From e73f48a18b678a4e53c53b677ff791f8f7f532b6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 10 Dec 2018 11:58:21 +0000 Subject: [PATCH 1/3] Add failing test Test fails as fetching repeated object is too slow. ```Head check took 124301 microseconds checking list of length 5000 Head check took 112286 microseconds checking list of length 5000 Head check took 1336512 microseconds checking list of length 5 2018-12-10T11:54:41.342 B0013 <0.2459.0> Long running task took 260788 microseconds with task of type pcl_head 2018-12-10T11:54:41.618 B0013 <0.2459.0> Long running task took 276508 microseconds with task of type pcl_head 2018-12-10T11:54:41.894 B0013 <0.2459.0> Long running task took 275225 microseconds with task of type pcl_head 2018-12-10T11:54:42.173 B0013 <0.2459.0> Long running task took 278836 microseconds with task of type pcl_head 2018-12-10T11:54:42.477 B0013 <0.2459.0> Long running task took 304524 microseconds with task of type pcl_head``` It taks twice as long to check for one repeated object as it does to check for 5K non-repeated objects --- src/leveled_bookie.erl | 2 +- test/end_to_end/recovery_SUITE.erl | 103 ++++++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 0550436..51b783d 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -2028,7 +2028,7 @@ addto_ledgercache({H, SQN, KeyChanges}, Cache) -> -> ledger_cache(). %% @doc %% Add a set of changes associated witha single sequence number (journal -%% update) to the ledger cache. This is used explicitly when laoding the +%% update) to the ledger cache. This is used explicitly when loading the %% ledger from the Journal (i.e. at startup) - and in this case the ETS insert %% can be bypassed, as all changes will be flushed to the Penciller before the %% load is complete. diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index eb748cc..23595b1 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -2,7 +2,9 @@ -include_lib("common_test/include/ct.hrl"). -include("include/leveled.hrl"). -export([all/0]). --export([hot_backup_simple/1, +-export([ + recovery_with_samekeyupdates/1, + hot_backup_simple/1, hot_backup_changes/1, retain_strategy/1, recovr_strategy/1, @@ -12,6 +14,7 @@ ]). all() -> [ + recovery_with_samekeyupdates, hot_backup_simple, hot_backup_changes, retain_strategy, @@ -22,6 +25,104 @@ all() -> [ ]. +recovery_with_samekeyupdates(_Config) -> + % Setup to prove https://github.com/martinsumner/leveled/issues/229 + % run a test that involves many updates to the same key, and check that + % this doesn't cause performance to flatline in either the normal "PUT" + % case, or in the case of the recovery from a lost keystore + AcceptableDuration = 300, % 5 minutes + E2E_SW = os:timestamp(), % Used to track time for overall job + + RootPath = testutil:reset_filestructure(), + BackupPath = testutil:reset_filestructure("backupSKU"), + BookOpts = [{root_path, RootPath}, + {cache_size, 2000}, + {max_journalsize, 20000000}, + {sync_strategy, testutil:sync_strategy()}], + {ok, Book1} = leveled_bookie:book_start(BookOpts), + + % Load in 5K different keys + % Load 5 keys in 5K times each + % Load in 5K different keys + io:format("Commence object generation and load~n"), + ObjectGen = testutil:get_compressiblevalue_andinteger(), + IndexGen = fun() -> [] end, + ObjL1 = testutil:generate_objects(5000, + {fixed_binary, 1}, + [], + ObjectGen, + IndexGen, + <<"Bucket1">>), + testutil:riakload(Book1, ObjL1), + RepeatedLoadFun = + fun(_I, _Acc) -> + ObjRL = + testutil:generate_objects(5, + {fixed_binary, 5001}, + [], + ObjectGen, + IndexGen, + <<"Bucket1">>), + testutil:riakload(Book1, ObjRL), + ObjRL + end, + FinalObjRL = lists:foldl(RepeatedLoadFun, [], lists:seq(1, 5000)), + ObjL2 = testutil:generate_objects(5000, + {fixed_binary, 6001}, + [], + ObjectGen, + IndexGen, + <<"Bucket1">>), + testutil:riakload(Book1, ObjL2), + + % Fetch all of ObjL1 + io:format("Check for presence of unique objects~n"), + ok = testutil:checkhead_forlist(Book1, ObjL1), + % Fetch all of ObjL2 + ok = testutil:checkhead_forlist(Book1, ObjL2), + io:format("Check for presence of repeated objects~n"), + % Fetch repeated objects 200 times each + CheckFun1 = + fun(_I) -> ok = testutil:checkhead_forlist(Book1, FinalObjRL) end, + lists:foreach(CheckFun1, lists:seq(1, 200)), + io:format("Checks complete~n"), + + io:format("Backup - get journal with no ledger~n"), + {async, BackupFun} = leveled_bookie:book_hotbackup(Book1), + ok = BackupFun(BackupPath), + + io:format("Restarting without key store~n"), + ok = leveled_bookie:book_close(Book1), + + BookOptsBackup = [{root_path, BackupPath}, + {cache_size, 2000}, + {max_journalsize, 20000000}, + {sync_strategy, testutil:sync_strategy()}], + {ok, Book2} = leveled_bookie:book_start(BookOptsBackup), + + % Fetch all of ObjL1 + io:format("Check for presence of unique objects~n"), + ok = testutil:checkhead_forlist(Book2, ObjL1), + % Fetch all of ObjL2 + ok = testutil:checkhead_forlist(Book2, ObjL2), + io:format("Check for presence of repeated objects~n"), + % Fetch repeated objects 200 times each + CheckFun2 = + fun(_I) -> ok = testutil:checkhead_forlist(Book2, FinalObjRL) end, + lists:foreach(CheckFun2, lists:seq(1, 200)), + io:format("Checks complete from backup~n"), + + DurationOfTest = timer:now_diff(os:timestamp(), E2E_SW)/(1000 * 1000), + io:format("Duration of test was ~w s~n", [DurationOfTest]), + true = DurationOfTest < AcceptableDuration, + + ok = leveled_bookie:book_close(Book2), + testutil:reset_filestructure(BackupPath), + testutil:reset_filestructure(). + + + + hot_backup_simple(_Config) -> % The journal may have a hot backup. This allows for an online Bookie % to be sent a message to prepare a backup function, which an asynchronous From b38d845c3ab9c04c88bb3d9a8586fb4b422f6e3a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 10 Dec 2018 12:01:11 +0000 Subject: [PATCH 2/3] Update src/leveled_pmem.erl Simple change to check for presence of objetc in list before adding it: ```Head check took 124416 microseconds checking list of length 5000 Head check took 130114 microseconds checking list of length 5000 Check for presence of repeated objects Head check took 1725 microseconds checking list of length 5``` --- src/leveled_pmem.erl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl index 171021f..3bb8cd8 100644 --- a/src/leveled_pmem.erl +++ b/src/leveled_pmem.erl @@ -200,7 +200,12 @@ merge_trees(StartKey, EndKey, TreeList, LevelMinus1) -> find_pos(<<>>, _Hash, PosList, _SlotID) -> PosList; find_pos(<<1:1/integer, Hash:23/integer, T/binary>>, Hash, PosList, SlotID) -> - find_pos(T, Hash, PosList ++ [SlotID], SlotID); + case lists:member(SlotID, PosList) of + true -> + find_pos(T, Hash, PosList, SlotID); + false -> + find_pos(T, Hash, PosList ++ [SlotID], SlotID) + end; find_pos(<<1:1/integer, _Miss:23/integer, T/binary>>, Hash, PosList, SlotID) -> find_pos(T, Hash, PosList, SlotID); find_pos(<<0:1/integer, NxtSlot:7/integer, T/binary>>, Hash, PosList, _SlotID) -> From f274d2a63affd36240fa82214588d1872672ec5f Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 10 Dec 2018 13:23:39 +0000 Subject: [PATCH 3/3] Tighten acceptable duration even with cover, passes in 30s. --- test/end_to_end/recovery_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 23595b1..c9306fd 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -30,7 +30,7 @@ recovery_with_samekeyupdates(_Config) -> % run a test that involves many updates to the same key, and check that % this doesn't cause performance to flatline in either the normal "PUT" % case, or in the case of the recovery from a lost keystore - AcceptableDuration = 300, % 5 minutes + AcceptableDuration = 180, % 3 minutes E2E_SW = os:timestamp(), % Used to track time for overall job RootPath = testutil:reset_filestructure(),