From 4d550ef2a1a2b7ff6f6dd2dc28e3b1aed44cf21d Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 20 Nov 2019 10:33:51 +0000 Subject: [PATCH 01/26] Bump version for new release --- src/leveled.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled.app.src b/src/leveled.app.src index add537e..11c190a 100644 --- a/src/leveled.app.src +++ b/src/leveled.app.src @@ -1,7 +1,7 @@ {application, leveled, [ {description, "Key Value store based on LSM-Tree and designed for larger values"}, - {vsn, "0.9.18"}, + {vsn, "0.9.20"}, {registered, []}, {applications, [ kernel, From 02155558df013500eb0557c5fe9ffbab6cbacc40 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 12 Feb 2020 09:26:53 +0000 Subject: [PATCH 02/26] Tag for Riak 2.9.1 --- src/leveled.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled.app.src b/src/leveled.app.src index add537e..5940dd1 100644 --- a/src/leveled.app.src +++ b/src/leveled.app.src @@ -1,7 +1,7 @@ {application, leveled, [ {description, "Key Value store based on LSM-Tree and designed for larger values"}, - {vsn, "0.9.18"}, + {vsn, "riak_kv-2.9.1"}, {registered, []}, {applications, [ kernel, From 60e29f2ff084e35e5e72164ced44857389fbd805 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 6 Mar 2020 11:29:25 +0000 Subject: [PATCH 03/26] (slightly) less random reads on journal compaction --- src/leveled_cdb.erl | 54 ++++++++++++++++++++++++------------------ src/leveled_iclerk.erl | 4 ++++ src/leveled_log.erl | 6 ++++- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 49418ff..f40a17b 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -130,6 +130,7 @@ -define(DELETE_TIMEOUT, 10000). -define(TIMING_SAMPLECOUNTDOWN, 5000). -define(TIMING_SAMPLESIZE, 100). +-define(GETPOS_FACTOR, 8). -define(MAX_OBJECT_SIZE, 1000000000). % 1GB but really should be much smaller than this @@ -266,29 +267,36 @@ cdb_getpositions(Pid, SampleSize) -> % requests waiting for this to complete, loop over each of the 256 indexes % outside of the FSM processing loop - to allow for other messages to be % interleaved - case SampleSize of - all -> - FoldFun = - fun(Index, Acc) -> - cdb_getpositions_fromidx(Pid, all, Index, Acc) - end, - IdxList = lists:seq(0, 255), - lists:foldl(FoldFun, [], IdxList); - S0 -> - FoldFun = - fun({_R, Index}, Acc) -> - case length(Acc) of - S0 -> - Acc; - L when L < S0 -> - cdb_getpositions_fromidx(Pid, S0, Index, Acc) - end - end, - RandFun = fun(X) -> {leveled_rand:uniform(), X} end, - SeededL = lists:map(RandFun, lists:seq(0, 255)), - SortedL = lists:keysort(1, SeededL), - lists:foldl(FoldFun, [], SortedL) - end. + SW = os:timestamp(), + PosList = + case SampleSize of + all -> + FoldFun = + fun(Index, Acc) -> + cdb_getpositions_fromidx(Pid, all, Index, Acc) + end, + IdxList = lists:seq(0, 255), + lists:foldl(FoldFun, [], IdxList); + S0 -> + FC = ?GETPOS_FACTOR * S0, + FoldFun = + fun({_R, Index}, Acc) -> + case length(Acc) of + FC -> + Acc; + L when L < FC -> + cdb_getpositions_fromidx(Pid, FC, Index, Acc) + end + end, + RandFun = fun(X) -> {leveled_rand:uniform(), X} end, + SeededL = lists:map(RandFun, lists:seq(0, 255)), + SortedL = lists:keysort(1, SeededL), + PosList0 = lists:foldl(FoldFun, [], SortedL), + P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)), + lists:sublist(lists:sort(PosList0), P1, S0) + end, + leveled_log:log_timer("CDB22", [length(PosList)], SW), + PosList. cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) -> gen_fsm:sync_send_event(Pid, diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index fb8a088..21ed1ba 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -508,6 +508,10 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -> FN = leveled_cdb:cdb_filename(CDB), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), + AvgJump = + (lists:last(PositionList) - lists:nth(1, PositionList)) + div length(PositionList), + leveled_log:log("IC014", [AvgJump]), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), Score = size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN), diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 27344dc..cfc2791 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -356,6 +356,8 @@ {"IC013", {warn, "File with name ~s to be ignored in manifest as scanning for " ++ "first key returned empty - maybe corrupted"}}, + {"IC014", + {info, "Fetching position list with average byte jump ~p"}}, {"CDB01", {info, "Opening file for writing with filename ~s"}}, @@ -404,7 +406,9 @@ {"CDB20", {warn, "Error ~w caught when safe reading a file to length ~w"}}, {"CDB21", - {warn, "File ~s to be deleted but already gone"}} + {warn, "File ~s to be deleted but already gone"}}, + {"CDB22", + {info, "Positions ~w fetch"}} ]). From 156e7b064d424c9d3eedd670d29abab2d6f804cb Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 9 Mar 2020 15:12:48 +0000 Subject: [PATCH 04/26] Compaction, retain and recovery Change the penciller check so that it returns current/replaced/missing not just true/false. Reduce unnecessary penciller checks for non-standard keys that will always be retained - and remove redunandt code. Expand tests of retain and recover to make sure that compaction on delete is well covered. Also move the SQN number laong during initial loads - to stop aggressive loop to find starting SQN every file. --- src/leveled_bookie.erl | 2 +- src/leveled_codec.erl | 15 ++-- src/leveled_iclerk.erl | 112 +++++++++++++++++------------ src/leveled_inker.erl | 36 ++++++---- src/leveled_penciller.erl | 38 +++++----- src/leveled_runner.erl | 2 +- test/end_to_end/recovery_SUITE.erl | 108 +++++++++++++++++++++++++++- 7 files changed, 223 insertions(+), 90 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 63b22f1..74707dc 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -2278,7 +2278,7 @@ maybe_withjitter(_CacheSize, _MaxCacheSize) -> -spec get_loadfun(book_state()) -> fun(). %% @doc -%% The LoadFun will be sued by the Inker when walking across the Journal to +%% The LoadFun will be used by the Inker when walking across the Journal to %% load the Penciller at startup get_loadfun(State) -> PrepareFun = diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index b5cee4f..91386fc 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -35,7 +35,7 @@ from_inkerkv/2, from_journalkey/1, revert_to_keydeltas/2, - is_compaction_candidate/1, + is_full_journalentry/1, split_inkvalue/1, check_forinkertype/2, get_tagstrategy/2, @@ -410,11 +410,12 @@ to_inkerkv(LedgerKey, SQN, Object, KeyChanges, PressMethod, Compress) -> %% If we wish to retain key deltas when an object in the Journal has been %% replaced - then this converts a Journal Key and Value into one which has no %% object body just the key deltas. +%% Only called if retain strategy and has passed +%% leveled_codec:is_full_journalentry/1 - so no need to consider other key +%% types revert_to_keydeltas({SQN, ?INKT_STND, LedgerKey}, InkerV) -> {_V, KeyDeltas} = revert_value_from_journal(InkerV), - {{SQN, ?INKT_KEYD, LedgerKey}, {null, KeyDeltas}}; -revert_to_keydeltas(JournalKey, InkerV) -> - {JournalKey, InkerV}. + {{SQN, ?INKT_KEYD, LedgerKey}, {null, KeyDeltas}}. %% Used when fetching objects, so only handles standard, hashable entries from_inkerkv(Object) -> @@ -560,12 +561,12 @@ check_forinkertype(_LedgerKey, head_only) -> check_forinkertype(_LedgerKey, _Object) -> ?INKT_STND. --spec is_compaction_candidate(journal_key()) -> boolean(). +-spec is_full_journalentry(journal_key()) -> boolean(). %% @doc %% Only journal keys with standard objects should be scored for compaction -is_compaction_candidate({_SQN, ?INKT_STND, _LK}) -> +is_full_journalentry({_SQN, ?INKT_STND, _LK}) -> true; -is_compaction_candidate(_OtherJKType) -> +is_full_journalentry(_OtherJKType) -> false. diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index fb8a088..95ee191 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -519,17 +519,17 @@ size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> fun(KS, {ActSize, RplSize}) -> case KS of {{SQN, Type, PK}, Size} -> - MayScore = - leveled_codec:is_compaction_candidate({SQN, Type, PK}), - case MayScore of + IsJournalEntry = + leveled_codec:is_full_journalentry({SQN, Type, PK}), + case IsJournalEntry of false -> {ActSize + Size - ?CRC_SIZE, RplSize}; true -> Check = FilterFun(FilterServer, PK, SQN), case {Check, SQN > MaxSQN} of - {true, _} -> + {current, _} -> {ActSize + Size - ?CRC_SIZE, RplSize}; - {false, true} -> + {_, true} -> {ActSize + Size - ?CRC_SIZE, RplSize}; _ -> {ActSize, RplSize + Size - ?CRC_SIZE} @@ -542,7 +542,7 @@ size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> % expected format of the key {ActSize, RplSize} end - end, + end, R0 = lists:foldl(FoldFunForSizeCompare, {0, 0}, KeySizeList), {ActiveSize, ReplacedSize} = R0, @@ -757,39 +757,51 @@ split_positions_into_batches(Positions, Journal, Batches) -> %% a `skip` strategy. filter_output(KVCs, FilterFun, FilterServer, MaxSQN, ReloadStrategy) -> FoldFun = - fun(KVC0, Acc) -> - case KVC0 of - {_InkKey, crc_wonky, false} -> - % Bad entry, disregard, don't check - Acc; - {JK, JV, _Check} -> - {SQN, LK} = - leveled_codec:from_journalkey(JK), - CompactStrategy = - leveled_codec:get_tagstrategy(LK, ReloadStrategy), - KeyValid = FilterFun(FilterServer, LK, SQN), - IsInMemory = SQN > MaxSQN, - case {KeyValid or IsInMemory, CompactStrategy} of - {true, _} -> - % This entry may still be required regardless of - % strategy - [KVC0|Acc]; - {false, retain} -> - % If we have a retain strategy, it can't be - % discarded - but the value part is no longer - % required as this version has been replaced - {JK0, JV0} = - leveled_codec:revert_to_keydeltas(JK, JV), - [{JK0, JV0, null}|Acc]; - {false, _} -> - % This is out of date and not retained - discard - Acc - end - end - end, + filter_output_fun(FilterFun, FilterServer, MaxSQN, ReloadStrategy), lists:reverse(lists:foldl(FoldFun, [], KVCs)). +filter_output_fun(FilterFun, FilterServer, MaxSQN, ReloadStrategy) -> + fun(KVC0, Acc) -> + case KVC0 of + {_InkKey, crc_wonky, false} -> + % Bad entry, disregard, don't check + Acc; + {JK, JV, _Check} -> + {SQN, LK} = + leveled_codec:from_journalkey(JK), + CompactStrategy = + leveled_codec:get_tagstrategy(LK, ReloadStrategy), + IsJournalEntry = + leveled_codec:is_full_journalentry(JK), + case {CompactStrategy, IsJournalEntry} of + {retain, false} -> + [KVC0|Acc]; + _ -> + KeyCurrent = FilterFun(FilterServer, LK, SQN), + IsInMemory = SQN > MaxSQN, + case {KeyCurrent, IsInMemory, CompactStrategy} of + {KC, InMem, _} when KC == current; InMem -> + % This entry may still be required + % regardless of strategy + [KVC0|Acc]; + {_, _, retain} -> + % If we have a retain strategy, it can't be + % discarded - but the value part is no + % longer required as this version has been + % replaced + {JK0, JV0} = + leveled_codec:revert_to_keydeltas(JK, JV), + [{JK0, JV0, null}|Acc]; + {_, _, _} -> + % This is out of date and not retained so + % discard + Acc + end + end + end + end. + write_values([], _CDBopts, Journal0, ManSlice0, _PressMethod) -> {Journal0, ManSlice0}; write_values(KVCList, CDBopts, Journal0, ManSlice0, PressMethod) -> @@ -985,13 +997,13 @@ check_single_file_test() -> LedgerFun1 = fun(Srv, Key, ObjSQN) -> case lists:keyfind(ObjSQN, 1, Srv) of {ObjSQN, Key} -> - true; + current; _ -> - false + replaced end end, Score1 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 4), ?assertMatch(37.5, Score1), - LedgerFun2 = fun(_Srv, _Key, _ObjSQN) -> true end, + LedgerFun2 = fun(_Srv, _Key, _ObjSQN) -> current end, Score2 = check_single_file(CDB, LedgerFun2, LedgerSrv1, 9, 8, 4), ?assertMatch(100.0, Score2), Score3 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 3), @@ -1016,9 +1028,9 @@ compact_single_file_setup() -> LedgerFun1 = fun(Srv, Key, ObjSQN) -> case lists:keyfind(ObjSQN, 1, Srv) of {ObjSQN, Key} -> - true; + current; _ -> - false + replaced end end, CompactFP = leveled_inker:filepath(RP, journal_compact_dir), ok = filelib:ensure_dir(CompactFP), @@ -1119,7 +1131,7 @@ compact_empty_file_test() -> LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, {2, {o, "Bucket", "Key2", null}}, {3, {o, "Bucket", "Key3", null}}], - LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> false end, + LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> replaced end, Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4), ?assertMatch(100.0, Score1), ok = leveled_cdb:cdb_deletepending(CDB2), @@ -1161,7 +1173,13 @@ compact_singlefile_totwosmallfiles_testto() -> filename=leveled_cdb:cdb_filename(CDBr), journal=CDBr, compaction_perc=50.0}], - FakeFilterFun = fun(_FS, _LK, SQN) -> SQN rem 2 == 0 end, + FakeFilterFun = + fun(_FS, _LK, SQN) -> + case SQN rem 2 of + 0 -> current; + _ -> replaced + end + end, ManifestSlice = compact_files(BestRun1, CDBoptsSmall, @@ -1190,7 +1208,13 @@ size_score_test() -> {{7, ?INKT_STND, "Key7"}, 184}], MaxSQN = 6, CurrentList = ["Key1", "Key4", "Key5", "Key6"], - FilterFun = fun(L, K, _SQN) -> lists:member(K, L) end, + FilterFun = + fun(L, K, _SQN) -> + case lists:member(K, L) of + true -> current; + false -> replaced + end + end, Score = size_comparison_score(KeySizeList, FilterFun, CurrentList, MaxSQN), ?assertMatch(true, Score > 69.0), ?assertMatch(true, Score < 70.0). diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index 32ee376..aa9d14d 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -1142,18 +1142,18 @@ fold_from_sequence(_MinSQN, _FoldFuns, Acc, []) -> Acc; fold_from_sequence(MinSQN, FoldFuns, Acc, [{LowSQN, FN, Pid, _LK}|Rest]) when LowSQN >= MinSQN -> - Acc0 = foldfile_between_sequence(MinSQN, - MinSQN + ?LOADING_BATCH, - FoldFuns, - Acc, - Pid, - undefined, - FN), - fold_from_sequence(MinSQN, FoldFuns, Acc0, Rest); + {NextMinSQN, Acc0} = foldfile_between_sequence(MinSQN, + MinSQN + ?LOADING_BATCH, + FoldFuns, + Acc, + Pid, + undefined, + FN), + fold_from_sequence(NextMinSQN, FoldFuns, Acc0, Rest); fold_from_sequence(MinSQN, FoldFuns, Acc, [{_LowSQN, FN, Pid, _LK}|Rest]) -> % If this file has a LowSQN less than the minimum, we can skip it if the % next file also has a LowSQN below the minimum - Acc0 = + {NextMinSQN, Acc0} = case Rest of [] -> foldfile_between_sequence(MinSQN, @@ -1172,9 +1172,9 @@ fold_from_sequence(MinSQN, FoldFuns, Acc, [{_LowSQN, FN, Pid, _LK}|Rest]) -> undefined, FN); _ -> - Acc + {MinSQN, Acc} end, - fold_from_sequence(MinSQN, FoldFuns, Acc0, Rest). + fold_from_sequence(NextMinSQN, FoldFuns, Acc0, Rest). foldfile_between_sequence(MinSQN, MaxSQN, FoldFuns, Acc, CDBpid, StartPos, FN) -> @@ -1182,8 +1182,8 @@ foldfile_between_sequence(MinSQN, MaxSQN, FoldFuns, InitBatchAcc = {MinSQN, MaxSQN, InitAccFun(FN, MinSQN)}, case leveled_cdb:cdb_scan(CDBpid, FilterFun, InitBatchAcc, StartPos) of - {eof, {_AccMinSQN, _AccMaxSQN, BatchAcc}} -> - FoldFun(BatchAcc, Acc); + {eof, {AccMinSQN, _AccMaxSQN, BatchAcc}} -> + {AccMinSQN, FoldFun(BatchAcc, Acc)}; {LastPosition, {_AccMinSQN, _AccMaxSQN, BatchAcc}} -> UpdAcc = FoldFun(BatchAcc, Acc), NextSQN = MaxSQN + 1, @@ -1452,7 +1452,10 @@ compact_journal_testto(WRP, ExpectedFiles) -> fun(X) -> {X, 55} end, fun(_F) -> ok end, fun(L, K, SQN) -> - lists:member({SQN, K}, L) + case lists:member({SQN, K}, L) of + true -> current; + false -> replaced + end end, 5000), timer:sleep(1000), @@ -1464,7 +1467,10 @@ compact_journal_testto(WRP, ExpectedFiles) -> fun(X) -> {X, 55} end, fun(_F) -> ok end, fun(L, K, SQN) -> - lists:member({SQN, K}, L) + case lists:member({SQN, K}, L) of + true -> current; + false -> replaced + end end, 5000), timer:sleep(1000), diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 4d572bd..3373246 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -305,8 +305,9 @@ list(leveled_codec:ledger_kv()|leveled_sst:expandable_pointer())}. -type iterator() :: list(iterator_entry()). -type bad_ledgerkey() :: list(). +-type sqn_check() :: current|replaced|missing. --export_type([levelzero_cacheentry/0]). +-export_type([levelzero_cacheentry/0, sqn_check/0]). %%%============================================================================ %%% API @@ -480,14 +481,13 @@ pcl_fetchnextkey(Pid, StartKey, EndKey, AccFun, InitAcc) -> -spec pcl_checksequencenumber(pid(), leveled_codec:ledger_key()|bad_ledgerkey(), - integer()) -> boolean(). + integer()) -> sqn_check(). %% @doc %% Check if the sequence number of the passed key is not replaced by a change -%% after the passed sequence number. Will return true if the Key is present -%% and either is equal to, or prior to the passed SQN. -%% -%% If the key is not present, it will be assumed that a higher sequence number -%% tombstone once existed, and false will be returned. +%% after the passed sequence number. Will return: +%% - current +%% - replaced +%% - missing pcl_checksequencenumber(Pid, Key, SQN) -> Hash = leveled_codec:segment_hash(Key), if @@ -1487,7 +1487,7 @@ log_slowfetch(T0, R, PID, Level, FetchTolerance) -> end. --spec compare_to_sqn(tuple()|not_present, integer()) -> boolean(). +-spec compare_to_sqn(tuple()|not_present, integer()) -> sqn_check(). %% @doc %% Check to see if the SQN in the penciller is after the SQN expected for an %% object (used to allow the journal to check compaction status from a cache @@ -1495,19 +1495,19 @@ log_slowfetch(T0, R, PID, Level, FetchTolerance) -> compare_to_sqn(Obj, SQN) -> case Obj of not_present -> - false; + missing; Obj -> SQNToCompare = leveled_codec:strip_to_seqonly(Obj), if SQNToCompare > SQN -> - false; + replaced; true -> % Normally we would expect the SQN to be equal here, but % this also allows for the Journal to have a more advanced % value. We return true here as we wouldn't want to % compact thta more advanced value, but this may cause % confusion in snapshots. - true + current end end. @@ -2097,25 +2097,25 @@ simple_server_test() -> ?assertMatch(Key2, pcl_fetch(PclSnap, {o,"Bucket0002", "Key0002", null})), ?assertMatch(Key3, pcl_fetch(PclSnap, {o,"Bucket0003", "Key0003", null})), ?assertMatch(Key4, pcl_fetch(PclSnap, {o,"Bucket0004", "Key0004", null})), - ?assertMatch(true, pcl_checksequencenumber(PclSnap, + ?assertMatch(current, pcl_checksequencenumber(PclSnap, {o, "Bucket0001", "Key0001", null}, 1)), - ?assertMatch(true, pcl_checksequencenumber(PclSnap, + ?assertMatch(current, pcl_checksequencenumber(PclSnap, {o, "Bucket0002", "Key0002", null}, 1002)), - ?assertMatch(true, pcl_checksequencenumber(PclSnap, + ?assertMatch(current, pcl_checksequencenumber(PclSnap, {o, "Bucket0003", "Key0003", null}, 2003)), - ?assertMatch(true, pcl_checksequencenumber(PclSnap, + ?assertMatch(current, pcl_checksequencenumber(PclSnap, {o, "Bucket0004", "Key0004", @@ -2132,7 +2132,7 @@ simple_server_test() -> KL1A = generate_randomkeys({2000, 4006}), ok = maybe_pause_push(PCLr, [Key1A]), ok = maybe_pause_push(PCLr, KL1A), - ?assertMatch(true, pcl_checksequencenumber(PclSnap, + ?assertMatch(current, pcl_checksequencenumber(PclSnap, {o, "Bucket0001", "Key0001", @@ -2148,19 +2148,19 @@ simple_server_test() -> undefined, false), - ?assertMatch(false, pcl_checksequencenumber(PclSnap2, + ?assertMatch(replaced, pcl_checksequencenumber(PclSnap2, {o, "Bucket0001", "Key0001", null}, 1)), - ?assertMatch(true, pcl_checksequencenumber(PclSnap2, + ?assertMatch(current, pcl_checksequencenumber(PclSnap2, {o, "Bucket0001", "Key0001", null}, 4005)), - ?assertMatch(true, pcl_checksequencenumber(PclSnap2, + ?assertMatch(current, pcl_checksequencenumber(PclSnap2, {o, "Bucket0002", "Key0002", diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index 412ad60..a51dacc 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -373,7 +373,7 @@ foldobjects_allkeys(SnapFun, Tag, FoldObjectsFun, sqn_order) -> SQN), % Need to check that we have not folded past the point % at which the snapshot was taken - (JournalSQN >= SQN) and CheckSQN + (JournalSQN >= SQN) and (CheckSQN == current) end, diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index a3890f8..640b688 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -238,12 +238,12 @@ retain_strategy(_Config) -> RootPath = testutil:reset_filestructure(), BookOpts = [{root_path, RootPath}, {cache_size, 1000}, - {max_journalsize, 5000000}, + {max_journalobjectcount, 5000}, {sync_strategy, testutil:sync_strategy()}, {reload_strategy, [{?RIAK_TAG, retain}]}], BookOptsAlt = [{root_path, RootPath}, {cache_size, 1000}, - {max_journalsize, 100000}, + {max_journalobjectcount, 2000}, {sync_strategy, testutil:sync_strategy()}, {reload_strategy, [{?RIAK_TAG, retain}]}, {max_run_length, 8}], @@ -260,6 +260,58 @@ retain_strategy(_Config) -> {"Bucket4", Spcl4, LastV4}, {"Bucket5", Spcl5, LastV5}, {"Bucket6", Spcl6, LastV6}]), + + {ok, Book1} = leveled_bookie:book_start(BookOpts), + compact_and_wait(Book1), + compact_and_wait(Book1), + ok = leveled_bookie:book_close(Book1), + + ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}, + {"Bucket4", Spcl4, LastV4}, + {"Bucket5", Spcl5, LastV5}, + {"Bucket6", Spcl6, LastV6}]), + + {ok, Book2} = leveled_bookie:book_start(BookOptsAlt), + + {KSpcL2, _V2} = testutil:put_indexed_objects(Book2, "AltBucket6", 3000), + Q2 = fun(RT) -> {index_query, + "AltBucket6", + {fun testutil:foldkeysfun/3, []}, + {"idx1_bin", "#", "|"}, + {RT, undefined}} + end, + {async, KFolder2A} = leveled_bookie:book_returnfolder(Book2, Q2(false)), + KeyList2A = lists:usort(KFolder2A()), + true = length(KeyList2A) == 3000, + + DeleteFun = + fun({DK, [{add, DIdx, DTerm}]}) -> + ok = testutil:book_riakdelete(Book2, + "AltBucket6", + DK, + [{remove, DIdx, DTerm}]) + end, + lists:foreach(DeleteFun, KSpcL2), + + {async, KFolder2AD} = leveled_bookie:book_returnfolder(Book2, Q2(false)), + KeyList2AD = lists:usort(KFolder2AD()), + true = length(KeyList2AD) == 0, + + ok = leveled_bookie:book_close(Book2), + + {ok, Book3} = leveled_bookie:book_start(BookOptsAlt), + + io:format("Compact after deletions~n"), + + compact_and_wait(Book3), + compact_and_wait(Book3), + + {async, KFolder3AD} = leveled_bookie:book_returnfolder(Book3, Q2(false)), + KeyList3AD = lists:usort(KFolder3AD()), + true = length(KeyList3AD) == 0, + + ok = leveled_bookie:book_close(Book3), + testutil:reset_filestructure(). @@ -267,7 +319,7 @@ recovr_strategy(_Config) -> RootPath = testutil:reset_filestructure(), BookOpts = [{root_path, RootPath}, {cache_size, 1000}, - {max_journalsize, 5000000}, + {max_journalobjectcount, 8000}, {sync_strategy, testutil:sync_strategy()}, {reload_strategy, [{?RIAK_TAG, recovr}]}], @@ -309,7 +361,57 @@ recovr_strategy(_Config) -> true = length(KeyList) == 6400, true = length(KeyList) < length(KeyTermList), true = length(KeyTermList) < 25600, + ok = leveled_bookie:book_close(Book1), + + RevisedOpts = [{root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 2000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{?RIAK_TAG, recovr}]}], + + {ok, Book2} = leveled_bookie:book_start(RevisedOpts), + + {KSpcL2, _V2} = testutil:put_indexed_objects(Book2, "AltBucket6", 3000), + {async, KFolder2} = leveled_bookie:book_returnfolder(Book2, Q(false)), + KeyList2 = lists:usort(KFolder2()), + true = length(KeyList2) == 6400, + + Q2 = fun(RT) -> {index_query, + "AltBucket6", + {fun testutil:foldkeysfun/3, []}, + {"idx1_bin", "#", "|"}, + {RT, undefined}} + end, + {async, KFolder2A} = leveled_bookie:book_returnfolder(Book2, Q2(false)), + KeyList2A = lists:usort(KFolder2A()), + true = length(KeyList2A) == 3000, + + DeleteFun = + fun({DK, [{add, DIdx, DTerm}]}) -> + ok = testutil:book_riakdelete(Book2, + "AltBucket6", + DK, + [{remove, DIdx, DTerm}]) + end, + lists:foreach(DeleteFun, KSpcL2), + + {async, KFolder2AD} = leveled_bookie:book_returnfolder(Book2, Q2(false)), + KeyList2AD = lists:usort(KFolder2AD()), + true = length(KeyList2AD) == 0, + + compact_and_wait(Book2), + compact_and_wait(Book2), + + ok = leveled_bookie:book_close(Book2), + + {ok, Book3} = leveled_bookie:book_start(RevisedOpts), + {async, KFolder3AD} = leveled_bookie:book_returnfolder(Book3, Q2(false)), + KeyList3AD = lists:usort(KFolder3AD()), + true = length(KeyList3AD) == 0, + + ok = leveled_bookie:book_close(Book3), + testutil:reset_filestructure(). From 6b3328f4a38a4b1b56c2ad82dcd722be3fc7cb8a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 9 Mar 2020 17:45:06 +0000 Subject: [PATCH 05/26] Rationalise logging in commit Also: Sort the output from an 'all' fetch one loop at a time Make sure the test of scoring na empty file is scoring an empty file If it is an emtpy file we want to compact the fragment away - in which case it should score 0.0 not 100.0 --- src/leveled_cdb.erl | 64 ++++++++++++++++++++---------------------- src/leveled_iclerk.erl | 22 +++++++++------ src/leveled_log.erl | 6 ++-- 3 files changed, 45 insertions(+), 47 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index f40a17b..3de322b 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -267,36 +267,33 @@ cdb_getpositions(Pid, SampleSize) -> % requests waiting for this to complete, loop over each of the 256 indexes % outside of the FSM processing loop - to allow for other messages to be % interleaved - SW = os:timestamp(), - PosList = - case SampleSize of - all -> - FoldFun = - fun(Index, Acc) -> - cdb_getpositions_fromidx(Pid, all, Index, Acc) - end, - IdxList = lists:seq(0, 255), - lists:foldl(FoldFun, [], IdxList); - S0 -> - FC = ?GETPOS_FACTOR * S0, - FoldFun = - fun({_R, Index}, Acc) -> - case length(Acc) of - FC -> - Acc; - L when L < FC -> - cdb_getpositions_fromidx(Pid, FC, Index, Acc) - end - end, - RandFun = fun(X) -> {leveled_rand:uniform(), X} end, - SeededL = lists:map(RandFun, lists:seq(0, 255)), - SortedL = lists:keysort(1, SeededL), - PosList0 = lists:foldl(FoldFun, [], SortedL), - P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)), - lists:sublist(lists:sort(PosList0), P1, S0) - end, - leveled_log:log_timer("CDB22", [length(PosList)], SW), - PosList. + case SampleSize of + all -> + FoldFun = + fun(Index, Acc) -> + PosList = cdb_getpositions_fromidx(Pid, all, Index, []), + lists:merge(Acc, lists:sort(PosList)) + end, + IdxList = lists:seq(0, 255), + lists:foldl(FoldFun, [], IdxList); + S0 -> + FC = ?GETPOS_FACTOR * S0, + FoldFun = + fun({_R, Index}, Acc) -> + case length(Acc) of + FC -> + Acc; + L when L < FC -> + cdb_getpositions_fromidx(Pid, FC, Index, Acc) + end + end, + RandFun = fun(X) -> {leveled_rand:uniform(), X} end, + SeededL = lists:map(RandFun, lists:seq(0, 255)), + SortedL = lists:keysort(1, SeededL), + PosList0 = lists:foldl(FoldFun, [], SortedL), + P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)), + lists:sublist(lists:sort(PosList0), P1, S0) + end. cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) -> gen_fsm:sync_send_event(Pid, @@ -1234,10 +1231,9 @@ scan_index_returnpositions(Handle, Position, Count, PosList0) -> [HPosition|PosList] end end, - PosList = lists:foldl(AddPosFun, - PosList0, - read_next_n_integerpairs(Handle, Count)), - lists:reverse(PosList). + lists:foldl(AddPosFun, + PosList0, + read_next_n_integerpairs(Handle, Count)). %% Take an active file and write the hash details necessary to close that diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 21ed1ba..1c969f6 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -507,17 +507,22 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> %% calls. check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -> FN = leveled_cdb:cdb_filename(CDB), + SW = os:timestamp(), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), - AvgJump = - (lists:last(PositionList) - lists:nth(1, PositionList)) - div length(PositionList), - leveled_log:log("IC014", [AvgJump]), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), Score = size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN), - leveled_log:log("IC004", [FN, Score]), + safely_log_filescore(PositionList, FN, Score, SW), Score. +safely_log_filescore([], FN, Score, SW) -> + leveled_log:log_timer("IC004", [Score, empty, FN], SW); +safely_log_filescore(PositionList, FN, Score, SW) -> + AvgJump = + (lists:last(PositionList) - lists:nth(1, PositionList)) + div length(PositionList), + leveled_log:log_timer("IC004", [Score, AvgJump, FN], SW). + size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> FoldFunForSizeCompare = fun(KS, {ActSize, RplSize}) -> @@ -546,13 +551,13 @@ size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> % expected format of the key {ActSize, RplSize} end - end, + end, R0 = lists:foldl(FoldFunForSizeCompare, {0, 0}, KeySizeList), {ActiveSize, ReplacedSize} = R0, case ActiveSize + ReplacedSize of 0 -> - 100.0; + 0.0; _ -> 100 * ActiveSize / (ActiveSize + ReplacedSize) end. @@ -1117,7 +1122,6 @@ compact_empty_file_test() -> FN1 = leveled_inker:filepath(RP, 1, new_journal), CDBopts = #cdb_options{binary_mode=true}, {ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBopts), - ok = leveled_cdb:cdb_put(CDB1, {1, stnd, test_ledgerkey("Key1")}, <<>>), {ok, FN2} = leveled_cdb:cdb_complete(CDB1), {ok, CDB2} = leveled_cdb:cdb_open_reader(FN2), LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, @@ -1125,7 +1129,7 @@ compact_empty_file_test() -> {3, {o, "Bucket", "Key3", null}}], LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> false end, Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4), - ?assertMatch(100.0, Score1), + ?assertMatch(0.0, Score1), ok = leveled_cdb:cdb_deletepending(CDB2), ok = leveled_cdb:cdb_destroy(CDB2). diff --git a/src/leveled_log.erl b/src/leveled_log.erl index cfc2791..ddc624d 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -336,7 +336,7 @@ {info, "Scoring of compaction runs complete with highest score=~w " ++ "with run of run_length=~w"}}, {"IC004", - {info, "Score for filename ~s is ~w"}}, + {info, "Score=~w with mean_byte_jump=~w for filename ~s"}}, {"IC005", {info, "Compaction to be performed on ~w files with score of ~w"}}, {"IC006", @@ -406,9 +406,7 @@ {"CDB20", {warn, "Error ~w caught when safe reading a file to length ~w"}}, {"CDB21", - {warn, "File ~s to be deleted but already gone"}}, - {"CDB22", - {info, "Positions ~w fetch"}} + {warn, "File ~s to be deleted but already gone"}} ]). From 207aeb8b99420c44a575f46aaed810ff1a3d4bb7 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 9 Mar 2020 20:42:48 +0000 Subject: [PATCH 06/26] Remove additional log --- src/leveled_log.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index ddc624d..51072fa 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -356,8 +356,6 @@ {"IC013", {warn, "File with name ~s to be ignored in manifest as scanning for " ++ "first key returned empty - maybe corrupted"}}, - {"IC014", - {info, "Fetching position list with average byte jump ~p"}}, {"CDB01", {info, "Opening file for writing with filename ~s"}}, From 694d2c39f878cad2c534189fa04f4fdacea8b9a7 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sun, 15 Mar 2020 22:14:42 +0000 Subject: [PATCH 07/26] Support for recalc Initial test included for running with recallc, and also transition from retain to recalc. Moves all logic for startup fold into leveled_bookie - avoid the Inker requiring any direct knowledge about implementation of the Penciller. --- src/leveled_bookie.erl | 228 +++++++++++++++++++++-------- src/leveled_codec.erl | 13 +- src/leveled_head.erl | 162 +++++++++++++++++++- src/leveled_iclerk.erl | 106 ++++++++++---- src/leveled_inker.erl | 38 +---- test/end_to_end/recovery_SUITE.erl | 80 ++++++---- test/end_to_end/testutil.erl | 82 ++++++----- 7 files changed, 522 insertions(+), 187 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 74707dc..bcbf4ef 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -93,8 +93,6 @@ ]). -export([empty_ledgercache/0, - loadqueue_ledgercache/1, - push_ledgercache/2, snapshot_store/6, fetch_value/2, journal_notfound/4]). @@ -105,6 +103,7 @@ -include_lib("eunit/include/eunit.hrl"). +-define(LOADING_PAUSE, 1000). -define(CACHE_SIZE, 2500). -define(MIN_CACHE_SIZE, 100). -define(MIN_PCL_CACHE_SIZE, 400). @@ -166,7 +165,7 @@ -record(state, {inker :: pid() | undefined, penciller :: pid() | undefined, cache_size :: integer() | undefined, - ledger_cache = #ledger_cache{}, + ledger_cache = #ledger_cache{} :: ledger_cache(), is_snapshot :: boolean() | undefined, slow_offer = false :: boolean(), @@ -315,7 +314,7 @@ % Journal, to recalculate the index changes based on the current % state of the Ledger and the object metadata. % - % reload_strategy ptions are a list - to map from a tag to the + % reload_strategy options are a list - to map from a tag to the % strategy (recovr|retain|recalc). Defualt strategies are: % [{?RIAK_TAG, retain}, {?STD_TAG, retain}] {max_pencillercachesize, pos_integer()|undefined} | @@ -378,7 +377,16 @@ % true ]. +-type initial_loadfun() :: + fun((leveled_codec:journal_key(), + any(), + non_neg_integer(), + {non_neg_integer(), non_neg_integer(), ledger_cache()}, + fun((any()) -> {binary(), non_neg_integer()})) -> + {loop|stop, + {non_neg_integer(), non_neg_integer(), ledger_cache()}}). +-export_type([initial_loadfun/0]). %%%============================================================================ %%% API @@ -1250,8 +1258,7 @@ handle_call({put, Bucket, Key, Object, IndexSpecs, Tag, TTL}, From, State) SQN, Object, ObjSize, - {IndexSpecs, TTL}, - State), + {IndexSpecs, TTL}), Cache0 = addto_ledgercache(Changes, State#state.ledger_cache), {_SW2, Timings2} = update_timings(SW1, {put, mem}, Timings1), @@ -1288,8 +1295,7 @@ handle_call({mput, ObjectSpecs, TTL}, From, State) Changes = preparefor_ledgercache(?INKT_MPUT, ?DUMMY, SQN, null, length(ObjectSpecs), - {ObjectSpecs, TTL}, - State), + {ObjectSpecs, TTL}), Cache0 = addto_ledgercache(Changes, State#state.ledger_cache), case State#state.slow_offer of true -> @@ -1537,6 +1543,23 @@ code_change(_OldVsn, State, _Extra) -> empty_ledgercache() -> #ledger_cache{mem = ets:new(empty, [ordered_set])}. + +-spec push_to_penciller(pid(), ledger_cache()) -> ok. +%% @doc +%% The push to penciller must start as a tree to correctly de-duplicate +%% the list by order before becoming a de-duplicated list for loading +push_to_penciller(Penciller, LedgerCache) -> + push_to_penciller_loop(Penciller, loadqueue_ledgercache(LedgerCache)). + +push_to_penciller_loop(Penciller, LedgerCache) -> + case push_ledgercache(Penciller, LedgerCache) of + returned -> + timer:sleep(?LOADING_PAUSE), + push_to_penciller_loop(Penciller, LedgerCache); + ok -> + ok + end. + -spec push_ledgercache(pid(), ledger_cache()) -> ok|returned. %% @doc %% Push the ledgercache to the Penciller - which should respond ok or @@ -1642,10 +1665,22 @@ startup(InkerOpts, PencillerOpts, State) -> {ok, Penciller} = leveled_penciller:pcl_start(PencillerOpts), LedgerSQN = leveled_penciller:pcl_getstartupsequencenumber(Penciller), leveled_log:log("B0005", [LedgerSQN]), + ReloadStrategy = InkerOpts#inker_options.reload_strategy, + LoadFun = get_loadfun(ReloadStrategy, Penciller, State), + BatchFun = + fun(BatchAcc, _Acc) -> + push_to_penciller(Penciller, BatchAcc) + end, + InitAccFun = + fun(FN, CurrentMinSQN) -> + leveled_log:log("I0014", [FN, CurrentMinSQN]), + empty_ledgercache() + end, ok = leveled_inker:ink_loadpcl(Inker, LedgerSQN + 1, - get_loadfun(State), - Penciller), + LoadFun, + InitAccFun, + BatchFun), ok = leveled_inker:ink_checksqn(Inker, LedgerSQN), {Inker, Penciller}. @@ -2161,30 +2196,26 @@ check_notfound(CheckFrequency, CheckFun) -> -spec preparefor_ledgercache(leveled_codec:journal_key_tag()|null, leveled_codec:ledger_key()|?DUMMY, - integer(), any(), integer(), - leveled_codec:journal_keychanges(), - book_state()) - -> {integer()|no_lookup, - integer(), + non_neg_integer(), any(), integer(), + leveled_codec:journal_keychanges()) + -> {leveled_codec:segment_hash(), + non_neg_integer(), list(leveled_codec:ledger_kv())}. %% @doc %% Prepare an object and its related key changes for addition to the Ledger %% via the Ledger Cache. preparefor_ledgercache(?INKT_MPUT, - ?DUMMY, SQN, _O, _S, {ObjSpecs, TTL}, - _State) -> + ?DUMMY, SQN, _O, _S, {ObjSpecs, TTL}) -> ObjChanges = leveled_codec:obj_objectspecs(ObjSpecs, SQN, TTL), {no_lookup, SQN, ObjChanges}; preparefor_ledgercache(?INKT_KEYD, - LedgerKey, SQN, _Obj, _Size, {IdxSpecs, TTL}, - _State) -> + LedgerKey, SQN, _Obj, _Size, {IdxSpecs, TTL}) -> {Bucket, Key} = leveled_codec:from_ledgerkey(LedgerKey), KeyChanges = leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL), {no_lookup, SQN, KeyChanges}; preparefor_ledgercache(_InkTag, - LedgerKey, SQN, Obj, Size, {IdxSpecs, TTL}, - _State) -> + LedgerKey, SQN, Obj, Size, {IdxSpecs, TTL}) -> {Bucket, Key, MetaValue, {KeyH, _ObjH}, _LastMods} = leveled_codec:generate_ledgerkv(LedgerKey, SQN, Obj, Size, TTL), KeyChanges = @@ -2193,8 +2224,58 @@ preparefor_ledgercache(_InkTag, {KeyH, SQN, KeyChanges}. --spec addto_ledgercache({integer()|no_lookup, - integer(), +-spec recalcfor_ledgercache(leveled_codec:journal_key_tag()|null, + leveled_codec:ledger_key()|?DUMMY, + non_neg_integer(), any(), integer(), + leveled_codec:journal_keychanges(), + ledger_cache(), + pid()) + -> {leveled_codec:segment_hash(), + non_neg_integer(), + list(leveled_codec:ledger_kv())}. +%% @doc +%% When loading from the journal to the ledger, may hit a key which has the +%% `recalc` strategy. Such a key needs to recalculate the key changes by +%% comparison with the current state of the ledger, assuming it is a full +%% journal entry (i.e. KeyDeltas which may be a result of previously running +%% with a retain strategy should be ignored). +recalcfor_ledgercache(InkTag, + _LedgerKey, SQN, _Obj, _Size, {_IdxSpecs, _TTL}, + _LedgerCache, + _Penciller) + when InkTag == ?INKT_MPUT; InkTag == ?INKT_KEYD -> + {no_lookup, SQN, []}; +recalcfor_ledgercache(_InkTag, + LK, SQN, Obj, Size, {_IgnoreJournalIdxSpecs, TTL}, + LedgerCache, + Penciller) -> + {Bucket, Key, MetaValue, {KeyH, _ObjH}, _LastMods} = + leveled_codec:generate_ledgerkv(LK, SQN, Obj, Size, TTL), + OldObject = + case check_in_ledgercache(LK, KeyH, LedgerCache, loader) of + false -> + leveled_penciller:pcl_fetch(Penciller, LK, KeyH, true); + {value, KV} -> + KV + end, + OldMetadata = + case OldObject of + not_present -> + not_present; + {LK, LV} -> + leveled_codec:get_metadata(LV) + end, + UpdMetadata = leveled_codec:get_metadata(MetaValue), + IdxSpecs = + leveled_head:diff_indexspecs(element(1, LK), UpdMetadata, OldMetadata), + {KeyH, + SQN, + [{LK, MetaValue}] + ++ leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL)}. + + +-spec addto_ledgercache({leveled_codec:segment_hash(), + non_neg_integer(), list(leveled_codec:ledger_kv())}, ledger_cache()) -> ledger_cache(). @@ -2230,6 +2311,32 @@ addto_ledgercache({H, SQN, KeyChanges}, Cache, loader) -> max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}. +-spec check_in_ledgercache(leveled_codec:ledger_key(), + leveled_codec:segment_hash(), + ledger_cache(), + loader) -> + false | {value, leveled_codec:ledger_kv()}. +%% @doc +%% Check the ledger cache for a Key, when the ledger cache is in loader mode +%% and so is populating a queue not an ETS table +check_in_ledgercache(PK, Hash, Cache, loader) -> + case leveled_pmem:check_index(Hash, Cache#ledger_cache.index) of + [] -> + false; + _ -> + search(fun({K,_V}) -> K == PK end, + lists:reverse(Cache#ledger_cache.load_queue)) + end. + +-spec search(fun((any()) -> boolean()), list()) -> {value, any()}|false. +search(Pred, [Hd|Tail]) -> + case Pred(Hd) of + true -> {value, Hd}; + false -> search(Pred, Tail) + end; +search(Pred, []) when is_function(Pred, 1) -> + false. + -spec maybepush_ledgercache(integer(), ledger_cache(), pid()) -> {ok|returned, ledger_cache()}. %% @doc @@ -2276,44 +2383,47 @@ maybe_withjitter(_CacheSize, _MaxCacheSize) -> false. --spec get_loadfun(book_state()) -> fun(). +-spec get_loadfun(leveled_codec:compaction_strategy(), pid(), book_state()) + -> initial_loadfun(). %% @doc %% The LoadFun will be used by the Inker when walking across the Journal to -%% load the Penciller at startup -get_loadfun(State) -> - PrepareFun = - fun(Tag, PK, SQN, Obj, VS, IdxSpecs) -> - preparefor_ledgercache(Tag, PK, SQN, Obj, VS, IdxSpecs, State) - end, - LoadFun = - fun(KeyInJournal, ValueInJournal, _Pos, Acc0, ExtractFun) -> - {MinSQN, MaxSQN, OutputTree} = Acc0, - {SQN, InkTag, PK} = KeyInJournal, - % VBin may already be a term - {VBin, VSize} = ExtractFun(ValueInJournal), - {Obj, IdxSpecs} = leveled_codec:split_inkvalue(VBin), - case SQN of - SQN when SQN < MinSQN -> - {loop, Acc0}; - SQN when SQN < MaxSQN -> - Chngs = PrepareFun(InkTag, PK, SQN, Obj, VSize, IdxSpecs), - {loop, - {MinSQN, - MaxSQN, - addto_ledgercache(Chngs, OutputTree, loader)}}; - MaxSQN -> - leveled_log:log("B0006", [SQN]), - Chngs = PrepareFun(InkTag, PK, SQN, Obj, VSize, IdxSpecs), - {stop, - {MinSQN, - MaxSQN, - addto_ledgercache(Chngs, OutputTree, loader)}}; - SQN when SQN > MaxSQN -> - leveled_log:log("B0007", [MaxSQN, SQN]), - {stop, Acc0} - end - end, - LoadFun. +%% load the Penciller at startup. +get_loadfun(ReloadStrat, Penciller, _State) -> + fun(KeyInJournal, ValueInJournal, _Pos, Acc0, ExtractFun) -> + {MinSQN, MaxSQN, LedgerCache} = Acc0, + {SQN, InkTag, PK} = KeyInJournal, + case SQN of + SQN when SQN < MinSQN -> + {loop, Acc0}; + SQN when SQN > MaxSQN -> + leveled_log:log("B0007", [MaxSQN, SQN]), + {stop, Acc0}; + _ -> + {VBin, ValSize} = ExtractFun(ValueInJournal), + % VBin may already be a term + {Obj, IdxSpecs} = leveled_codec:split_inkvalue(VBin), + Chngs = + case leveled_codec:get_tagstrategy(PK, ReloadStrat) of + recalc -> + recalcfor_ledgercache(InkTag, PK, SQN, + Obj, ValSize, IdxSpecs, + LedgerCache, + Penciller); + _ -> + preparefor_ledgercache(InkTag, PK, SQN, + Obj, ValSize, IdxSpecs) + end, + case SQN of + MaxSQN -> + leveled_log:log("B0006", [SQN]), + LC0 = addto_ledgercache(Chngs, LedgerCache, loader), + {stop, {MinSQN, MaxSQN, LC0}}; + _ -> + LC0 = addto_ledgercache(Chngs, LedgerCache, loader), + {loop, {MinSQN, MaxSQN, LC0}} + end + end + end. delete_path(DirPath) -> diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 91386fc..95e894e 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -49,7 +49,8 @@ segment_hash/1, to_lookup/1, next_key/1, - return_proxy/4]). + return_proxy/4, + get_metadata/1]). -define(LMD_FORMAT, "~4..0w~2..0w~2..0w~2..0w~2..0w"). -define(NRT_IDX, "$aae."). @@ -243,6 +244,10 @@ strip_to_indexdetails({_, V}) when tuple_size(V) > 4 -> striphead_to_v1details(V) -> {element(1, V), element(2, V), element(3, V), element(4, V)}. +-spec get_metadata(ledger_value()) -> metadata(). +get_metadata(LV) -> + element(4, LV). + -spec key_dominates(ledger_kv(), ledger_kv()) -> left_hand_first|right_hand_first|left_hand_dominant|right_hand_dominant. %% @doc @@ -358,7 +363,7 @@ endkey_passed(EndKey, CheckingKey) -> -spec inker_reload_strategy(compaction_strategy()) -> compaction_strategy(). %% @doc -%% Take the default startegy for compaction, and override the approach for any +%% Take the default strategy for compaction, and override the approach for any %% tags passed in inker_reload_strategy(AltList) -> ReloadStrategy0 = @@ -371,11 +376,13 @@ inker_reload_strategy(AltList) -> AltList). --spec get_tagstrategy(ledger_key(), compaction_strategy()) +-spec get_tagstrategy(ledger_key()|tag()|dummy, compaction_strategy()) -> skip|retain|recalc. %% @doc %% Work out the compaction strategy for the key get_tagstrategy({Tag, _, _, _}, Strategy) -> + get_tagstrategy(Tag, Strategy); +get_tagstrategy(Tag, Strategy) -> case lists:keyfind(Tag, 1, Strategy) of {Tag, TagStrat} -> TagStrat; diff --git a/src/leveled_head.erl b/src/leveled_head.erl index db94074..0669ec5 100644 --- a/src/leveled_head.erl +++ b/src/leveled_head.erl @@ -22,7 +22,8 @@ -export([key_to_canonicalbinary/1, build_head/2, - extract_metadata/3 + extract_metadata/3, + diff_indexspecs/3 ]). -export([get_size/2, @@ -71,12 +72,15 @@ -type object_metadata() :: riak_metadata()|std_metadata()|head_metadata(). -type appdefinable_function() :: - key_to_canonicalbinary | build_head | extract_metadata. + key_to_canonicalbinary | build_head | extract_metadata | diff_indexspecs. % Functions for which default behaviour can be over-written for the % application's own tags -type appdefinable_function_tuple() :: {appdefinable_function(), fun()}. +-type index_op() :: add | remove. +-type index_value() :: integer() | binary(). + -type head() :: binary()|tuple(). % TODO: @@ -174,6 +178,41 @@ default_extract_metadata(_Tag, SizeAsStoredInJournal, Obj) -> {{standard_hash(Obj), SizeAsStoredInJournal, undefined}, []}. +-spec diff_indexspecs(object_tag(), + object_metadata(), + object_metadata()|not_present) + -> leveled_codec:index_specs(). +%% @doc +%% Take an object metadata part from within the journal, and an object metadata +%% part from the ledger (which should have a lower SQN), and generate index +%% specs by determining the difference between the index specs on the object +%% to be loaded and that on object already stored. +%% +%% This is only relevant where the journal compaction strategy of `recalc` is +%% used, the Keychanges will be used when `retain` is the compaction strategy +diff_indexspecs(?RIAK_TAG, UpdatedMetadata, OldMetadata) -> + UpdIndexes = + get_indexes_from_siblingmetabin(element(1, UpdatedMetadata), []), + OldIndexes = + case OldMetadata of + not_present -> + []; + _ -> + get_indexes_from_siblingmetabin(element(1, OldMetadata), []) + end, + diff_index_data(OldIndexes, UpdIndexes); +diff_indexspecs(?STD_TAG, UpdatedMetadata, CurrentMetadata) -> + default_diff_indexspecs(?STD_TAG, UpdatedMetadata, CurrentMetadata); +diff_indexspecs(Tag, UpdatedMetadata, CurrentMetadata) -> + OverrideFun = + get_appdefined_function(diff_indexspecs, + fun default_diff_indexspecs/3, + 3), + OverrideFun(Tag, UpdatedMetadata, CurrentMetadata). + +default_diff_indexspecs(_Tag, _UpdatedMetadata, _CurrentMetadata) -> + []. + %%%============================================================================ %%% Standard External Functions %%%============================================================================ @@ -190,7 +229,7 @@ defined_objecttags() -> leveled_codec:compaction_method()}. %% @doc %% State the compaction_method to be used when reloading the Ledger from the -%% journal for each object tag. Note, no compaction startegy required for +%% journal for each object tag. Note, no compaction strategy required for %% head_only tag default_reload_strategy(Tag) -> {Tag, retain}. @@ -317,3 +356,120 @@ get_metadata_from_siblings(<>, MetaLen:32/integer, MetaBin:MetaLen/binary>>, [LastMod|LastMods]). + + +get_indexes_from_siblingmetabin(<<0:32/integer, + MetaLen:32/integer, + MetaBin:MetaLen/binary, + RestBin/binary>>, + Indexes) -> + UpdIndexes = lists:umerge(get_indexes_frommetabin(MetaBin), Indexes), + get_indexes_from_siblingmetabin(RestBin, UpdIndexes); +get_indexes_from_siblingmetabin(<>, + Indexes) when SibCount > 0 -> + get_indexes_from_siblingmetabin(RestBin, Indexes); +get_indexes_from_siblingmetabin(_, Indexes) -> + Indexes. + + +%% @doc +%% Parse the metabinary for an individual sibling and return a list of index +%% entries. +get_indexes_frommetabin(<<_LMD1:32/integer, _LMD2:32/integer, _LMD3:32/integer, + VTagLen:8/integer, _VTag:VTagLen/binary, + Deleted:1/binary-unit:8, + MetaRestBin/binary>>) when Deleted /= <<1>> -> + lists:usort(indexes_of_metabinary(MetaRestBin)); +get_indexes_frommetabin(_) -> + []. + + +indexes_of_metabinary(<<>>) -> + []; +indexes_of_metabinary(<>) -> + Key = decode_maybe_binary(KeyBin), + case Key of + <<"index">> -> + Value = decode_maybe_binary(ValueBin), + Value; + _ -> + indexes_of_metabinary(Rest) + end. + + +decode_maybe_binary(<<1, Bin/binary>>) -> + Bin; +decode_maybe_binary(<<0, Bin/binary>>) -> + binary_to_term(Bin); +decode_maybe_binary(<<_Other:8, Bin/binary>>) -> + Bin. + +-spec diff_index_data([{binary(), index_value()}], + [{binary(), index_value()}]) -> + [{index_op(), binary(), index_value()}]. +diff_index_data(OldIndexes, AllIndexes) -> + OldIndexSet = ordsets:from_list(OldIndexes), + AllIndexSet = ordsets:from_list(AllIndexes), + diff_specs_core(AllIndexSet, OldIndexSet). + + +diff_specs_core(AllIndexSet, OldIndexSet) -> + NewIndexSet = ordsets:subtract(AllIndexSet, OldIndexSet), + RemoveIndexSet = + ordsets:subtract(OldIndexSet, AllIndexSet), + NewIndexSpecs = + assemble_index_specs(ordsets:subtract(NewIndexSet, OldIndexSet), + add), + RemoveIndexSpecs = + assemble_index_specs(RemoveIndexSet, + remove), + NewIndexSpecs ++ RemoveIndexSpecs. + +%% @doc Assemble a list of index specs in the +%% form of triplets of the form +%% {IndexOperation, IndexField, IndexValue}. +-spec assemble_index_specs([{binary(), binary()}], index_op()) -> + [{index_op(), binary(), binary()}]. +assemble_index_specs(Indexes, IndexOp) -> + [{IndexOp, Index, Value} || {Index, Value} <- Indexes]. + + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + + +index_extract_test() -> + SibMetaBin = <<0,0,0,1,0,0,0,0,0,0,0,221,0,0,6,48,0,4,130,247,0,1,250,134, + 1,101,0,0,0,0,4,1,77,68,75,0,0,0,44,0,131,107,0,39,77,68, + 86,101,49,55,52,55,48,50,55,45,54,50,99,49,45,52,48,57,55, + 45,97,53,102,50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0, + 0,6,1,105,110,100,101,120,0,0,0,79,0,131,108,0,0,0,2,104,2, + 107,0,8,105,100,120,49,95,98,105,110,107,0,20,50,49,53,50, + 49,49,48,55,50,51,49,55,51,48,83,111,112,104,105,97,104,2, + 107,0,8,105,100,120,49,95,98,105,110,107,0,19,50,49,56,50, + 48,53,49,48,49,51,48,49,52,54,65,118,101,114,121,106,0,0,0, + 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, + 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, + 50,45,53,54,98,51,98,97,57,57,99,55,56,50>>, + Indexes = get_indexes_from_siblingmetabin(SibMetaBin, []), + ExpIndexes = [{"idx1_bin","21521107231730Sophia"}, + {"idx1_bin","21820510130146Avery"}], + ?assertMatch(ExpIndexes, Indexes). + +diff_index_test() -> + UpdIndexes = + [{<<"idx1_bin">>,<<"20840930001702Zoe">>}, + {<<"idx1_bin">>,<<"20931011172606Emily">>}], + OldIndexes = + [{<<"idx1_bin">>,<<"20231126131808Madison">>}, + {<<"idx1_bin">>,<<"20931011172606Emily">>}], + IdxSpecs = diff_index_data(OldIndexes, UpdIndexes), + ?assertMatch([{add, <<"idx1_bin">>, <<"20840930001702Zoe">>}, + {remove, <<"idx1_bin">>,<<"20231126131808Madison">>}], IdxSpecs). + +-endif. \ No newline at end of file diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index e996c15..afc2c8c 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -145,6 +145,15 @@ % released from a compaction run of a single file to make it a run % worthwhile of compaction (released space is 100.0 - target e.g. 70.0 % means that 30.0% should be released) +-type key_size() :: + {{non_neg_integer(), + leveled_codec:journal_key_tag(), + leveled_codec:ledger_key()}, non_neg_integer()}. +-type corrupted_test_key_size() :: + {{non_neg_integer(), + leveled_codec:journal_key_tag(), + leveled_codec:ledger_key(), + null}, non_neg_integer()}. %%%============================================================================ %%% API @@ -315,7 +324,8 @@ handle_cast({score_filelist, [Entry|Tail]}, State) -> ScoringState#scoring_state.filter_server, ScoringState#scoring_state.max_sqn, ?SAMPLE_SIZE, - ?BATCH_SIZE), + ?BATCH_SIZE, + State#state.reload_strategy), Candidate = #candidate{low_sqn = LowSQN, filename = FN, @@ -493,7 +503,10 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> %%% Internal functions %%%============================================================================ - +-spec check_single_file(pid(), fun(), any(), non_neg_integer(), + non_neg_integer(), non_neg_integer(), + leveled_codec:compaction_strategy()) -> + float(). %% @doc %% Get a score for a single CDB file in the journal. This will pull out a bunch %% of keys and sizes at random in an efficient way (by scanning the hashtable @@ -505,13 +518,19 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> %% %% The score is based on a random sample - so will not be consistent between %% calls. -check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -> +check_single_file(CDB, FilterFun, FilterServer, MaxSQN, + SampleSize, BatchSize, + ReloadStrategy) -> FN = leveled_cdb:cdb_filename(CDB), SW = os:timestamp(), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), Score = - size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN), + size_comparison_score(KeySizeList, + FilterFun, + FilterServer, + MaxSQN, + ReloadStrategy), safely_log_filescore(PositionList, FN, Score, SW), Score. @@ -523,7 +542,15 @@ safely_log_filescore(PositionList, FN, Score, SW) -> div length(PositionList), leveled_log:log_timer("IC004", [Score, AvgJump, FN], SW). -size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> +-spec size_comparison_score(list(key_size() | corrupted_test_key_size()), + fun(), + any(), + non_neg_integer(), + leveled_codec:compaction_strategy()) -> + float(). +size_comparison_score(KeySizeList, + FilterFun, FilterServer, MaxSQN, + RS) -> FoldFunForSizeCompare = fun(KS, {ActSize, RplSize}) -> case KS of @@ -532,7 +559,18 @@ size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> leveled_codec:is_full_journalentry({SQN, Type, PK}), case IsJournalEntry of false -> - {ActSize + Size - ?CRC_SIZE, RplSize}; + TS = leveled_codec:get_tagstrategy(PK, RS), + % If the strategy is to retain key deltas, then + % scoring must reflect that. Key deltas are + % possible even if strategy does not allow as + % there is support for changing strategy from + % retain to recalc + case TS of + retain -> + {ActSize + Size - ?CRC_SIZE, RplSize}; + _ -> + {ActSize, RplSize + Size - ?CRC_SIZE} + end; true -> Check = FilterFun(FilterServer, PK, SQN), case {Check, SQN > MaxSQN} of @@ -567,12 +605,13 @@ fetch_inbatches([], _BatchSize, CDB, CheckedList) -> ok = leveled_cdb:cdb_clerkcomplete(CDB), CheckedList; fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) -> - {Batch, Tail} = if - length(PositionList) >= BatchSize -> - lists:split(BatchSize, PositionList); - true -> - {PositionList, []} - end, + {Batch, Tail} = + if + length(PositionList) >= BatchSize -> + lists:split(BatchSize, PositionList); + true -> + {PositionList, []} + end, KL_List = leveled_cdb:cdb_directfetch(CDB, Batch, key_size), fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List). @@ -998,6 +1037,7 @@ fetch_testcdb(RP) -> check_single_file_test() -> RP = "test/test_area/", + RS = leveled_codec:inker_reload_strategy([]), ok = filelib:ensure_dir(leveled_inker:filepath(RP, journal_dir)), {ok, CDB} = fetch_testcdb(RP), LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, @@ -1010,14 +1050,14 @@ check_single_file_test() -> _ -> replaced end end, - Score1 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 4), + Score1 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 4, RS), ?assertMatch(37.5, Score1), LedgerFun2 = fun(_Srv, _Key, _ObjSQN) -> current end, - Score2 = check_single_file(CDB, LedgerFun2, LedgerSrv1, 9, 8, 4), + Score2 = check_single_file(CDB, LedgerFun2, LedgerSrv1, 9, 8, 4, RS), ?assertMatch(100.0, Score2), - Score3 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 3), + Score3 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 3, RS), ?assertMatch(37.5, Score3), - Score4 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 4, 8, 4), + Score4 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 4, 8, 4, RS), ?assertMatch(75.0, Score4), ok = leveled_cdb:cdb_deletepending(CDB), ok = leveled_cdb:cdb_destroy(CDB). @@ -1132,6 +1172,7 @@ compact_empty_file_test() -> RP = "test/test_area/", ok = filelib:ensure_dir(leveled_inker:filepath(RP, journal_dir)), FN1 = leveled_inker:filepath(RP, 1, new_journal), + RS = leveled_codec:inker_reload_strategy([]), CDBopts = #cdb_options{binary_mode=true}, {ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBopts), {ok, FN2} = leveled_cdb:cdb_complete(CDB1), @@ -1140,7 +1181,7 @@ compact_empty_file_test() -> {2, {o, "Bucket", "Key2", null}}, {3, {o, "Bucket", "Key3", null}}], LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> replaced end, - Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4), + Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4, RS), ?assertMatch(0.0, Score1), ok = leveled_cdb:cdb_deletepending(CDB2), ok = leveled_cdb:cdb_destroy(CDB2). @@ -1207,15 +1248,22 @@ compact_singlefile_totwosmallfiles_testto() -> size_score_test() -> KeySizeList = - [{{1, ?INKT_STND, "Key1"}, 104}, - {{2, ?INKT_STND, "Key2"}, 124}, - {{3, ?INKT_STND, "Key3"}, 144}, - {{4, ?INKT_STND, "Key4"}, 154}, - {{5, ?INKT_STND, "Key5", "Subk1"}, 164}, - {{6, ?INKT_STND, "Key6"}, 174}, - {{7, ?INKT_STND, "Key7"}, 184}], + [{{1, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key1">>, null}}, 104}, + {{2, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key2">>, null}}, 124}, + {{3, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key3">>, null}}, 144}, + {{4, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key4">>, null}}, 154}, + {{5, + ?INKT_STND, + {?STD_TAG, <<"B">>, <<"Key5">>, <<"Subk1">>}, null}, + 164}, + {{6, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key6">>, null}}, 174}, + {{7, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key7">>, null}}, 184}], MaxSQN = 6, - CurrentList = ["Key1", "Key4", "Key5", "Key6"], + CurrentList = + [{?STD_TAG, <<"B">>, <<"Key1">>, null}, + {?STD_TAG, <<"B">>, <<"Key4">>, null}, + {?STD_TAG, <<"B">>, <<"Key5">>, <<"Subk1">>}, + {?STD_TAG, <<"B">>, <<"Key6">>, null}], FilterFun = fun(L, K, _SQN) -> case lists:member(K, L) of @@ -1223,7 +1271,13 @@ size_score_test() -> false -> replaced end end, - Score = size_comparison_score(KeySizeList, FilterFun, CurrentList, MaxSQN), + Score = + size_comparison_score(KeySizeList, + FilterFun, + CurrentList, + MaxSQN, + leveled_codec:inker_reload_strategy([])), + io:format("Score ~w", [Score]), ?assertMatch(true, Score > 69.0), ?assertMatch(true, Score < 70.0). diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index aa9d14d..ab26ca7 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -101,7 +101,7 @@ ink_fetch/3, ink_keycheck/3, ink_fold/4, - ink_loadpcl/4, + ink_loadpcl/5, ink_registersnapshot/2, ink_confirmdelete/2, ink_compactjournal/3, @@ -133,7 +133,6 @@ -define(WASTE_FP, "waste"). -define(JOURNAL_FILEX, "cdb"). -define(PENDING_FILEX, "pnd"). --define(LOADING_PAUSE, 1000). -define(LOADING_BATCH, 1000). -define(TEST_KC, {[], infinity}). @@ -321,7 +320,11 @@ ink_fold(Pid, MinSQN, FoldFuns, Acc) -> {fold, MinSQN, FoldFuns, Acc, by_runner}, infinity). --spec ink_loadpcl(pid(), integer(), fun(), pid()) -> ok. +-spec ink_loadpcl(pid(), + integer(), + leveled_bookie:initial_loadfun(), + fun((string(), non_neg_integer()) -> any()), + fun((any(), any()) -> ok)) -> ok. %% %% Function to prompt load of the Ledger at startup. The Penciller should %% have determined the lowest SQN not present in the Ledger, and the inker @@ -330,20 +333,11 @@ ink_fold(Pid, MinSQN, FoldFuns, Acc) -> %% %% The load fun should be a five arity function like: %% load_fun(KeyInJournal, ValueInJournal, _Position, Acc0, ExtractFun) -ink_loadpcl(Pid, MinSQN, FilterFun, Penciller) -> - BatchFun = - fun(BatchAcc, _Acc) -> - push_to_penciller(Penciller, BatchAcc) - end, - InitAccFun = - fun(FN, CurrentMinSQN) -> - leveled_log:log("I0014", [FN, CurrentMinSQN]), - leveled_bookie:empty_ledgercache() - end, +ink_loadpcl(Pid, MinSQN, LoadFun, InitAccFun, BatchFun) -> gen_server:call(Pid, {fold, MinSQN, - {FilterFun, InitAccFun, BatchFun}, + {LoadFun, InitAccFun, BatchFun}, ok, as_ink}, infinity). @@ -1195,22 +1189,6 @@ foldfile_between_sequence(MinSQN, MaxSQN, FoldFuns, LastPosition, FN) end. - - -push_to_penciller(Penciller, LedgerCache) -> - % The push to penciller must start as a tree to correctly de-duplicate - % the list by order before becoming a de-duplicated list for loading - LC0 = leveled_bookie:loadqueue_ledgercache(LedgerCache), - push_to_penciller_loop(Penciller, LC0). - -push_to_penciller_loop(Penciller, LedgerCache) -> - case leveled_bookie:push_ledgercache(Penciller, LedgerCache) of - returned -> - timer:sleep(?LOADING_PAUSE), - push_to_penciller_loop(Penciller, LedgerCache); - ok -> - ok - end. sequencenumbers_fromfilenames(Filenames, Regex, IntName) -> diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 640b688..3538cea 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -7,6 +7,8 @@ hot_backup_simple/1, hot_backup_changes/1, retain_strategy/1, + recalc_strategy/1, + recalc_transition_strategy/1, recovr_strategy/1, aae_missingjournal/1, aae_bustedjournal/1, @@ -21,6 +23,8 @@ all() -> [ hot_backup_simple, hot_backup_changes, retain_strategy, + recalc_strategy, + recalc_transition_strategy, recovr_strategy, aae_missingjournal, aae_bustedjournal, @@ -233,85 +237,97 @@ hot_backup_changes(_Config) -> testutil:reset_filestructure(). - retain_strategy(_Config) -> + rotate_wipe_compact(retain, retain). + +recalc_strategy(_Config) -> + rotate_wipe_compact(recalc, recalc). + +recalc_transition_strategy(_Config) -> + rotate_wipe_compact(retain, recalc). + + +rotate_wipe_compact(Strategy1, Strategy2) -> RootPath = testutil:reset_filestructure(), BookOpts = [{root_path, RootPath}, {cache_size, 1000}, {max_journalobjectcount, 5000}, {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{?RIAK_TAG, retain}]}], + {reload_strategy, [{?RIAK_TAG, Strategy1}]}], BookOptsAlt = [{root_path, RootPath}, {cache_size, 1000}, {max_journalobjectcount, 2000}, {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{?RIAK_TAG, retain}]}, + {reload_strategy, [{?RIAK_TAG, Strategy2}]}, {max_run_length, 8}], - {ok, Spcl3, LastV3} = rotating_object_check(BookOpts, "Bucket3", 800), + {ok, Spcl3, LastV3} = rotating_object_check(BookOpts, "Bucket3", 400), ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}]), - {ok, Spcl4, LastV4} = rotating_object_check(BookOpts, "Bucket4", 1600), + {ok, Spcl4, LastV4} = rotating_object_check(BookOpts, "Bucket4", 800), ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}, {"Bucket4", Spcl4, LastV4}]), - {ok, Spcl5, LastV5} = rotating_object_check(BookOpts, "Bucket5", 3200), - ok = restart_from_blankledger(BookOptsAlt, [{"Bucket3", Spcl3, LastV3}, - {"Bucket5", Spcl5, LastV5}]), - {ok, Spcl6, LastV6} = rotating_object_check(BookOpts, "Bucket6", 6400), + {ok, Spcl5, LastV5} = rotating_object_check(BookOpts, "Bucket5", 1600), ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}, - {"Bucket4", Spcl4, LastV4}, - {"Bucket5", Spcl5, LastV5}, - {"Bucket6", Spcl6, LastV6}]), + {"Bucket5", Spcl5, LastV5}]), + {ok, Spcl6, LastV6} = rotating_object_check(BookOpts, "Bucket6", 3200), {ok, Book1} = leveled_bookie:book_start(BookOpts), compact_and_wait(Book1), - compact_and_wait(Book1), ok = leveled_bookie:book_close(Book1), - ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}, + ok = restart_from_blankledger(BookOptsAlt, [{"Bucket3", Spcl3, LastV3}, {"Bucket4", Spcl4, LastV4}, {"Bucket5", Spcl5, LastV5}, {"Bucket6", Spcl6, LastV6}]), {ok, Book2} = leveled_bookie:book_start(BookOptsAlt), + compact_and_wait(Book2), + ok = leveled_bookie:book_close(Book2), - {KSpcL2, _V2} = testutil:put_indexed_objects(Book2, "AltBucket6", 3000), + ok = restart_from_blankledger(BookOptsAlt, [{"Bucket3", Spcl3, LastV3}, + {"Bucket4", Spcl4, LastV4}, + {"Bucket5", Spcl5, LastV5}, + {"Bucket6", Spcl6, LastV6}]), + + {ok, Book3} = leveled_bookie:book_start(BookOptsAlt), + + {KSpcL2, _V2} = testutil:put_indexed_objects(Book3, "AltBucket6", 3000), Q2 = fun(RT) -> {index_query, "AltBucket6", {fun testutil:foldkeysfun/3, []}, {"idx1_bin", "#", "|"}, {RT, undefined}} end, - {async, KFolder2A} = leveled_bookie:book_returnfolder(Book2, Q2(false)), + {async, KFolder2A} = leveled_bookie:book_returnfolder(Book3, Q2(false)), KeyList2A = lists:usort(KFolder2A()), true = length(KeyList2A) == 3000, DeleteFun = fun({DK, [{add, DIdx, DTerm}]}) -> - ok = testutil:book_riakdelete(Book2, + ok = testutil:book_riakdelete(Book3, "AltBucket6", DK, [{remove, DIdx, DTerm}]) end, lists:foreach(DeleteFun, KSpcL2), - {async, KFolder2AD} = leveled_bookie:book_returnfolder(Book2, Q2(false)), - KeyList2AD = lists:usort(KFolder2AD()), - true = length(KeyList2AD) == 0, - - ok = leveled_bookie:book_close(Book2), - - {ok, Book3} = leveled_bookie:book_start(BookOptsAlt), - - io:format("Compact after deletions~n"), - - compact_and_wait(Book3), - compact_and_wait(Book3), - {async, KFolder3AD} = leveled_bookie:book_returnfolder(Book3, Q2(false)), KeyList3AD = lists:usort(KFolder3AD()), true = length(KeyList3AD) == 0, ok = leveled_bookie:book_close(Book3), + {ok, Book4} = leveled_bookie:book_start(BookOptsAlt), + + io:format("Compact after deletions~n"), + + compact_and_wait(Book4), + + {async, KFolder4AD} = leveled_bookie:book_returnfolder(Book4, Q2(false)), + KeyList4AD = lists:usort(KFolder4AD()), + true = length(KeyList4AD) == 0, + + ok = leveled_bookie:book_close(Book4), + testutil:reset_filestructure(). @@ -845,6 +861,10 @@ rotating_object_check(BookOpts, B, NumberOfObjects) -> B, KSpcL2, false), + ok = testutil:check_indexed_objects(Book1, + B, + KSpcL1 ++ KSpcL2 ++ KSpcL3, + V3), ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start(BookOpts), ok = testutil:check_indexed_objects(Book2, diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index e2918dc..94ed966 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -68,6 +68,7 @@ -define(MD_VTAG, <<"X-Riak-VTag">>). -define(MD_LASTMOD, <<"X-Riak-Last-Modified">>). -define(MD_DELETED, <<"X-Riak-Deleted">>). +-define(MD_INDEX, <<"index">>). -define(EMPTY_VTAG_BIN, <<"e">>). -define(ROOT_PATH, "test"). @@ -517,23 +518,30 @@ set_object(Bucket, Key, Value, IndexGen) -> set_object(Bucket, Key, Value, IndexGen, []). set_object(Bucket, Key, Value, IndexGen, Indexes2Remove) -> - + set_object(Bucket, Key, Value, IndexGen, Indexes2Remove, []). + +set_object(Bucket, Key, Value, IndexGen, Indexes2Remove, IndexesNotToRemove) -> + IdxSpecs = IndexGen(), + Indexes = + lists:map(fun({add, IdxF, IdxV}) -> {IdxF, IdxV} end, + IdxSpecs ++ IndexesNotToRemove), Obj = {Bucket, Key, Value, - IndexGen() ++ lists:map(fun({add, IdxF, IdxV}) -> - {remove, IdxF, IdxV} end, - Indexes2Remove), - [{"MDK", "MDV" ++ Key}, - {"MDK2", "MDV" ++ Key}, - {?MD_LASTMOD, os:timestamp()}]}, + IdxSpecs ++ + lists:map(fun({add, IdxF, IdxV}) -> {remove, IdxF, IdxV} end, + Indexes2Remove), + [{<<"MDK">>, "MDV" ++ Key}, + {<<"MDK2">>, "MDV" ++ Key}, + {?MD_LASTMOD, os:timestamp()}, + {?MD_INDEX, Indexes}]}, {B1, K1, V1, Spec1, MD} = Obj, Content = #r_content{metadata=dict:from_list(MD), value=V1}, {#r_object{bucket=B1, key=K1, contents=[Content], vclock=generate_vclock()}, - Spec1}. + Spec1 ++ IndexesNotToRemove}. get_value_from_objectlistitem({_Int, Obj, _Spc}) -> [Content] = Obj#r_object.contents, @@ -762,26 +770,28 @@ put_altered_indexed_objects(Book, Bucket, KSpecL) -> put_altered_indexed_objects(Book, Bucket, KSpecL, true). put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i) -> - IndexGen = testutil:get_randomindexes_generator(1), - V = testutil:get_compressiblevalue(), - RplKSpecL = lists:map(fun({K, Spc}) -> - AddSpc = if - RemoveOld2i == true -> - [lists:keyfind(add, 1, Spc)]; - RemoveOld2i == false -> - [] - end, - {O, AltSpc} = testutil:set_object(Bucket, - K, - V, - IndexGen, - AddSpc), - case book_riakput(Book, O, AltSpc) of - ok -> ok; - pause -> timer:sleep(?SLOWOFFER_DELAY) - end, - {K, AltSpc} end, - KSpecL), + IndexGen = get_randomindexes_generator(1), + V = get_compressiblevalue(), + FindAdditionFun = fun(SpcItem) -> element(1, SpcItem) == add end, + MapFun = + fun({K, Spc}) -> + {RemoveSpc, AddSpc} = + case RemoveOld2i of + true -> + {lists:filter(FindAdditionFun, Spc), []}; + false -> + {[], lists:filter(FindAdditionFun, Spc)} + end, + {O, AltSpc} = + set_object(Bucket, K, V, + IndexGen, RemoveSpc, AddSpc), + case book_riakput(Book, O, AltSpc) of + ok -> ok; + pause -> timer:sleep(?SLOWOFFER_DELAY) + end, + {K, AltSpc} + end, + RplKSpecL = lists:map(MapFun, KSpecL), {RplKSpecL, V}. rotating_object_check(RootPath, B, NumberOfObjects) -> @@ -790,16 +800,16 @@ rotating_object_check(RootPath, B, NumberOfObjects) -> {max_journalsize, 5000000}, {sync_strategy, sync_strategy()}], {ok, Book1} = leveled_bookie:book_start(BookOpts), - {KSpcL1, V1} = testutil:put_indexed_objects(Book1, B, NumberOfObjects), - ok = testutil:check_indexed_objects(Book1, B, KSpcL1, V1), - {KSpcL2, V2} = testutil:put_altered_indexed_objects(Book1, B, KSpcL1), - ok = testutil:check_indexed_objects(Book1, B, KSpcL2, V2), - {KSpcL3, V3} = testutil:put_altered_indexed_objects(Book1, B, KSpcL2), + {KSpcL1, V1} = put_indexed_objects(Book1, B, NumberOfObjects), + ok = check_indexed_objects(Book1, B, KSpcL1, V1), + {KSpcL2, V2} = put_altered_indexed_objects(Book1, B, KSpcL1), + ok = check_indexed_objects(Book1, B, KSpcL2, V2), + {KSpcL3, V3} = put_altered_indexed_objects(Book1, B, KSpcL2), ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start(BookOpts), - ok = testutil:check_indexed_objects(Book2, B, KSpcL3, V3), - {KSpcL4, V4} = testutil:put_altered_indexed_objects(Book2, B, KSpcL3), - ok = testutil:check_indexed_objects(Book2, B, KSpcL4, V4), + ok = check_indexed_objects(Book2, B, KSpcL3, V3), + {KSpcL4, V4} = put_altered_indexed_objects(Book2, B, KSpcL3), + ok = check_indexed_objects(Book2, B, KSpcL4, V4), Query = {keylist, ?RIAK_TAG, B, {fun foldkeysfun/3, []}}, {async, BList} = leveled_bookie:book_returnfolder(Book2, Query), true = NumberOfObjects == length(BList()), From 706ba8a6745aa923ef1ff8d186f59fd334283fb8 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sun, 15 Mar 2020 23:15:09 +0000 Subject: [PATCH 08/26] Resolve issues with passing specs around --- test/end_to_end/testutil.erl | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 94ed966..52db533 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -535,13 +535,13 @@ set_object(Bucket, Key, Value, IndexGen, Indexes2Remove, IndexesNotToRemove) -> {<<"MDK2">>, "MDV" ++ Key}, {?MD_LASTMOD, os:timestamp()}, {?MD_INDEX, Indexes}]}, - {B1, K1, V1, Spec1, MD} = Obj, + {B1, K1, V1, DeltaSpecs, MD} = Obj, Content = #r_content{metadata=dict:from_list(MD), value=V1}, {#r_object{bucket=B1, key=K1, contents=[Content], vclock=generate_vclock()}, - Spec1 ++ IndexesNotToRemove}. + DeltaSpecs}. get_value_from_objectlistitem({_Int, Obj, _Spc}) -> [Content] = Obj#r_object.contents, @@ -775,21 +775,32 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i) -> FindAdditionFun = fun(SpcItem) -> element(1, SpcItem) == add end, MapFun = fun({K, Spc}) -> + OldSpecs = lists:filter(FindAdditionFun, Spc), {RemoveSpc, AddSpc} = case RemoveOld2i of true -> - {lists:filter(FindAdditionFun, Spc), []}; + {OldSpecs, []}; false -> - {[], lists:filter(FindAdditionFun, Spc)} + {[], OldSpecs} end, - {O, AltSpc} = + {O, DeltaSpecs} = set_object(Bucket, K, V, IndexGen, RemoveSpc, AddSpc), - case book_riakput(Book, O, AltSpc) of + % DeltaSpecs should be new indexes added, and any old indexes which + % have been removed by this change where RemoveOld2i is true. + % + % The actual indexes within the object should reflect any history + % of indexes i.e. when RemoveOld2i is false. + % + % The [{Key, SpecL}] returned should accrue additions over loops if + % RemoveOld2i is false + case book_riakput(Book, O, DeltaSpecs) of ok -> ok; pause -> timer:sleep(?SLOWOFFER_DELAY) end, - {K, AltSpc} + % Note that order in the SpecL is important, as + % check_indexed_objects, needs to find the latest item added + {K, DeltaSpecs ++ AddSpc} end, RplKSpecL = lists:map(MapFun, KSpecL), {RplKSpecL, V}. From 9d92ca0773020d2103d8366fb482a0e40913dbc8 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 16 Mar 2020 12:51:14 +0000 Subject: [PATCH 09/26] Add tests for appDefined functions --- src/leveled_bookie.erl | 4 + src/leveled_codec.erl | 10 +- src/leveled_head.erl | 9 ++ test/end_to_end/appdefined_SUITE.erl | 134 ++++++++++++++++++++++++++- test/end_to_end/recovery_SUITE.erl | 81 +++++++++++++++- test/end_to_end/testutil.erl | 77 ++++++++++++--- 6 files changed, 289 insertions(+), 26 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index bcbf4ef..27c158d 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -3276,6 +3276,10 @@ sqnorder_mutatefold_test() -> ok = book_destroy(Bookie1). +search_test() -> + ?assertMatch({value, 5}, search(fun(X) -> X == 5 end, lists:seq(1, 10))), + ?assertMatch(false, search(fun(X) -> X == 55 end, lists:seq(1, 10))). + check_notfound_test() -> ProbablyFun = fun() -> probably end, MissingFun = fun() -> missing end, diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 95e894e..35ab5d1 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -366,14 +366,12 @@ endkey_passed(EndKey, CheckingKey) -> %% Take the default strategy for compaction, and override the approach for any %% tags passed in inker_reload_strategy(AltList) -> - ReloadStrategy0 = + DefaultList = lists:map(fun leveled_head:default_reload_strategy/1, leveled_head:defined_objecttags()), - lists:foldl(fun({X, Y}, SList) -> - lists:keyreplace(X, 1, SList, {X, Y}) - end, - ReloadStrategy0, - AltList). + lists:ukeymerge(1, + lists:ukeysort(1, AltList), + lists:ukeysort(1, DefaultList)). -spec get_tagstrategy(ledger_key()|tag()|dummy, compaction_strategy()) diff --git a/src/leveled_head.erl b/src/leveled_head.erl index 0669ec5..ad4bbfa 100644 --- a/src/leveled_head.erl +++ b/src/leveled_head.erl @@ -472,4 +472,13 @@ diff_index_test() -> ?assertMatch([{add, <<"idx1_bin">>, <<"20840930001702Zoe">>}, {remove, <<"idx1_bin">>,<<"20231126131808Madison">>}], IdxSpecs). +decode_test() -> + Bin = <<"999">>, + BinTerm = term_to_binary("999"), + ?assertMatch("999", binary_to_list( + decode_maybe_binary(<<1:8/integer, Bin/binary>>))), + ?assertMatch("999", decode_maybe_binary(<<0:8/integer, BinTerm/binary>>)), + ?assertMatch("999", binary_to_list( + decode_maybe_binary(<<2:8/integer, Bin/binary>>))). + -endif. \ No newline at end of file diff --git a/test/end_to_end/appdefined_SUITE.erl b/test/end_to_end/appdefined_SUITE.erl index 79301de..c041844 100644 --- a/test/end_to_end/appdefined_SUITE.erl +++ b/test/end_to_end/appdefined_SUITE.erl @@ -2,11 +2,14 @@ -include_lib("common_test/include/ct.hrl"). -include("include/leveled.hrl"). -export([all/0]). --export([application_defined_tag/1 +-export([ + application_defined_tag/1, + bespoketag_recalc/1 ]). all() -> [ - application_defined_tag + % application_defined_tag, + bespoketag_recalc ]. @@ -62,6 +65,8 @@ application_defined_tag_tester(KeyCount, Tag, Functions, ExpectMD) -> StartOpts1 = [{root_path, RootPath}, {sync_strategy, testutil:sync_strategy()}, {log_level, warn}, + {reload_strategy, + [{bespoke_tag1, retain}, {bespoke_tag2, retain}]}, {override_functions, Functions}], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), Value = leveled_rand:rand_bytes(512), @@ -107,8 +112,6 @@ application_defined_tag_tester(KeyCount, Tag, Functions, ExpectMD) -> ok = leveled_bookie:book_close(Bookie2). - - object_generator(Count, V) -> Hash = erlang:phash2({count, V}), @@ -118,4 +121,125 @@ object_generator(Count, V) -> {Bucket, Key, [{hash, Hash}, {shard, Count rem 10}, - {random, Random}, {value, V}]}. \ No newline at end of file + {random, Random}, {value, V}]}. + + + +bespoketag_recalc(_Config) -> + %% Get a sensible behaviour using the recalc compaction strategy with a + %% bespoke tag + + RootPath = testutil:reset_filestructure(), + B0 = <<"B0">>, + KeyCount = 7000, + + ExtractMDFun = + fun(bespoke_tag, Size, Obj) -> + [{index, IL}, {value, _V}] = Obj, + {{erlang:phash2(term_to_binary(Obj)), + Size, + {index, IL}}, + [os:timestamp()]} + end, + CalcIndexFun = + fun(bespoke_tag, UpdMeta, PrvMeta) -> + % io:format("UpdMeta ~w PrvMeta ~w~n", [UpdMeta, PrvMeta]), + {index, UpdIndexes} = element(3, UpdMeta), + IndexDeltas = + case PrvMeta of + not_present -> + UpdIndexes; + PrvMeta when is_tuple(PrvMeta) -> + {index, PrvIndexes} = element(3, PrvMeta), + lists:subtract(UpdIndexes, PrvIndexes) + end, + lists:map(fun(I) -> {add, <<"temp_int">>, I} end, IndexDeltas) + end, + + BookOpts = [{root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 6000}, + {max_pencillercachesize, 8000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{bespoke_tag, recalc}]}, + {override_functions, + [{extract_metadata, ExtractMDFun}, + {diff_indexspecs, CalcIndexFun}]}], + + {ok, Book1} = leveled_bookie:book_start(BookOpts), + LoadFun = + fun(Book, MustFind) -> + fun(I) -> + testutil:stdload_object(Book, + B0, list_to_binary(["A"|integer_to_list(I rem KeyCount)]), + I, erlang:phash2({value, I}), + infinity, bespoke_tag, false, MustFind) + end + end, + lists:foreach(LoadFun(Book1, false), lists:seq(1, KeyCount)), + lists:foreach(LoadFun(Book1, true), lists:seq(KeyCount + 1, KeyCount * 2)), + + FoldFun = + fun(_B0, {IV0, _K0}, Acc) -> + case IV0 - 1 of + Acc -> + Acc + 1; + _Unexpected -> + % io:format("Eh? - ~w ~w~n", [Unexpected, Acc]), + Acc + 1 + end + end, + + CountFold = + fun(Book, CurrentCount) -> + leveled_bookie:book_indexfold(Book, + B0, + {FoldFun, 0}, + {<<"temp_int">>, 0, CurrentCount}, + {true, undefined}) + end, + + {async, FolderA} = CountFold(Book1, 2 * KeyCount), + CountA = FolderA(), + io:format("Counted double index entries ~w - everything loaded OK~n", + [CountA]), + true = 2 * KeyCount == CountA, + + io:format("Before close looking for Key 999 ~w~n", + [leveled_bookie:book_head(Book1, B0, <<"A999">>, bespoke_tag)]), + + ok = leveled_bookie:book_close(Book1), + + {ok, Book2} = leveled_bookie:book_start(BookOpts), + io:format("After opening looking for Key 999 ~w~n", + [leveled_bookie:book_head(Book2, B0, <<"A999">>, bespoke_tag)]), + + lists:foreach(LoadFun(Book2, true), lists:seq(KeyCount * 2 + 1, KeyCount * 3)), + + io:format("After fresh load looking for Key 999 ~w~n", + [leveled_bookie:book_head(Book2, B0, <<"A999">>, bespoke_tag)]), + + {async, FolderB} = CountFold(Book2, 3 * KeyCount), + CountB = FolderB(), + io:format("Counted triple index entries ~w - everything re-loaded~n", + [CountB]), + true = 3 * KeyCount == CountB, + + testutil:compact_and_wait(Book2), + ok = leveled_bookie:book_close(Book2), + + io:format("Restart from blank ledger~n"), + + leveled_penciller:clean_testdir(proplists:get_value(root_path, BookOpts) ++ + "/ledger"), + {ok, Book3} = leveled_bookie:book_start(BookOpts), + + {async, FolderC} = CountFold(Book3, 3 * KeyCount), + CountC = FolderC(), + io:format("All index entries ~w present - recalc ok~n", + [CountC]), + true = 3 * KeyCount == CountC, + + ok = leveled_bookie:book_close(Book3), + + testutil:reset_filestructure(). \ No newline at end of file diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 3538cea..44d9418 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -10,6 +10,7 @@ recalc_strategy/1, recalc_transition_strategy/1, recovr_strategy/1, + stdtag_recalc/1, aae_missingjournal/1, aae_bustedjournal/1, journal_compaction_bustedjournal/1, @@ -31,7 +32,8 @@ all() -> [ journal_compaction_bustedjournal, close_duringcompaction, allkeydelta_journal_multicompact, - recompact_keydeltas + recompact_keydeltas, + stdtag_recalc ]. @@ -149,8 +151,6 @@ recovery_with_samekeyupdates(_Config) -> testutil:reset_filestructure(). - - hot_backup_simple(_Config) -> % The journal may have a hot backup. This allows for an online Bookie % to be sent a message to prepare a backup function, which an asynchronous @@ -331,6 +331,81 @@ rotate_wipe_compact(Strategy1, Strategy2) -> testutil:reset_filestructure(). +stdtag_recalc(_Config) -> + %% Setting the ?STD_TAG to do recalc, should result in the ?STD_TAG + %% behaving like recovr - as no recalc is done for ?STD_TAG + + %% NOTE -This is a test to confirm bad things happen! + + RootPath = testutil:reset_filestructure(), + B0 = <<"B0">>, + KeyCount = 7000, + BookOpts = [{root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 5000}, + {max_pencillercachesize, 10000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{?STD_TAG, recalc}]}], + {ok, Book1} = leveled_bookie:book_start(BookOpts), + LoadFun = + fun(Book) -> + fun(I) -> + testutil:stdload_object(Book, + B0, erlang:phash2(I rem KeyCount), + I, erlang:phash2({value, I}), + infinity, ?STD_TAG, false, false) + end + end, + lists:foreach(LoadFun(Book1), lists:seq(1, KeyCount)), + lists:foreach(LoadFun(Book1), lists:seq(KeyCount + 1, KeyCount * 2)), + + CountFold = + fun(Book, CurrentCount) -> + leveled_bookie:book_indexfold(Book, + B0, + {fun(_BF, _KT, Acc) -> Acc + 1 end, + 0}, + {<<"temp_int">>, 0, CurrentCount}, + {true, undefined}) + end, + + {async, FolderA} = CountFold(Book1, 2 * KeyCount), + CountA = FolderA(), + io:format("Counted double index entries ~w - everything loaded OK~n", + [CountA]), + true = 2 * KeyCount == CountA, + + ok = leveled_bookie:book_close(Book1), + + {ok, Book2} = leveled_bookie:book_start(BookOpts), + lists:foreach(LoadFun(Book2), lists:seq(KeyCount * 2 + 1, KeyCount * 3)), + + {async, FolderB} = CountFold(Book2, 3 * KeyCount), + CountB = FolderB(), + io:format("Maybe counted less index entries ~w - everything not loaded~n", + [CountB]), + true = 3 * KeyCount >= CountB, + + compact_and_wait(Book2), + ok = leveled_bookie:book_close(Book2), + + io:format("Restart from blank ledger"), + + leveled_penciller:clean_testdir(proplists:get_value(root_path, BookOpts) ++ + "/ledger"), + {ok, Book3} = leveled_bookie:book_start(BookOpts), + + {async, FolderC} = CountFold(Book3, 3 * KeyCount), + CountC = FolderC(), + io:format("Missing index entries ~w - recalc not supported on ?STD_TAG~n", + [CountC]), + true = 3 * KeyCount > CountC, + + ok = leveled_bookie:book_close(Book3), + + testutil:reset_filestructure(). + + recovr_strategy(_Config) -> RootPath = testutil:reset_filestructure(), BookOpts = [{root_path, RootPath}, diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 52db533..b84d757 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -10,6 +10,7 @@ stdload/2, stdload_expiring/3, stdload_object/6, + stdload_object/9, reset_filestructure/0, reset_filestructure/1, check_bucket_stats/2, @@ -59,7 +60,8 @@ get_value_from_objectlistitem/1, numbered_key/1, fixed_bin_key/1, - convert_to_seconds/1]). + convert_to_seconds/1, + compact_and_wait/1]). -define(RETURN_TERMS, {true, undefined}). -define(SLOWOFFER_DELAY, 5). @@ -241,17 +243,46 @@ stdload_expiring(Book, KeyCount, TTL, V, Acc) -> stdload_expiring(Book, KeyCount - 1, TTL, V, [{I, B, K}|Acc]). stdload_object(Book, B, K, I, V, TTL) -> - Obj = [{index, I}, {value, V}], - IdxSpecs = - case leveled_bookie:book_get(Book, B, K) of - {ok, PrevObj} -> - {index, OldI} = lists:keyfind(index, 1, PrevObj), - io:format("Remove index ~w for ~w~n", [OldI, I]), - [{remove, <<"temp_int">>, OldI}, {add, <<"temp_int">>, I}]; - not_found -> - [{add, <<"temp_int">>, I}] + stdload_object(Book, B, K, I, V, TTL, ?STD_TAG, true, false). + +stdload_object(Book, B, K, I, V, TTL, Tag, RemovePrev2i, MustFind) -> + Obj = [{index, [I]}, {value, V}], + {IdxSpecs, Obj0} = + case {leveled_bookie:book_get(Book, B, K, Tag), MustFind} of + {{ok, PrevObj}, _} -> + {index, PrevIs} = lists:keyfind(index, 1, PrevObj), + case RemovePrev2i of + true -> + MapFun = + fun(OldI) -> {remove, <<"temp_int">>, OldI} end, + {[{add, <<"temp_int">>, I}|lists:map(MapFun, PrevIs)], + Obj}; + false -> + {[{add, <<"temp_int">>, I}], + [{index, [I|PrevIs]}, {value, V}]} + end; + {not_found, false} -> + {[{add, <<"temp_int">>, I}], Obj}; + {not_found, true} -> + HR = leveled_bookie:book_head(Book, B, K, Tag), + io:format("Unexpected not_found for key=~w I=~w HR=~w~n ", + [K, I, HR]), + {[{add, <<"temp_int">>, I}], Obj} end, - R = leveled_bookie:book_tempput(Book, B, K, Obj, IdxSpecs, ?STD_TAG, TTL), + R = + case TTL of + infinity -> + leveled_bookie:book_put(Book, B, K, Obj0, IdxSpecs, Tag); + TTL when is_integer(TTL) -> + leveled_bookie:book_tempput(Book, B, K, Obj0, + IdxSpecs, Tag, TTL) + end, + case K of + <<57, 57, 57>> -> + io:format("K ~w I ~w R ~w~n", [K, I, R]); + _ -> + ok + end, case R of ok -> ok; @@ -262,6 +293,7 @@ stdload_object(Book, B, K, I, V, TTL) -> + reset_filestructure() -> reset_filestructure(0, ?ROOT_PATH). @@ -883,4 +915,25 @@ get_aae_segment(Obj) -> get_aae_segment({Type, Bucket}, Key) -> leveled_tictac:keyto_segment32(<>); get_aae_segment(Bucket, Key) -> - leveled_tictac:keyto_segment32(<>). \ No newline at end of file + leveled_tictac:keyto_segment32(<>). + +compact_and_wait(Book) -> + compact_and_wait(Book, 20000). + +compact_and_wait(Book, WaitForDelete) -> + ok = leveled_bookie:book_compactjournal(Book, 30000), + F = fun leveled_bookie:book_islastcompactionpending/1, + lists:foldl(fun(X, Pending) -> + case Pending of + false -> + false; + true -> + io:format("Loop ~w waiting for journal " + ++ "compaction to complete~n", [X]), + timer:sleep(20000), + F(Book) + end end, + true, + lists:seq(1, 15)), + io:format("Waiting for journal deletes~n"), + timer:sleep(WaitForDelete). From 6350302ea8cfa462c98b5d698d5ae8868b345323 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 16 Mar 2020 13:32:52 +0000 Subject: [PATCH 10/26] Uncomment test --- test/end_to_end/appdefined_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/end_to_end/appdefined_SUITE.erl b/test/end_to_end/appdefined_SUITE.erl index c041844..8f44560 100644 --- a/test/end_to_end/appdefined_SUITE.erl +++ b/test/end_to_end/appdefined_SUITE.erl @@ -8,7 +8,7 @@ ]). all() -> [ - % application_defined_tag, + application_defined_tag, bespoketag_recalc ]. From dbceda876cea052ea4899114af1e0e1dbfe68870 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 16 Mar 2020 16:35:06 +0000 Subject: [PATCH 11/26] Issue with tag order https://github.com/martinsumner/leveled/issues/309 Resolve issue, and remove test log entries used when discovering issue. --- src/leveled_sst.erl | 4 ++-- test/end_to_end/appdefined_SUITE.erl | 13 +------------ test/end_to_end/testutil.erl | 11 ----------- 3 files changed, 3 insertions(+), 25 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 2eddd34..330d1d4 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1511,7 +1511,7 @@ accumulate_positions({K, V}, {PosBinAcc, NoHashCount, HashAcc, LMDAcc}) -> NHC:7/integer, PosBinAcc/binary>>, 0, - HashAcc, + [H1|HashAcc], LMDAcc0} end; false -> @@ -2304,7 +2304,7 @@ split_lists(KVList1, SlotLists, N, PressMethod, IdxModDate) -> -spec merge_lists(list(), list(), tuple(), sst_options(), boolean()) -> {list(), list(), list(tuple()), tuple()|null}. %% @doc -%% Merge lists when merging across more thna one file. KVLists that are +%% Merge lists when merging across more than one file. KVLists that are %% provided may include pointers to fetch more Keys/Values from the source %% file merge_lists(KVList1, KVList2, LevelInfo, SSTOpts, IndexModDate) -> diff --git a/test/end_to_end/appdefined_SUITE.erl b/test/end_to_end/appdefined_SUITE.erl index 8f44560..9a8cf39 100644 --- a/test/end_to_end/appdefined_SUITE.erl +++ b/test/end_to_end/appdefined_SUITE.erl @@ -171,7 +171,7 @@ bespoketag_recalc(_Config) -> fun(Book, MustFind) -> fun(I) -> testutil:stdload_object(Book, - B0, list_to_binary(["A"|integer_to_list(I rem KeyCount)]), + B0, integer_to_binary(I rem KeyCount), I, erlang:phash2({value, I}), infinity, bespoke_tag, false, MustFind) end @@ -205,24 +205,13 @@ bespoketag_recalc(_Config) -> [CountA]), true = 2 * KeyCount == CountA, - io:format("Before close looking for Key 999 ~w~n", - [leveled_bookie:book_head(Book1, B0, <<"A999">>, bespoke_tag)]), - ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start(BookOpts), - io:format("After opening looking for Key 999 ~w~n", - [leveled_bookie:book_head(Book2, B0, <<"A999">>, bespoke_tag)]), - lists:foreach(LoadFun(Book2, true), lists:seq(KeyCount * 2 + 1, KeyCount * 3)), - io:format("After fresh load looking for Key 999 ~w~n", - [leveled_bookie:book_head(Book2, B0, <<"A999">>, bespoke_tag)]), - {async, FolderB} = CountFold(Book2, 3 * KeyCount), CountB = FolderB(), - io:format("Counted triple index entries ~w - everything re-loaded~n", - [CountB]), true = 3 * KeyCount == CountB, testutil:compact_and_wait(Book2), diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index b84d757..00ad9ad 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -262,11 +262,6 @@ stdload_object(Book, B, K, I, V, TTL, Tag, RemovePrev2i, MustFind) -> [{index, [I|PrevIs]}, {value, V}]} end; {not_found, false} -> - {[{add, <<"temp_int">>, I}], Obj}; - {not_found, true} -> - HR = leveled_bookie:book_head(Book, B, K, Tag), - io:format("Unexpected not_found for key=~w I=~w HR=~w~n ", - [K, I, HR]), {[{add, <<"temp_int">>, I}], Obj} end, R = @@ -277,12 +272,6 @@ stdload_object(Book, B, K, I, V, TTL, Tag, RemovePrev2i, MustFind) -> leveled_bookie:book_tempput(Book, B, K, Obj0, IdxSpecs, Tag, TTL) end, - case K of - <<57, 57, 57>> -> - io:format("K ~w I ~w R ~w~n", [K, I, R]); - _ -> - ok - end, case R of ok -> ok; From b49a5ff53d5cd964a1f1e7cf9925ebc785f20a4b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 16 Mar 2020 17:35:38 +0000 Subject: [PATCH 12/26] Additional unit tests of MetaBin handling --- src/leveled_head.erl | 51 +++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/src/leveled_head.erl b/src/leveled_head.erl index ad4bbfa..2db0f72 100644 --- a/src/leveled_head.erl +++ b/src/leveled_head.erl @@ -444,22 +444,47 @@ assemble_index_specs(Indexes, IndexOp) -> index_extract_test() -> - SibMetaBin = <<0,0,0,1,0,0,0,0,0,0,0,221,0,0,6,48,0,4,130,247,0,1,250,134, - 1,101,0,0,0,0,4,1,77,68,75,0,0,0,44,0,131,107,0,39,77,68, - 86,101,49,55,52,55,48,50,55,45,54,50,99,49,45,52,48,57,55, - 45,97,53,102,50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0, - 0,6,1,105,110,100,101,120,0,0,0,79,0,131,108,0,0,0,2,104,2, - 107,0,8,105,100,120,49,95,98,105,110,107,0,20,50,49,53,50, - 49,49,48,55,50,51,49,55,51,48,83,111,112,104,105,97,104,2, - 107,0,8,105,100,120,49,95,98,105,110,107,0,19,50,49,56,50, - 48,53,49,48,49,51,48,49,52,54,65,118,101,114,121,106,0,0,0, - 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, - 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, - 50,45,53,54,98,51,98,97,57,57,99,55,56,50>>, + SibMetaBin = + <<0,0,0,1,0,0,0,0,0,0,0,221,0,0,6,48,0,4,130,247,0,1,250,134, + 1,101,0,0,0,0,4,1,77,68,75,0,0,0,44,0,131,107,0,39,77,68, + 86,101,49,55,52,55,48,50,55,45,54,50,99,49,45,52,48,57,55, + 45,97,53,102,50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0, + 0,6,1,105,110,100,101,120,0,0,0,79,0,131,108,0,0,0,2,104,2, + 107,0,8,105,100,120,49,95,98,105,110,107,0,20,50,49,53,50, + 49,49,48,55,50,51,49,55,51,48,83,111,112,104,105,97,104,2, + 107,0,8,105,100,120,49,95,98,105,110,107,0,19,50,49,56,50, + 48,53,49,48,49,51,48,49,52,54,65,118,101,114,121,106,0,0,0, + 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, + 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, + 50,45,53,54,98,51,98,97,57,57,99,55,56,50>>, Indexes = get_indexes_from_siblingmetabin(SibMetaBin, []), ExpIndexes = [{"idx1_bin","21521107231730Sophia"}, {"idx1_bin","21820510130146Avery"}], - ?assertMatch(ExpIndexes, Indexes). + ?assertMatch(ExpIndexes, Indexes), + SibMetaBinNoIdx = + <<0,0,0,1,0,0,0,0,0,0,0,128,0,0,6,48,0,4,130,247,0,1,250,134, + 1,101,0,0,0,0,4,1,77,68,75,0,0,0,44,0,131,107,0,39,77,68, + 86,101,49,55,52,55,48,50,55,45,54,50,99,49,45,52,48,57,55, + 45,97,53,102,50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0,0, + 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, + 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, + 50,45,53,54,98,51,98,97,57,57,99,55,56,50>>, + ?assertMatch([], get_indexes_from_siblingmetabin(SibMetaBinNoIdx, [])), + SibMetaBinOverhang = + <<0,0,0,1,0,0,0,0,0,0,0,221,0,0,6,48,0,4,130,247,0,1,250,134, + 1,101,0,0,0,0,4,1,77,68,75,0,0,0,44,0,131,107,0,39,77,68, + 86,101,49,55,52,55,48,50,55,45,54,50,99,49,45,52,48,57,55, + 45,97,53,102,50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0, + 0,6,1,105,110,100,101,120,0,0,0,79,0,131,108,0,0,0,2,104,2, + 107,0,8,105,100,120,49,95,98,105,110,107,0,20,50,49,53,50, + 49,49,48,55,50,51,49,55,51,48,83,111,112,104,105,97,104,2, + 107,0,8,105,100,120,49,95,98,105,110,107,0,19,50,49,56,50, + 48,53,49,48,49,51,48,49,52,54,65,118,101,114,121,106,0,0,0, + 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, + 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, + 50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0,0,0>>, + ?assertMatch(ExpIndexes, + get_indexes_from_siblingmetabin(SibMetaBinOverhang, [])). diff_index_test() -> UpdIndexes = From 5f7d261a87d19fc9378dd35018822dbe07519b52 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 16 Mar 2020 18:53:40 +0000 Subject: [PATCH 13/26] Improve test Genuine overhang --- src/leveled_head.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/leveled_head.erl b/src/leveled_head.erl index 2db0f72..3ee66c2 100644 --- a/src/leveled_head.erl +++ b/src/leveled_head.erl @@ -482,7 +482,8 @@ index_extract_test() -> 48,53,49,48,49,51,48,49,52,54,65,118,101,114,121,106,0,0,0, 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, - 50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0,0,0>>, + 50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0,0,0,0,0,0,4, + 0,0,0,0>>, ?assertMatch(ExpIndexes, get_indexes_from_siblingmetabin(SibMetaBinOverhang, [])). From 808a858d0953330f4c1d0b3f22d97a059611d0b9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 16 Mar 2020 21:41:47 +0000 Subject: [PATCH 14/26] Don't score a rolling file In giving an empty file a score of 0, a race condition was exposed. A file might not be active, but might still be rolling - and then cna get scored as 0, and immediately compacted. It will then be removed from the journal manifest. Check each file is not rolling before making it a candidate for rolling. --- src/leveled_iclerk.erl | 6 +++++- test/end_to_end/riak_SUITE.erl | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index afc2c8c..d69dd92 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -307,7 +307,11 @@ handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Manifest0}, % Don't want to process a queued call waiting on an old manifest [_Active|Manifest] = Manifest0, {FilterServer, MaxSQN} = InitiateFun(Checker), - ok = clerk_scorefilelist(self(), Manifest), + NotRollingFun = + fun({_LowSQN, _FN, Pid, _LK}) -> + not leveled_cdb:cdb_isrolling(Pid) + end, + ok = clerk_scorefilelist(self(), lists:filter(NotRollingFun, Manifest)), ScoringState = #scoring_state{filter_fun = FilterFun, filter_server = FilterServer, diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 5e82ce7..da51127 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -905,7 +905,7 @@ handoff(_Config) -> {sync_strategy, sync}], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), - % Add some noe Riak objects in - which should be ignored in folds. + % Add some none Riak objects in - which should be ignored in folds. Hashes = testutil:stdload(Bookie1, 1000), % Generate 200K objects to be used within the test, and load them into % the first store (outputting the generated objects as a list of lists) From 5b4edfebb6c4102de4a41fd0aaf7774ef6b12625 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 17 Mar 2020 14:20:57 +0000 Subject: [PATCH 15/26] Coverage cheat Very rarely, this line in the tests this line is not covered - so cheating here to consistently pass coverage --- src/leveled_sst.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 330d1d4..56de69e 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -3319,6 +3319,7 @@ simple_persisted_tester(SSTNewFun) -> Acc end end, + true = [] == MapFun({FirstKey, "V"}, []), % coverage cheat within MapFun KVList3 = lists:foldl(MapFun, [], KVList2), SW2 = os:timestamp(), lists:foreach(fun({K, H, _V}) -> From 50cb98ecdd37b88a32465c51d1de97130d82b0de Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 17 Mar 2020 17:29:59 +0000 Subject: [PATCH 16/26] Resolve intermittent test failure the previous regex filter still allowed files with cdb in the body of the name (which can be true as filenames are guid based) --- test/end_to_end/testutil.erl | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 00ad9ad..0fe9a66 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -881,16 +881,9 @@ restore_topending(RootPath, FileName) -> find_journals(RootPath) -> {ok, FNsA_J} = file:list_dir(RootPath ++ "/journal/journal_files"), - {ok, Regex} = re:compile(".*\.cdb"), - CDBFiles = lists:foldl(fun(FN, Acc) -> case re:run(FN, Regex) of - nomatch -> - Acc; - _ -> - [FN|Acc] - end - end, - [], - FNsA_J), + % Must not return a file with the .pnd extension + CDBFiles = + lists:filter(fun(FN) -> filename:extension(FN) == ".cdb" end, FNsA_J), CDBFiles. convert_to_seconds({MegaSec, Seconds, _MicroSec}) -> From 8a9db9e75ed4bd9508bea0abd7d2fc3b2f54e9a9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 23 Mar 2020 16:45:28 +0000 Subject: [PATCH 17/26] Add log of startegy when clerk starts compaction --- src/leveled_iclerk.erl | 2 ++ src/leveled_log.erl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index d69dd92..0551a31 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -297,6 +297,8 @@ handle_call(stop, _From, State) -> handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Manifest0}, State) -> + leveled_log:log("IC014", [State#state.reload_strategy, + State#state.max_run_length]), % Empty the waste folder clear_waste(State), SW = os:timestamp(), diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 51072fa..ea84b51 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -356,6 +356,8 @@ {"IC013", {warn, "File with name ~s to be ignored in manifest as scanning for " ++ "first key returned empty - maybe corrupted"}}, + {"IC014", + {info, "Compaction to be run with strategy ~w and max_run_length ~w"}}, {"CDB01", {info, "Opening file for writing with filename ~s"}}, From 20a7a2257108f4a743584528471dc03f32da5a3a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 24 Mar 2020 20:21:44 +0000 Subject: [PATCH 18/26] Add documentation for recalc option --- docs/DESIGN.md | 2 +- docs/STARTUP_OPTIONS.md | 4 ++-- src/leveled_bookie.erl | 12 ++++++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 8e8c142..d095821 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -120,7 +120,7 @@ Three potential recovery strategies are supported to provide some flexibility fo - retain - on compaction KeyDeltas are retained in the Journal, only values are removed. -- recalc (not yet implemented) - the compaction rules assume that on recovery the key changes will be recalculated by comparing the change with the current database state. In recovery the key changes will be recalculated by comparing the change with the current database state. +- recalc - the compaction rules assume that on recovery the key changes will be recalculated by comparing the change with the current database state. In recovery the key changes will be recalculated by comparing the change with the current database state. A user-defined function should be passed in at startup to achieve this recalculation (to override `leveled_head:diff_indexspeacs/3`). ### Hot Backups diff --git a/docs/STARTUP_OPTIONS.md b/docs/STARTUP_OPTIONS.md index e73a021..c802f73 100644 --- a/docs/STARTUP_OPTIONS.md +++ b/docs/STARTUP_OPTIONS.md @@ -77,11 +77,11 @@ However, what if the Ledger had been erased? This could happen due to some corr The are three potential strategies: - - `skip` - don't worry about this scenario, require the Ledger to be backed up; + - `recovr` - don't worry about this scenario, require the Ledger to be backed up; - `retain` - discard the object itself on compaction but keep the key changes; - `recalc` - recalculate the indexes on reload by comparing the information on the object with the current state of the Ledger (as would be required by the PUT process when comparing IndexSpecs at PUT time). -There is no code for `recalc` at present it is simply a logical possibility. So to set a reload strategy there should be an entry like `{reload_strategy, [{TagName, skip|retain}]}`. By default tags are pre-set to `retain`. If there is no need to handle a corrupted Ledger, then all tags could be set to `skip`. +To set a reload strategy requires a list of tuples to match tag names to strategy `{reload_strategy, [{TagName, recovr|retain|recalc}]}`. By default tags are pre-set to `retain`. If there is no need to handle a corrupted Ledger, then all tags could be set to `recovr` - this assumes that either the ledger files are protected by some other means from corruption, or an external anti-entropy mechanism will recover the lost data. ## Compression Method diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 27c158d..1aeea16 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -309,10 +309,14 @@ % resilience outside of the store), or retain (retain a history of % key changes, even when the object value has been compacted). % - % There is a third, theoretical and untested strategy, which is - % recalc - which would require when reloading the Ledger from the - % Journal, to recalculate the index changes based on the current - % state of the Ledger and the object metadata. + % There is a third strategy, which is recalc, where on reloading + % the Ledger from the Journal, the key changes are recalculated by + % comparing the extracted metadata from the Journal object, with the + % extracted metadata from the current Ledger object it is set to + % replace (should one be present). Implementing the recalc + % strategy requires a override function for + % `leveled_head:diff_indexspecs/3`. + % A function for the ?RIAK_TAG is provided and tested. % % reload_strategy options are a list - to map from a tag to the % strategy (recovr|retain|recalc). Defualt strategies are: From 4ef0f4006da19e11e50512374e0010ea383bb354 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 26 Mar 2020 14:18:57 +0000 Subject: [PATCH 19/26] Extend mergefile_selector for strategy Strategy only applied below L1, and only random strategy supported --- src/leveled_pclerk.erl | 2 +- src/leveled_pmanifest.erl | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 3e697a3..c7d199f 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -183,7 +183,7 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) -> leveled_log:log("PC023", [SrcLevel + 1, FCnt, AvgMem, MaxFN, MaxP, MaxMem]) end, - Src = leveled_pmanifest:mergefile_selector(Manifest, SrcLevel), + Src = leveled_pmanifest:mergefile_selector(Manifest, SrcLevel, random), NewSQN = leveled_pmanifest:get_manifest_sqn(Manifest) + 1, SinkList = leveled_pmanifest:merge_lookup(Manifest, SrcLevel + 1, diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 91bacaa..43b2a07 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -34,7 +34,7 @@ remove_manifest_entry/4, replace_manifest_entry/5, switch_manifest_entry/4, - mergefile_selector/2, + mergefile_selector/3, add_snapshot/3, release_snapshot/2, merge_snapshot/2, @@ -82,6 +82,7 @@ -type manifest() :: #manifest{}. -type manifest_entry() :: #manifest_entry{}. -type manifest_owner() :: pid()|list(). +-type selector_strategy() :: random. -export_type([manifest/0, manifest_entry/0, manifest_owner/0]). @@ -429,7 +430,8 @@ merge_lookup(Manifest, LevelIdx, StartKey, EndKey) -> range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun). --spec mergefile_selector(manifest(), integer()) -> manifest_entry(). +-spec mergefile_selector(manifest(), integer(), selector_strategy()) + -> manifest_entry(). %% @doc %% An algorithm for discovering which files to merge .... %% We can find the most optimal file: @@ -441,10 +443,10 @@ merge_lookup(Manifest, LevelIdx, StartKey, EndKey) -> %% genuinely better - eventually every file has to be compacted. %% %% Hence, the initial implementation is to select files to merge at random -mergefile_selector(Manifest, LevelIdx) when LevelIdx =< 1 -> +mergefile_selector(Manifest, LevelIdx, _Strategy) when LevelIdx =< 1 -> Level = array:get(LevelIdx, Manifest#manifest.levels), lists:nth(leveled_rand:uniform(length(Level)), Level); -mergefile_selector(Manifest, LevelIdx) -> +mergefile_selector(Manifest, LevelIdx, random) -> Level = leveled_tree:to_list(array:get(LevelIdx, Manifest#manifest.levels)), {_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level), @@ -1057,10 +1059,10 @@ changeup_setup(Man6) -> random_select_test() -> ManTuple = initial_setup(), LastManifest = element(7, ManTuple), - L1File = mergefile_selector(LastManifest, 1), + L1File = mergefile_selector(LastManifest, 1, random), % This blows up if the function is not prepared for the different format % https://github.com/martinsumner/leveled/issues/43 - _L2File = mergefile_selector(LastManifest, 2), + _L2File = mergefile_selector(LastManifest, 2, random), Level1 = array:get(1, LastManifest#manifest.levels), ?assertMatch(true, lists:member(L1File, Level1)). From e175948378ac8ec7385d58c3dfcb3c4f54bf942b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 26 Mar 2020 14:25:09 +0000 Subject: [PATCH 20/26] Remove references ot 'skip' strategy Now called `recovr` --- docs/DESIGN.md | 2 +- src/leveled_bookie.erl | 2 +- src/leveled_codec.erl | 4 ++-- src/leveled_iclerk.erl | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/DESIGN.md b/docs/DESIGN.md index d095821..7adfd57 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -120,7 +120,7 @@ Three potential recovery strategies are supported to provide some flexibility fo - retain - on compaction KeyDeltas are retained in the Journal, only values are removed. -- recalc - the compaction rules assume that on recovery the key changes will be recalculated by comparing the change with the current database state. In recovery the key changes will be recalculated by comparing the change with the current database state. A user-defined function should be passed in at startup to achieve this recalculation (to override `leveled_head:diff_indexspeacs/3`). +- recalc - the compaction rules assume that on recovery the key changes will be recalculated by comparing the change with the current database state. In recovery the key changes will be recalculated by comparing the change with the current database state. A user-defined function should be passed in at startup to achieve this recalculation (to override `leveled_head:diff_indexspecs/3`). ### Hot Backups diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 1aeea16..e1a2728 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -1109,7 +1109,7 @@ book_destroy(Pid) -> %% to store the backup. %% %% Backup files are hard-linked. Does not work in head_only mode, or if -%% index changes are used with a `skip` compaction/reload strategy +%% index changes are used with a `recovr` compaction/reload strategy book_hotbackup(Pid) -> gen_server:call(Pid, hot_backup, infinity). diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 35ab5d1..d040723 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -89,7 +89,7 @@ -type ledger_kv() :: {ledger_key(), ledger_value()}. -type compaction_method() :: - retain|skip|recalc. + retain|recovr|recalc. -type compaction_strategy() :: list({tag(), compaction_method()}). -type journal_key_tag() :: @@ -375,7 +375,7 @@ inker_reload_strategy(AltList) -> -spec get_tagstrategy(ledger_key()|tag()|dummy, compaction_strategy()) - -> skip|retain|recalc. + -> compaction_method(). %% @doc %% Work out the compaction strategy for the key get_tagstrategy({Tag, _, _, _}, Strategy) -> diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 0551a31..9f1256d 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -805,10 +805,11 @@ split_positions_into_batches(Positions, Journal, Batches) -> %% recalculating the KeyChanges by looking at the object when we reload. So %% old objects can be discarded. %% -%% If the strategy is skip, we don't care about KeyDeltas. Note though, that +%% If the strategy is recovr, we don't care about KeyDeltas. Note though, that %% if the ledger is deleted it may not be possible to safely rebuild a KeyStore %% if it contains index entries. The hot_backup approach is also not safe with -%% a `skip` strategy. +%% a `recovr` strategy. The recovr strategy assumes faults in the ledger will +%% be resolved via application-level anti-entropy filter_output(KVCs, FilterFun, FilterServer, MaxSQN, ReloadStrategy) -> FoldFun = filter_output_fun(FilterFun, FilterServer, MaxSQN, ReloadStrategy), From aca945a17151dd62fd1a568583fff6c50b0fd624 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 27 Mar 2020 10:20:10 +0000 Subject: [PATCH 21/26] Add counting of tombstones to new SST files .. and that old-style SST files cna still be created, and opened, with a return of 'not_counted' --- src/leveled_pclerk.erl | 6 +- src/leveled_sst.erl | 314 +++++++++++++++++++++++++------- test/end_to_end/basic_SUITE.erl | 2 +- 3 files changed, 249 insertions(+), 73 deletions(-) diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index c7d199f..f5d60b1 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -260,9 +260,9 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) -> length(Additions)), leveled_log:log("PC012", [NewSQN, FileName, SinkB]), TS1 = os:timestamp(), - case leveled_sst:sst_new(RP, FileName, - KL1, KL2, SinkB, SinkLevel, MaxSQN, - OptsSST) of + case leveled_sst:sst_newmerge(RP, FileName, + KL1, KL2, SinkB, SinkLevel, MaxSQN, + OptsSST) of empty -> leveled_log:log("PC013", [FileName]), do_merge([], [], diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 2eddd34..f9a6c5a 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -92,6 +92,7 @@ -define(FLIPPER32, 4294967295). -define(COMPRESS_AT_LEVEL, 1). -define(INDEX_MODDATE, true). +-define(TOMB_COUNT, true). -define(USE_SET_FOR_SPEED, 64). -define(STARTUP_TIMEOUT, 10000). @@ -111,7 +112,7 @@ delete_pending/3]). -export([sst_new/6, - sst_new/8, + sst_newmerge/8, sst_newlevelzero/7, sst_open/4, sst_get/2, @@ -123,6 +124,7 @@ sst_checkready/1, sst_switchlevels/2, sst_deleteconfirmed/1, + sst_gettombcount/1, sst_close/1]). -export([tune_seglist/1, extract_hash/1, member_check/2]). @@ -158,7 +160,7 @@ range_endpoint(), range_endpoint()}. -type expandable_pointer() - :: slot_pointer()|sst_closed_pointer(). + :: slot_pointer()|sst_pointer()|sst_closed_pointer(). -type expanded_pointer() :: leveled_codec:ledger_kv()|expandable_pointer(). -type binaryslot_element() @@ -168,9 +170,13 @@ {sets, sets:set(non_neg_integer())}| {list, list(non_neg_integer())}. -type sst_options() - :: #sst_options{}. + :: #sst_options{}. +-type binary_slot() + :: {binary(), binary(), list(integer()), leveled_codec:ledger_key()}. +-type sst_summary() + :: #summary{}. -%% yield_blockquery is used to detemrine if the work necessary to process a +%% yield_blockquery is used to determine if the work necessary to process a %% range query beyond the fetching the slot should be managed from within %% this process, or should be handled by the calling process. %% Handling within the calling process may lead to extra binary heap garbage @@ -193,7 +199,9 @@ fetch_cache = array:new([{size, ?CACHE_SIZE}]), new_slots :: list()|undefined, deferred_startup_tuple :: tuple()|undefined, - level :: non_neg_integer()|undefined}). + level :: non_neg_integer()|undefined, + tomb_count = not_counted + :: non_neg_integer()|not_counted}). -record(sst_timings, {sample_count = 0 :: integer(), @@ -265,7 +273,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> {ok, Pid} = gen_fsm:start_link(?MODULE, [], []), PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method), OptsSST0 = OptsSST#sst_options{press_method = PressMethod0}, - {[], [], SlotList, FK} = + {[], [], SlotList, FK, _CountOfTombs} = merge_lists(KVList, OptsSST0, IndexModDate), case gen_fsm:sync_send_event(Pid, {sst_new, @@ -276,17 +284,18 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> MaxSQN, OptsSST0, IndexModDate, + not_counted, self()}, infinity) of {ok, {SK, EK}, Bloom} -> {ok, Pid, {SK, EK}, Bloom} end. --spec sst_new(string(), string(), - list(leveled_codec:ledger_kv()|sst_pointer()), - list(leveled_codec:ledger_kv()|sst_pointer()), - boolean(), integer(), - integer(), sst_options()) +-spec sst_newmerge(string(), string(), + list(leveled_codec:ledger_kv()|sst_pointer()), + list(leveled_codec:ledger_kv()|sst_pointer()), + boolean(), integer(), + integer(), sst_options()) -> empty|{ok, pid(), {{list(leveled_codec:ledger_kv()), list(leveled_codec:ledger_kv())}, @@ -302,23 +311,23 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> %% The remainder of the lists is returned along with the StartKey and EndKey %% so that the remainder cna be used in the next file in the merge. It might %% be that the merge_lists returns nothing (for example when a basement file is -%% all tombstones) - and the atome empty is returned in this case so that the +%% all tombstones) - and the atom empty is returned in this case so that the %% file is not added to the manifest. -sst_new(RootPath, Filename, +sst_newmerge(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, OptsSST) -> - sst_new(RootPath, Filename, + sst_newmerge(RootPath, Filename, KVL1, KVL2, IsBasement, Level, - MaxSQN, OptsSST, ?INDEX_MODDATE). + MaxSQN, OptsSST, ?INDEX_MODDATE, ?TOMB_COUNT). -sst_new(RootPath, Filename, +sst_newmerge(RootPath, Filename, KVL1, KVL2, IsBasement, Level, - MaxSQN, OptsSST, IndexModDate) -> + MaxSQN, OptsSST, IndexModDate, TombCount) -> PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method), OptsSST0 = OptsSST#sst_options{press_method = PressMethod0}, - {Rem1, Rem2, SlotList, FK} = - merge_lists(KVL1, KVL2, {IsBasement, Level}, - OptsSST0, IndexModDate), + {Rem1, Rem2, SlotList, FK, CountOfTombs} = + merge_lists(KVL1, KVL2, {IsBasement, Level}, OptsSST0, + IndexModDate, TombCount), case SlotList of [] -> empty; @@ -333,6 +342,7 @@ sst_new(RootPath, Filename, MaxSQN, OptsSST0, IndexModDate, + CountOfTombs, self()}, infinity) of {ok, {SK, EK}, Bloom} -> @@ -428,6 +438,15 @@ sst_expandpointer(Pointer, MorePointers, ScanWidth, SegmentList, LowLastMod) -> sst_setfordelete(Pid, Penciller) -> gen_fsm:sync_send_event(Pid, {set_for_delete, Penciller}, infinity). +-spec sst_gettombcount(pid()) -> non_neg_integer()|not_counted. +%% @doc +%% Get the count of tomb stones in this SST file, returning not_counted if this +%% file was created with a version which did not support tombstone counting, or +%% could also be because the file is L0 (which aren't counted as being chosen +%% for merge is inevitable) +sst_gettombcount(Pid) -> + gen_fsm:sync_send_event(Pid, get_tomb_count, infinity). + -spec sst_clear(pid()) -> ok. %% @doc %% For this file to be closed and deleted @@ -497,17 +516,18 @@ starting({sst_open, RootPath, Filename, OptsSST, Level}, _From, State) -> starting({sst_new, RootPath, Filename, Level, {SlotList, FirstKey}, MaxSQN, - OptsSST, IdxModDate, StartingPID}, _From, State) -> + OptsSST, IdxModDate, CountOfTombs, StartingPID}, _From, State) -> SW = os:timestamp(), leveled_log:save(OptsSST#sst_options.log_options), PressMethod = OptsSST#sst_options.press_method, {Length, SlotIndex, BlockIndex, SlotsBin, Bloom} = build_all_slots(SlotList), SummaryBin = - build_table_summary(SlotIndex, Level, FirstKey, Length, MaxSQN, Bloom), + build_table_summary(SlotIndex, Level, FirstKey, Length, + MaxSQN, Bloom, CountOfTombs), ActualFilename = write_file(RootPath, Filename, SummaryBin, SlotsBin, - PressMethod, IdxModDate), + PressMethod, IdxModDate, CountOfTombs), YBQ = Level =< 2, {UpdState, Bloom} = read_file(ActualFilename, @@ -530,7 +550,8 @@ starting({sst_newlevelzero, RootPath, Filename, Penciller, MaxSQN, OptsSST, IdxModDate}, _From, State) -> DeferredStartupTuple = - {RootPath, Filename, Penciller, MaxSQN, OptsSST, IdxModDate}, + {RootPath, Filename, Penciller, MaxSQN, OptsSST, + IdxModDate}, {reply, ok, starting, State#state{deferred_startup_tuple = DeferredStartupTuple, level = 0}}; starting(close, _From, State) -> @@ -551,7 +572,7 @@ starting(complete_l0startup, State) -> Time0 = timer:now_diff(os:timestamp(), SW0), SW1 = os:timestamp(), - {[], [], SlotList, FirstKey} = + {[], [], SlotList, FirstKey, _CountOfTombs} = merge_lists(KVList, OptsSST, IdxModDate), Time1 = timer:now_diff(os:timestamp(), SW1), @@ -562,13 +583,14 @@ starting(complete_l0startup, State) -> SW3 = os:timestamp(), SummaryBin = - build_table_summary(SlotIndex, 0, FirstKey, SlotCount, MaxSQN, Bloom), + build_table_summary(SlotIndex, 0, FirstKey, SlotCount, + MaxSQN, Bloom, not_counted), Time3 = timer:now_diff(os:timestamp(), SW3), SW4 = os:timestamp(), ActualFilename = write_file(RootPath, Filename, SummaryBin, SlotsBin, - PressMethod, IdxModDate), + PressMethod, IdxModDate, not_counted), {UpdState, Bloom} = read_file(ActualFilename, State#state{root_path=RootPath, @@ -716,6 +738,8 @@ reader(background_complete, _From, State) -> Summary#summary.last_key}, reader, State}; +reader(get_tomb_count, _From, State) -> + {reply, State#state.tomb_count, reader, State}; reader(close, _From, State) -> ok = file:close(State#state.handle), {stop, normal, ok, State}. @@ -1196,11 +1220,11 @@ compress_level(_Level, PressMethod) -> PressMethod. write_file(RootPath, Filename, SummaryBin, SlotsBin, - PressMethod, IdxModDate) -> + PressMethod, IdxModDate, CountOfTombs) -> SummaryLength = byte_size(SummaryBin), SlotsLength = byte_size(SlotsBin), {PendingName, FinalName} = generate_filenames(Filename), - FileVersion = gen_fileversion(PressMethod, IdxModDate), + FileVersion = gen_fileversion(PressMethod, IdxModDate, CountOfTombs), case filelib:is_file(filename:join(RootPath, FinalName)) of true -> AltName = filename:join(RootPath, filename:basename(FinalName)) @@ -1210,6 +1234,7 @@ write_file(RootPath, Filename, SummaryBin, SlotsBin, false -> ok end, + ok = leveled_util:safe_rename(filename:join(RootPath, PendingName), filename:join(RootPath, FinalName), < open_reader(filename:join(State#state.root_path, Filename), LoadPageCache), UpdState0 = imp_fileversion(FileVersion, State), - {Summary, Bloom, SlotList} = read_table_summary(SummaryBin), + {Summary, Bloom, SlotList, TombCount} = + read_table_summary(SummaryBin, UpdState0#state.tomb_count), BlockIndexCache = array:new([{size, Summary#summary.size}, {default, none}]), UpdState1 = UpdState0#state{blockindex_cache = BlockIndexCache}, @@ -1235,11 +1261,12 @@ read_file(Filename, State, LoadPageCache) -> Summary#summary.size, Summary#summary.max_sqn]), {UpdState1#state{summary = UpdSummary, - handle = Handle, - filename = Filename}, + handle = Handle, + filename = Filename, + tomb_count = TombCount}, Bloom}. -gen_fileversion(PressMethod, IdxModDate) -> +gen_fileversion(PressMethod, IdxModDate, CountOfTombs) -> % Native or none can be treated the same once written, as reader % does not need to know as compression info will be in header of the % block @@ -1256,7 +1283,14 @@ gen_fileversion(PressMethod, IdxModDate) -> false -> 0 end, - Bit1+ Bit2. + Bit3 = + case CountOfTombs of + not_counted -> + 0; + _ -> + 4 + end, + Bit1 + Bit2 + Bit3. imp_fileversion(VersionInt, State) -> UpdState0 = @@ -1273,7 +1307,12 @@ imp_fileversion(VersionInt, State) -> 2 -> UpdState0#state{index_moddate = true} end, - UpdState1. + case VersionInt band 4 of + 0 -> + UpdState1; + 4 -> + UpdState1#state{tomb_count = 0} + end. open_reader(Filename, LoadPageCache) -> {ok, Handle} = file:open(Filename, [binary, raw, read]), @@ -1290,25 +1329,50 @@ open_reader(Filename, LoadPageCache) -> {ok, SummaryBin} = file:pread(Handle, SlotsLength + 9, SummaryLength), {Handle, FileVersion, SummaryBin}. -build_table_summary(SlotIndex, _Level, FirstKey, SlotCount, MaxSQN, Bloom) -> +build_table_summary(SlotIndex, _Level, FirstKey, + SlotCount, MaxSQN, Bloom, CountOfTombs) -> [{LastKey, _LastV}|_Rest] = SlotIndex, Summary = #summary{first_key = FirstKey, last_key = LastKey, size = SlotCount, max_sqn = MaxSQN}, - SummBin = + SummBin0 = term_to_binary({Summary, Bloom, lists:reverse(SlotIndex)}, ?BINARY_SETTINGS), + + SummBin = + case CountOfTombs of + not_counted -> + SummBin0; + I -> + <> + end, + SummCRC = hmac(SummBin), <>. -read_table_summary(BinWithCheck) -> +-spec read_table_summary(binary(), not_counted|non_neg_integer()) -> + {sst_summary(), + leveled_ebloom:bloom(), + list(tuple()), + not_counted|non_neg_integer()}. +%% @doc +%% Read the table summary - format varies depending on file version (presence +%5 of tomb count) +read_table_summary(BinWithCheck, TombCount) -> <> = BinWithCheck, CRCCheck = hmac(SummBin), if CRCCheck == SummCRC -> % If not might it might be possible to rebuild from all the slots - binary_to_term(SummBin) + case TombCount of + not_counted -> + erlang:append_element(binary_to_term(SummBin), + not_counted); + _ -> + <> = SummBin, + erlang:append_element(binary_to_term(SummBin0), I) + end end. @@ -1535,10 +1599,7 @@ take_max_lastmoddate(LMD, LMDAcc) -> press_method(), boolean(), build_timings()) -> - {{binary(), - binary(), - list(integer()), - leveled_codec:ledger_key()}, + {binary_slot(), build_timings()}. %% @doc %% Generate the serialised slot to be used when storing this sublist of keys @@ -2258,7 +2319,7 @@ revert_position(Pos) -> %% large numbers of index keys are present - as well as improving compression %% ratios in the Ledger. %% -%% The outcome of merge_lists/3 and merge_lists/5 should be an list of slots. +%% The outcome of merge_lists/3 and merge_lists/6 should be an list of slots. %% Each slot should be ordered by Key and be of the form {Flag, KVList}, where %% Flag can either be lookup or no-lookup. The list of slots should also be %% ordered by Key (i.e. the first key in the slot) @@ -2275,17 +2336,19 @@ revert_position(Pos) -> %% any lower sequence numbers should be compacted out of existence -spec merge_lists(list(), sst_options(), boolean()) - -> {list(), list(), list(tuple()), tuple()|null}. + -> {list(), list(), list(binary_slot()), + tuple()|null, non_neg_integer()|not_counted}. %% @doc %% -%% Merge from asingle list (i.e. at Level 0) +%% Merge from a single list (i.e. at Level 0) merge_lists(KVList1, SSTOpts, IdxModDate) -> - SlotCount = length(KVList1) div ?LOOK_SLOTSIZE, + SlotCount = length(KVList1) div ?LOOK_SLOTSIZE, {[], [], split_lists(KVList1, [], SlotCount, SSTOpts#sst_options.press_method, IdxModDate), - element(1, lists:nth(1, KVList1))}. + element(1, lists:nth(1, KVList1)), + not_counted}. split_lists([], SlotLists, 0, _PressMethod, _IdxModDate) -> @@ -2301,35 +2364,57 @@ split_lists(KVList1, SlotLists, N, PressMethod, IdxModDate) -> split_lists(KVListRem, [SlotD|SlotLists], N - 1, PressMethod, IdxModDate). --spec merge_lists(list(), list(), tuple(), sst_options(), boolean()) -> - {list(), list(), list(tuple()), tuple()|null}. +-spec merge_lists(list(), list(), tuple(), sst_options(), + boolean(), boolean()) -> + {list(), list(), list(binary_slot()), tuple()|null, + non_neg_integer()}. %% @doc -%% Merge lists when merging across more thna one file. KVLists that are +%% Merge lists when merging across more than one file. KVLists that are %% provided may include pointers to fetch more Keys/Values from the source %% file -merge_lists(KVList1, KVList2, LevelInfo, SSTOpts, IndexModDate) -> +merge_lists(KVList1, KVList2, LevelInfo, SSTOpts, + IndexModDate, SaveTombCount) -> + InitTombCount = + case SaveTombCount of true -> 0; false -> not_counted end, merge_lists(KVList1, KVList2, LevelInfo, [], null, 0, SSTOpts#sst_options.max_sstslots, SSTOpts#sst_options.press_method, IndexModDate, + InitTombCount, #build_timings{}). +-spec merge_lists( + list(expanded_pointer()), + list(expanded_pointer()), + {boolean(), non_neg_integer()}, + list(binary_slot()), + leveled_codec:ledger_key()|null, + non_neg_integer(), + non_neg_integer(), + press_method(), + boolean(), + non_neg_integer()|not_counted, + build_timings()) -> + {list(expanded_pointer()), list(expanded_pointer()), + list(binary_slot()), leveled_codec:ledger_key()|null, + non_neg_integer()|not_counted}. + merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, MaxSlots, MaxSlots, - _PressMethod, _IdxModDate, T0) -> + _PressMethod, _IdxModDate, CountOfTombs, T0) -> % This SST file is full, move to complete file, and return the % remainder log_buildtimings(T0, LI), - {KVL1, KVL2, lists:reverse(SlotList), FirstKey}; + {KVL1, KVL2, lists:reverse(SlotList), FirstKey, CountOfTombs}; merge_lists([], [], LI, SlotList, FirstKey, _SlotCount, _MaxSlots, - _PressMethod, _IdxModDate, T0) -> + _PressMethod, _IdxModDate, CountOfTombs, T0) -> % the source files are empty, complete the file log_buildtimings(T0, LI), - {[], [], lists:reverse(SlotList), FirstKey}; + {[], [], lists:reverse(SlotList), FirstKey, CountOfTombs}; merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, SlotCount, MaxSlots, - PressMethod, IdxModDate, T0) -> + PressMethod, IdxModDate, CountOfTombs, T0) -> % Form a slot by merging the two lists until the next 128 K/V pairs have % been determined SW = os:timestamp(), @@ -2348,6 +2433,7 @@ merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, SlotCount, MaxSlots, MaxSlots, PressMethod, IdxModDate, + CountOfTombs, T1); {Lookup, KVL} -> % Convert the list of KVs for the slot into a binary, and related @@ -2363,9 +2449,42 @@ merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, SlotCount, MaxSlots, MaxSlots, PressMethod, IdxModDate, + count_tombs(KVL, CountOfTombs), T2) end. + +-spec count_tombs(list(leveled_codec:ledger_kv()), + non_neg_integer()|not_counted) -> + non_neg_integer()|not_counted. +%% @doc +%% Count the tombstones in a list of KVs +count_tombs(_KVL, not_counted) -> + not_counted; +count_tombs(KVL, InitCount) -> + FoldFun = + fun(KV, Count) -> + case leveled_codec:strip_to_statusonly(KV) of + tomb -> + Count + 1; + _ -> + Count + end + end, + lists:foldl(FoldFun, InitCount, KVL). + +-spec form_slot(list(expanded_pointer()), + list(expanded_pointer()), + {boolean(), non_neg_integer()}, + lookup|no_lookup, + non_neg_integer(), + list(leveled_codec:ledger_kv()), + leveled_codec:ledger_key()|null) -> + {list(expanded_pointer()), list(expanded_pointer()), + {lookup|no_lookup, list(leveled_codec:ledger_kv())}, + leveled_codec:ledger_key()}. +%% @doc +%% Merge together Key Value lists to provide an ordered slot of KVs form_slot([], [], _LI, Type, _Size, Slot, FK) -> {[], [], {Type, lists:reverse(Slot)}, FK}; form_slot(KVList1, KVList2, _LI, lookup, ?LOOK_SLOTSIZE, Slot, FK) -> @@ -2644,8 +2763,8 @@ testsst_new(RootPath, Filename, OptsSST = #sst_options{press_method=PressMethod, log_options=leveled_log:get_opts()}, - sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, - OptsSST, false). + sst_newmerge(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, + OptsSST, false, false). generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> generate_randomkeys(Seqn, @@ -2695,11 +2814,61 @@ generate_indexkey(Term, Count) -> infinity). +tombcount_test() -> + N = 1600, + KL1 = generate_randomkeys(N div 2 + 1, N, 1, 4), + KL2 = generate_indexkeys(N div 2), + FlipToTombFun = + fun({K, V}) -> + case leveled_rand:uniform(10) of + X when X > 5 -> + {K, setelement(2, V, tomb)}; + _ -> + {K, V} + end + end, + KVL1 = lists:map(FlipToTombFun, KL1), + KVL2 = lists:map(FlipToTombFun, KL2), + CountTombFun = + fun({_K, V}, Acc) -> + case element(2, V) of + tomb -> + Acc + 1; + _ -> + Acc + end + end, + ExpectedCount = lists:foldl(CountTombFun, 0, KVL1 ++ KVL2), + + {RP, Filename} = {?TEST_AREA, "tombcount_test"}, + OptsSST = + #sst_options{press_method=native, + log_options=leveled_log:get_opts()}, + {ok, SST1, _KD, _BB} = sst_newmerge(RP, Filename, + KVL1, KVL2, false, 2, + N, OptsSST, false, false), + ?assertMatch(not_counted, sst_gettombcount(SST1)), + ok = sst_close(SST1), + ok = file:delete(filename:join(RP, Filename ++ ".sst")), + + {ok, SST2, _KD, _BB} = sst_newmerge(RP, Filename, + KVL1, KVL2, false, 2, + N, OptsSST, false, true), + + ?assertMatch(ExpectedCount, sst_gettombcount(SST2)), + ok = sst_close(SST2), + ok = file:delete(filename:join(RP, Filename ++ ".sst")). + + + form_slot_test() -> % If a skip key happens, mustn't switch to loookup by accident as could be % over the expected size - SkippingKV = {{o, "B1", "K9999", null}, {9999, tomb, 1234567, {}}}, - Slot = [{{o, "B1", "K5", null}, {5, active, 99234567, {}}}], + SkippingKV = + {{o, "B1", "K9999", null}, {9999, tomb, {1234568, 1234567}, {}}}, + Slot = + [{{o, "B1", "K5", null}, + {5, {active, infinity}, {99234568, 99234567}, {}}}], R1 = form_slot([SkippingKV], [], {true, 99999999}, no_lookup, @@ -2709,19 +2878,26 @@ form_slot_test() -> ?assertMatch({[], [], {no_lookup, Slot}, {o, "B1", "K5", null}}, R1). merge_tombstonelist_test() -> - % Merge lists with nothing but tombstones - SkippingKV1 = {{o, "B1", "K9995", null}, {9995, tomb, 1234567, {}}}, - SkippingKV2 = {{o, "B1", "K9996", null}, {9996, tomb, 1234567, {}}}, - SkippingKV3 = {{o, "B1", "K9997", null}, {9997, tomb, 1234567, {}}}, - SkippingKV4 = {{o, "B1", "K9998", null}, {9998, tomb, 1234567, {}}}, - SkippingKV5 = {{o, "B1", "K9999", null}, {9999, tomb, 1234567, {}}}, + % Merge lists with nothing but tombstones, and file at basement level + SkippingKV1 = + {{o, "B1", "K9995", null}, {9995, tomb, {1234568, 1234567}, {}}}, + SkippingKV2 = + {{o, "B1", "K9996", null}, {9996, tomb, {1234568, 1234567}, {}}}, + SkippingKV3 = + {{o, "B1", "K9997", null}, {9997, tomb, {1234568, 1234567}, {}}}, + SkippingKV4 = + {{o, "B1", "K9998", null}, {9998, tomb, {1234568, 1234567}, {}}}, + SkippingKV5 = + {{o, "B1", "K9999", null}, {9999, tomb, {1234568, 1234567}, {}}}, R = merge_lists([SkippingKV1, SkippingKV3, SkippingKV5], [SkippingKV2, SkippingKV4], {true, 9999999}, #sst_options{press_method = native, max_sstslots = 256}, - ?INDEX_MODDATE), - ?assertMatch({[], [], [], null}, R). + ?INDEX_MODDATE, + true), + + ?assertMatch({[], [], [], null, 0}, R). indexed_list_test() -> io:format(user, "~nIndexed list timing test:~n", []), diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 9591fee..2495eb8 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -196,7 +196,7 @@ bigsst_littlesst(_Config) -> {compression_point, on_compact}], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), ObjL1 = - testutil:generate_objects(60000, 1, [], + testutil:generate_objects(80000, 1, [], leveled_rand:rand_bytes(100), fun() -> [] end, <<"B">>), testutil:riakload(Bookie1, ObjL1), From da97d65a23d1aa38c579d04fa5ad75b2bb7904ca Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 27 Mar 2020 15:09:48 +0000 Subject: [PATCH 22/26] Add grooming compactions Make half of LSM-tree compactions grooming compactions i.e. compactions biased towards merging files with large numbers of tombstones. --- src/leveled_log.erl | 2 + src/leveled_pclerk.erl | 106 +++++++++++++++++++++++++++++++++++++- src/leveled_pmanifest.erl | 21 +++++++- src/leveled_sst.erl | 6 +++ 4 files changed, 131 insertions(+), 4 deletions(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 51072fa..d9c5aff 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -230,6 +230,8 @@ {"PC023", {info, "At level=~w file_count=~w avg_mem=~w " ++ "file with most memory fn=~s p=~w mem=~w"}}, + {"PC024", + {info, "Grooming compaction picked file with tomb_count=~w"}}, {"PM002", {info, "Completed dump of L0 cache to list of l0cache_size=~w"}}, diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index f5d60b1..9f47df0 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -49,6 +49,7 @@ -define(MAX_TIMEOUT, 2000). -define(MIN_TIMEOUT, 200). +-define(GROOMING_PERC, 50). -record(state, {owner :: pid() | undefined, root_path :: string() | undefined, @@ -56,6 +57,8 @@ sst_options :: #sst_options{} }). +-type manifest_entry() :: #manifest_entry{}. + %%%============================================================================ %%% API %%%============================================================================ @@ -183,7 +186,15 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) -> leveled_log:log("PC023", [SrcLevel + 1, FCnt, AvgMem, MaxFN, MaxP, MaxMem]) end, - Src = leveled_pmanifest:mergefile_selector(Manifest, SrcLevel, random), + SelectMethod = + case leveled_rand:uniform(100) of + R when R < ?GROOMING_PERC -> + {grooming, fun grooming_scorer/1}; + _ -> + random + end, + Src = + leveled_pmanifest:mergefile_selector(Manifest, SrcLevel, SelectMethod), NewSQN = leveled_pmanifest:get_manifest_sqn(Manifest) + 1, SinkList = leveled_pmanifest:merge_lookup(Manifest, SrcLevel + 1, @@ -285,6 +296,18 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) -> Additions ++ [Entry]) end. +-spec grooming_scorer(list(manifest_entry())) -> manifest_entry(). +grooming_scorer(Sample) -> + ScoringFun = + fun(ME) -> + TombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner), + {TombCount, ME} + end, + ScoredSample = + lists:reverse(lists:ukeysort(1, lists:map(ScoringFun, Sample))), + [{HighestTC, BestME}|_Rest] = ScoredSample, + leveled_log:log("PC024", [HighestTC]), + BestME. return_deletions(ManifestSQN, PendingDeletionD) -> % The returning of deletions had been seperated out as a failure to fetch @@ -325,6 +348,82 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) -> generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange). +grooming_score_test() -> + ok = filelib:ensure_dir("test/test_area/ledger_files/"), + KL1_L3 = lists:sort(generate_randomkeys(2000, 0, 100)), + KL2_L3 = lists:sort(generate_randomkeys(2000, 101, 250)), + KL3_L3 = lists:sort(generate_randomkeys(2000, 251, 300)), + KL4_L3 = lists:sort(generate_randomkeys(2000, 301, 400)), + [{HeadK, HeadV}|RestKL2] = KL2_L3, + + {ok, PidL3_1, _, _} = + leveled_sst:sst_newmerge("test/test_area/ledger_files/", + "1_L3.sst", + KL1_L3, + [{HeadK, setelement(2, HeadV, tomb)} + |RestKL2], + false, + 3, + 999999, + #sst_options{}, + true, + true), + {ok, PidL3_1B, _, _} = + leveled_sst:sst_newmerge("test/test_area/ledger_files/", + "1B_L3.sst", + KL1_L3, + [{HeadK, setelement(2, HeadV, tomb)} + |RestKL2], + true, + 3, + 999999, + #sst_options{}, + true, + true), + + {ok, PidL3_2, _, _} = + leveled_sst:sst_newmerge("test/test_area/ledger_files/", + "2_L3.sst", + KL3_L3, + KL4_L3, + false, + 3, + 999999, + #sst_options{}, + true, + true), + {ok, PidL3_2NC, _, _} = + leveled_sst:sst_newmerge("test/test_area/ledger_files/", + "2NC_L3.sst", + KL3_L3, + KL4_L3, + false, + 3, + 999999, + #sst_options{}, + true, + false), + + ME1 = #manifest_entry{owner=PidL3_1}, + ME1B = #manifest_entry{owner=PidL3_1B}, + ME2 = #manifest_entry{owner=PidL3_2}, + ME2NC = #manifest_entry{owner=PidL3_2NC}, + ?assertMatch(ME1, grooming_scorer([ME1, ME2])), + ?assertMatch(ME1, grooming_scorer([ME2, ME1])), + % prefer the file with the tombstone + ?assertMatch(ME2NC, grooming_scorer([ME1, ME2NC])), + ?assertMatch(ME2NC, grooming_scorer([ME2NC, ME1])), + % not_counted > 1 - we will merge files in unexpected (i.e. legacy) + % format first + ?assertMatch(ME1B, grooming_scorer([ME1B, ME2])), + ?assertMatch(ME2, grooming_scorer([ME2, ME1B])), + % If the file with the tombstone is in the basement, it will have + % no tombstone so the first file will be chosen + + lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end, + [PidL3_1, PidL3_1B, PidL3_2, PidL3_2NC]). + + merge_file_test() -> ok = filelib:ensure_dir("test/test_area/ledger_files/"), KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)), @@ -401,7 +500,10 @@ merge_file_test() -> "test/test_area/ledger_files/", 3, #sst_options{}), - ?assertMatch(3, leveled_pmanifest:get_manifest_sqn(Man6)). + ?assertMatch(3, leveled_pmanifest:get_manifest_sqn(Man6)), + + lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end, + [PidL1_1, PidL2_1, PidL2_2, PidL2_3, PidL2_4]). coverage_cheat_test() -> {ok, _State1} = diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 43b2a07..94c9c77 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -60,6 +60,7 @@ -define(TREE_WIDTH, 8). -define(PHANTOM_PID, r2d_fail). -define(MANIFESTS_TO_RETAIN, 5). +-define(GROOM_SAMPLE, 8). -record(manifest, {levels, % an array of lists or trees representing the manifest @@ -82,7 +83,8 @@ -type manifest() :: #manifest{}. -type manifest_entry() :: #manifest_entry{}. -type manifest_owner() :: pid()|list(). --type selector_strategy() :: random. +-type selector_strategy() :: + random|{grooming, fun((list(manifest_entry())) -> manifest_entry())}. -export_type([manifest/0, manifest_entry/0, manifest_owner/0]). @@ -450,7 +452,21 @@ mergefile_selector(Manifest, LevelIdx, random) -> Level = leveled_tree:to_list(array:get(LevelIdx, Manifest#manifest.levels)), {_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level), - ME. + ME; +mergefile_selector(Manifest, LevelIdx, {grooming, ScoringFun}) -> + Level = leveled_tree:to_list(array:get(LevelIdx, + Manifest#manifest.levels)), + SelectorFun = + fun(_I, Acc) -> + {_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level), + [ME|Acc] + end, + Sample = + lists:usort(lists:foldl(SelectorFun, [], lists:seq(1, ?GROOM_SAMPLE))), + % Note that Entries may be less than GROOM_SAMPLE, if same one picked + % multiple times + ScoringFun(Sample). + -spec merge_snapshot(manifest(), manifest()) -> manifest(). %% @doc @@ -609,6 +625,7 @@ check_bloom(Manifest, FP, Hash) -> %%% Internal Functions %%%============================================================================ + -spec get_manifest_entry({tuple(), manifest_entry()}|manifest_entry()) -> manifest_entry(). %% @doc diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index f9a6c5a..4eb7292 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -127,6 +127,12 @@ sst_gettombcount/1, sst_close/1]). +-ifdef(TEST). + +-export([sst_newmerge/10]). + +-endif. + -export([tune_seglist/1, extract_hash/1, member_check/2]). -export([in_range/3]). From 28c88ef8b8b10e510441d575ffe4490b73dde9f4 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 27 Mar 2020 20:09:03 +0000 Subject: [PATCH 23/26] Typo --- src/leveled_sst.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 4eb7292..399576b 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1364,7 +1364,7 @@ build_table_summary(SlotIndex, _Level, FirstKey, not_counted|non_neg_integer()}. %% @doc %% Read the table summary - format varies depending on file version (presence -%5 of tomb count) +%% of tomb count) read_table_summary(BinWithCheck, TombCount) -> <> = BinWithCheck, CRCCheck = hmac(SummBin), From 9838e255d2712ce3f12f131d81b645f7b7e645c8 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sun, 29 Mar 2020 20:02:21 +0100 Subject: [PATCH 24/26] Address review comments More efficient traversal of list to score. --- src/leveled_pclerk.erl | 25 +++++++++++++++---------- src/leveled_pmanifest.erl | 3 ++- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 9f47df0..53f8cae 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -188,7 +188,7 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) -> end, SelectMethod = case leveled_rand:uniform(100) of - R when R < ?GROOMING_PERC -> + R when R =< ?GROOMING_PERC -> {grooming, fun grooming_scorer/1}; _ -> random @@ -297,18 +297,23 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) -> end. -spec grooming_scorer(list(manifest_entry())) -> manifest_entry(). -grooming_scorer(Sample) -> - ScoringFun = - fun(ME) -> - TombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner), - {TombCount, ME} - end, - ScoredSample = - lists:reverse(lists:ukeysort(1, lists:map(ScoringFun, Sample))), - [{HighestTC, BestME}|_Rest] = ScoredSample, +grooming_scorer([ME | MEs]) -> + InitTombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner), + {HighestTC, BestME} = grooming_scorer(InitTombCount, ME, MEs), leveled_log:log("PC024", [HighestTC]), BestME. +grooming_scorer(HighestTC, BestME, []) -> + {HighestTC, BestME}; +grooming_scorer(HighestTC, BestME, [ME | MEs]) -> + TombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner), + case TombCount > HighestTC of + true -> + grooming_scorer(TombCount, ME, MEs); + false -> + grooming_scorer(HighestTC, BestME, MEs) + end. + return_deletions(ManifestSQN, PendingDeletionD) -> % The returning of deletions had been seperated out as a failure to fetch % here had caased crashes of the clerk. The root cause of the failure to diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 94c9c77..34df448 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -464,7 +464,8 @@ mergefile_selector(Manifest, LevelIdx, {grooming, ScoringFun}) -> Sample = lists:usort(lists:foldl(SelectorFun, [], lists:seq(1, ?GROOM_SAMPLE))), % Note that Entries may be less than GROOM_SAMPLE, if same one picked - % multiple times + % multiple times. Level cannot be empty, as otherwise a merge would not + % have been chosen at this level ScoringFun(Sample). From d05a5fdd4610ef4cfdd62f52b10573f4e4659241 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 30 Mar 2020 20:07:48 +0100 Subject: [PATCH 25/26] Make grooming more accurate Check more files to optimise grooming choices --- src/leveled_pmanifest.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 34df448..3c7c0c7 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -60,7 +60,7 @@ -define(TREE_WIDTH, 8). -define(PHANTOM_PID, r2d_fail). -define(MANIFESTS_TO_RETAIN, 5). --define(GROOM_SAMPLE, 8). +-define(GROOM_SAMPLE, 16). -record(manifest, {levels, % an array of lists or trees representing the manifest From 312fc52832e13c61b3e137777e5cd51c0d50f39e Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 31 Mar 2020 09:33:50 +0100 Subject: [PATCH 26/26] Extend test to make it highly likely a "garbage" merge file choice is made --- test/end_to_end/riak_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index da51127..17b02d0 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -26,7 +26,7 @@ all() -> [ basic_riak(_Config) -> - basic_riak_tester(<<"B0">>, 120000), + basic_riak_tester(<<"B0">>, 640000), basic_riak_tester({<<"Type0">>, <<"B0">>}, 80000).