Add grooming compactions

Make half of LSM-tree compactions grooming compactions i.e. compactions biased towards merging files with large numbers of tombstones.
This commit is contained in:
Martin Sumner 2020-03-27 15:09:48 +00:00
parent aca945a171
commit da97d65a23
4 changed files with 131 additions and 4 deletions

View file

@ -230,6 +230,8 @@
{"PC023", {"PC023",
{info, "At level=~w file_count=~w avg_mem=~w " {info, "At level=~w file_count=~w avg_mem=~w "
++ "file with most memory fn=~s p=~w mem=~w"}}, ++ "file with most memory fn=~s p=~w mem=~w"}},
{"PC024",
{info, "Grooming compaction picked file with tomb_count=~w"}},
{"PM002", {"PM002",
{info, "Completed dump of L0 cache to list of l0cache_size=~w"}}, {info, "Completed dump of L0 cache to list of l0cache_size=~w"}},

View file

@ -49,6 +49,7 @@
-define(MAX_TIMEOUT, 2000). -define(MAX_TIMEOUT, 2000).
-define(MIN_TIMEOUT, 200). -define(MIN_TIMEOUT, 200).
-define(GROOMING_PERC, 50).
-record(state, {owner :: pid() | undefined, -record(state, {owner :: pid() | undefined,
root_path :: string() | undefined, root_path :: string() | undefined,
@ -56,6 +57,8 @@
sst_options :: #sst_options{} sst_options :: #sst_options{}
}). }).
-type manifest_entry() :: #manifest_entry{}.
%%%============================================================================ %%%============================================================================
%%% API %%% API
%%%============================================================================ %%%============================================================================
@ -183,7 +186,15 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) ->
leveled_log:log("PC023", leveled_log:log("PC023",
[SrcLevel + 1, FCnt, AvgMem, MaxFN, MaxP, MaxMem]) [SrcLevel + 1, FCnt, AvgMem, MaxFN, MaxP, MaxMem])
end, end,
Src = leveled_pmanifest:mergefile_selector(Manifest, SrcLevel, random), SelectMethod =
case leveled_rand:uniform(100) of
R when R < ?GROOMING_PERC ->
{grooming, fun grooming_scorer/1};
_ ->
random
end,
Src =
leveled_pmanifest:mergefile_selector(Manifest, SrcLevel, SelectMethod),
NewSQN = leveled_pmanifest:get_manifest_sqn(Manifest) + 1, NewSQN = leveled_pmanifest:get_manifest_sqn(Manifest) + 1,
SinkList = leveled_pmanifest:merge_lookup(Manifest, SinkList = leveled_pmanifest:merge_lookup(Manifest,
SrcLevel + 1, SrcLevel + 1,
@ -285,6 +296,18 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) ->
Additions ++ [Entry]) Additions ++ [Entry])
end. end.
-spec grooming_scorer(list(manifest_entry())) -> manifest_entry().
grooming_scorer(Sample) ->
ScoringFun =
fun(ME) ->
TombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner),
{TombCount, ME}
end,
ScoredSample =
lists:reverse(lists:ukeysort(1, lists:map(ScoringFun, Sample))),
[{HighestTC, BestME}|_Rest] = ScoredSample,
leveled_log:log("PC024", [HighestTC]),
BestME.
return_deletions(ManifestSQN, PendingDeletionD) -> return_deletions(ManifestSQN, PendingDeletionD) ->
% The returning of deletions had been seperated out as a failure to fetch % The returning of deletions had been seperated out as a failure to fetch
@ -325,6 +348,82 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange). generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
grooming_score_test() ->
ok = filelib:ensure_dir("test/test_area/ledger_files/"),
KL1_L3 = lists:sort(generate_randomkeys(2000, 0, 100)),
KL2_L3 = lists:sort(generate_randomkeys(2000, 101, 250)),
KL3_L3 = lists:sort(generate_randomkeys(2000, 251, 300)),
KL4_L3 = lists:sort(generate_randomkeys(2000, 301, 400)),
[{HeadK, HeadV}|RestKL2] = KL2_L3,
{ok, PidL3_1, _, _} =
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
"1_L3.sst",
KL1_L3,
[{HeadK, setelement(2, HeadV, tomb)}
|RestKL2],
false,
3,
999999,
#sst_options{},
true,
true),
{ok, PidL3_1B, _, _} =
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
"1B_L3.sst",
KL1_L3,
[{HeadK, setelement(2, HeadV, tomb)}
|RestKL2],
true,
3,
999999,
#sst_options{},
true,
true),
{ok, PidL3_2, _, _} =
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
"2_L3.sst",
KL3_L3,
KL4_L3,
false,
3,
999999,
#sst_options{},
true,
true),
{ok, PidL3_2NC, _, _} =
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
"2NC_L3.sst",
KL3_L3,
KL4_L3,
false,
3,
999999,
#sst_options{},
true,
false),
ME1 = #manifest_entry{owner=PidL3_1},
ME1B = #manifest_entry{owner=PidL3_1B},
ME2 = #manifest_entry{owner=PidL3_2},
ME2NC = #manifest_entry{owner=PidL3_2NC},
?assertMatch(ME1, grooming_scorer([ME1, ME2])),
?assertMatch(ME1, grooming_scorer([ME2, ME1])),
% prefer the file with the tombstone
?assertMatch(ME2NC, grooming_scorer([ME1, ME2NC])),
?assertMatch(ME2NC, grooming_scorer([ME2NC, ME1])),
% not_counted > 1 - we will merge files in unexpected (i.e. legacy)
% format first
?assertMatch(ME1B, grooming_scorer([ME1B, ME2])),
?assertMatch(ME2, grooming_scorer([ME2, ME1B])),
% If the file with the tombstone is in the basement, it will have
% no tombstone so the first file will be chosen
lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end,
[PidL3_1, PidL3_1B, PidL3_2, PidL3_2NC]).
merge_file_test() -> merge_file_test() ->
ok = filelib:ensure_dir("test/test_area/ledger_files/"), ok = filelib:ensure_dir("test/test_area/ledger_files/"),
KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)), KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)),
@ -401,7 +500,10 @@ merge_file_test() ->
"test/test_area/ledger_files/", "test/test_area/ledger_files/",
3, #sst_options{}), 3, #sst_options{}),
?assertMatch(3, leveled_pmanifest:get_manifest_sqn(Man6)). ?assertMatch(3, leveled_pmanifest:get_manifest_sqn(Man6)),
lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end,
[PidL1_1, PidL2_1, PidL2_2, PidL2_3, PidL2_4]).
coverage_cheat_test() -> coverage_cheat_test() ->
{ok, _State1} = {ok, _State1} =

View file

@ -60,6 +60,7 @@
-define(TREE_WIDTH, 8). -define(TREE_WIDTH, 8).
-define(PHANTOM_PID, r2d_fail). -define(PHANTOM_PID, r2d_fail).
-define(MANIFESTS_TO_RETAIN, 5). -define(MANIFESTS_TO_RETAIN, 5).
-define(GROOM_SAMPLE, 8).
-record(manifest, {levels, -record(manifest, {levels,
% an array of lists or trees representing the manifest % an array of lists or trees representing the manifest
@ -82,7 +83,8 @@
-type manifest() :: #manifest{}. -type manifest() :: #manifest{}.
-type manifest_entry() :: #manifest_entry{}. -type manifest_entry() :: #manifest_entry{}.
-type manifest_owner() :: pid()|list(). -type manifest_owner() :: pid()|list().
-type selector_strategy() :: random. -type selector_strategy() ::
random|{grooming, fun((list(manifest_entry())) -> manifest_entry())}.
-export_type([manifest/0, manifest_entry/0, manifest_owner/0]). -export_type([manifest/0, manifest_entry/0, manifest_owner/0]).
@ -450,7 +452,21 @@ mergefile_selector(Manifest, LevelIdx, random) ->
Level = leveled_tree:to_list(array:get(LevelIdx, Level = leveled_tree:to_list(array:get(LevelIdx,
Manifest#manifest.levels)), Manifest#manifest.levels)),
{_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level), {_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level),
ME. ME;
mergefile_selector(Manifest, LevelIdx, {grooming, ScoringFun}) ->
Level = leveled_tree:to_list(array:get(LevelIdx,
Manifest#manifest.levels)),
SelectorFun =
fun(_I, Acc) ->
{_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level),
[ME|Acc]
end,
Sample =
lists:usort(lists:foldl(SelectorFun, [], lists:seq(1, ?GROOM_SAMPLE))),
% Note that Entries may be less than GROOM_SAMPLE, if same one picked
% multiple times
ScoringFun(Sample).
-spec merge_snapshot(manifest(), manifest()) -> manifest(). -spec merge_snapshot(manifest(), manifest()) -> manifest().
%% @doc %% @doc
@ -609,6 +625,7 @@ check_bloom(Manifest, FP, Hash) ->
%%% Internal Functions %%% Internal Functions
%%%============================================================================ %%%============================================================================
-spec get_manifest_entry({tuple(), manifest_entry()}|manifest_entry()) -spec get_manifest_entry({tuple(), manifest_entry()}|manifest_entry())
-> manifest_entry(). -> manifest_entry().
%% @doc %% @doc

View file

@ -127,6 +127,12 @@
sst_gettombcount/1, sst_gettombcount/1,
sst_close/1]). sst_close/1]).
-ifdef(TEST).
-export([sst_newmerge/10]).
-endif.
-export([tune_seglist/1, extract_hash/1, member_check/2]). -export([tune_seglist/1, extract_hash/1, member_check/2]).
-export([in_range/3]). -export([in_range/3]).