Add grooming compactions
Make half of LSM-tree compactions grooming compactions i.e. compactions biased towards merging files with large numbers of tombstones.
This commit is contained in:
parent
aca945a171
commit
da97d65a23
4 changed files with 131 additions and 4 deletions
|
@ -230,6 +230,8 @@
|
||||||
{"PC023",
|
{"PC023",
|
||||||
{info, "At level=~w file_count=~w avg_mem=~w "
|
{info, "At level=~w file_count=~w avg_mem=~w "
|
||||||
++ "file with most memory fn=~s p=~w mem=~w"}},
|
++ "file with most memory fn=~s p=~w mem=~w"}},
|
||||||
|
{"PC024",
|
||||||
|
{info, "Grooming compaction picked file with tomb_count=~w"}},
|
||||||
{"PM002",
|
{"PM002",
|
||||||
{info, "Completed dump of L0 cache to list of l0cache_size=~w"}},
|
{info, "Completed dump of L0 cache to list of l0cache_size=~w"}},
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,7 @@
|
||||||
|
|
||||||
-define(MAX_TIMEOUT, 2000).
|
-define(MAX_TIMEOUT, 2000).
|
||||||
-define(MIN_TIMEOUT, 200).
|
-define(MIN_TIMEOUT, 200).
|
||||||
|
-define(GROOMING_PERC, 50).
|
||||||
|
|
||||||
-record(state, {owner :: pid() | undefined,
|
-record(state, {owner :: pid() | undefined,
|
||||||
root_path :: string() | undefined,
|
root_path :: string() | undefined,
|
||||||
|
@ -56,6 +57,8 @@
|
||||||
sst_options :: #sst_options{}
|
sst_options :: #sst_options{}
|
||||||
}).
|
}).
|
||||||
|
|
||||||
|
-type manifest_entry() :: #manifest_entry{}.
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% API
|
%%% API
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
@ -183,7 +186,15 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) ->
|
||||||
leveled_log:log("PC023",
|
leveled_log:log("PC023",
|
||||||
[SrcLevel + 1, FCnt, AvgMem, MaxFN, MaxP, MaxMem])
|
[SrcLevel + 1, FCnt, AvgMem, MaxFN, MaxP, MaxMem])
|
||||||
end,
|
end,
|
||||||
Src = leveled_pmanifest:mergefile_selector(Manifest, SrcLevel, random),
|
SelectMethod =
|
||||||
|
case leveled_rand:uniform(100) of
|
||||||
|
R when R < ?GROOMING_PERC ->
|
||||||
|
{grooming, fun grooming_scorer/1};
|
||||||
|
_ ->
|
||||||
|
random
|
||||||
|
end,
|
||||||
|
Src =
|
||||||
|
leveled_pmanifest:mergefile_selector(Manifest, SrcLevel, SelectMethod),
|
||||||
NewSQN = leveled_pmanifest:get_manifest_sqn(Manifest) + 1,
|
NewSQN = leveled_pmanifest:get_manifest_sqn(Manifest) + 1,
|
||||||
SinkList = leveled_pmanifest:merge_lookup(Manifest,
|
SinkList = leveled_pmanifest:merge_lookup(Manifest,
|
||||||
SrcLevel + 1,
|
SrcLevel + 1,
|
||||||
|
@ -285,6 +296,18 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) ->
|
||||||
Additions ++ [Entry])
|
Additions ++ [Entry])
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
-spec grooming_scorer(list(manifest_entry())) -> manifest_entry().
|
||||||
|
grooming_scorer(Sample) ->
|
||||||
|
ScoringFun =
|
||||||
|
fun(ME) ->
|
||||||
|
TombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner),
|
||||||
|
{TombCount, ME}
|
||||||
|
end,
|
||||||
|
ScoredSample =
|
||||||
|
lists:reverse(lists:ukeysort(1, lists:map(ScoringFun, Sample))),
|
||||||
|
[{HighestTC, BestME}|_Rest] = ScoredSample,
|
||||||
|
leveled_log:log("PC024", [HighestTC]),
|
||||||
|
BestME.
|
||||||
|
|
||||||
return_deletions(ManifestSQN, PendingDeletionD) ->
|
return_deletions(ManifestSQN, PendingDeletionD) ->
|
||||||
% The returning of deletions had been seperated out as a failure to fetch
|
% The returning of deletions had been seperated out as a failure to fetch
|
||||||
|
@ -325,6 +348,82 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
|
||||||
generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
|
generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
|
||||||
|
|
||||||
|
|
||||||
|
grooming_score_test() ->
|
||||||
|
ok = filelib:ensure_dir("test/test_area/ledger_files/"),
|
||||||
|
KL1_L3 = lists:sort(generate_randomkeys(2000, 0, 100)),
|
||||||
|
KL2_L3 = lists:sort(generate_randomkeys(2000, 101, 250)),
|
||||||
|
KL3_L3 = lists:sort(generate_randomkeys(2000, 251, 300)),
|
||||||
|
KL4_L3 = lists:sort(generate_randomkeys(2000, 301, 400)),
|
||||||
|
[{HeadK, HeadV}|RestKL2] = KL2_L3,
|
||||||
|
|
||||||
|
{ok, PidL3_1, _, _} =
|
||||||
|
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
|
||||||
|
"1_L3.sst",
|
||||||
|
KL1_L3,
|
||||||
|
[{HeadK, setelement(2, HeadV, tomb)}
|
||||||
|
|RestKL2],
|
||||||
|
false,
|
||||||
|
3,
|
||||||
|
999999,
|
||||||
|
#sst_options{},
|
||||||
|
true,
|
||||||
|
true),
|
||||||
|
{ok, PidL3_1B, _, _} =
|
||||||
|
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
|
||||||
|
"1B_L3.sst",
|
||||||
|
KL1_L3,
|
||||||
|
[{HeadK, setelement(2, HeadV, tomb)}
|
||||||
|
|RestKL2],
|
||||||
|
true,
|
||||||
|
3,
|
||||||
|
999999,
|
||||||
|
#sst_options{},
|
||||||
|
true,
|
||||||
|
true),
|
||||||
|
|
||||||
|
{ok, PidL3_2, _, _} =
|
||||||
|
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
|
||||||
|
"2_L3.sst",
|
||||||
|
KL3_L3,
|
||||||
|
KL4_L3,
|
||||||
|
false,
|
||||||
|
3,
|
||||||
|
999999,
|
||||||
|
#sst_options{},
|
||||||
|
true,
|
||||||
|
true),
|
||||||
|
{ok, PidL3_2NC, _, _} =
|
||||||
|
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
|
||||||
|
"2NC_L3.sst",
|
||||||
|
KL3_L3,
|
||||||
|
KL4_L3,
|
||||||
|
false,
|
||||||
|
3,
|
||||||
|
999999,
|
||||||
|
#sst_options{},
|
||||||
|
true,
|
||||||
|
false),
|
||||||
|
|
||||||
|
ME1 = #manifest_entry{owner=PidL3_1},
|
||||||
|
ME1B = #manifest_entry{owner=PidL3_1B},
|
||||||
|
ME2 = #manifest_entry{owner=PidL3_2},
|
||||||
|
ME2NC = #manifest_entry{owner=PidL3_2NC},
|
||||||
|
?assertMatch(ME1, grooming_scorer([ME1, ME2])),
|
||||||
|
?assertMatch(ME1, grooming_scorer([ME2, ME1])),
|
||||||
|
% prefer the file with the tombstone
|
||||||
|
?assertMatch(ME2NC, grooming_scorer([ME1, ME2NC])),
|
||||||
|
?assertMatch(ME2NC, grooming_scorer([ME2NC, ME1])),
|
||||||
|
% not_counted > 1 - we will merge files in unexpected (i.e. legacy)
|
||||||
|
% format first
|
||||||
|
?assertMatch(ME1B, grooming_scorer([ME1B, ME2])),
|
||||||
|
?assertMatch(ME2, grooming_scorer([ME2, ME1B])),
|
||||||
|
% If the file with the tombstone is in the basement, it will have
|
||||||
|
% no tombstone so the first file will be chosen
|
||||||
|
|
||||||
|
lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end,
|
||||||
|
[PidL3_1, PidL3_1B, PidL3_2, PidL3_2NC]).
|
||||||
|
|
||||||
|
|
||||||
merge_file_test() ->
|
merge_file_test() ->
|
||||||
ok = filelib:ensure_dir("test/test_area/ledger_files/"),
|
ok = filelib:ensure_dir("test/test_area/ledger_files/"),
|
||||||
KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)),
|
KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)),
|
||||||
|
@ -401,7 +500,10 @@ merge_file_test() ->
|
||||||
"test/test_area/ledger_files/",
|
"test/test_area/ledger_files/",
|
||||||
3, #sst_options{}),
|
3, #sst_options{}),
|
||||||
|
|
||||||
?assertMatch(3, leveled_pmanifest:get_manifest_sqn(Man6)).
|
?assertMatch(3, leveled_pmanifest:get_manifest_sqn(Man6)),
|
||||||
|
|
||||||
|
lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end,
|
||||||
|
[PidL1_1, PidL2_1, PidL2_2, PidL2_3, PidL2_4]).
|
||||||
|
|
||||||
coverage_cheat_test() ->
|
coverage_cheat_test() ->
|
||||||
{ok, _State1} =
|
{ok, _State1} =
|
||||||
|
|
|
@ -60,6 +60,7 @@
|
||||||
-define(TREE_WIDTH, 8).
|
-define(TREE_WIDTH, 8).
|
||||||
-define(PHANTOM_PID, r2d_fail).
|
-define(PHANTOM_PID, r2d_fail).
|
||||||
-define(MANIFESTS_TO_RETAIN, 5).
|
-define(MANIFESTS_TO_RETAIN, 5).
|
||||||
|
-define(GROOM_SAMPLE, 8).
|
||||||
|
|
||||||
-record(manifest, {levels,
|
-record(manifest, {levels,
|
||||||
% an array of lists or trees representing the manifest
|
% an array of lists or trees representing the manifest
|
||||||
|
@ -82,7 +83,8 @@
|
||||||
-type manifest() :: #manifest{}.
|
-type manifest() :: #manifest{}.
|
||||||
-type manifest_entry() :: #manifest_entry{}.
|
-type manifest_entry() :: #manifest_entry{}.
|
||||||
-type manifest_owner() :: pid()|list().
|
-type manifest_owner() :: pid()|list().
|
||||||
-type selector_strategy() :: random.
|
-type selector_strategy() ::
|
||||||
|
random|{grooming, fun((list(manifest_entry())) -> manifest_entry())}.
|
||||||
|
|
||||||
-export_type([manifest/0, manifest_entry/0, manifest_owner/0]).
|
-export_type([manifest/0, manifest_entry/0, manifest_owner/0]).
|
||||||
|
|
||||||
|
@ -450,7 +452,21 @@ mergefile_selector(Manifest, LevelIdx, random) ->
|
||||||
Level = leveled_tree:to_list(array:get(LevelIdx,
|
Level = leveled_tree:to_list(array:get(LevelIdx,
|
||||||
Manifest#manifest.levels)),
|
Manifest#manifest.levels)),
|
||||||
{_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level),
|
{_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level),
|
||||||
ME.
|
ME;
|
||||||
|
mergefile_selector(Manifest, LevelIdx, {grooming, ScoringFun}) ->
|
||||||
|
Level = leveled_tree:to_list(array:get(LevelIdx,
|
||||||
|
Manifest#manifest.levels)),
|
||||||
|
SelectorFun =
|
||||||
|
fun(_I, Acc) ->
|
||||||
|
{_SK, ME} = lists:nth(leveled_rand:uniform(length(Level)), Level),
|
||||||
|
[ME|Acc]
|
||||||
|
end,
|
||||||
|
Sample =
|
||||||
|
lists:usort(lists:foldl(SelectorFun, [], lists:seq(1, ?GROOM_SAMPLE))),
|
||||||
|
% Note that Entries may be less than GROOM_SAMPLE, if same one picked
|
||||||
|
% multiple times
|
||||||
|
ScoringFun(Sample).
|
||||||
|
|
||||||
|
|
||||||
-spec merge_snapshot(manifest(), manifest()) -> manifest().
|
-spec merge_snapshot(manifest(), manifest()) -> manifest().
|
||||||
%% @doc
|
%% @doc
|
||||||
|
@ -609,6 +625,7 @@ check_bloom(Manifest, FP, Hash) ->
|
||||||
%%% Internal Functions
|
%%% Internal Functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
|
|
||||||
-spec get_manifest_entry({tuple(), manifest_entry()}|manifest_entry())
|
-spec get_manifest_entry({tuple(), manifest_entry()}|manifest_entry())
|
||||||
-> manifest_entry().
|
-> manifest_entry().
|
||||||
%% @doc
|
%% @doc
|
||||||
|
|
|
@ -127,6 +127,12 @@
|
||||||
sst_gettombcount/1,
|
sst_gettombcount/1,
|
||||||
sst_close/1]).
|
sst_close/1]).
|
||||||
|
|
||||||
|
-ifdef(TEST).
|
||||||
|
|
||||||
|
-export([sst_newmerge/10]).
|
||||||
|
|
||||||
|
-endif.
|
||||||
|
|
||||||
-export([tune_seglist/1, extract_hash/1, member_check/2]).
|
-export([tune_seglist/1, extract_hash/1, member_check/2]).
|
||||||
|
|
||||||
-export([in_range/3]).
|
-export([in_range/3]).
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue