Change default compaction settings

Need to allow specific settings to be passed into unit tests.

Also, too much journal compaction may lead to intermittent failures on
the basic_SUITE space_clear_on_delete test.  think this is because
there are less “deletes” to reload in on startup to trigger the cascade
down and clear up?
This commit is contained in:
Martin Sumner 2017-06-02 08:37:57 +01:00
parent 39fd3e5c87
commit 15c52ae118
2 changed files with 68 additions and 49 deletions

View file

@ -95,10 +95,10 @@
-define(BATCH_SIZE, 32). -define(BATCH_SIZE, 32).
-define(BATCHES_TO_CHECK, 8). -define(BATCHES_TO_CHECK, 8).
%% How many consecutive files to compact in one run %% How many consecutive files to compact in one run
-define(MAX_COMPACTION_RUN, 6). -define(MAX_COMPACTION_RUN, 8).
%% Sliding scale to allow preference of longer runs up to maximum %% Sliding scale to allow preference of longer runs up to maximum
-define(SINGLEFILE_COMPACTION_TARGET, 40.0). -define(SINGLEFILE_COMPACTION_TARGET, 40.0).
-define(MAXRUN_COMPACTION_TARGET, 60.0). -define(MAXRUN_COMPACTION_TARGET, 70.0).
-define(CRC_SIZE, 4). -define(CRC_SIZE, 4).
-define(DEFAULT_RELOAD_STRATEGY, leveled_codec:inker_reload_strategy([])). -define(DEFAULT_RELOAD_STRATEGY, leveled_codec:inker_reload_strategy([])).
-define(DEFAULT_WASTE_RETENTION_PERIOD, 86400). -define(DEFAULT_WASTE_RETENTION_PERIOD, 86400).
@ -383,47 +383,55 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) ->
KL_List = leveled_cdb:cdb_directfetch(CDB, Batch, key_size), KL_List = leveled_cdb:cdb_directfetch(CDB, Batch, key_size),
fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List). fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List).
assess_candidates(AllCandidates, MaxRunLength) ->
NaiveBestRun = assess_candidates(AllCandidates, MaxRunLength, [], []), assess_candidates(AllCandidates, MaxRunLength) when is_integer(MaxRunLength) ->
% This will take the defaults for other params.
% Unit tests should pass tuple as params including tested defaults
assess_candidates(AllCandidates,
{MaxRunLength,
?MAXRUN_COMPACTION_TARGET,
?SINGLEFILE_COMPACTION_TARGET});
assess_candidates(AllCandidates, Params) ->
NaiveBestRun = assess_candidates(AllCandidates, Params, [], []),
MaxRunLength = element(1, Params),
case length(AllCandidates) of case length(AllCandidates) of
L when L > MaxRunLength, MaxRunLength > 1 -> L when L > MaxRunLength, MaxRunLength > 1 ->
%% Assess with different offsets from the start %% Assess with different offsets from the start
SqL = lists:seq(1, MaxRunLength - 1), AssessFold =
lists:foldl(fun(Counter, BestRun) -> fun(Counter, BestRun) ->
SubList = lists:nthtail(Counter, SubList = lists:nthtail(Counter, AllCandidates),
AllCandidates), assess_candidates(SubList, Params, [], BestRun)
assess_candidates(SubList, end,
MaxRunLength,
[], lists:foldl(AssessFold,
BestRun) NaiveBestRun,
end, lists:seq(1, MaxRunLength - 1));
NaiveBestRun,
SqL);
_ -> _ ->
NaiveBestRun NaiveBestRun
end. end.
assess_candidates([], _MaxRunLength, _CurrentRun0, BestAssessment) -> assess_candidates([], _Params, _CurrentRun0, BestAssessment) ->
BestAssessment; BestAssessment;
assess_candidates([HeadC|Tail], MaxRunLength, CurrentRun0, BestAssessment) -> assess_candidates([HeadC|Tail], Params, CurrentRun0, BestAssessment) ->
CurrentRun1 = choose_best_assessment(CurrentRun0 ++ [HeadC], CurrentRun1 = choose_best_assessment(CurrentRun0 ++ [HeadC],
[HeadC], [HeadC],
MaxRunLength), Params),
assess_candidates(Tail, assess_candidates(Tail,
MaxRunLength, Params,
CurrentRun1, CurrentRun1,
choose_best_assessment(CurrentRun1, choose_best_assessment(CurrentRun1,
BestAssessment, BestAssessment,
MaxRunLength)). Params)).
choose_best_assessment(RunToAssess, BestRun, MaxRunLength) -> choose_best_assessment(RunToAssess, BestRun, Params) ->
{MaxRunLength, _MR_CT, _SF_CT} = Params,
case length(RunToAssess) of case length(RunToAssess) of
LR1 when LR1 > MaxRunLength -> LR1 when LR1 > MaxRunLength ->
BestRun; BestRun;
_ -> _ ->
AssessScore = score_run(RunToAssess, MaxRunLength), AssessScore = score_run(RunToAssess, Params),
BestScore = score_run(BestRun, MaxRunLength), BestScore = score_run(BestRun, Params),
if if
AssessScore > BestScore -> AssessScore > BestScore ->
RunToAssess; RunToAssess;
@ -431,19 +439,23 @@ choose_best_assessment(RunToAssess, BestRun, MaxRunLength) ->
BestRun BestRun
end end
end. end.
score_run([], _MaxRunLength) -> score_run(Run, MaxRunLength) when is_integer(MaxRunLength) ->
Params = {MaxRunLength,
?MAXRUN_COMPACTION_TARGET,
?SINGLEFILE_COMPACTION_TARGET},
score_run(Run, Params);
score_run([], _Params) ->
0.0; 0.0;
score_run(Run, MaxRunLength) -> score_run(Run, {MaxRunLength, MR_CT, SF_CT}) ->
TargetIncr = case MaxRunLength of TargetIncr =
1 -> case MaxRunLength of
0.0; 1 ->
MaxRunSize -> 0.0;
(?MAXRUN_COMPACTION_TARGET MaxRunSize ->
- ?SINGLEFILE_COMPACTION_TARGET) (MR_CT - SF_CT) / (MaxRunSize - 1)
/ (MaxRunSize - 1) end,
end, Target = SF_CT + TargetIncr * (length(Run) - 1),
Target = ?SINGLEFILE_COMPACTION_TARGET + TargetIncr * (length(Run) - 1),
RunTotal = lists:foldl(fun(Cand, Acc) -> RunTotal = lists:foldl(fun(Cand, Acc) ->
Acc + Cand#candidate.compaction_perc end, Acc + Cand#candidate.compaction_perc end,
0.0, 0.0,
@ -645,7 +657,7 @@ simple_score_test() ->
#candidate{compaction_perc = 75.0}, #candidate{compaction_perc = 75.0},
#candidate{compaction_perc = 76.0}, #candidate{compaction_perc = 76.0},
#candidate{compaction_perc = 70.0}], #candidate{compaction_perc = 70.0}],
?assertMatch(-14.0, score_run(Run1, 4)), ?assertMatch(-4.0, score_run(Run1, 4)),
Run2 = [#candidate{compaction_perc = 75.0}], Run2 = [#candidate{compaction_perc = 75.0}],
?assertMatch(-35.0, score_run(Run2, 4)), ?assertMatch(-35.0, score_run(Run2, 4)),
?assertMatch(0.0, score_run([], 4)), ?assertMatch(0.0, score_run([], 4)),
@ -657,10 +669,16 @@ score_compare_test() ->
#candidate{compaction_perc = 55.0}, #candidate{compaction_perc = 55.0},
#candidate{compaction_perc = 56.0}, #candidate{compaction_perc = 56.0},
#candidate{compaction_perc = 50.0}], #candidate{compaction_perc = 50.0}],
?assertMatch(6.0, score_run(Run1, 4)), ?assertMatch(16.0, score_run(Run1, 4)),
Run2 = [#candidate{compaction_perc = 55.0}], Run2 = [#candidate{compaction_perc = 55.0}],
?assertMatch(Run1, choose_best_assessment(Run1, Run2, 4)), ?assertMatch(Run1,
?assertMatch(Run2, choose_best_assessment(Run1 ++ Run2, Run2, 4)). choose_best_assessment(Run1,
Run2,
{4, 60.0, 40.0})),
?assertMatch(Run2,
choose_best_assessment(Run1 ++ Run2,
Run2,
{4, 60.0, 40.0})).
file_gc_test() -> file_gc_test() ->
State = #state{waste_path="test/waste/", State = #state{waste_path="test/waste/",
@ -683,6 +701,7 @@ find_bestrun_test() ->
%% -define(SINGLEFILE_COMPACTION_TARGET, 40.0). %% -define(SINGLEFILE_COMPACTION_TARGET, 40.0).
%% -define(MAXRUN_COMPACTION_TARGET, 60.0). %% -define(MAXRUN_COMPACTION_TARGET, 60.0).
%% Tested first with blocks significant as no back-tracking %% Tested first with blocks significant as no back-tracking
Params = {4, 60.0, 40.0},
Block1 = [#candidate{compaction_perc = 55.0}, Block1 = [#candidate{compaction_perc = 55.0},
#candidate{compaction_perc = 65.0}, #candidate{compaction_perc = 65.0},
#candidate{compaction_perc = 42.0}, #candidate{compaction_perc = 42.0},
@ -697,39 +716,39 @@ find_bestrun_test() ->
#candidate{compaction_perc = 100.0}], #candidate{compaction_perc = 100.0}],
Block4 = [#candidate{compaction_perc = 55.0}, Block4 = [#candidate{compaction_perc = 55.0},
#candidate{compaction_perc = 56.0}, #candidate{compaction_perc = 56.0},
#candidate{compaction_perc = 56.0}, #candidate{compaction_perc = 57.0},
#candidate{compaction_perc = 40.0}], #candidate{compaction_perc = 40.0}],
Block5 = [#candidate{compaction_perc = 60.0}, Block5 = [#candidate{compaction_perc = 60.0},
#candidate{compaction_perc = 60.0}], #candidate{compaction_perc = 60.0}],
CList0 = Block1 ++ Block2 ++ Block3 ++ Block4 ++ Block5, CList0 = Block1 ++ Block2 ++ Block3 ++ Block4 ++ Block5,
?assertMatch(Block4, assess_candidates(CList0, 4, [], [])), ?assertMatch(Block4, assess_candidates(CList0, Params, [], [])),
CList1 = CList0 ++ [#candidate{compaction_perc = 20.0}], CList1 = CList0 ++ [#candidate{compaction_perc = 20.0}],
?assertMatch([#candidate{compaction_perc = 20.0}], ?assertMatch([#candidate{compaction_perc = 20.0}],
assess_candidates(CList1, 4, [], [])), assess_candidates(CList1, Params, [], [])),
CList2 = Block4 ++ Block3 ++ Block2 ++ Block1 ++ Block5, CList2 = Block4 ++ Block3 ++ Block2 ++ Block1 ++ Block5,
?assertMatch(Block4, assess_candidates(CList2, 4, [], [])), ?assertMatch(Block4, assess_candidates(CList2, Params, [], [])),
CList3 = Block5 ++ Block1 ++ Block2 ++ Block3 ++ Block4, CList3 = Block5 ++ Block1 ++ Block2 ++ Block3 ++ Block4,
?assertMatch([#candidate{compaction_perc = 42.0}, ?assertMatch([#candidate{compaction_perc = 42.0},
#candidate{compaction_perc = 50.0}, #candidate{compaction_perc = 50.0},
#candidate{compaction_perc = 38.0}], #candidate{compaction_perc = 38.0}],
assess_candidates(CList3, 4, [], [])), assess_candidates(CList3, Params)),
%% Now do some back-tracking to get a genuinely optimal solution without %% Now do some back-tracking to get a genuinely optimal solution without
%% needing to re-order %% needing to re-order
?assertMatch([#candidate{compaction_perc = 42.0}, ?assertMatch([#candidate{compaction_perc = 42.0},
#candidate{compaction_perc = 50.0}, #candidate{compaction_perc = 50.0},
#candidate{compaction_perc = 38.0}], #candidate{compaction_perc = 38.0}],
assess_candidates(CList0, 4)), assess_candidates(CList0, Params)),
?assertMatch([#candidate{compaction_perc = 42.0}, ?assertMatch([#candidate{compaction_perc = 42.0},
#candidate{compaction_perc = 50.0}, #candidate{compaction_perc = 50.0},
#candidate{compaction_perc = 38.0}], #candidate{compaction_perc = 38.0}],
assess_candidates(CList0, 5)), assess_candidates(CList0, setelement(1, Params, 5))),
?assertMatch([#candidate{compaction_perc = 42.0}, ?assertMatch([#candidate{compaction_perc = 42.0},
#candidate{compaction_perc = 50.0}, #candidate{compaction_perc = 50.0},
#candidate{compaction_perc = 38.0}, #candidate{compaction_perc = 38.0},
#candidate{compaction_perc = 75.0}, #candidate{compaction_perc = 75.0},
#candidate{compaction_perc = 75.0}, #candidate{compaction_perc = 75.0},
#candidate{compaction_perc = 45.0}], #candidate{compaction_perc = 45.0}],
assess_candidates(CList0, 6)). assess_candidates(CList0, setelement(1, Params, 6))).
test_ledgerkey(Key) -> test_ledgerkey(Key) ->
{o, "Bucket", Key, null}. {o, "Bucket", Key, null}.

View file

@ -467,7 +467,7 @@ load_and_count_withdelete(_Config) ->
space_clear_ondelete(_Config) -> space_clear_ondelete(_Config) ->
RootPath = testutil:reset_filestructure(), RootPath = testutil:reset_filestructure(),
StartOpts1 = [{root_path, RootPath}, StartOpts1 = [{root_path, RootPath},
{max_journalsize, 20000000}, {max_journalsize, 10000000},
{sync_strategy, testutil:sync_strategy()}], {sync_strategy, testutil:sync_strategy()}],
{ok, Book1} = leveled_bookie:book_start(StartOpts1), {ok, Book1} = leveled_bookie:book_start(StartOpts1),
G2 = fun testutil:generate_compressibleobjects/2, G2 = fun testutil:generate_compressibleobjects/2,
@ -586,7 +586,7 @@ space_clear_ondelete(_Config) ->
io:format("FNsD - Bookie has ~w ledger files " ++ io:format("FNsD - Bookie has ~w ledger files " ++
"after second close~n", [length(FNsD_L)]), "after second close~n", [length(FNsD_L)]),
lists:foreach(fun(FN) -> lists:foreach(fun(FN) ->
io:format("FNsD - Ledger file if ~s~n", [FN]) io:format("FNsD - Ledger file is ~s~n", [FN])
end, end,
FNsD_L), FNsD_L),
true = PointB_Journals < length(FNsA_J), true = PointB_Journals < length(FNsA_J),