Change score assessment
Wans't actually calculating scores correctly previously.
This commit is contained in:
parent
908f762a48
commit
d9229c9782
1 changed files with 68 additions and 108 deletions
|
@ -118,6 +118,7 @@
|
||||||
compaction_perc :: float() | undefined}).
|
compaction_perc :: float() | undefined}).
|
||||||
|
|
||||||
-type iclerk_options() :: #iclerk_options{}.
|
-type iclerk_options() :: #iclerk_options{}.
|
||||||
|
-type candidate():: #candidate{}.
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% API
|
%%% API
|
||||||
|
@ -229,9 +230,9 @@ handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Inker, _TO},
|
||||||
{MaxRunLength,
|
{MaxRunLength,
|
||||||
State#state.maxrunlength_compactionperc,
|
State#state.maxrunlength_compactionperc,
|
||||||
State#state.singlefile_compactionperc},
|
State#state.singlefile_compactionperc},
|
||||||
BestRun0 = assess_candidates(Candidates, ScoreParams),
|
{BestRun0, Score} = assess_candidates(Candidates, ScoreParams),
|
||||||
case score_run(BestRun0, ScoreParams) of
|
case Score > 0.0 of
|
||||||
Score when Score > 0.0 ->
|
true ->
|
||||||
BestRun1 = sort_run(BestRun0),
|
BestRun1 = sort_run(BestRun0),
|
||||||
print_compaction_run(BestRun1, ScoreParams),
|
print_compaction_run(BestRun1, ScoreParams),
|
||||||
ManifestSlice = compact_files(BestRun1,
|
ManifestSlice = compact_files(BestRun1,
|
||||||
|
@ -257,7 +258,7 @@ handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Inker, _TO},
|
||||||
ok = CloseFun(FilterServer),
|
ok = CloseFun(FilterServer),
|
||||||
{noreply, State}
|
{noreply, State}
|
||||||
end;
|
end;
|
||||||
Score ->
|
false ->
|
||||||
leveled_log:log("IC003", [Score]),
|
leveled_log:log("IC003", [Score]),
|
||||||
ok = leveled_inker:ink_compactioncomplete(Inker),
|
ok = leveled_inker:ink_compactioncomplete(Inker),
|
||||||
ok = CloseFun(FilterServer),
|
ok = CloseFun(FilterServer),
|
||||||
|
@ -457,54 +458,42 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) ->
|
||||||
fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List).
|
fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List).
|
||||||
|
|
||||||
|
|
||||||
|
-spec assess_candidates(list(candidate()), tuple())
|
||||||
|
-> {list(candidate()), float()}.
|
||||||
|
%% @doc
|
||||||
|
%% For each run length we need to assess all the runs of candidates
|
||||||
|
%% The best scoring run should emerge as the winner
|
||||||
|
%% Although this requires many loops over the list of the candidate, as the
|
||||||
|
%% file scores have already been calculated the cost per loop should not be
|
||||||
|
%% a high burden. Reducing the maximum run length, will reduce the cost of
|
||||||
|
%% this exercise should be a problem
|
||||||
assess_candidates(AllCandidates, Params) ->
|
assess_candidates(AllCandidates, Params) ->
|
||||||
NaiveBestRun = assess_candidates(AllCandidates, Params, [], []),
|
MaxRunLength = min(element(1, Params), length(AllCandidates)),
|
||||||
MaxRunLength = element(1, Params),
|
NaiveBestRun = lists:sublist(AllCandidates, MaxRunLength),
|
||||||
case length(AllCandidates) of
|
FoldFun =
|
||||||
L when L > MaxRunLength, MaxRunLength > 1 ->
|
fun(RunLength, Best) ->
|
||||||
%% Assess with different offsets from the start
|
assess_for_runlength(RunLength, AllCandidates, Params, Best)
|
||||||
AssessFold =
|
end,
|
||||||
fun(Counter, BestRun) ->
|
|
||||||
SubList = lists:nthtail(Counter, AllCandidates),
|
% Check all run lengths to find the best candidate. Reverse the list of
|
||||||
assess_candidates(SubList, Params, [], BestRun)
|
% run lengths, so that longer runs win on equality of score
|
||||||
end,
|
lists:foldl(FoldFun,
|
||||||
|
{NaiveBestRun, score_run(NaiveBestRun, Params)},
|
||||||
lists:foldl(AssessFold,
|
lists:reverse(lists:seq(1, MaxRunLength))).
|
||||||
NaiveBestRun,
|
|
||||||
lists:seq(1, MaxRunLength - 1));
|
|
||||||
_ ->
|
|
||||||
NaiveBestRun
|
|
||||||
end.
|
|
||||||
|
|
||||||
assess_candidates([], _Params, _CurrentRun0, BestAssessment) ->
|
|
||||||
BestAssessment;
|
|
||||||
assess_candidates([HeadC|Tail], Params, CurrentRun0, BestAssessment) ->
|
|
||||||
CurrentRun1 = choose_best_assessment(CurrentRun0 ++ [HeadC],
|
|
||||||
[HeadC],
|
|
||||||
Params),
|
|
||||||
assess_candidates(Tail,
|
|
||||||
Params,
|
|
||||||
CurrentRun1,
|
|
||||||
choose_best_assessment(CurrentRun1,
|
|
||||||
BestAssessment,
|
|
||||||
Params)).
|
|
||||||
|
|
||||||
|
|
||||||
choose_best_assessment(RunToAssess, BestRun, Params) ->
|
assess_for_runlength(RunLength, AllCandidates, Params, Best) ->
|
||||||
{MaxRunLength, _MR_CT, _SF_CT} = Params,
|
NumberOfRuns = 1 + length(AllCandidates) - RunLength,
|
||||||
case length(RunToAssess) of
|
FoldFun =
|
||||||
LR1 when LR1 > MaxRunLength ->
|
fun(Offset, {BestRun, BestScore}) ->
|
||||||
BestRun;
|
Run = lists:sublist(AllCandidates, Offset, RunLength),
|
||||||
_ ->
|
Score = score_run(Run, Params),
|
||||||
AssessScore = score_run(RunToAssess, Params),
|
case Score > BestScore of
|
||||||
BestScore = score_run(BestRun, Params),
|
true -> {Run, Score};
|
||||||
if
|
false -> {BestRun, BestScore}
|
||||||
AssessScore > BestScore ->
|
|
||||||
RunToAssess;
|
|
||||||
true ->
|
|
||||||
BestRun
|
|
||||||
end
|
end
|
||||||
end.
|
end,
|
||||||
|
lists:foldl(FoldFun, Best, lists:seq(1, NumberOfRuns)).
|
||||||
|
|
||||||
|
|
||||||
score_run([], _Params) ->
|
score_run([], _Params) ->
|
||||||
|
@ -740,22 +729,6 @@ simple_score_test() ->
|
||||||
Run3 = [#candidate{compaction_perc = 100.0}],
|
Run3 = [#candidate{compaction_perc = 100.0}],
|
||||||
?assertMatch(-60.0, score_run(Run3, {4, 70.0, 40.0})).
|
?assertMatch(-60.0, score_run(Run3, {4, 70.0, 40.0})).
|
||||||
|
|
||||||
score_compare_test() ->
|
|
||||||
Run1 = [#candidate{compaction_perc = 55.0},
|
|
||||||
#candidate{compaction_perc = 55.0},
|
|
||||||
#candidate{compaction_perc = 56.0},
|
|
||||||
#candidate{compaction_perc = 50.0}],
|
|
||||||
?assertMatch(16.0, score_run(Run1, {4, 70.0, 40.0})),
|
|
||||||
Run2 = [#candidate{compaction_perc = 55.0}],
|
|
||||||
?assertMatch(Run1,
|
|
||||||
choose_best_assessment(Run1,
|
|
||||||
Run2,
|
|
||||||
{4, 60.0, 40.0})),
|
|
||||||
?assertMatch(Run2,
|
|
||||||
choose_best_assessment(Run1 ++ Run2,
|
|
||||||
Run2,
|
|
||||||
{4, 60.0, 40.0})).
|
|
||||||
|
|
||||||
file_gc_test() ->
|
file_gc_test() ->
|
||||||
State = #state{waste_path="test/waste/",
|
State = #state{waste_path="test/waste/",
|
||||||
waste_retention_period=1},
|
waste_retention_period=1},
|
||||||
|
@ -771,6 +744,11 @@ file_gc_test() ->
|
||||||
{ok, ClearedJournals2} = file:list_dir(State#state.waste_path),
|
{ok, ClearedJournals2} = file:list_dir(State#state.waste_path),
|
||||||
?assertMatch([], ClearedJournals2).
|
?assertMatch([], ClearedJournals2).
|
||||||
|
|
||||||
|
|
||||||
|
check_bestrun(CandidateList, Params) ->
|
||||||
|
{BestRun, _Score} = assess_candidates(CandidateList, Params),
|
||||||
|
lists:map(fun(C) -> C#candidate.filename end, BestRun).
|
||||||
|
|
||||||
find_bestrun_test() ->
|
find_bestrun_test() ->
|
||||||
%% Tests dependent on these defaults
|
%% Tests dependent on these defaults
|
||||||
%% -define(MAX_COMPACTION_RUN, 4).
|
%% -define(MAX_COMPACTION_RUN, 4).
|
||||||
|
@ -778,53 +756,35 @@ find_bestrun_test() ->
|
||||||
%% -define(MAXRUNLENGTH_COMPACTION_TARGET, 60.0).
|
%% -define(MAXRUNLENGTH_COMPACTION_TARGET, 60.0).
|
||||||
%% Tested first with blocks significant as no back-tracking
|
%% Tested first with blocks significant as no back-tracking
|
||||||
Params = {4, 60.0, 40.0},
|
Params = {4, 60.0, 40.0},
|
||||||
Block1 = [#candidate{compaction_perc = 55.0},
|
Block1 = [#candidate{compaction_perc = 55.0, filename = "a"},
|
||||||
#candidate{compaction_perc = 65.0},
|
#candidate{compaction_perc = 65.0, filename = "b"},
|
||||||
#candidate{compaction_perc = 42.0},
|
#candidate{compaction_perc = 42.0, filename = "c"},
|
||||||
#candidate{compaction_perc = 50.0}],
|
#candidate{compaction_perc = 50.0, filename = "d"}],
|
||||||
Block2 = [#candidate{compaction_perc = 38.0},
|
Block2 = [#candidate{compaction_perc = 38.0, filename = "e"},
|
||||||
#candidate{compaction_perc = 75.0},
|
#candidate{compaction_perc = 75.0, filename = "f"},
|
||||||
#candidate{compaction_perc = 75.0},
|
#candidate{compaction_perc = 75.0, filename = "g"},
|
||||||
#candidate{compaction_perc = 45.0}],
|
#candidate{compaction_perc = 45.0, filename = "h"}],
|
||||||
Block3 = [#candidate{compaction_perc = 70.0},
|
Block3 = [#candidate{compaction_perc = 70.0, filename = "i"},
|
||||||
#candidate{compaction_perc = 100.0},
|
#candidate{compaction_perc = 100.0, filename = "j"},
|
||||||
#candidate{compaction_perc = 100.0},
|
#candidate{compaction_perc = 100.0, filename = "k"},
|
||||||
#candidate{compaction_perc = 100.0}],
|
#candidate{compaction_perc = 100.0, filename = "l"}],
|
||||||
Block4 = [#candidate{compaction_perc = 55.0},
|
Block4 = [#candidate{compaction_perc = 55.0, filename = "m"},
|
||||||
#candidate{compaction_perc = 56.0},
|
#candidate{compaction_perc = 56.0, filename = "n"},
|
||||||
#candidate{compaction_perc = 57.0},
|
#candidate{compaction_perc = 57.0, filename = "o"},
|
||||||
#candidate{compaction_perc = 40.0}],
|
#candidate{compaction_perc = 40.0, filename = "p"}],
|
||||||
Block5 = [#candidate{compaction_perc = 60.0},
|
Block5 = [#candidate{compaction_perc = 60.0, filename = "q"},
|
||||||
#candidate{compaction_perc = 60.0}],
|
#candidate{compaction_perc = 60.0, filename = "r"}],
|
||||||
CList0 = Block1 ++ Block2 ++ Block3 ++ Block4 ++ Block5,
|
CList0 = Block1 ++ Block2 ++ Block3 ++ Block4 ++ Block5,
|
||||||
?assertMatch(Block4, assess_candidates(CList0, Params, [], [])),
|
?assertMatch(["b", "c", "d", "e"], check_bestrun(CList0, Params)),
|
||||||
CList1 = CList0 ++ [#candidate{compaction_perc = 20.0}],
|
CList1 = CList0 ++ [#candidate{compaction_perc = 20.0, filename="s"}],
|
||||||
?assertMatch([#candidate{compaction_perc = 20.0}],
|
?assertMatch(["s"], check_bestrun(CList1, Params)),
|
||||||
assess_candidates(CList1, Params, [], [])),
|
|
||||||
CList2 = Block4 ++ Block3 ++ Block2 ++ Block1 ++ Block5,
|
CList2 = Block4 ++ Block3 ++ Block2 ++ Block1 ++ Block5,
|
||||||
?assertMatch(Block4, assess_candidates(CList2, Params, [], [])),
|
?assertMatch(["h", "a", "b", "c"], check_bestrun(CList2, Params)),
|
||||||
CList3 = Block5 ++ Block1 ++ Block2 ++ Block3 ++ Block4,
|
CList3 = Block5 ++ Block1 ++ Block2 ++ Block3 ++ Block4,
|
||||||
?assertMatch([#candidate{compaction_perc = 42.0},
|
?assertMatch(["b", "c", "d", "e"],check_bestrun(CList3, Params)).
|
||||||
#candidate{compaction_perc = 50.0},
|
|
||||||
#candidate{compaction_perc = 38.0}],
|
handle_emptycandidatelist_test() ->
|
||||||
assess_candidates(CList3, Params)),
|
?assertMatch({[], 0.0}, assess_candidates([], {4, 60.0, 40.0})).
|
||||||
%% Now do some back-tracking to get a genuinely optimal solution without
|
|
||||||
%% needing to re-order
|
|
||||||
?assertMatch([#candidate{compaction_perc = 42.0},
|
|
||||||
#candidate{compaction_perc = 50.0},
|
|
||||||
#candidate{compaction_perc = 38.0}],
|
|
||||||
assess_candidates(CList0, Params)),
|
|
||||||
?assertMatch([#candidate{compaction_perc = 42.0},
|
|
||||||
#candidate{compaction_perc = 50.0},
|
|
||||||
#candidate{compaction_perc = 38.0}],
|
|
||||||
assess_candidates(CList0, setelement(1, Params, 5))),
|
|
||||||
?assertMatch([#candidate{compaction_perc = 42.0},
|
|
||||||
#candidate{compaction_perc = 50.0},
|
|
||||||
#candidate{compaction_perc = 38.0},
|
|
||||||
#candidate{compaction_perc = 75.0},
|
|
||||||
#candidate{compaction_perc = 75.0},
|
|
||||||
#candidate{compaction_perc = 45.0}],
|
|
||||||
assess_candidates(CList0, setelement(1, Params, 6))).
|
|
||||||
|
|
||||||
test_ledgerkey(Key) ->
|
test_ledgerkey(Key) ->
|
||||||
{o, "Bucket", Key, null}.
|
{o, "Bucket", Key, null}.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue