diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index ed7a8f3..3c4f7e5 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -118,6 +118,7 @@ compaction_perc :: float() | undefined}). -type iclerk_options() :: #iclerk_options{}. +-type candidate():: #candidate{}. %%%============================================================================ %%% API @@ -229,9 +230,9 @@ handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Inker, _TO}, {MaxRunLength, State#state.maxrunlength_compactionperc, State#state.singlefile_compactionperc}, - BestRun0 = assess_candidates(Candidates, ScoreParams), - case score_run(BestRun0, ScoreParams) of - Score when Score > 0.0 -> + {BestRun0, Score} = assess_candidates(Candidates, ScoreParams), + case Score > 0.0 of + true -> BestRun1 = sort_run(BestRun0), print_compaction_run(BestRun1, ScoreParams), ManifestSlice = compact_files(BestRun1, @@ -257,7 +258,7 @@ handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Inker, _TO}, ok = CloseFun(FilterServer), {noreply, State} end; - Score -> + false -> leveled_log:log("IC003", [Score]), ok = leveled_inker:ink_compactioncomplete(Inker), ok = CloseFun(FilterServer), @@ -457,54 +458,42 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) -> fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List). +-spec assess_candidates(list(candidate()), tuple()) + -> {list(candidate()), float()}. +%% @doc +%% For each run length we need to assess all the runs of candidates +%% The best scoring run should emerge as the winner +%% Although this requires many loops over the list of the candidate, as the +%% file scores have already been calculated the cost per loop should not be +%% a high burden. Reducing the maximum run length, will reduce the cost of +%% this exercise should be a problem assess_candidates(AllCandidates, Params) -> - NaiveBestRun = assess_candidates(AllCandidates, Params, [], []), - MaxRunLength = element(1, Params), - case length(AllCandidates) of - L when L > MaxRunLength, MaxRunLength > 1 -> - %% Assess with different offsets from the start - AssessFold = - fun(Counter, BestRun) -> - SubList = lists:nthtail(Counter, AllCandidates), - assess_candidates(SubList, Params, [], BestRun) - end, - - lists:foldl(AssessFold, - NaiveBestRun, - lists:seq(1, MaxRunLength - 1)); - _ -> - NaiveBestRun - end. - -assess_candidates([], _Params, _CurrentRun0, BestAssessment) -> - BestAssessment; -assess_candidates([HeadC|Tail], Params, CurrentRun0, BestAssessment) -> - CurrentRun1 = choose_best_assessment(CurrentRun0 ++ [HeadC], - [HeadC], - Params), - assess_candidates(Tail, - Params, - CurrentRun1, - choose_best_assessment(CurrentRun1, - BestAssessment, - Params)). + MaxRunLength = min(element(1, Params), length(AllCandidates)), + NaiveBestRun = lists:sublist(AllCandidates, MaxRunLength), + FoldFun = + fun(RunLength, Best) -> + assess_for_runlength(RunLength, AllCandidates, Params, Best) + end, + + % Check all run lengths to find the best candidate. Reverse the list of + % run lengths, so that longer runs win on equality of score + lists:foldl(FoldFun, + {NaiveBestRun, score_run(NaiveBestRun, Params)}, + lists:reverse(lists:seq(1, MaxRunLength))). -choose_best_assessment(RunToAssess, BestRun, Params) -> - {MaxRunLength, _MR_CT, _SF_CT} = Params, - case length(RunToAssess) of - LR1 when LR1 > MaxRunLength -> - BestRun; - _ -> - AssessScore = score_run(RunToAssess, Params), - BestScore = score_run(BestRun, Params), - if - AssessScore > BestScore -> - RunToAssess; - true -> - BestRun +assess_for_runlength(RunLength, AllCandidates, Params, Best) -> + NumberOfRuns = 1 + length(AllCandidates) - RunLength, + FoldFun = + fun(Offset, {BestRun, BestScore}) -> + Run = lists:sublist(AllCandidates, Offset, RunLength), + Score = score_run(Run, Params), + case Score > BestScore of + true -> {Run, Score}; + false -> {BestRun, BestScore} end - end. + end, + lists:foldl(FoldFun, Best, lists:seq(1, NumberOfRuns)). score_run([], _Params) -> @@ -740,22 +729,6 @@ simple_score_test() -> Run3 = [#candidate{compaction_perc = 100.0}], ?assertMatch(-60.0, score_run(Run3, {4, 70.0, 40.0})). -score_compare_test() -> - Run1 = [#candidate{compaction_perc = 55.0}, - #candidate{compaction_perc = 55.0}, - #candidate{compaction_perc = 56.0}, - #candidate{compaction_perc = 50.0}], - ?assertMatch(16.0, score_run(Run1, {4, 70.0, 40.0})), - Run2 = [#candidate{compaction_perc = 55.0}], - ?assertMatch(Run1, - choose_best_assessment(Run1, - Run2, - {4, 60.0, 40.0})), - ?assertMatch(Run2, - choose_best_assessment(Run1 ++ Run2, - Run2, - {4, 60.0, 40.0})). - file_gc_test() -> State = #state{waste_path="test/waste/", waste_retention_period=1}, @@ -771,6 +744,11 @@ file_gc_test() -> {ok, ClearedJournals2} = file:list_dir(State#state.waste_path), ?assertMatch([], ClearedJournals2). + +check_bestrun(CandidateList, Params) -> + {BestRun, _Score} = assess_candidates(CandidateList, Params), + lists:map(fun(C) -> C#candidate.filename end, BestRun). + find_bestrun_test() -> %% Tests dependent on these defaults %% -define(MAX_COMPACTION_RUN, 4). @@ -778,53 +756,35 @@ find_bestrun_test() -> %% -define(MAXRUNLENGTH_COMPACTION_TARGET, 60.0). %% Tested first with blocks significant as no back-tracking Params = {4, 60.0, 40.0}, - Block1 = [#candidate{compaction_perc = 55.0}, - #candidate{compaction_perc = 65.0}, - #candidate{compaction_perc = 42.0}, - #candidate{compaction_perc = 50.0}], - Block2 = [#candidate{compaction_perc = 38.0}, - #candidate{compaction_perc = 75.0}, - #candidate{compaction_perc = 75.0}, - #candidate{compaction_perc = 45.0}], - Block3 = [#candidate{compaction_perc = 70.0}, - #candidate{compaction_perc = 100.0}, - #candidate{compaction_perc = 100.0}, - #candidate{compaction_perc = 100.0}], - Block4 = [#candidate{compaction_perc = 55.0}, - #candidate{compaction_perc = 56.0}, - #candidate{compaction_perc = 57.0}, - #candidate{compaction_perc = 40.0}], - Block5 = [#candidate{compaction_perc = 60.0}, - #candidate{compaction_perc = 60.0}], + Block1 = [#candidate{compaction_perc = 55.0, filename = "a"}, + #candidate{compaction_perc = 65.0, filename = "b"}, + #candidate{compaction_perc = 42.0, filename = "c"}, + #candidate{compaction_perc = 50.0, filename = "d"}], + Block2 = [#candidate{compaction_perc = 38.0, filename = "e"}, + #candidate{compaction_perc = 75.0, filename = "f"}, + #candidate{compaction_perc = 75.0, filename = "g"}, + #candidate{compaction_perc = 45.0, filename = "h"}], + Block3 = [#candidate{compaction_perc = 70.0, filename = "i"}, + #candidate{compaction_perc = 100.0, filename = "j"}, + #candidate{compaction_perc = 100.0, filename = "k"}, + #candidate{compaction_perc = 100.0, filename = "l"}], + Block4 = [#candidate{compaction_perc = 55.0, filename = "m"}, + #candidate{compaction_perc = 56.0, filename = "n"}, + #candidate{compaction_perc = 57.0, filename = "o"}, + #candidate{compaction_perc = 40.0, filename = "p"}], + Block5 = [#candidate{compaction_perc = 60.0, filename = "q"}, + #candidate{compaction_perc = 60.0, filename = "r"}], CList0 = Block1 ++ Block2 ++ Block3 ++ Block4 ++ Block5, - ?assertMatch(Block4, assess_candidates(CList0, Params, [], [])), - CList1 = CList0 ++ [#candidate{compaction_perc = 20.0}], - ?assertMatch([#candidate{compaction_perc = 20.0}], - assess_candidates(CList1, Params, [], [])), + ?assertMatch(["b", "c", "d", "e"], check_bestrun(CList0, Params)), + CList1 = CList0 ++ [#candidate{compaction_perc = 20.0, filename="s"}], + ?assertMatch(["s"], check_bestrun(CList1, Params)), CList2 = Block4 ++ Block3 ++ Block2 ++ Block1 ++ Block5, - ?assertMatch(Block4, assess_candidates(CList2, Params, [], [])), + ?assertMatch(["h", "a", "b", "c"], check_bestrun(CList2, Params)), CList3 = Block5 ++ Block1 ++ Block2 ++ Block3 ++ Block4, - ?assertMatch([#candidate{compaction_perc = 42.0}, - #candidate{compaction_perc = 50.0}, - #candidate{compaction_perc = 38.0}], - assess_candidates(CList3, Params)), - %% Now do some back-tracking to get a genuinely optimal solution without - %% needing to re-order - ?assertMatch([#candidate{compaction_perc = 42.0}, - #candidate{compaction_perc = 50.0}, - #candidate{compaction_perc = 38.0}], - assess_candidates(CList0, Params)), - ?assertMatch([#candidate{compaction_perc = 42.0}, - #candidate{compaction_perc = 50.0}, - #candidate{compaction_perc = 38.0}], - assess_candidates(CList0, setelement(1, Params, 5))), - ?assertMatch([#candidate{compaction_perc = 42.0}, - #candidate{compaction_perc = 50.0}, - #candidate{compaction_perc = 38.0}, - #candidate{compaction_perc = 75.0}, - #candidate{compaction_perc = 75.0}, - #candidate{compaction_perc = 45.0}], - assess_candidates(CList0, setelement(1, Params, 6))). + ?assertMatch(["b", "c", "d", "e"],check_bestrun(CList3, Params)). + +handle_emptycandidatelist_test() -> + ?assertMatch({[], 0.0}, assess_candidates([], {4, 60.0, 40.0})). test_ledgerkey(Key) -> {o, "Bucket", Key, null}.