From 15c52ae118266f7f0f8dca0c2d3d7593234c71a4 Mon Sep 17 00:00:00 2001
From: Martin Sumner <martin.sumner@adaptip.co.uk>
Date: Fri, 2 Jun 2017 08:37:57 +0100
Subject: [PATCH] Change default compaction settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Need to allow specific settings to be passed into unit tests.

Also, too much journal compaction may lead to intermittent failures on
the basic_SUITE space_clear_on_delete test.  think this is because
there are less “deletes” to reload in on startup to trigger the cascade
down and clear up?
---
 src/leveled_iclerk.erl          | 113 +++++++++++++++++++-------------
 test/end_to_end/basic_SUITE.erl |   4 +-
 2 files changed, 68 insertions(+), 49 deletions(-)

diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl
index 07c6aeb..40211ea 100644
--- a/src/leveled_iclerk.erl
+++ b/src/leveled_iclerk.erl
@@ -95,10 +95,10 @@
 -define(BATCH_SIZE, 32).
 -define(BATCHES_TO_CHECK, 8).
 %% How many consecutive files to compact in one run
--define(MAX_COMPACTION_RUN, 6).
+-define(MAX_COMPACTION_RUN, 8).
 %% Sliding scale to allow preference of longer runs up to maximum
 -define(SINGLEFILE_COMPACTION_TARGET, 40.0).
--define(MAXRUN_COMPACTION_TARGET, 60.0).
+-define(MAXRUN_COMPACTION_TARGET, 70.0).
 -define(CRC_SIZE, 4).
 -define(DEFAULT_RELOAD_STRATEGY, leveled_codec:inker_reload_strategy([])).
 -define(DEFAULT_WASTE_RETENTION_PERIOD, 86400).
@@ -383,47 +383,55 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) ->
     KL_List = leveled_cdb:cdb_directfetch(CDB, Batch, key_size),
     fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List).
 
-assess_candidates(AllCandidates, MaxRunLength) ->
-    NaiveBestRun = assess_candidates(AllCandidates, MaxRunLength, [], []),
+
+assess_candidates(AllCandidates, MaxRunLength) when is_integer(MaxRunLength) ->
+    % This will take the defaults for other params.
+    % Unit tests should pass tuple as params including tested defaults
+    assess_candidates(AllCandidates, 
+                        {MaxRunLength,
+                            ?MAXRUN_COMPACTION_TARGET,
+                            ?SINGLEFILE_COMPACTION_TARGET});
+assess_candidates(AllCandidates, Params) ->
+    NaiveBestRun = assess_candidates(AllCandidates, Params, [], []),
+    MaxRunLength = element(1, Params),
     case length(AllCandidates) of
         L when L > MaxRunLength, MaxRunLength > 1 ->
             %% Assess with different offsets from the start
-            SqL = lists:seq(1, MaxRunLength - 1),
-            lists:foldl(fun(Counter, BestRun) ->
-                                SubList = lists:nthtail(Counter,
-                                                            AllCandidates),
-                                assess_candidates(SubList,
-                                                    MaxRunLength,
-                                                    [],
-                                                    BestRun)
-                                end,
-                            NaiveBestRun,
-                            SqL);
+            AssessFold = 
+                fun(Counter, BestRun) ->
+                    SubList = lists:nthtail(Counter, AllCandidates),
+                    assess_candidates(SubList, Params, [], BestRun)
+                end,
+
+            lists:foldl(AssessFold, 
+                            NaiveBestRun, 
+                            lists:seq(1, MaxRunLength - 1));
         _ ->
             NaiveBestRun
     end.
 
-assess_candidates([], _MaxRunLength, _CurrentRun0, BestAssessment) ->
+assess_candidates([], _Params, _CurrentRun0, BestAssessment) ->
     BestAssessment;
-assess_candidates([HeadC|Tail], MaxRunLength, CurrentRun0, BestAssessment) ->
+assess_candidates([HeadC|Tail], Params, CurrentRun0, BestAssessment) ->
     CurrentRun1 = choose_best_assessment(CurrentRun0 ++ [HeadC],
                                             [HeadC],
-                                            MaxRunLength),
+                                            Params),
     assess_candidates(Tail,
-                        MaxRunLength,
+                        Params,
                         CurrentRun1,
                         choose_best_assessment(CurrentRun1,
                                                 BestAssessment,
-                                                MaxRunLength)).
+                                                Params)).
 
 
-choose_best_assessment(RunToAssess, BestRun, MaxRunLength) ->
+choose_best_assessment(RunToAssess, BestRun, Params) ->
+    {MaxRunLength, _MR_CT, _SF_CT} = Params,
     case length(RunToAssess) of
         LR1 when LR1 > MaxRunLength ->
             BestRun;
         _ ->
-            AssessScore = score_run(RunToAssess, MaxRunLength),
-            BestScore = score_run(BestRun, MaxRunLength),
+            AssessScore = score_run(RunToAssess, Params),
+            BestScore = score_run(BestRun, Params),
             if
                 AssessScore > BestScore ->
                     RunToAssess;
@@ -431,19 +439,23 @@ choose_best_assessment(RunToAssess, BestRun, MaxRunLength) ->
                     BestRun
             end
     end.    
-        
-score_run([], _MaxRunLength) ->
+
+score_run(Run, MaxRunLength) when is_integer(MaxRunLength) ->
+    Params = {MaxRunLength, 
+                    ?MAXRUN_COMPACTION_TARGET,
+                    ?SINGLEFILE_COMPACTION_TARGET},
+    score_run(Run, Params);
+score_run([], _Params) ->
     0.0;
-score_run(Run, MaxRunLength) ->
-    TargetIncr = case MaxRunLength of
-                        1 ->
-                            0.0;
-                        MaxRunSize ->
-                            (?MAXRUN_COMPACTION_TARGET
-                                - ?SINGLEFILE_COMPACTION_TARGET)
-                                    / (MaxRunSize - 1)
-                    end,
-    Target = ?SINGLEFILE_COMPACTION_TARGET +  TargetIncr * (length(Run) - 1),
+score_run(Run, {MaxRunLength, MR_CT, SF_CT}) ->
+    TargetIncr = 
+        case MaxRunLength of
+            1 ->
+                0.0;
+            MaxRunSize ->
+                (MR_CT - SF_CT) / (MaxRunSize - 1)
+        end,
+    Target = SF_CT +  TargetIncr * (length(Run) - 1),
     RunTotal = lists:foldl(fun(Cand, Acc) ->
                                 Acc + Cand#candidate.compaction_perc end,
                             0.0,
@@ -645,7 +657,7 @@ simple_score_test() ->
             #candidate{compaction_perc = 75.0},
             #candidate{compaction_perc = 76.0},
             #candidate{compaction_perc = 70.0}],
-    ?assertMatch(-14.0, score_run(Run1, 4)),
+    ?assertMatch(-4.0, score_run(Run1, 4)),
     Run2 = [#candidate{compaction_perc = 75.0}],
     ?assertMatch(-35.0, score_run(Run2, 4)),
     ?assertMatch(0.0, score_run([], 4)),
@@ -657,10 +669,16 @@ score_compare_test() ->
             #candidate{compaction_perc = 55.0},
             #candidate{compaction_perc = 56.0},
             #candidate{compaction_perc = 50.0}],
-    ?assertMatch(6.0, score_run(Run1, 4)),
+    ?assertMatch(16.0, score_run(Run1, 4)),
     Run2 = [#candidate{compaction_perc = 55.0}],
-    ?assertMatch(Run1, choose_best_assessment(Run1, Run2, 4)),
-    ?assertMatch(Run2, choose_best_assessment(Run1 ++ Run2, Run2, 4)).
+    ?assertMatch(Run1, 
+                    choose_best_assessment(Run1, 
+                                            Run2, 
+                                            {4, 60.0, 40.0})),
+    ?assertMatch(Run2, 
+                    choose_best_assessment(Run1 ++ Run2, 
+                                            Run2, 
+                                            {4, 60.0, 40.0})).
 
 file_gc_test() ->
     State = #state{waste_path="test/waste/",
@@ -683,6 +701,7 @@ find_bestrun_test() ->
 %% -define(SINGLEFILE_COMPACTION_TARGET, 40.0).
 %% -define(MAXRUN_COMPACTION_TARGET, 60.0).
 %% Tested first with blocks significant as no back-tracking
+    Params = {4, 60.0, 40.0},
     Block1 = [#candidate{compaction_perc = 55.0},
                 #candidate{compaction_perc = 65.0},
                 #candidate{compaction_perc = 42.0},
@@ -697,39 +716,39 @@ find_bestrun_test() ->
                 #candidate{compaction_perc = 100.0}],
     Block4 = [#candidate{compaction_perc = 55.0},
                 #candidate{compaction_perc = 56.0},
-                #candidate{compaction_perc = 56.0},
+                #candidate{compaction_perc = 57.0},
                 #candidate{compaction_perc = 40.0}],
     Block5 = [#candidate{compaction_perc = 60.0},
                 #candidate{compaction_perc = 60.0}],
     CList0 = Block1 ++ Block2 ++ Block3 ++ Block4 ++ Block5,
-    ?assertMatch(Block4, assess_candidates(CList0, 4, [], [])),
+    ?assertMatch(Block4, assess_candidates(CList0, Params, [], [])),
     CList1 = CList0 ++ [#candidate{compaction_perc = 20.0}],
     ?assertMatch([#candidate{compaction_perc = 20.0}],
-                    assess_candidates(CList1, 4, [], [])),
+                    assess_candidates(CList1, Params, [], [])),
     CList2 = Block4 ++ Block3 ++ Block2 ++ Block1 ++ Block5,
-    ?assertMatch(Block4, assess_candidates(CList2, 4, [], [])),
+    ?assertMatch(Block4, assess_candidates(CList2, Params, [], [])),
     CList3 = Block5 ++ Block1 ++ Block2 ++ Block3 ++ Block4,
     ?assertMatch([#candidate{compaction_perc = 42.0},
                         #candidate{compaction_perc = 50.0},
                         #candidate{compaction_perc = 38.0}],
-                    assess_candidates(CList3, 4, [], [])),
+                    assess_candidates(CList3, Params)),
     %% Now do some back-tracking to get a genuinely optimal solution without
     %% needing to re-order
     ?assertMatch([#candidate{compaction_perc = 42.0},
                         #candidate{compaction_perc = 50.0},
                         #candidate{compaction_perc = 38.0}],
-                    assess_candidates(CList0, 4)),
+                    assess_candidates(CList0, Params)),
     ?assertMatch([#candidate{compaction_perc = 42.0},
                         #candidate{compaction_perc = 50.0},
                         #candidate{compaction_perc = 38.0}],
-                    assess_candidates(CList0, 5)),
+                    assess_candidates(CList0, setelement(1, Params, 5))),
     ?assertMatch([#candidate{compaction_perc = 42.0},
                         #candidate{compaction_perc = 50.0},
                         #candidate{compaction_perc = 38.0},
                         #candidate{compaction_perc = 75.0},
                         #candidate{compaction_perc = 75.0},
                         #candidate{compaction_perc = 45.0}], 
-                    assess_candidates(CList0, 6)).
+                    assess_candidates(CList0, setelement(1, Params, 6))).
 
 test_ledgerkey(Key) ->
     {o, "Bucket", Key, null}.
diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl
index 932e062..2f3337d 100644
--- a/test/end_to_end/basic_SUITE.erl
+++ b/test/end_to_end/basic_SUITE.erl
@@ -467,7 +467,7 @@ load_and_count_withdelete(_Config) ->
 space_clear_ondelete(_Config) ->
     RootPath = testutil:reset_filestructure(),
     StartOpts1 = [{root_path, RootPath},
-                    {max_journalsize, 20000000},
+                    {max_journalsize, 10000000},
                     {sync_strategy, testutil:sync_strategy()}],
     {ok, Book1} = leveled_bookie:book_start(StartOpts1),
     G2 = fun testutil:generate_compressibleobjects/2,
@@ -586,7 +586,7 @@ space_clear_ondelete(_Config) ->
     io:format("FNsD - Bookie has ~w ledger files " ++
                     "after second close~n", [length(FNsD_L)]),
     lists:foreach(fun(FN) -> 
-                        io:format("FNsD - Ledger file if ~s~n", [FN]) 
+                        io:format("FNsD - Ledger file is ~s~n", [FN]) 
                     end, 
                     FNsD_L),
     true = PointB_Journals < length(FNsA_J),