Mas d34 leveled.i459 partialmerge (#460)

* Add test to replicate issue 459 Nothing actually crashes due to the issue - but looking at the logs there is the polarised stats associated with the issue. When merging into L3, you would normally expect to merge into 4 files - but actually we see FileCounter occasionally spiking. * Add partial merge support There is a `max_mergebelow` size which can be a positive integer, or infinity. It defaults to 32. If a merge from Level N covers less than `max_mergebelow` files in level N + 1 - the merge will proceesd as before. If it has >= `max_mergebelow`, the merge will be curtailed when `max_mergebelow div 2` files have been created at that level. The remainder for Level N will then be written, as well as for Level N + 1 up to the next whole file that has no yet been touched by the merge. The backlog that prompted the merge will still exist - as the files in Level N have not been changed. However, it is likely the next file picked will not be the same one, and will in probability have a lower number of files to merge (as the average is =< 8). This will stop progress from being halted by long merge jobs, as they will exit out in a safe way after partial completion. In the case where the majority of files covered do not require a merge, then those files will be skipped the next time the remainder file is picked up for merge at Level N
2024-11-30 13:16:13 +00:00 · 2024-11-30 13:16:13 +00:00 · 69e8b29d1f
commit 69e8b29d1f
parent c642575caa
8 changed files with 360 additions and 66 deletions
--- a/test/end_to_end/riak_SUITE.erl
+++ b/test/end_to_end/riak_SUITE.erl
@ -2,8 +2,9 @@

 -include("leveled.hrl").

-export([all/0, init_per_suite/1, end_per_suite/1]).
+-export([all/0, init_per_suite/1, end_per_suite/1, suite/0]).
 -export([
+        test_large_lsm_merge/1,
        basic_riak/1,
        fetchclocks_modifiedbetween/1,
        crossbucket_aae/1,
@ -14,6 +15,8 @@
        summarisable_sstindex/1
            ]).

+suite() -> [{timetrap, {hours, 2}}].
+
 all() -> [
            basic_riak,
            fetchclocks_modifiedbetween,
@ -22,7 +25,8 @@ all() -> [
            dollar_bucket_index,
            dollar_key_index,
            bigobject_memorycheck,
-            summarisable_sstindex
+            summarisable_sstindex,
+            test_large_lsm_merge
            ].

 -define(MAGIC, 53). % riak_kv -> riak_object
@ -34,11 +38,160 @@ init_per_suite(Config) ->
 end_per_suite(Config) ->
    testutil:end_per_suite(Config).

+
+test_large_lsm_merge(_Config) ->
+    lsm_merge_tester(24).
+
+lsm_merge_tester(LoopsPerBucket) ->
+    RootPath = testutil:reset_filestructure("lsmMerge"),
+    PutsPerLoop = 32000,
+    SampleOneIn = 100,
+    StartOpts1 =
+        [
+            {root_path, RootPath},
+            {max_pencillercachesize, 16000},
+            {max_sstslots, 96},
+                % Make SST files smaller, to accelerate merges
+            {max_mergebelow, 24},
+            {sync_strategy, testutil:sync_strategy()},
+            {log_level, warn},
+            {compression_method, zstd},
+            {
+                forced_logs,
+                [
+                    b0015, b0016, b0017, b0018, p0032, sst12,
+                    pc008, pc010, pc011, pc026,
+                    p0018, p0024
+                ]
+            }
+        ],
+    {ok, Bookie1} = leveled_bookie:book_start(StartOpts1),
+
+    LoadBucketFun =
+        fun(Book, Bucket, Loops) ->
+            V = testutil:get_compressiblevalue(),
+            lists:foreach(
+                fun(_I) ->
+                    {_, V} =
+                        testutil:put_indexed_objects(
+                            Book,
+                            Bucket,
+                            PutsPerLoop,
+                            V
+                        )
+                end,
+                lists:seq(1, Loops)
+            ),
+        V
+        end,
+
+    V1 = LoadBucketFun(Bookie1, <<"B1">>, LoopsPerBucket),
+    io:format("Completed load of ~s~n", [<<"B1">>]),
+    V2 = LoadBucketFun(Bookie1, <<"B2">>, LoopsPerBucket),
+    io:format("Completed load of ~s~n", [<<"B2">>]),
+    ValueMap = #{<<"B1">> => V1, <<"B2">> => V2},
+
+    CheckBucketFun =
+        fun(Book) ->
+            BookHeadFoldFun =
+                fun(B, K, _Hd, {SampleKeys, CountAcc}) ->
+                    UpdCntAcc =
+                        maps:update_with(B, fun(C) -> C + 1 end, 1, CountAcc),
+                    case rand:uniform(SampleOneIn) of
+                        R when R == 1 ->
+                            {[{B, K}|SampleKeys], UpdCntAcc};
+                        _ ->
+                            {SampleKeys, UpdCntAcc}
+                    end
+                end,
+            {async, HeadFolder} =
+                leveled_bookie:book_headfold(
+                    Book, 
+                    ?RIAK_TAG,
+                    {BookHeadFoldFun, {[], maps:new()}},
+                    true,
+                    false,
+                    false
+                ), 
+            {Time, R} = timer:tc(HeadFolder),
+            io:format(
+                "CheckBucketFold returned counts ~w in ~w ms~n",
+                [element(2, R), Time div 1000]
+            ),
+            R
+        end,
+
+    {SampleKeysF1, CountMapF1} = CheckBucketFun(Bookie1),
+    true = (LoopsPerBucket * PutsPerLoop) == maps:get(<<"B1">>, CountMapF1),
+    true = (LoopsPerBucket * PutsPerLoop) == maps:get(<<"B2">>, CountMapF1),
+
+    TestSampleKeyFun =
+        fun(Book, Values) ->
+            fun({B, K}) ->
+                ExpectedV = maps:get(B, Values),
+                {ok, Obj} = testutil:book_riakget(Book, B, K),
+                true = ExpectedV == testutil:get_value(Obj)
+            end
+        end,
+
+    {GT1, ok} =
+        timer:tc(
+            fun() ->
+                lists:foreach(TestSampleKeyFun(Bookie1, ValueMap), SampleKeysF1)
+            end
+        ),
+    io:format(
+        "Returned ~w sample gets in ~w ms~n",
+        [length(SampleKeysF1), GT1 div 1000]
+    ),
+
+    ok = leveled_bookie:book_close(Bookie1),
+    {ok, Bookie2} =
+        leveled_bookie:book_start(
+            lists:ukeysort(1, [{max_sstslots, 64}|StartOpts1])
+        ),
+
+    {SampleKeysF2, CountMapF2} = CheckBucketFun(Bookie2),
+    true = (LoopsPerBucket * PutsPerLoop) == maps:get(<<"B1">>, CountMapF2),
+    true = (LoopsPerBucket * PutsPerLoop) == maps:get(<<"B2">>, CountMapF2),
+
+    {GT2, ok} =
+        timer:tc(
+            fun() ->
+                lists:foreach(TestSampleKeyFun(Bookie2, ValueMap), SampleKeysF2)
+            end
+        ),
+    io:format(
+        "Returned ~w sample gets in ~w ms~n",
+        [length(SampleKeysF2), GT2 div 1000]
+    ),
+
+    V3 = LoadBucketFun(Bookie2, <<"B3">>, LoopsPerBucket),
+    io:format("Completed load of ~s~n", [<<"B3">>]),
+    UpdValueMap = #{<<"B1">> => V1, <<"B2">> => V2, <<"B3">> => V3},
+
+    {SampleKeysF3, CountMapF3} = CheckBucketFun(Bookie2),
+    true = (LoopsPerBucket * PutsPerLoop) == maps:get(<<"B1">>, CountMapF3),
+    true = (LoopsPerBucket * PutsPerLoop) == maps:get(<<"B2">>, CountMapF3),
+    true = (LoopsPerBucket * PutsPerLoop) == maps:get(<<"B3">>, CountMapF3),
+
+    {GT3, ok} =
+        timer:tc(
+            fun() ->
+                lists:foreach(TestSampleKeyFun(Bookie2, UpdValueMap), SampleKeysF3)
+            end
+        ),
+    io:format(
+        "Returned ~w sample gets in ~w ms~n",
+        [length(SampleKeysF3), GT3 div 1000]
+    ),
+
+    ok = leveled_bookie:book_destroy(Bookie2).
+
 basic_riak(_Config) ->
    basic_riak_tester(<<"B0">>, 640000),
    basic_riak_tester({<<"Type0">>, <<"B0">>}, 80000).

-
 basic_riak_tester(Bucket, KeyCount) ->
    % Key Count should be > 10K and divisible by 5
    io:format("Basic riak test with Bucket ~w KeyCount ~w~n",