Mas i340 doublel3 d31 (#347)

* Double size of L4 files And double max efficient size of leveled_ebloom * Revert penciller shape But expand file size at L3 * More concise version Following code review * OTP 24 dialyzer fix Bindings intended to match - so don't use underscore * Allow eqc tests to work from `rebar3 as eqc shell` Then `eqc:quickcheck(leveled_statemeqc:prop_db()).` Plus markdown tidy
2021-08-23 17:18:45 +01:00 · 2021-08-23 17:18:45 +01:00 · a0e9ac737c
commit a0e9ac737c
parent 507bf63e22
5 changed files with 146 additions and 34 deletions
--- a/README.md
+++ b/README.md
@ -78,8 +78,8 @@ In order to contribute to leveled, fork the repository, make a branch for your c

 To have rebar3 execute the full set of tests, run:

-    `rebar3 as test do xref, dialyzer, cover --reset, eunit --cover, ct --cover, cover --verbose`
+```rebar3 as test do xref, dialyzer, cover --reset, eunit --cover, ct --cover, cover --verbose```

 For those with a Quickcheck license, property-based tests can also be run using:

-    `rebar3 as eqc do eunit --module=leveled_simpleeqc, eunit --module=leveled_statemeqc`
+```rebar3 as eqc do eunit --module=leveled_simpleeqc, eunit --module=leveled_statemeqc```
--- a/rebar.config
+++ b/rebar.config
@ -19,7 +19,7 @@
 {profiles,
 [{eqc, [{deps, [meck, fqc]},
    {erl_opts, [debug_info,  {d, 'EQC'}]},
-    {extra_src_dirs, ["test/property"]},
+    {extra_src_dirs, ["test/property", "test/end_to_end"]},
    {shell, [{apps, [lz4]}]},
    {plugins, [rebar_eqc]}
   ]},
--- a/src/leveled_ebloom.erl
+++ b/src/leveled_ebloom.erl
@ -29,13 +29,32 @@
 %%% API
 %%%============================================================================

-spec create_bloom(list(integer())) -> bloom().
+-spec create_bloom(list(leveled_codec:segment_hash())) -> bloom().
 %% @doc
-%% Create a binary bloom filter from alist of hashes
+%% Create a binary bloom filter from a list of hashes
 create_bloom(HashList) ->
    case length(HashList) of
        0 ->
            <<>>;
+        L when L > 32768 ->
+            {HL0, HL1} =
+                lists:partition(fun({_, Hash}) -> Hash band 32 == 0 end,
+                                HashList),
+            Bin1 =
+                add_hashlist(HL0,
+                                32,
+                                0, 0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0, 0),
+            Bin2 =
+                add_hashlist(HL1,
+                                32,
+                                0, 0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0, 0),
+            <<Bin1/binary, Bin2/binary>>;
        L when L > 16384 ->
            add_hashlist(HashList,
                            32,
@ -55,7 +74,7 @@ create_bloom(HashList) ->
    end.


-spec check_hash(integer(), bloom()) -> boolean().
+-spec check_hash(leveled_codec:segment_hash(), bloom()) -> boolean().
 %% @doc
 %% Check for the presence of a given hash within a bloom
 check_hash(_Hash, <<>>) ->
@ -548,15 +567,17 @@ empty_bloom_test() ->
                    check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})).

 bloom_test_() ->
-    {timeout, 60, fun bloom_test_ranges/0}.
+    {timeout, 120, fun bloom_test_ranges/0}.

 bloom_test_ranges() ->
-    test_bloom(40000, 2),
-    test_bloom(128 * 256, 10),
-    test_bloom(20000, 2),
-    test_bloom(10000, 2),
-    test_bloom(5000, 2),
-    test_bloom(2000, 2).
+    test_bloom(80000, 4),
+    test_bloom(60000, 4),
+    test_bloom(40000, 4),
+    test_bloom(128 * 256, 4),
+    test_bloom(20000, 4),
+    test_bloom(10000, 4),
+    test_bloom(5000, 4),
+    test_bloom(2000, 4).

 test_bloom(N, Runs) ->
    ListOfHashLists = 
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@ -307,17 +307,20 @@
 -type iterator() :: list(iterator_entry()).
 -type bad_ledgerkey() :: list().
 -type sqn_check() :: current|replaced|missing.
-type pclacc_fun() ::
-        fun((leveled_codec:ledger_key(),
-                leveled_codec:ledger_value(),
-                any()) -> any()).
 -type sst_fetchfun() ::
        fun((pid(),
                leveled_codec:ledger_key(),
                leveled_codec:segment_hash(),
-                non_neg_integer()) -> leveled_codec:ledger_kv()|not_present).
+                non_neg_integer()) -> 
+                    leveled_codec:ledger_kv()|not_present).
+-type levelzero_returnfun() :: fun((levelzero_cacheentry()) -> ok).
+-type pclacc_fun() ::
+        fun((leveled_codec:ledger_key(),
+                leveled_codec:ledger_value(),
+                any()) -> any()).

-export_type([levelzero_cacheentry/0, sqn_check/0]).
+
+-export_type([levelzero_cacheentry/0, levelzero_returnfun/0, sqn_check/0]).

 %%%============================================================================
 %%% API
--- a/src/leveled_sst.erl
+++ b/src/leveled_sst.erl
@ -91,6 +91,7 @@
 -define(LMD_LENGTH, 4).
 -define(FLIPPER32, 4294967295).
 -define(COMPRESS_AT_LEVEL, 1).
+-define(DOUBLESIZE_LEVEL, 3).
 -define(INDEX_MODDATE, true).
 -define(TOMB_COUNT, true).
 -define(USE_SET_FOR_SPEED, 64).
@ -281,7 +282,11 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST) ->
 sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) ->
    {ok, Pid} = gen_fsm:start_link(?MODULE, [], []),
    PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method),
-    OptsSST0 = OptsSST#sst_options{press_method = PressMethod0},
+    MaxSlots0 = maxslots_level(Level, OptsSST#sst_options.max_sstslots),
+    OptsSST0 =
+        OptsSST#sst_options{press_method = PressMethod0,
+                            max_sstslots = MaxSlots0},
+
    {[], [], SlotList, FK, _CountOfTombs}  =
        merge_lists(KVList, OptsSST0, IndexModDate),
    case gen_fsm:sync_send_event(Pid,
@ -318,7 +323,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) ->
 %% deleted.
 %%
 %% The remainder of the lists is returned along with the StartKey and EndKey
-%% so that the remainder cna be  used in the next file in the merge.  It might
+%% so that the remainder can be  used in the next file in the merge.  It might
 %% be that the merge_lists returns nothing (for example when a basement file is
 %% all tombstones) - and the atom empty is returned in this case so that the
 %% file is not added to the manifest.
@ -333,7 +338,10 @@ sst_newmerge(RootPath, Filename,
        KVL1, KVL2, IsBasement, Level, 
        MaxSQN, OptsSST, IndexModDate, TombCount) ->
    PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method),
-    OptsSST0 = OptsSST#sst_options{press_method = PressMethod0},
+    MaxSlots0 = maxslots_level(Level, OptsSST#sst_options.max_sstslots),
+    OptsSST0 =
+        OptsSST#sst_options{press_method = PressMethod0,
+                            max_sstslots = MaxSlots0},
    {Rem1, Rem2, SlotList, FK, CountOfTombs} = 
        merge_lists(KVL1, KVL2, {IsBasement, Level}, OptsSST0,
                    IndexModDate, TombCount),
@ -360,7 +368,13 @@ sst_newmerge(RootPath, Filename,
    end.

 -spec sst_newlevelzero(string(), string(),
-                            integer(), fun()|list(), pid()|undefined, integer(), 
+                            integer(), 
+                            fun((pos_integer(),
+                                    leveled_penciller:levelzero_returnfun())
+                                -> ok)|
+                                list(), 
+                            pid()|undefined, 
+                            integer(), 
                            sst_options()) ->
                                        {ok, pid(), noreply}.
 %% @doc
@ -371,7 +385,10 @@ sst_newlevelzero(RootPath, Filename,
                    Slots, Fetcher, Penciller,
                    MaxSQN, OptsSST) ->
    PressMethod0 = compress_level(0, OptsSST#sst_options.press_method),
-    OptsSST0 = OptsSST#sst_options{press_method = PressMethod0},
+    MaxSlots0 = maxslots_level(0, OptsSST#sst_options.max_sstslots),
+    OptsSST0 =
+        OptsSST#sst_options{press_method = PressMethod0,
+                            max_sstslots = MaxSlots0},
    {ok, Pid} = gen_fsm:start_link(?MODULE, [], []),
    % Initiate the file into the "starting" state
    ok = gen_fsm:sync_send_event(Pid,
@ -1325,6 +1342,12 @@ compress_level(Level, _PressMethod) when Level < ?COMPRESS_AT_LEVEL ->
 compress_level(_Level, PressMethod) ->
    PressMethod.

+-spec maxslots_level(non_neg_integer(), pos_integer()) ->  pos_integer().
+maxslots_level(Level, MaxSlotCount) when Level < ?DOUBLESIZE_LEVEL ->
+    MaxSlotCount;
+maxslots_level(_Level, MaxSlotCount) ->
+    2 * MaxSlotCount.
+
 write_file(RootPath, Filename, SummaryBin, SlotsBin,
            PressMethod, IdxModDate, CountOfTombs) ->
    SummaryLength = byte_size(SummaryBin),
@ -2921,8 +2944,13 @@ generate_indexkey(Term, Count) ->
                                    Count, 
                                    infinity).

-
 tombcount_test() ->
+    tombcount_tester(1),
+    tombcount_tester(2),
+    tombcount_tester(3),
+    tombcount_tester(4).
+
+tombcount_tester(Level) ->
    N = 1600,
    KL1 = generate_randomkeys(N div 2 + 1, N, 1, 4),
    KL2 = generate_indexkeys(N div 2),
@ -2952,23 +2980,22 @@ tombcount_test() ->
    OptsSST = 
        #sst_options{press_method=native,
                        log_options=leveled_log:get_opts()},
-    {ok, SST1, KD, BB} = sst_newmerge(RP, Filename,
-                               KVL1, KVL2, false, 2,
-                               N, OptsSST, false, false),
+    {ok, SST1, KD, BB} = sst_newmerge(RP, Filename, 
+                                KVL1, KVL2, false, Level, 
+                                N, OptsSST, false, false),
    ?assertMatch(not_counted, sst_gettombcount(SST1)),
    ok = sst_close(SST1),
    ok = file:delete(filename:join(RP, Filename ++ ".sst")),

-    {ok, SST2, KD, BB} = sst_newmerge(RP, Filename,
-                               KVL1, KVL2, false, 2,
-                               N, OptsSST, false, true),
-
+    {ok, SST2, KD, BB} = sst_newmerge(RP, Filename, 
+                                KVL1, KVL2, false, Level, 
+                                N, OptsSST, false, true),
+    
    ?assertMatch(ExpectedCount, sst_gettombcount(SST2)),
    ok = sst_close(SST2),
    ok = file:delete(filename:join(RP, Filename ++ ".sst")).


-
 form_slot_test() ->
    % If a skip key happens, mustn't switch to loookup by accident as could be
    % over the expected size
@ -3275,7 +3302,68 @@ test_binary_slot(FullBin, Key, Hash, ExpectedValue) ->
    % io:format(user, "Fetch success in ~w microseconds ~n",
    %             [timer:now_diff(os:timestamp(), SW)]).

-    
+doublesize_test_() ->
+    {timeout, 300, fun doublesize_tester/0}.
+
+doublesize_tester() ->
+    io:format(user, "~nPreparing key lists for test~n", []),
+    Contents = lists:ukeysort(1, generate_randomkeys(1, 65000, 1, 6)),
+    SplitFun =
+        fun({K, V}, {L1, L2}) ->
+            case length(L1) > length(L2) of
+                true ->
+                    {L1, [{K, V}|L2]};
+                _ ->
+                    {[{K, V}|L1], L2}
+            end
+        end,
+    {KVL1, KVL2} = lists:foldr(SplitFun, {[], []}, Contents),
+
+    io:format(user, "Running tests over different sizes:~n", []),
+
+    size_tester(lists:sublist(KVL1, 4000), lists:sublist(KVL2, 4000), 8000),
+    size_tester(lists:sublist(KVL1, 16000), lists:sublist(KVL2, 16000), 32000),
+    size_tester(lists:sublist(KVL1, 24000), lists:sublist(KVL2, 24000), 48000),
+    size_tester(lists:sublist(KVL1, 32000), lists:sublist(KVL2, 32000), 64000).
+
+size_tester(KVL1, KVL2, N) ->
+    io:format(user, "~nStarting ... test with ~w keys ~n", [N]),
+
+    {RP, Filename} = {?TEST_AREA, "doublesize_test"},
+    OptsSST = 
+        #sst_options{press_method=native,
+                        log_options=leveled_log:get_opts()},
+    {ok, SST1, _KD, _BB} = sst_newmerge(RP, Filename, 
+                                KVL1, KVL2, false, ?DOUBLESIZE_LEVEL, 
+                                N, OptsSST, false, false),
+    ok = sst_close(SST1),
+    {ok, SST2, _SKEK, Bloom} =
+        sst_open(RP, Filename ++ ".sst", OptsSST, ?DOUBLESIZE_LEVEL),
+    FetchFun =
+        fun({K, V}) ->
+            {K0, V0} = sst_get(SST2, K),
+            ?assertMatch(K, K0),
+            ?assertMatch(V, V0)
+        end,
+    lists:foreach(FetchFun, KVL1 ++ KVL2),
+
+    CheckBloomFun =
+        fun({K, _V}) ->
+            leveled_ebloom:check_hash(leveled_codec:segment_hash(K), Bloom)
+        end,
+    KBIn = length(lists:filter(CheckBloomFun, KVL1 ++ KVL2)),
+    KBOut =
+        length(lists:filter(CheckBloomFun,
+            generate_randomkeys(1, 1000, 7, 9))),
+
+    ?assertMatch(N, KBIn),
+
+    io:format(user, "~w false positives in 1000~n", [KBOut]),
+
+    ok = sst_close(SST2),
+    ok = file:delete(filename:join(RP, Filename ++ ".sst")).
+
+
 merge_test() ->
    filelib:ensure_dir(?TEST_AREA),
    merge_tester(fun testsst_new/6, fun testsst_new/8).