From a0e9ac737c9e0a63d0b66d554e20bac44bcbeba1 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 23 Aug 2021 17:18:45 +0100 Subject: [PATCH] Mas i340 doublel3 d31 (#347) * Double size of L4 files And double max efficient size of leveled_ebloom * Revert penciller shape But expand file size at L3 * More concise version Following code review * OTP 24 dialyzer fix Bindings intended to match - so don't use underscore * Allow eqc tests to work from `rebar3 as eqc shell` Then `eqc:quickcheck(leveled_statemeqc:prop_db()).` Plus markdown tidy --- README.md | 4 +- rebar.config | 2 +- src/leveled_ebloom.erl | 41 +++++++++---- src/leveled_penciller.erl | 15 +++-- src/leveled_sst.erl | 118 +++++++++++++++++++++++++++++++++----- 5 files changed, 146 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index be0c60f..f71b605 100644 --- a/README.md +++ b/README.md @@ -78,8 +78,8 @@ In order to contribute to leveled, fork the repository, make a branch for your c To have rebar3 execute the full set of tests, run: - `rebar3 as test do xref, dialyzer, cover --reset, eunit --cover, ct --cover, cover --verbose` +```rebar3 as test do xref, dialyzer, cover --reset, eunit --cover, ct --cover, cover --verbose``` For those with a Quickcheck license, property-based tests can also be run using: - `rebar3 as eqc do eunit --module=leveled_simpleeqc, eunit --module=leveled_statemeqc` +```rebar3 as eqc do eunit --module=leveled_simpleeqc, eunit --module=leveled_statemeqc``` diff --git a/rebar.config b/rebar.config index 1c6298b..d8aeaa3 100644 --- a/rebar.config +++ b/rebar.config @@ -19,7 +19,7 @@ {profiles, [{eqc, [{deps, [meck, fqc]}, {erl_opts, [debug_info, {d, 'EQC'}]}, - {extra_src_dirs, ["test/property"]}, + {extra_src_dirs, ["test/property", "test/end_to_end"]}, {shell, [{apps, [lz4]}]}, {plugins, [rebar_eqc]} ]}, diff --git a/src/leveled_ebloom.erl b/src/leveled_ebloom.erl index 9612f72..6484891 100644 --- a/src/leveled_ebloom.erl +++ b/src/leveled_ebloom.erl @@ -29,13 +29,32 @@ %%% API %%%============================================================================ --spec create_bloom(list(integer())) -> bloom(). +-spec create_bloom(list(leveled_codec:segment_hash())) -> bloom(). %% @doc -%% Create a binary bloom filter from alist of hashes +%% Create a binary bloom filter from a list of hashes create_bloom(HashList) -> case length(HashList) of 0 -> <<>>; + L when L > 32768 -> + {HL0, HL1} = + lists:partition(fun({_, Hash}) -> Hash band 32 == 0 end, + HashList), + Bin1 = + add_hashlist(HL0, + 32, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0), + Bin2 = + add_hashlist(HL1, + 32, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0), + <>; L when L > 16384 -> add_hashlist(HashList, 32, @@ -55,7 +74,7 @@ create_bloom(HashList) -> end. --spec check_hash(integer(), bloom()) -> boolean(). +-spec check_hash(leveled_codec:segment_hash(), bloom()) -> boolean(). %% @doc %% Check for the presence of a given hash within a bloom check_hash(_Hash, <<>>) -> @@ -548,15 +567,17 @@ empty_bloom_test() -> check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})). bloom_test_() -> - {timeout, 60, fun bloom_test_ranges/0}. + {timeout, 120, fun bloom_test_ranges/0}. bloom_test_ranges() -> - test_bloom(40000, 2), - test_bloom(128 * 256, 10), - test_bloom(20000, 2), - test_bloom(10000, 2), - test_bloom(5000, 2), - test_bloom(2000, 2). + test_bloom(80000, 4), + test_bloom(60000, 4), + test_bloom(40000, 4), + test_bloom(128 * 256, 4), + test_bloom(20000, 4), + test_bloom(10000, 4), + test_bloom(5000, 4), + test_bloom(2000, 4). test_bloom(N, Runs) -> ListOfHashLists = diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 4e972df..a756ad3 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -307,17 +307,20 @@ -type iterator() :: list(iterator_entry()). -type bad_ledgerkey() :: list(). -type sqn_check() :: current|replaced|missing. --type pclacc_fun() :: - fun((leveled_codec:ledger_key(), - leveled_codec:ledger_value(), - any()) -> any()). -type sst_fetchfun() :: fun((pid(), leveled_codec:ledger_key(), leveled_codec:segment_hash(), - non_neg_integer()) -> leveled_codec:ledger_kv()|not_present). + non_neg_integer()) -> + leveled_codec:ledger_kv()|not_present). +-type levelzero_returnfun() :: fun((levelzero_cacheentry()) -> ok). +-type pclacc_fun() :: + fun((leveled_codec:ledger_key(), + leveled_codec:ledger_value(), + any()) -> any()). --export_type([levelzero_cacheentry/0, sqn_check/0]). + +-export_type([levelzero_cacheentry/0, levelzero_returnfun/0, sqn_check/0]). %%%============================================================================ %%% API diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index cff1799..2545559 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -91,6 +91,7 @@ -define(LMD_LENGTH, 4). -define(FLIPPER32, 4294967295). -define(COMPRESS_AT_LEVEL, 1). +-define(DOUBLESIZE_LEVEL, 3). -define(INDEX_MODDATE, true). -define(TOMB_COUNT, true). -define(USE_SET_FOR_SPEED, 64). @@ -281,7 +282,11 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST) -> sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> {ok, Pid} = gen_fsm:start_link(?MODULE, [], []), PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method), - OptsSST0 = OptsSST#sst_options{press_method = PressMethod0}, + MaxSlots0 = maxslots_level(Level, OptsSST#sst_options.max_sstslots), + OptsSST0 = + OptsSST#sst_options{press_method = PressMethod0, + max_sstslots = MaxSlots0}, + {[], [], SlotList, FK, _CountOfTombs} = merge_lists(KVList, OptsSST0, IndexModDate), case gen_fsm:sync_send_event(Pid, @@ -318,7 +323,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> %% deleted. %% %% The remainder of the lists is returned along with the StartKey and EndKey -%% so that the remainder cna be used in the next file in the merge. It might +%% so that the remainder can be used in the next file in the merge. It might %% be that the merge_lists returns nothing (for example when a basement file is %% all tombstones) - and the atom empty is returned in this case so that the %% file is not added to the manifest. @@ -333,7 +338,10 @@ sst_newmerge(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, OptsSST, IndexModDate, TombCount) -> PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method), - OptsSST0 = OptsSST#sst_options{press_method = PressMethod0}, + MaxSlots0 = maxslots_level(Level, OptsSST#sst_options.max_sstslots), + OptsSST0 = + OptsSST#sst_options{press_method = PressMethod0, + max_sstslots = MaxSlots0}, {Rem1, Rem2, SlotList, FK, CountOfTombs} = merge_lists(KVL1, KVL2, {IsBasement, Level}, OptsSST0, IndexModDate, TombCount), @@ -360,7 +368,13 @@ sst_newmerge(RootPath, Filename, end. -spec sst_newlevelzero(string(), string(), - integer(), fun()|list(), pid()|undefined, integer(), + integer(), + fun((pos_integer(), + leveled_penciller:levelzero_returnfun()) + -> ok)| + list(), + pid()|undefined, + integer(), sst_options()) -> {ok, pid(), noreply}. %% @doc @@ -371,7 +385,10 @@ sst_newlevelzero(RootPath, Filename, Slots, Fetcher, Penciller, MaxSQN, OptsSST) -> PressMethod0 = compress_level(0, OptsSST#sst_options.press_method), - OptsSST0 = OptsSST#sst_options{press_method = PressMethod0}, + MaxSlots0 = maxslots_level(0, OptsSST#sst_options.max_sstslots), + OptsSST0 = + OptsSST#sst_options{press_method = PressMethod0, + max_sstslots = MaxSlots0}, {ok, Pid} = gen_fsm:start_link(?MODULE, [], []), % Initiate the file into the "starting" state ok = gen_fsm:sync_send_event(Pid, @@ -1325,6 +1342,12 @@ compress_level(Level, _PressMethod) when Level < ?COMPRESS_AT_LEVEL -> compress_level(_Level, PressMethod) -> PressMethod. +-spec maxslots_level(non_neg_integer(), pos_integer()) -> pos_integer(). +maxslots_level(Level, MaxSlotCount) when Level < ?DOUBLESIZE_LEVEL -> + MaxSlotCount; +maxslots_level(_Level, MaxSlotCount) -> + 2 * MaxSlotCount. + write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod, IdxModDate, CountOfTombs) -> SummaryLength = byte_size(SummaryBin), @@ -2921,8 +2944,13 @@ generate_indexkey(Term, Count) -> Count, infinity). - tombcount_test() -> + tombcount_tester(1), + tombcount_tester(2), + tombcount_tester(3), + tombcount_tester(4). + +tombcount_tester(Level) -> N = 1600, KL1 = generate_randomkeys(N div 2 + 1, N, 1, 4), KL2 = generate_indexkeys(N div 2), @@ -2952,23 +2980,22 @@ tombcount_test() -> OptsSST = #sst_options{press_method=native, log_options=leveled_log:get_opts()}, - {ok, SST1, KD, BB} = sst_newmerge(RP, Filename, - KVL1, KVL2, false, 2, - N, OptsSST, false, false), + {ok, SST1, KD, BB} = sst_newmerge(RP, Filename, + KVL1, KVL2, false, Level, + N, OptsSST, false, false), ?assertMatch(not_counted, sst_gettombcount(SST1)), ok = sst_close(SST1), ok = file:delete(filename:join(RP, Filename ++ ".sst")), - {ok, SST2, KD, BB} = sst_newmerge(RP, Filename, - KVL1, KVL2, false, 2, - N, OptsSST, false, true), - + {ok, SST2, KD, BB} = sst_newmerge(RP, Filename, + KVL1, KVL2, false, Level, + N, OptsSST, false, true), + ?assertMatch(ExpectedCount, sst_gettombcount(SST2)), ok = sst_close(SST2), ok = file:delete(filename:join(RP, Filename ++ ".sst")). - form_slot_test() -> % If a skip key happens, mustn't switch to loookup by accident as could be % over the expected size @@ -3275,7 +3302,68 @@ test_binary_slot(FullBin, Key, Hash, ExpectedValue) -> % io:format(user, "Fetch success in ~w microseconds ~n", % [timer:now_diff(os:timestamp(), SW)]). - +doublesize_test_() -> + {timeout, 300, fun doublesize_tester/0}. + +doublesize_tester() -> + io:format(user, "~nPreparing key lists for test~n", []), + Contents = lists:ukeysort(1, generate_randomkeys(1, 65000, 1, 6)), + SplitFun = + fun({K, V}, {L1, L2}) -> + case length(L1) > length(L2) of + true -> + {L1, [{K, V}|L2]}; + _ -> + {[{K, V}|L1], L2} + end + end, + {KVL1, KVL2} = lists:foldr(SplitFun, {[], []}, Contents), + + io:format(user, "Running tests over different sizes:~n", []), + + size_tester(lists:sublist(KVL1, 4000), lists:sublist(KVL2, 4000), 8000), + size_tester(lists:sublist(KVL1, 16000), lists:sublist(KVL2, 16000), 32000), + size_tester(lists:sublist(KVL1, 24000), lists:sublist(KVL2, 24000), 48000), + size_tester(lists:sublist(KVL1, 32000), lists:sublist(KVL2, 32000), 64000). + +size_tester(KVL1, KVL2, N) -> + io:format(user, "~nStarting ... test with ~w keys ~n", [N]), + + {RP, Filename} = {?TEST_AREA, "doublesize_test"}, + OptsSST = + #sst_options{press_method=native, + log_options=leveled_log:get_opts()}, + {ok, SST1, _KD, _BB} = sst_newmerge(RP, Filename, + KVL1, KVL2, false, ?DOUBLESIZE_LEVEL, + N, OptsSST, false, false), + ok = sst_close(SST1), + {ok, SST2, _SKEK, Bloom} = + sst_open(RP, Filename ++ ".sst", OptsSST, ?DOUBLESIZE_LEVEL), + FetchFun = + fun({K, V}) -> + {K0, V0} = sst_get(SST2, K), + ?assertMatch(K, K0), + ?assertMatch(V, V0) + end, + lists:foreach(FetchFun, KVL1 ++ KVL2), + + CheckBloomFun = + fun({K, _V}) -> + leveled_ebloom:check_hash(leveled_codec:segment_hash(K), Bloom) + end, + KBIn = length(lists:filter(CheckBloomFun, KVL1 ++ KVL2)), + KBOut = + length(lists:filter(CheckBloomFun, + generate_randomkeys(1, 1000, 7, 9))), + + ?assertMatch(N, KBIn), + + io:format(user, "~w false positives in 1000~n", [KBOut]), + + ok = sst_close(SST2), + ok = file:delete(filename:join(RP, Filename ++ ".sst")). + + merge_test() -> filelib:ensure_dir(?TEST_AREA), merge_tester(fun testsst_new/6, fun testsst_new/8).