Mas i340 doublel3 d31 (#347)

* Double size of L4 files

And double max efficient size of leveled_ebloom

* Revert penciller shape

But expand file size at L3

* More concise version

Following code review

* OTP 24 dialyzer fix

Bindings intended to match - so don't use underscore

* Allow eqc tests to work from `rebar3 as eqc shell`

Then `eqc:quickcheck(leveled_statemeqc:prop_db()).`

Plus markdown tidy
This commit is contained in:
Martin Sumner 2021-08-23 17:18:45 +01:00 committed by GitHub
parent 507bf63e22
commit a0e9ac737c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 146 additions and 34 deletions

View file

@ -78,8 +78,8 @@ In order to contribute to leveled, fork the repository, make a branch for your c
To have rebar3 execute the full set of tests, run: To have rebar3 execute the full set of tests, run:
`rebar3 as test do xref, dialyzer, cover --reset, eunit --cover, ct --cover, cover --verbose` ```rebar3 as test do xref, dialyzer, cover --reset, eunit --cover, ct --cover, cover --verbose```
For those with a Quickcheck license, property-based tests can also be run using: For those with a Quickcheck license, property-based tests can also be run using:
`rebar3 as eqc do eunit --module=leveled_simpleeqc, eunit --module=leveled_statemeqc` ```rebar3 as eqc do eunit --module=leveled_simpleeqc, eunit --module=leveled_statemeqc```

View file

@ -19,7 +19,7 @@
{profiles, {profiles,
[{eqc, [{deps, [meck, fqc]}, [{eqc, [{deps, [meck, fqc]},
{erl_opts, [debug_info, {d, 'EQC'}]}, {erl_opts, [debug_info, {d, 'EQC'}]},
{extra_src_dirs, ["test/property"]}, {extra_src_dirs, ["test/property", "test/end_to_end"]},
{shell, [{apps, [lz4]}]}, {shell, [{apps, [lz4]}]},
{plugins, [rebar_eqc]} {plugins, [rebar_eqc]}
]}, ]},

View file

@ -29,13 +29,32 @@
%%% API %%% API
%%%============================================================================ %%%============================================================================
-spec create_bloom(list(integer())) -> bloom(). -spec create_bloom(list(leveled_codec:segment_hash())) -> bloom().
%% @doc %% @doc
%% Create a binary bloom filter from alist of hashes %% Create a binary bloom filter from a list of hashes
create_bloom(HashList) -> create_bloom(HashList) ->
case length(HashList) of case length(HashList) of
0 -> 0 ->
<<>>; <<>>;
L when L > 32768 ->
{HL0, HL1} =
lists:partition(fun({_, Hash}) -> Hash band 32 == 0 end,
HashList),
Bin1 =
add_hashlist(HL0,
32,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0),
Bin2 =
add_hashlist(HL1,
32,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0),
<<Bin1/binary, Bin2/binary>>;
L when L > 16384 -> L when L > 16384 ->
add_hashlist(HashList, add_hashlist(HashList,
32, 32,
@ -55,7 +74,7 @@ create_bloom(HashList) ->
end. end.
-spec check_hash(integer(), bloom()) -> boolean(). -spec check_hash(leveled_codec:segment_hash(), bloom()) -> boolean().
%% @doc %% @doc
%% Check for the presence of a given hash within a bloom %% Check for the presence of a given hash within a bloom
check_hash(_Hash, <<>>) -> check_hash(_Hash, <<>>) ->
@ -548,15 +567,17 @@ empty_bloom_test() ->
check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})). check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})).
bloom_test_() -> bloom_test_() ->
{timeout, 60, fun bloom_test_ranges/0}. {timeout, 120, fun bloom_test_ranges/0}.
bloom_test_ranges() -> bloom_test_ranges() ->
test_bloom(40000, 2), test_bloom(80000, 4),
test_bloom(128 * 256, 10), test_bloom(60000, 4),
test_bloom(20000, 2), test_bloom(40000, 4),
test_bloom(10000, 2), test_bloom(128 * 256, 4),
test_bloom(5000, 2), test_bloom(20000, 4),
test_bloom(2000, 2). test_bloom(10000, 4),
test_bloom(5000, 4),
test_bloom(2000, 4).
test_bloom(N, Runs) -> test_bloom(N, Runs) ->
ListOfHashLists = ListOfHashLists =

View file

@ -307,17 +307,20 @@
-type iterator() :: list(iterator_entry()). -type iterator() :: list(iterator_entry()).
-type bad_ledgerkey() :: list(). -type bad_ledgerkey() :: list().
-type sqn_check() :: current|replaced|missing. -type sqn_check() :: current|replaced|missing.
-type pclacc_fun() ::
fun((leveled_codec:ledger_key(),
leveled_codec:ledger_value(),
any()) -> any()).
-type sst_fetchfun() :: -type sst_fetchfun() ::
fun((pid(), fun((pid(),
leveled_codec:ledger_key(), leveled_codec:ledger_key(),
leveled_codec:segment_hash(), leveled_codec:segment_hash(),
non_neg_integer()) -> leveled_codec:ledger_kv()|not_present). non_neg_integer()) ->
leveled_codec:ledger_kv()|not_present).
-type levelzero_returnfun() :: fun((levelzero_cacheentry()) -> ok).
-type pclacc_fun() ::
fun((leveled_codec:ledger_key(),
leveled_codec:ledger_value(),
any()) -> any()).
-export_type([levelzero_cacheentry/0, sqn_check/0]).
-export_type([levelzero_cacheentry/0, levelzero_returnfun/0, sqn_check/0]).
%%%============================================================================ %%%============================================================================
%%% API %%% API

View file

@ -91,6 +91,7 @@
-define(LMD_LENGTH, 4). -define(LMD_LENGTH, 4).
-define(FLIPPER32, 4294967295). -define(FLIPPER32, 4294967295).
-define(COMPRESS_AT_LEVEL, 1). -define(COMPRESS_AT_LEVEL, 1).
-define(DOUBLESIZE_LEVEL, 3).
-define(INDEX_MODDATE, true). -define(INDEX_MODDATE, true).
-define(TOMB_COUNT, true). -define(TOMB_COUNT, true).
-define(USE_SET_FOR_SPEED, 64). -define(USE_SET_FOR_SPEED, 64).
@ -281,7 +282,11 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST) ->
sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) ->
{ok, Pid} = gen_fsm:start_link(?MODULE, [], []), {ok, Pid} = gen_fsm:start_link(?MODULE, [], []),
PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method), PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method),
OptsSST0 = OptsSST#sst_options{press_method = PressMethod0}, MaxSlots0 = maxslots_level(Level, OptsSST#sst_options.max_sstslots),
OptsSST0 =
OptsSST#sst_options{press_method = PressMethod0,
max_sstslots = MaxSlots0},
{[], [], SlotList, FK, _CountOfTombs} = {[], [], SlotList, FK, _CountOfTombs} =
merge_lists(KVList, OptsSST0, IndexModDate), merge_lists(KVList, OptsSST0, IndexModDate),
case gen_fsm:sync_send_event(Pid, case gen_fsm:sync_send_event(Pid,
@ -318,7 +323,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) ->
%% deleted. %% deleted.
%% %%
%% The remainder of the lists is returned along with the StartKey and EndKey %% The remainder of the lists is returned along with the StartKey and EndKey
%% so that the remainder cna be used in the next file in the merge. It might %% so that the remainder can be used in the next file in the merge. It might
%% be that the merge_lists returns nothing (for example when a basement file is %% be that the merge_lists returns nothing (for example when a basement file is
%% all tombstones) - and the atom empty is returned in this case so that the %% all tombstones) - and the atom empty is returned in this case so that the
%% file is not added to the manifest. %% file is not added to the manifest.
@ -333,7 +338,10 @@ sst_newmerge(RootPath, Filename,
KVL1, KVL2, IsBasement, Level, KVL1, KVL2, IsBasement, Level,
MaxSQN, OptsSST, IndexModDate, TombCount) -> MaxSQN, OptsSST, IndexModDate, TombCount) ->
PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method), PressMethod0 = compress_level(Level, OptsSST#sst_options.press_method),
OptsSST0 = OptsSST#sst_options{press_method = PressMethod0}, MaxSlots0 = maxslots_level(Level, OptsSST#sst_options.max_sstslots),
OptsSST0 =
OptsSST#sst_options{press_method = PressMethod0,
max_sstslots = MaxSlots0},
{Rem1, Rem2, SlotList, FK, CountOfTombs} = {Rem1, Rem2, SlotList, FK, CountOfTombs} =
merge_lists(KVL1, KVL2, {IsBasement, Level}, OptsSST0, merge_lists(KVL1, KVL2, {IsBasement, Level}, OptsSST0,
IndexModDate, TombCount), IndexModDate, TombCount),
@ -360,7 +368,13 @@ sst_newmerge(RootPath, Filename,
end. end.
-spec sst_newlevelzero(string(), string(), -spec sst_newlevelzero(string(), string(),
integer(), fun()|list(), pid()|undefined, integer(), integer(),
fun((pos_integer(),
leveled_penciller:levelzero_returnfun())
-> ok)|
list(),
pid()|undefined,
integer(),
sst_options()) -> sst_options()) ->
{ok, pid(), noreply}. {ok, pid(), noreply}.
%% @doc %% @doc
@ -371,7 +385,10 @@ sst_newlevelzero(RootPath, Filename,
Slots, Fetcher, Penciller, Slots, Fetcher, Penciller,
MaxSQN, OptsSST) -> MaxSQN, OptsSST) ->
PressMethod0 = compress_level(0, OptsSST#sst_options.press_method), PressMethod0 = compress_level(0, OptsSST#sst_options.press_method),
OptsSST0 = OptsSST#sst_options{press_method = PressMethod0}, MaxSlots0 = maxslots_level(0, OptsSST#sst_options.max_sstslots),
OptsSST0 =
OptsSST#sst_options{press_method = PressMethod0,
max_sstslots = MaxSlots0},
{ok, Pid} = gen_fsm:start_link(?MODULE, [], []), {ok, Pid} = gen_fsm:start_link(?MODULE, [], []),
% Initiate the file into the "starting" state % Initiate the file into the "starting" state
ok = gen_fsm:sync_send_event(Pid, ok = gen_fsm:sync_send_event(Pid,
@ -1325,6 +1342,12 @@ compress_level(Level, _PressMethod) when Level < ?COMPRESS_AT_LEVEL ->
compress_level(_Level, PressMethod) -> compress_level(_Level, PressMethod) ->
PressMethod. PressMethod.
-spec maxslots_level(non_neg_integer(), pos_integer()) -> pos_integer().
maxslots_level(Level, MaxSlotCount) when Level < ?DOUBLESIZE_LEVEL ->
MaxSlotCount;
maxslots_level(_Level, MaxSlotCount) ->
2 * MaxSlotCount.
write_file(RootPath, Filename, SummaryBin, SlotsBin, write_file(RootPath, Filename, SummaryBin, SlotsBin,
PressMethod, IdxModDate, CountOfTombs) -> PressMethod, IdxModDate, CountOfTombs) ->
SummaryLength = byte_size(SummaryBin), SummaryLength = byte_size(SummaryBin),
@ -2921,8 +2944,13 @@ generate_indexkey(Term, Count) ->
Count, Count,
infinity). infinity).
tombcount_test() -> tombcount_test() ->
tombcount_tester(1),
tombcount_tester(2),
tombcount_tester(3),
tombcount_tester(4).
tombcount_tester(Level) ->
N = 1600, N = 1600,
KL1 = generate_randomkeys(N div 2 + 1, N, 1, 4), KL1 = generate_randomkeys(N div 2 + 1, N, 1, 4),
KL2 = generate_indexkeys(N div 2), KL2 = generate_indexkeys(N div 2),
@ -2953,14 +2981,14 @@ tombcount_test() ->
#sst_options{press_method=native, #sst_options{press_method=native,
log_options=leveled_log:get_opts()}, log_options=leveled_log:get_opts()},
{ok, SST1, KD, BB} = sst_newmerge(RP, Filename, {ok, SST1, KD, BB} = sst_newmerge(RP, Filename,
KVL1, KVL2, false, 2, KVL1, KVL2, false, Level,
N, OptsSST, false, false), N, OptsSST, false, false),
?assertMatch(not_counted, sst_gettombcount(SST1)), ?assertMatch(not_counted, sst_gettombcount(SST1)),
ok = sst_close(SST1), ok = sst_close(SST1),
ok = file:delete(filename:join(RP, Filename ++ ".sst")), ok = file:delete(filename:join(RP, Filename ++ ".sst")),
{ok, SST2, KD, BB} = sst_newmerge(RP, Filename, {ok, SST2, KD, BB} = sst_newmerge(RP, Filename,
KVL1, KVL2, false, 2, KVL1, KVL2, false, Level,
N, OptsSST, false, true), N, OptsSST, false, true),
?assertMatch(ExpectedCount, sst_gettombcount(SST2)), ?assertMatch(ExpectedCount, sst_gettombcount(SST2)),
@ -2968,7 +2996,6 @@ tombcount_test() ->
ok = file:delete(filename:join(RP, Filename ++ ".sst")). ok = file:delete(filename:join(RP, Filename ++ ".sst")).
form_slot_test() -> form_slot_test() ->
% If a skip key happens, mustn't switch to loookup by accident as could be % If a skip key happens, mustn't switch to loookup by accident as could be
% over the expected size % over the expected size
@ -3275,6 +3302,67 @@ test_binary_slot(FullBin, Key, Hash, ExpectedValue) ->
% io:format(user, "Fetch success in ~w microseconds ~n", % io:format(user, "Fetch success in ~w microseconds ~n",
% [timer:now_diff(os:timestamp(), SW)]). % [timer:now_diff(os:timestamp(), SW)]).
doublesize_test_() ->
{timeout, 300, fun doublesize_tester/0}.
doublesize_tester() ->
io:format(user, "~nPreparing key lists for test~n", []),
Contents = lists:ukeysort(1, generate_randomkeys(1, 65000, 1, 6)),
SplitFun =
fun({K, V}, {L1, L2}) ->
case length(L1) > length(L2) of
true ->
{L1, [{K, V}|L2]};
_ ->
{[{K, V}|L1], L2}
end
end,
{KVL1, KVL2} = lists:foldr(SplitFun, {[], []}, Contents),
io:format(user, "Running tests over different sizes:~n", []),
size_tester(lists:sublist(KVL1, 4000), lists:sublist(KVL2, 4000), 8000),
size_tester(lists:sublist(KVL1, 16000), lists:sublist(KVL2, 16000), 32000),
size_tester(lists:sublist(KVL1, 24000), lists:sublist(KVL2, 24000), 48000),
size_tester(lists:sublist(KVL1, 32000), lists:sublist(KVL2, 32000), 64000).
size_tester(KVL1, KVL2, N) ->
io:format(user, "~nStarting ... test with ~w keys ~n", [N]),
{RP, Filename} = {?TEST_AREA, "doublesize_test"},
OptsSST =
#sst_options{press_method=native,
log_options=leveled_log:get_opts()},
{ok, SST1, _KD, _BB} = sst_newmerge(RP, Filename,
KVL1, KVL2, false, ?DOUBLESIZE_LEVEL,
N, OptsSST, false, false),
ok = sst_close(SST1),
{ok, SST2, _SKEK, Bloom} =
sst_open(RP, Filename ++ ".sst", OptsSST, ?DOUBLESIZE_LEVEL),
FetchFun =
fun({K, V}) ->
{K0, V0} = sst_get(SST2, K),
?assertMatch(K, K0),
?assertMatch(V, V0)
end,
lists:foreach(FetchFun, KVL1 ++ KVL2),
CheckBloomFun =
fun({K, _V}) ->
leveled_ebloom:check_hash(leveled_codec:segment_hash(K), Bloom)
end,
KBIn = length(lists:filter(CheckBloomFun, KVL1 ++ KVL2)),
KBOut =
length(lists:filter(CheckBloomFun,
generate_randomkeys(1, 1000, 7, 9))),
?assertMatch(N, KBIn),
io:format(user, "~w false positives in 1000~n", [KBOut]),
ok = sst_close(SST2),
ok = file:delete(filename:join(RP, Filename ++ ".sst")).
merge_test() -> merge_test() ->
filelib:ensure_dir(?TEST_AREA), filelib:ensure_dir(?TEST_AREA),