Switch to using bloom at penciller

Previouslythe tinybloom was used within the SST file as an extra check to remove false fetches.

However the SST already has a low FPR check in the slot_index.  If the newebloom was used (which is no longer per slot, but per sst), this can be shared with the penciller and then the penciller could use it and avoid the message pass.

the message pass may be blocked by a 2i query or a slot fetch request for a merge.  So this should make performance within the Penciller snappier.

This is as a result of taking sst_timings within a volume test - where there was an average of + 100microsecs for each level that was dropped down.  Given the bloom/slot checks were < 20 microsecs - there seems to be some further delay.

The bloom is a binary of > 64 bytes - so passing it around should not require a copy.
This commit is contained in:
Martin Sumner 2017-11-28 01:19:30 +00:00
parent 467ad50cd1
commit c2f19d8825
8 changed files with 367 additions and 572 deletions

View file

@ -33,7 +33,8 @@
{start_key :: tuple() | undefined,
end_key :: tuple() | undefined,
owner :: pid()|list(),
filename :: string() | undefined}).
filename :: string() | undefined,
bloom :: binary() | none}).
-record(cdb_options,
{max_size :: integer() | undefined,

View file

@ -226,9 +226,9 @@
{"SST12",
{info, "SST Timings for sample_count=~w"
++ " at timing points index_query_time=~w"
++ " tiny_bloom_time=~w slot_index_time=~w slot_fetch_time=~w"
++ " lookup_cache_time=~w slot_index_time=~w slot_fetch_time=~w"
++ " noncached_block_fetch_time=~w"
++ " exiting at points tiny_bloom=~w slot_index=~w"
++ " exiting at points slot_index=~w"
++ " slot_fetch=~w noncached_block_fetch=~w"}},

View file

@ -223,12 +223,13 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, CM, Additions) ->
RP, NewSQN, MaxSQN,
CM,
Additions);
{ok, Pid, Reply} ->
{ok, Pid, Reply, Bloom} ->
{{KL1Rem, KL2Rem}, SmallestKey, HighestKey} = Reply,
Entry = #manifest_entry{start_key=SmallestKey,
end_key=HighestKey,
owner=Pid,
filename=FileName},
filename=FileName,
bloom=Bloom},
leveled_log:log_timer("PC015", [], TS1),
do_merge(KL1Rem, KL2Rem,
SinkLevel, SinkB,
@ -275,35 +276,40 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
merge_file_test() ->
KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)),
{ok, PidL1_1, _} = leveled_sst:sst_new("../test/",
{ok, PidL1_1, _, _} =
leveled_sst:sst_new("../test/",
"KL1_L1.sst",
1,
KL1_L1,
999999,
native),
KL1_L2 = lists:sort(generate_randomkeys(8000, 0, 250)),
{ok, PidL2_1, _} = leveled_sst:sst_new("../test/",
{ok, PidL2_1, _, _} =
leveled_sst:sst_new("../test/",
"KL1_L2.sst",
2,
KL1_L2,
999999,
native),
KL2_L2 = lists:sort(generate_randomkeys(8000, 250, 250)),
{ok, PidL2_2, _} = leveled_sst:sst_new("../test/",
{ok, PidL2_2, _, _} =
leveled_sst:sst_new("../test/",
"KL2_L2.sst",
2,
KL2_L2,
999999,
lz4),
KL3_L2 = lists:sort(generate_randomkeys(8000, 500, 250)),
{ok, PidL2_3, _} = leveled_sst:sst_new("../test/",
{ok, PidL2_3, _, _} =
leveled_sst:sst_new("../test/",
"KL3_L2.sst",
2,
KL3_L2,
999999,
lz4),
KL4_L2 = lists:sort(generate_randomkeys(8000, 750, 250)),
{ok, PidL2_4, _} = leveled_sst:sst_new("../test/",
{ok, PidL2_4, _, _} =
leveled_sst:sst_new("../test/",
"KL4_L2.sst",
2,
KL4_L2,

View file

@ -181,7 +181,7 @@
pcl_checksequencenumber/3,
pcl_workforclerk/1,
pcl_manifestchange/2,
pcl_confirml0complete/4,
pcl_confirml0complete/5,
pcl_confirmdelete/3,
pcl_close/1,
pcl_doom/1,
@ -439,14 +439,14 @@ pcl_workforclerk(Pid) ->
pcl_manifestchange(Pid, Manifest) ->
gen_server:cast(Pid, {manifest_change, Manifest}).
-spec pcl_confirml0complete(pid(), string(), tuple(), tuple()) -> ok.
-spec pcl_confirml0complete(pid(), string(), tuple(), tuple(), binary()) -> ok.
%% @doc
%% Allows a SST writer that has written a L0 file to confirm that the file
%% is now complete, so the filename and key ranges can be added to the
%% manifest and the file can be used in place of the in-memory levelzero
%% cache.
pcl_confirml0complete(Pid, FN, StartKey, EndKey) ->
gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey}).
pcl_confirml0complete(Pid, FN, StartKey, EndKey, Bloom) ->
gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey, Bloom}).
-spec pcl_confirmdelete(pid(), string(), pid()) -> ok.
%% @doc
@ -759,12 +759,13 @@ handle_cast({confirm_delete, Filename, FilePid}, State=#state{is_snapshot=Snap})
% from the Clerk
{noreply, State}
end;
handle_cast({levelzero_complete, FN, StartKey, EndKey}, State) ->
handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
leveled_log:log("P0029", []),
ManEntry = #manifest_entry{start_key=StartKey,
end_key=EndKey,
owner=State#state.levelzero_constructor,
filename=FN},
filename=FN,
bloom=Bloom},
ManifestSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1,
UpdMan = leveled_pmanifest:insert_manifest_entry(State#state.manifest,
ManifestSQN,
@ -837,7 +838,7 @@ terminate(Reason, State) ->
L0_Left = State#state.levelzero_size > 0,
case {State#state.levelzero_pending, L0_Present, L0_Left} of
{false, false, true} ->
L0Pid = roll_memory(State, true),
{L0Pid, _L0Bloom} = roll_memory(State, true),
ok = leveled_sst:sst_close(L0Pid);
StatusTuple ->
leveled_log:log("P0010", [StatusTuple])
@ -911,11 +912,9 @@ start_from_file(PCLopts) ->
Manifest0 = leveled_pmanifest:open_manifest(RootPath),
OpenFun =
fun(FN) ->
{ok,
Pid,
{_FK, _LK}} =
{ok, Pid, {_FK, _LK}, Bloom} =
leveled_sst:sst_open(sst_rootpath(RootPath), FN),
Pid
{Pid, Bloom}
end,
SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1,
{MaxSQN, Manifest1, FileList} =
@ -930,12 +929,13 @@ start_from_file(PCLopts) ->
true ->
leveled_log:log("P0015", [L0FN]),
L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN),
{ok, L0Pid, {L0StartKey, L0EndKey}} = L0Open,
{ok, L0Pid, {L0StartKey, L0EndKey}, Bloom} = L0Open,
L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid),
L0Entry = #manifest_entry{start_key = L0StartKey,
end_key = L0EndKey,
filename = L0FN,
owner = L0Pid},
owner = L0Pid,
bloom = Bloom},
Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1,
ManSQN + 1,
0,
@ -1025,7 +1025,7 @@ update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
JitterCheck = RandomFactor or CacheMuchTooBig,
case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of
{true, true, true, true} ->
L0Constructor = roll_memory(UpdState, false),
{L0Constructor, none} = roll_memory(UpdState, false),
leveled_log:log_timer("P0031", [true, true], SW),
UpdState#state{levelzero_pending=true,
levelzero_constructor=L0Constructor};
@ -1063,7 +1063,7 @@ roll_memory(State, false) ->
State#state.ledger_sqn,
State#state.compression_method),
{ok, Constructor, _} = R,
Constructor;
{Constructor, none};
roll_memory(State, true) ->
ManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1,
RootPath = sst_rootpath(State#state.root_path),
@ -1077,8 +1077,8 @@ roll_memory(State, true) ->
KVList,
State#state.ledger_sqn,
State#state.compression_method),
{ok, Constructor, _} = R,
Constructor.
{ok, Constructor, _, Bloom} = R,
{Constructor, Bloom}.
timed_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, Timings) ->
SW = os:timestamp(),
@ -1107,11 +1107,16 @@ fetch(Key, Hash, Manifest, Level, FetchFun) ->
false ->
fetch(Key, Hash, Manifest, Level + 1, FetchFun);
FP ->
case leveled_pmanifest:check_bloom(Manifest, FP, Hash) of
true ->
case FetchFun(FP, Key, Hash, Level) of
not_present ->
fetch(Key, Hash, Manifest, Level + 1, FetchFun);
ObjectFound ->
{ObjectFound, Level}
end;
false ->
fetch(Key, Hash, Manifest, Level + 1, FetchFun)
end
end.

View file

@ -41,7 +41,8 @@
ready_to_delete/2,
check_for_work/2,
is_basement/2,
levelzero_present/1
levelzero_present/1,
check_bloom/3
]).
-export([
@ -69,8 +70,10 @@
pending_deletes, % OTP16 does not like defining type
% a dictionary mapping keys (filenames) to SQN when the
% deletion was made, and the original Manifest Entry
basement :: integer()
basement :: integer(),
% Currently the lowest level (the largest number)
blooms :: dict:dict()
% A dictionary mapping PIDs to bloom filters
}).
-type manifest() :: #manifest{}.
@ -100,7 +103,8 @@ new_manifest() ->
manifest_sqn = 0,
snapshots = [],
pending_deletes = dict:new(),
basement = 0
basement = 0,
blooms = dict:new()
}.
-spec open_manifest(string()) -> manifest().
@ -143,17 +147,23 @@ copy_manifest(Manifest) ->
%% manifest. The PidFun should be able to return the Pid of a file process
%% (having started one). The SQNFun will return the max sequence number
%% of that file, if passed the Pid that owns it.
load_manifest(Manifest, PidFun, SQNFun) ->
load_manifest(Manifest, LoadFun, SQNFun) ->
UpdateLevelFun =
fun(LevelIdx, {AccMaxSQN, AccMan, AccFL}) ->
L0 = array:get(LevelIdx, AccMan#manifest.levels),
{L1, SQN1, FileList} = load_level(LevelIdx, L0, PidFun, SQNFun),
{L1, SQN1, FileList, LvlBloom} =
load_level(LevelIdx, L0, LoadFun, SQNFun),
UpdLevels = array:set(LevelIdx, L1, AccMan#manifest.levels),
UpdBlooms =
dict:merge(fun(_K, V, V) -> V end,
AccMan#manifest.blooms,
LvlBloom),
{max(AccMaxSQN, SQN1),
AccMan#manifest{levels = UpdLevels},
AccMan#manifest{levels = UpdLevels, blooms = UpdBlooms},
AccFL ++ FileList}
end,
lists:foldl(UpdateLevelFun, {0, Manifest, []},
lists:foldl(UpdateLevelFun,
{0, Manifest, []},
lists:seq(0, Manifest#manifest.basement)).
-spec close_manifest(manifest(), fun()) -> ok.
@ -182,7 +192,8 @@ save_manifest(Manifest, RootPath) ->
FP = filepath(RootPath, Manifest#manifest.manifest_sqn, current_manifest),
ManBin = term_to_binary(Manifest#manifest{snapshots = [],
pending_deletes = dict:new(),
min_snapshot_sqn = 0}),
min_snapshot_sqn = 0,
blooms = dict:new()}),
CRC = erlang:crc32(ManBin),
ok = file:write_file(FP, <<CRC:32/integer, ManBin/binary>>).
@ -198,9 +209,12 @@ save_manifest(Manifest, RootPath) ->
replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) ->
Levels = Manifest#manifest.levels,
Level = array:get(LevelIdx, Levels),
UpdLevel = replace_entry(LevelIdx, Level, Removals, Additions),
{UpdBlooms, StrippedAdditions} =
update_blooms(Removals, Additions, Manifest#manifest.blooms),
UpdLevel = replace_entry(LevelIdx, Level, Removals, StrippedAdditions),
leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]),
PendingDeletes = update_pendingdeletes(ManSQN,
PendingDeletes =
update_pendingdeletes(ManSQN,
Removals,
Manifest#manifest.pending_deletes),
UpdLevels = array:set(LevelIdx, UpdLevel, Levels),
@ -209,13 +223,15 @@ replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) ->
Manifest#manifest{levels = UpdLevels,
basement = get_basement(UpdLevels),
manifest_sqn = ManSQN,
pending_deletes = PendingDeletes};
pending_deletes = PendingDeletes,
blooms = UpdBlooms};
false ->
Basement = max(LevelIdx, Manifest#manifest.basement),
Manifest#manifest{levels = UpdLevels,
basement = Basement,
manifest_sqn = ManSQN,
pending_deletes = PendingDeletes}
pending_deletes = PendingDeletes,
blooms = UpdBlooms}
end.
-spec insert_manifest_entry(manifest(), integer(), integer(),
@ -226,12 +242,15 @@ replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) ->
insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
Levels = Manifest#manifest.levels,
Level = array:get(LevelIdx, Levels),
UpdLevel = add_entry(LevelIdx, Level, Entry),
{UpdBlooms, UpdEntry} =
update_blooms([], Entry, Manifest#manifest.blooms),
UpdLevel = add_entry(LevelIdx, Level, UpdEntry),
leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]),
Basement = max(LevelIdx, Manifest#manifest.basement),
Manifest#manifest{levels = array:set(LevelIdx, UpdLevel, Levels),
basement = Basement,
manifest_sqn = ManSQN}.
manifest_sqn = ManSQN,
blooms = UpdBlooms}.
-spec remove_manifest_entry(manifest(), integer(), integer(),
list()|manifest_entry()) -> manifest().
@ -240,6 +259,8 @@ insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
Levels = Manifest#manifest.levels,
Level = array:get(LevelIdx, Levels),
{UpdBlooms, []} =
update_blooms(Entry, [], Manifest#manifest.blooms),
UpdLevel = remove_entry(LevelIdx, Level, Entry),
leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]),
PendingDeletes = update_pendingdeletes(ManSQN,
@ -251,11 +272,13 @@ remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
Manifest#manifest{levels = UpdLevels,
basement = get_basement(UpdLevels),
manifest_sqn = ManSQN,
pending_deletes = PendingDeletes};
pending_deletes = PendingDeletes,
blooms = UpdBlooms};
false ->
Manifest#manifest{levels = UpdLevels,
manifest_sqn = ManSQN,
pending_deletes = PendingDeletes}
pending_deletes = PendingDeletes,
blooms = UpdBlooms}
end.
-spec switch_manifest_entry(manifest(), integer(), integer(),
@ -479,6 +502,20 @@ is_basement(Manifest, Level) ->
levelzero_present(Manifest) ->
not is_empty(0, array:get(0, Manifest#manifest.levels)).
-spec check_bloom(manifest(), string(), {integer(), integer()}) -> boolean().
%% @doc
%% Check to see if a hahs is present in a manifest entry by using the exported
%% bloom filter
check_bloom(Manifest, FP, Hash) ->
case dict:find(FP, Manifest#manifest.blooms) of
{ok, Bloom} when is_binary(Bloom) ->
leveled_ebloom:check_hash(Hash, Bloom);
_ ->
true
end.
%%%============================================================================
%%% Internal Functions
%%%============================================================================
@ -489,35 +526,39 @@ levelzero_present(Manifest) ->
%% future branches may make lower levels trees or skiplists to improve fetch
%% efficiency
load_level(LevelIdx, Level, PidFun, SQNFun) ->
load_level(LevelIdx, Level, LoadFun, SQNFun) ->
HigherLevelLoadFun =
fun(ME, {L_Out, L_MaxSQN, FileList}) ->
fun(ME, {L_Out, L_MaxSQN, FileList, BloomD}) ->
FN = ME#manifest_entry.filename,
P = PidFun(FN),
{P, Bloom} = LoadFun(FN),
SQN = SQNFun(P),
{[ME#manifest_entry{owner=P}|L_Out],
max(SQN, L_MaxSQN),
[FN|FileList]}
[FN|FileList],
dict:store(FN, Bloom, BloomD)}
end,
LowerLevelLoadFun =
fun({EK, ME}, {L_Out, L_MaxSQN, FileList}) ->
fun({EK, ME}, {L_Out, L_MaxSQN, FileList, BloomD}) ->
FN = ME#manifest_entry.filename,
P = PidFun(FN),
{P, Bloom} = LoadFun(FN),
SQN = SQNFun(P),
{[{EK, ME#manifest_entry{owner=P}}|L_Out],
max(SQN, L_MaxSQN),
[FN|FileList]}
[FN|FileList],
dict:store(FN, Bloom, BloomD)}
end,
case LevelIdx =< 1 of
true ->
lists:foldr(HigherLevelLoadFun, {[], 0, []}, Level);
lists:foldr(HigherLevelLoadFun, {[], 0, [], dict:new()}, Level);
false ->
{L0, MaxSQN, Flist} = lists:foldr(LowerLevelLoadFun,
{[], 0, []},
{L0, MaxSQN, Flist, UpdBloomD} =
lists:foldr(LowerLevelLoadFun,
{[], 0, [], dict:new()},
leveled_tree:to_list(Level)),
{leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH),
MaxSQN,
Flist}
Flist,
UpdBloomD}
end.
close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 ->
@ -567,9 +608,7 @@ add_entry(LevelIdx, Level, Entries) when is_list(Entries) ->
leveled_tree:from_orderedlist(lists:append([LHS, Entries0, RHS]),
?TREE_TYPE,
?TREE_WIDTH)
end;
add_entry(LevelIdx, Level, Entry) ->
add_entry(LevelIdx, Level, [Entry]).
end.
remove_entry(LevelIdx, Level, Entries) ->
% We're assuming we're removing a sorted sublist
@ -608,12 +647,7 @@ replace_entry(LevelIdx, Level, Removals, Additions) when LevelIdx =< 1 ->
FirstEntry#manifest_entry.end_key),
{LHS, RHS} = lists:splitwith(PredFun, Level),
Post = lists:nthtail(SectionLength, RHS),
case is_list(Additions) of
true ->
lists:append([LHS, Additions, Post]);
false ->
lists:append([LHS, [Additions], Post])
end;
replace_entry(LevelIdx, Level, Removals, Additions) ->
{SectionLength, FirstEntry} = measure_removals(Removals),
PredFun = pred_fun(LevelIdx,
@ -627,21 +661,11 @@ replace_entry(LevelIdx, Level, Removals, Additions) ->
_ ->
lists:nthtail(SectionLength, RHS)
end,
UpdList =
case is_list(Additions) of
true ->
MapFun =
fun(ME) ->
{ME#manifest_entry.end_key, ME}
end,
Additions0 = lists:map(MapFun, Additions),
lists:append([LHS, Additions0, Post]);
false ->
lists:append([LHS,
[{Additions#manifest_entry.end_key,
Additions}],
Post])
end,
UpdList = lists:append([LHS, lists:map(MapFun, Additions), Post]),
leveled_tree:from_orderedlist(UpdList, ?TREE_TYPE, ?TREE_WIDTH).
@ -661,6 +685,46 @@ update_pendingdeletes(ManSQN, Removals, PendingDeletes) ->
end,
lists:foldl(DelFun, PendingDeletes, Entries).
-spec update_blooms(list()|manifest_entry(),
list()|manifest_entry(),
dict:dict())
-> {dict:dict(), list()}.
%% @doc
%%
%% The manifest is a Pid-> Bloom mappping for every Pid, and this needs to
%% be updated to represent the changes. However, the bloom would bloat out
%% the stored manifest, so the bloom must be stripped from the manifest entry
%% as part of this process
update_blooms(Removals, Additions, Blooms) ->
Additions0 =
case is_list(Additions) of
true -> Additions;
false -> [Additions]
end,
Removals0 =
case is_list(Removals) of
true -> Removals;
false -> [Removals]
end,
RemFun =
fun(R, BloomD) ->
dict:erase(R#manifest_entry.owner, BloomD)
end,
AddFun =
fun(A, BloomD) ->
dict:store(A#manifest_entry.owner, A#manifest_entry.bloom, BloomD)
end,
StripFun =
fun(A) ->
A#manifest_entry{bloom = none}
end,
Blooms0 = lists:foldl(RemFun, Blooms, Removals0),
Blooms1 = lists:foldl(AddFun, Blooms0, Additions0),
{Blooms1, lists:map(StripFun, Additions0)}.
key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 ->
false;
key_lookup_level(LevelIdx, [Entry|Rest], Key) when LevelIdx =< 1 ->
@ -782,27 +846,33 @@ initial_setup() ->
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
filename="Z1",
owner="pid_z1"},
owner="pid_z1",
bloom=none},
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
end_key={o, "Bucket1", "K71", null},
filename="Z2",
owner="pid_z2"},
owner="pid_z2",
bloom=none},
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
end_key={o, "Bucket1", "K993", null},
filename="Z3",
owner="pid_z3"},
owner="pid_z3",
bloom=none},
E4 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld7"}, "K93"},
filename="Z4",
owner="pid_z4"},
owner="pid_z4",
bloom=none},
E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"},
end_key={o, "Bucket1", "K78", null},
filename="Z5",
owner="pid_z5"},
owner="pid_z5",
bloom=none},
E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null},
end_key={o, "Bucket1", "K996", null},
filename="Z6",
owner="pid_z6"},
owner="pid_z6",
bloom=none},
Man0 = new_manifest(),
@ -819,32 +889,39 @@ changeup_setup(Man6) ->
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
filename="Z1",
owner="pid_z1"},
owner="pid_z1",
bloom=none},
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
end_key={o, "Bucket1", "K71", null},
filename="Z2",
owner="pid_z2"},
owner="pid_z2",
bloom=none},
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
end_key={o, "Bucket1", "K993", null},
filename="Z3",
owner="pid_z3"},
owner="pid_z3",
bloom=none},
E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"},
owner="pid_y1",
filename="Y1"},
filename="Y1",
bloom=none},
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
end_key={o, "Bucket1", "K45", null},
owner="pid_y2",
filename="Y2"},
filename="Y2",
bloom=none},
E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null},
end_key={o, "Bucket1", "K812", null},
owner="pid_y3",
filename="Y3"},
filename="Y3",
bloom=none},
E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null},
end_key={o, "Bucket1", "K998", null},
owner="pid_y4",
filename="Y4"},
filename="Y4",
bloom=none},
Man7 = remove_manifest_entry(Man6, 2, 1, E1),
Man8 = remove_manifest_entry(Man7, 2, 1, E2),
@ -949,32 +1026,39 @@ ext_keylookup_manifest_test() ->
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
filename="Z1",
owner="pid_z1"},
owner="pid_z1",
bloom=none},
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
end_key={o, "Bucket1", "K71", null},
filename="Z2",
owner="pid_z2"},
owner="pid_z2",
bloom=none},
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
end_key={o, "Bucket1", "K993", null},
filename="Z3",
owner="pid_z3"},
owner="pid_z3",
bloom=none},
E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"},
owner="pid_y1",
filename="Y1"},
filename="Y1",
bloom=none},
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
end_key={o, "Bucket1", "K45", null},
owner="pid_y2",
filename="Y2"},
filename="Y2",
bloom=none},
E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null},
end_key={o, "Bucket1", "K812", null},
owner="pid_y3",
filename="Y3"},
filename="Y3",
bloom=none},
E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null},
end_key={o, "Bucket1", "K998", null},
owner="pid_y4",
filename="Y4"},
filename="Y4",
bloom=none},
Man8 = replace_manifest_entry(ManOpen2, 2, 1, E1, E1_2),
Man9 = remove_manifest_entry(Man8, 2, 1, [E2, E3]),
@ -988,21 +1072,18 @@ ext_keylookup_manifest_test() ->
E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"},
end_key={o, "Bucket1", "K78", null},
filename="Z5",
owner="pid_z5"},
owner="pid_z5",
bloom=none},
E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null},
end_key={o, "Bucket1", "K996", null},
filename="Z6",
owner="pid_z6"},
owner="pid_z6",
bloom=none},
Man11 = remove_manifest_entry(Man10, 3, 2, [E5, E6]),
?assertMatch(3, get_manifest_sqn(Man11)),
?assertMatch(false, key_lookup(Man11, 2, LK1_4)),
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
end_key={o, "Bucket1", "K45", null},
owner="pid_y2",
filename="Y2"},
Man12 = replace_manifest_entry(Man11, 4, 2, E2_2, E5),
?assertMatch(4, get_manifest_sqn(Man12)),
?assertMatch("pid_z5", key_lookup(Man12, 2, LK1_4)).
@ -1057,7 +1138,8 @@ levelzero_present_test() ->
E0 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={o, "Bucket1", "Key996", null},
filename="Z0",
owner="pid_z0"},
owner="pid_z0",
bloom=none},
Man0 = new_manifest(),
?assertMatch(false, levelzero_present(Man0)),
@ -1070,15 +1152,18 @@ snapshot_release_test() ->
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
filename="Z1",
owner="pid_z1"},
owner="pid_z1",
bloom=none},
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
end_key={o, "Bucket1", "K71", null},
filename="Z2",
owner="pid_z2"},
owner="pid_z2",
bloom=none},
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
end_key={o, "Bucket1", "K993", null},
filename="Z3",
owner="pid_z3"},
owner="pid_z3",
bloom=none},
Man7 = add_snapshot(Man6, pid_a1, 3600),
Man8 = remove_manifest_entry(Man7, 2, 1, E1),
@ -1134,18 +1219,18 @@ potential_issue_test() ->
{[],
[{manifest_entry,{o_rkv,"Bucket","Key10",null},
{o_rkv,"Bucket","Key12949",null},
"<0.313.0>","./16_1_0.sst"},
"<0.313.0>","./16_1_0.sst", none},
{manifest_entry,{o_rkv,"Bucket","Key129490",null},
{o_rkv,"Bucket","Key158981",null},
"<0.315.0>","./16_1_1.sst"},
"<0.315.0>","./16_1_1.sst", none},
{manifest_entry,{o_rkv,"Bucket","Key158982",null},
{o_rkv,"Bucket","Key188472",null},
"<0.316.0>","./16_1_2.sst"}],
"<0.316.0>","./16_1_2.sst", none}],
{idxt,1,
{{[{{o_rkv,"Bucket1","Key1",null},
{manifest_entry,{o_rkv,"Bucket","Key9083",null},
{o_rkv,"Bucket1","Key1",null},
"<0.320.0>","./16_1_6.sst"}}]},
"<0.320.0>","./16_1_6.sst", none}}]},
{1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
{idxt,0,{{},{0,nil}}},
{idxt,0,{{},{0,nil}}},
@ -1158,7 +1243,8 @@ potential_issue_test() ->
{dict,0,16,16,8,80,48,
{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
{{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}},
2},
2,
dict:new()},
Range1 = range_lookup(Manifest,
1,
{o_rkv, "Bucket", null, null},

View file

@ -115,8 +115,7 @@
-record(slot_index_value, {slot_id :: integer(),
start_position :: integer(),
length :: integer(),
bloom :: binary()}).
length :: integer()}).
-record(summary, {first_key :: tuple(),
last_key :: tuple(),
@ -148,11 +147,11 @@
-record(sst_timings,
{sample_count = 0 :: integer(),
index_query_time = 0 :: integer(),
tiny_bloom_time = 0 :: integer(),
lookup_cache_time = 0 :: integer(),
slot_index_time = 0 :: integer(),
slot_fetch_time = 0 :: integer(),
noncached_block_time = 0 :: integer(),
tiny_bloom_count = 0 :: integer(),
lookup_cache_count = 0 :: integer(),
slot_index_count = 0 :: integer(),
slot_fetch_count = 0 :: integer(),
noncached_block_count = 0 :: integer()}).
@ -164,7 +163,8 @@
%%% API
%%%============================================================================
-spec sst_open(string(), string()) -> {ok, pid(), {tuple(), tuple()}}.
-spec sst_open(string(), string()) ->
{ok, pid(), {tuple(), tuple()}, binary()}.
%% @doc
%% Open an SST file at a given path and filename. The first and last keys
%% are returned in response to the request - so that those keys can be used
@ -178,13 +178,13 @@ sst_open(RootPath, Filename) ->
case gen_fsm:sync_send_event(Pid,
{sst_open, RootPath, Filename},
infinity) of
{ok, {SK, EK}} ->
{ok, Pid, {SK, EK}}
{ok, {SK, EK}, Bloom} ->
{ok, Pid, {SK, EK}, Bloom}
end.
-spec sst_new(string(), string(), integer(),
list(), integer(), press_methods()) ->
{ok, pid(), {tuple(), tuple()}}.
{ok, pid(), {tuple(), tuple()}, binary()}.
%% @doc
%% Start a new SST file at the assigned level passing in a list of Key, Value
%% pairs. This should not be used for basement levels or unexpanded Key/Value
@ -201,13 +201,13 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) ->
MaxSQN,
PressMethod},
infinity) of
{ok, {SK, EK}} ->
{ok, Pid, {SK, EK}}
{ok, {SK, EK}, Bloom} ->
{ok, Pid, {SK, EK}, Bloom}
end.
-spec sst_new(string(), string(), list(), list(),
boolean(), integer(), integer(), press_methods()) ->
empty|{ok, pid(), {{list(), list()}, tuple(), tuple()}}.
empty|{ok, pid(), {{list(), list()}, tuple(), tuple()}, binary()}.
%% @doc
%% Start a new SST file at the assigned level passing in a two lists of
%% {Key, Value} pairs to be merged. The merge_lists function will use the
@ -238,8 +238,8 @@ sst_new(RootPath, Filename,
MaxSQN,
PressMethod},
infinity) of
{ok, {SK, EK}} ->
{ok, Pid, {{Rem1, Rem2}, SK, EK}}
{ok, {SK, EK}, Bloom} ->
{ok, Pid, {{Rem1, Rem2}, SK, EK}, Bloom}
end
end.
@ -399,10 +399,11 @@ init([]) ->
{ok, starting, #state{}}.
starting({sst_open, RootPath, Filename}, _From, State) ->
UpdState = read_file(Filename, State#state{root_path=RootPath}),
{UpdState, Bloom} =
read_file(Filename, State#state{root_path=RootPath}),
Summary = UpdState#state.summary,
{reply,
{ok, {Summary#summary.first_key, Summary#summary.last_key}},
{ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom},
reader,
UpdState};
starting({sst_new,
@ -413,24 +414,22 @@ starting({sst_new,
{Length,
SlotIndex,
BlockIndex,
SlotsBin} = build_all_slots(SlotList, PressMethod),
SummaryBin = build_table_summary(SlotIndex,
Level,
FirstKey,
Length,
MaxSQN),
SlotsBin,
Bloom} = build_all_slots(SlotList, PressMethod),
SummaryBin =
build_table_summary(SlotIndex, Level, FirstKey, Length, MaxSQN, Bloom),
ActualFilename =
write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod),
YBQ = Level =< 2,
UpdState = read_file(ActualFilename,
State#state{root_path=RootPath,
yield_blockquery=YBQ}),
{UpdState, Bloom} =
read_file(ActualFilename,
State#state{root_path=RootPath, yield_blockquery=YBQ}),
Summary = UpdState#state.summary,
leveled_log:log_timer("SST08",
[ActualFilename, Level, Summary#summary.max_sqn],
SW),
{reply,
{ok, {Summary#summary.first_key, Summary#summary.last_key}},
{ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom},
reader,
UpdState#state{blockindex_cache = BlockIndex}}.
@ -449,23 +448,21 @@ starting({sst_newlevelzero, RootPath, Filename,
{SlotCount,
SlotIndex,
BlockIndex,
SlotsBin} = build_all_slots(SlotList, PressMethod),
SlotsBin,
Bloom} = build_all_slots(SlotList, PressMethod),
Time2 = timer:now_diff(os:timestamp(), SW2),
SW3 = os:timestamp(),
SummaryBin = build_table_summary(SlotIndex,
0,
FirstKey,
SlotCount,
MaxSQN),
SummaryBin =
build_table_summary(SlotIndex, 0, FirstKey, SlotCount, MaxSQN, Bloom),
Time3 = timer:now_diff(os:timestamp(), SW3),
SW4 = os:timestamp(),
ActualFilename =
write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod),
UpdState = read_file(ActualFilename,
State#state{root_path = RootPath,
yield_blockquery = true}),
{UpdState, Bloom} =
read_file(ActualFilename,
State#state{root_path=RootPath, yield_blockquery=true}),
Summary = UpdState#state.summary,
Time4 = timer:now_diff(os:timestamp(), SW4),
@ -483,7 +480,8 @@ starting({sst_newlevelzero, RootPath, Filename,
leveled_penciller:pcl_confirml0complete(Penciller,
UpdState#state.filename,
Summary#summary.first_key,
Summary#summary.last_key),
Summary#summary.last_key,
Bloom),
{next_state,
reader,
UpdState#state{blockindex_cache = BlockIndex}}
@ -646,26 +644,18 @@ fetch(LedgerKey, Hash, State, Timings0) ->
{SW1, Timings1} = update_timings(SW0, Timings0, index_query, true),
SlotID = Slot#slot_index_value.slot_id,
Bloom = Slot#slot_index_value.bloom,
case leveled_tinybloom:check_hash(Hash, Bloom) of
false ->
{_SW2, Timings2} =
update_timings(SW1, Timings1, tiny_bloom, false),
{not_present, State, Timings2};
true ->
{SW2, Timings2} =
update_timings(SW1, Timings1, tiny_bloom, true),
CachedBlockIdx =
array:get(SlotID - 1, State#state.blockindex_cache),
{SW2, Timings2} = update_timings(SW1, Timings1, lookup_cache, true),
CachedBlockIdx = array:get(SlotID - 1,
State#state.blockindex_cache),
case CachedBlockIdx of
none ->
SlotBin = read_slot(State#state.handle, Slot),
{Result, BlockLengths, BlockIdx} =
binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod),
BlockIndexCache = array:set(SlotID - 1,
<<BlockLengths/binary,
BlockIdx/binary>>,
BlockIndexCache =
array:set(SlotID - 1,
<<BlockLengths/binary, BlockIdx/binary>>,
State#state.blockindex_cache),
{_SW3, Timings3} =
update_timings(SW2, Timings2, noncached_block, false),
@ -673,24 +663,15 @@ fetch(LedgerKey, Hash, State, Timings0) ->
State#state{blockindex_cache = BlockIndexCache},
Timings3};
<<BlockLengths:24/binary, BlockIdx/binary>> ->
PosList = find_pos(BlockIdx,
extra_hash(Hash),
[],
0),
PosList = find_pos(BlockIdx, extra_hash(Hash), [], 0),
case PosList of
[] ->
{_SW3, Timings3} =
update_timings(SW2,
Timings2,
slot_index,
false),
update_timings(SW2, Timings2, slot_index, false),
{not_present, State, Timings3};
_ ->
{SW3, Timings3} =
update_timings(SW2,
Timings2,
slot_index,
true),
update_timings(SW2, Timings2, slot_index, true),
StartPos = Slot#slot_index_value.start_position,
Result =
check_blocks(PosList,
@ -701,13 +682,9 @@ fetch(LedgerKey, Hash, State, Timings0) ->
PressMethod,
not_present),
{_SW4, Timings4} =
update_timings(SW3,
Timings3,
slot_fetch,
false),
update_timings(SW3, Timings3, slot_fetch, false),
{Result, State, Timings4}
end
end
end.
@ -808,7 +785,7 @@ read_file(Filename, State) ->
{Handle, FileVersion, SummaryBin} =
open_reader(filename:join(State#state.root_path, Filename)),
UpdState0 = imp_fileversion(FileVersion, State),
{Summary, SlotList} = read_table_summary(SummaryBin),
{Summary, Bloom, SlotList} = read_table_summary(SummaryBin),
BlockIndexCache = array:new([{size, Summary#summary.size},
{default, none}]),
UpdState1 = UpdState0#state{blockindex_cache = BlockIndexCache},
@ -817,9 +794,10 @@ read_file(Filename, State) ->
leveled_log:log("SST03", [Filename,
Summary#summary.size,
Summary#summary.max_sqn]),
UpdState1#state{summary = UpdSummary,
{UpdState1#state{summary = UpdSummary,
handle = Handle,
filename = Filename}.
filename = Filename},
Bloom}.
gen_fileversion(PressMethod) ->
Bit1 =
@ -848,13 +826,14 @@ open_reader(Filename) ->
{ok, SummaryBin} = file:pread(Handle, SlotsLength + 9, SummaryLength),
{Handle, FileVersion, SummaryBin}.
build_table_summary(SlotIndex, _Level, FirstKey, SlotCount, MaxSQN) ->
build_table_summary(SlotIndex, _Level, FirstKey, SlotCount, MaxSQN, Bloom) ->
[{LastKey, _LastV}|_Rest] = SlotIndex,
Summary = #summary{first_key = FirstKey,
last_key = LastKey,
size = SlotCount,
max_sqn = MaxSQN},
SummBin = term_to_binary({Summary, lists:reverse(SlotIndex)},
SummBin =
term_to_binary({Summary, Bloom, lists:reverse(SlotIndex)},
?BINARY_SETTINGS),
SummCRC = erlang:crc32(SummBin),
<<SummCRC:32/integer, SummBin/binary>>.
@ -878,30 +857,31 @@ build_all_slots(SlotList, PressMethod) ->
array:new([{size, SlotCount},
{default, none}]),
<<>>,
[],
PressMethod),
{SlotIndex, BlockIndex, SlotsBin} = BuildResponse,
{SlotCount, SlotIndex, BlockIndex, SlotsBin}.
{SlotIndex, BlockIndex, SlotsBin, HashLists} = BuildResponse,
Bloom = leveled_ebloom:create_bloom(HashLists),
{SlotCount, SlotIndex, BlockIndex, SlotsBin, Bloom}.
build_all_slots([], _Pos, _SlotID,
SlotIdxAcc, BlockIdxAcc, SlotBinAcc,
SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists,
_PressMethod) ->
{SlotIdxAcc, BlockIdxAcc, SlotBinAcc};
{SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists};
build_all_slots([SlotD|Rest], Pos, SlotID,
SlotIdxAcc, BlockIdxAcc, SlotBinAcc,
SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists,
PressMethod) ->
{BlockIdx, SlotBin, HashList, LastKey} = SlotD,
Length = byte_size(SlotBin),
Bloom = leveled_tinybloom:create_bloom(HashList),
SlotIndexV = #slot_index_value{slot_id = SlotID,
start_position = Pos,
length = Length,
bloom = Bloom},
length = Length},
build_all_slots(Rest,
Pos + Length,
SlotID + 1,
[{LastKey, SlotIndexV}|SlotIdxAcc],
array:set(SlotID - 1, BlockIdx, BlockIdxAcc),
<<SlotBinAcc/binary, SlotBin/binary>>,
lists:append(HashLists, HashList),
PressMethod).
@ -1828,11 +1808,10 @@ log_timings(no_timing) ->
log_timings(Timings) ->
leveled_log:log("SST12", [Timings#sst_timings.sample_count,
Timings#sst_timings.index_query_time,
Timings#sst_timings.tiny_bloom_time,
Timings#sst_timings.lookup_cache_time,
Timings#sst_timings.slot_index_time,
Timings#sst_timings.slot_fetch_time,
Timings#sst_timings.noncached_block_time,
Timings#sst_timings.tiny_bloom_count,
Timings#sst_timings.slot_index_count,
Timings#sst_timings.slot_fetch_count,
Timings#sst_timings.noncached_block_count]).
@ -1847,9 +1826,9 @@ update_timings(SW, Timings, Stage, Continue) ->
index_query ->
IQT = Timings#sst_timings.index_query_time,
Timings#sst_timings{index_query_time = IQT + Timer};
tiny_bloom ->
TBT = Timings#sst_timings.tiny_bloom_time,
Timings#sst_timings{tiny_bloom_time = TBT + Timer};
lookup_cache ->
TBT = Timings#sst_timings.lookup_cache_time,
Timings#sst_timings{lookup_cache_time = TBT + Timer};
slot_index ->
SIT = Timings#sst_timings.slot_index_time,
Timings#sst_timings{slot_index_time = SIT + Timer};
@ -1866,9 +1845,6 @@ update_timings(SW, Timings, Stage, Continue) ->
false ->
Timings1 =
case Stage of
tiny_bloom ->
TBC = Timings#sst_timings.tiny_bloom_count,
Timings0#sst_timings{tiny_bloom_count = TBC + 1};
slot_index ->
SIC = Timings#sst_timings.slot_index_count,
Timings0#sst_timings{slot_index_count = SIC + 1};
@ -2149,9 +2125,9 @@ merge_test() ->
KVL2 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 20)),
KVL3 = lists:ukeymerge(1, KVL1, KVL2),
SW0 = os:timestamp(),
{ok, P1, {FK1, LK1}} =
{ok, P1, {FK1, LK1}, _Bloom1} =
sst_new("../test/", "level1_src", 1, KVL1, 6000, native),
{ok, P2, {FK2, LK2}} =
{ok, P2, {FK2, LK2}, _Bloom2} =
sst_new("../test/", "level2_src", 2, KVL2, 3000, native),
ExpFK1 = element(1, lists:nth(1, KVL1)),
ExpLK1 = element(1, lists:last(KVL1)),
@ -2165,7 +2141,7 @@ merge_test() ->
ML2 = [{next, #manifest_entry{owner = P2}, FK2}],
NewR =
sst_new("../test/", "level2_merge", ML1, ML2, false, 2, N * 2, native),
{ok, P3, {{Rem1, Rem2}, FK3, LK3}} = NewR,
{ok, P3, {{Rem1, Rem2}, FK3, LK3}, _Bloom3} = NewR,
?assertMatch([], Rem1),
?assertMatch([], Rem2),
?assertMatch(true, FK3 == min(FK1, FK2)),
@ -2198,7 +2174,7 @@ simple_persisted_range_test() ->
KVList1 = lists:ukeysort(1, KVList0),
[{FirstKey, _FV}|_Rest] = KVList1,
{LastKey, _LV} = lists:last(KVList1),
{ok, Pid, {FirstKey, LastKey}} =
{ok, Pid, {FirstKey, LastKey}, _Bloom} =
sst_new(RP, Filename, 1, KVList1, length(KVList1), native),
{o, B, K, null} = LastKey,
@ -2248,7 +2224,7 @@ additional_range_test() ->
[],
lists:seq(?NOLOOK_SLOTSIZE + Gap + 1,
2 * ?NOLOOK_SLOTSIZE + Gap)),
{ok, P1, {{Rem1, Rem2}, SK, EK}} =
{ok, P1, {{Rem1, Rem2}, SK, EK}, _Bloom1} =
sst_new("../test/", "range1_src", IK1, IK2, false, 1, 9999, native),
?assertMatch([], Rem1),
?assertMatch([], Rem2),
@ -2306,7 +2282,7 @@ simple_persisted_slotsize_test() ->
?LOOK_SLOTSIZE),
[{FirstKey, _FV}|_Rest] = KVList1,
{LastKey, _LV} = lists:last(KVList1),
{ok, Pid, {FirstKey, LastKey}} =
{ok, Pid, {FirstKey, LastKey}, _Bloom} =
sst_new(RP, Filename, 1, KVList1, length(KVList1), native),
lists:foreach(fun({K, V}) ->
?assertMatch({K, V}, sst_get(Pid, K))
@ -2321,7 +2297,7 @@ simple_persisted_test() ->
KVList1 = lists:ukeysort(1, KVList0),
[{FirstKey, _FV}|_Rest] = KVList1,
{LastKey, _LV} = lists:last(KVList1),
{ok, Pid, {FirstKey, LastKey}} =
{ok, Pid, {FirstKey, LastKey}, _Bloom} =
sst_new(RP, Filename, 1, KVList1, length(KVList1), native),
SW0 = os:timestamp(),
lists:foreach(fun({K, V}) ->
@ -2534,16 +2510,15 @@ check_segment_match(PosBinIndex1, KVL, TreeSize) ->
timings_test() ->
SW = os:timestamp(),
timer:sleep(1),
{no_timing, T0} = update_timings(SW, #sst_timings{}, tiny_bloom, false),
{no_timing, T1} = update_timings(SW, T0, slot_index, false),
{no_timing, T1} = update_timings(SW, #sst_timings{}, slot_index, false),
{no_timing, T2} = update_timings(SW, T1, slot_fetch, false),
{no_timing, T3} = update_timings(SW, T2, noncached_block, false),
timer:sleep(1),
{_, T4} = update_timings(SW, T3, tiny_bloom, true),
?assertMatch(4, T4#sst_timings.sample_count),
?assertMatch(1, T4#sst_timings.tiny_bloom_count),
?assertMatch(true, T4#sst_timings.tiny_bloom_time >
T3#sst_timings.tiny_bloom_time).
{_, T4} = update_timings(SW, T3, slot_fetch, true),
?assertMatch(3, T4#sst_timings.sample_count),
?assertMatch(1, T4#sst_timings.slot_fetch_count),
?assertMatch(true, T4#sst_timings.slot_fetch_time >
T3#sst_timings.slot_fetch_time).
-endif.

View file

@ -1,278 +0,0 @@
%% -------- TinyBloom ---------
%%
%% A fixed size bloom that supports 128 keys only, made to try and minimise
%% the cost of producing the bloom
%%
-module(leveled_tinybloom).
-include("include/leveled.hrl").
-include_lib("eunit/include/eunit.hrl").
-export([
create_bloom/1,
check_hash/2
]).
-define(BLOOM_SIZE_BYTES, 16).
-define(INTEGER_SIZE, 128).
-define(BAND_MASK, ?INTEGER_SIZE - 1).
%%%============================================================================
%%% API
%%%============================================================================
-spec create_bloom(list(integer())) -> binary().
%% @doc
%% Create a binary bloom filter from alist of hashes
create_bloom(HashList) ->
case length(HashList) of
0 ->
<<>>;
L when L > 32 ->
add_hashlist(HashList,
7,
0, 0, 0, 0, 0, 0, 0, 0);
L when L > 16 ->
add_hashlist(HashList, 3, 0, 0, 0, 0);
_ ->
add_hashlist(HashList, 1, 0, 0)
end.
-spec check_hash(integer(), binary()) -> boolean().
%% @doc
%% Check for the presence of a given hash within a bloom
check_hash(_Hash, <<>>) ->
false;
check_hash({_SegHash, Hash}, BloomBin) ->
SlotSplit = (byte_size(BloomBin) div ?BLOOM_SIZE_BYTES) - 1,
{Slot, Hashes} = split_hash(Hash, SlotSplit),
Mask = get_mask(Hashes),
Pos = Slot * ?BLOOM_SIZE_BYTES,
IntSize = ?INTEGER_SIZE,
<<_H:Pos/binary, CheckInt:IntSize/integer, _T/binary>> = BloomBin,
case CheckInt band Mask of
Mask ->
true;
_ ->
false
end.
%%%============================================================================
%%% Internal Functions
%%%============================================================================
split_hash(Hash, SlotSplit) ->
Slot = Hash band SlotSplit,
H0 = (Hash bsr 4) band (?BAND_MASK),
H1 = (Hash bsr 11) band (?BAND_MASK),
H2 = (Hash bsr 18) band (?BAND_MASK),
H3 = (Hash bsr 25) band (?BAND_MASK),
{Slot, [H0, H1, H2, H3]}.
get_mask([H0, H1, H2, H3]) ->
(1 bsl H0) bor (1 bsl H1) bor (1 bsl H2) bor (1 bsl H3).
%% This looks ugly and clunky, but in tests it was quicker than modifying an
%% Erlang term like an array as it is passed around the loop
add_hashlist([], _S, S0, S1) ->
IntSize = ?INTEGER_SIZE,
<<S0:IntSize/integer, S1:IntSize/integer>>;
add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1) ->
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
Mask = get_mask(Hashes),
case Slot of
0 ->
add_hashlist(T, SlotSplit, S0 bor Mask, S1);
1 ->
add_hashlist(T, SlotSplit, S0, S1 bor Mask)
end.
add_hashlist([], _S, S0, S1, S2, S3) ->
IntSize = ?INTEGER_SIZE,
<<S0:IntSize/integer, S1:IntSize/integer,
S2:IntSize/integer, S3:IntSize/integer>>;
add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1, S2, S3) ->
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
Mask = get_mask(Hashes),
case Slot of
0 ->
add_hashlist(T, SlotSplit, S0 bor Mask, S1, S2, S3);
1 ->
add_hashlist(T, SlotSplit, S0, S1 bor Mask, S2, S3);
2 ->
add_hashlist(T, SlotSplit, S0, S1, S2 bor Mask, S3);
3 ->
add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask)
end.
add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7) ->
IntSize = ?INTEGER_SIZE,
<<S0:IntSize/integer, S1:IntSize/integer,
S2:IntSize/integer, S3:IntSize/integer,
S4:IntSize/integer, S5:IntSize/integer,
S6:IntSize/integer, S7:IntSize/integer>>;
add_hashlist([{_SegHash, TopHash}|T],
SlotSplit,
S0, S1, S2, S3, S4, S5, S6, S7) ->
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
Mask = get_mask(Hashes),
case Slot of
0 ->
add_hashlist(T,
SlotSplit,
S0 bor Mask, S1, S2, S3, S4, S5, S6, S7);
1 ->
add_hashlist(T,
SlotSplit,
S0, S1 bor Mask, S2, S3, S4, S5, S6, S7);
2 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2 bor Mask, S3, S4, S5, S6, S7);
3 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3 bor Mask, S4, S5, S6, S7);
4 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3, S4 bor Mask, S5, S6, S7);
5 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3, S4, S5 bor Mask, S6, S7);
6 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3, S4, S5, S6 bor Mask, S7);
7 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3, S4, S5, S6, S7 bor Mask)
end.
%%%============================================================================
%%% Test
%%%============================================================================
-ifdef(TEST).
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
generate_randomkeys(Seqn,
Count,
[],
BucketRangeLow,
BucketRangeHigh).
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
Acc;
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BRand = leveled_rand:uniform(BRange),
BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0),
KNumber = string:right(integer_to_list(leveled_rand:uniform(10000)), 6, $0),
LK = leveled_codec:to_ledgerkey("Bucket" ++ BNumber, "Key" ++ KNumber, o),
Chunk = leveled_rand:rand_bytes(16),
{_B, _K, MV, _H, _LMs} =
leveled_codec:generate_ledgerkv(LK, Seqn, Chunk, 64, infinity),
generate_randomkeys(Seqn + 1,
Count - 1,
[{LK, MV}|Acc],
BucketLow,
BRange).
get_hashlist(N) ->
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N * 2, 1, 20)),
KVL = lists:sublist(KVL0, N),
HashFun =
fun({K, _V}) ->
leveled_codec:segment_hash(K)
end,
lists:map(HashFun, KVL).
check_all_hashes(BloomBin, HashList) ->
CheckFun =
fun(Hash) ->
?assertMatch(true, check_hash(Hash, BloomBin))
end,
lists:foreach(CheckFun, HashList).
check_neg_hashes(BloomBin, HashList, Counters) ->
CheckFun =
fun(Hash, {AccT, AccF}) ->
case check_hash(Hash, BloomBin) of
true ->
{AccT + 1, AccF};
false ->
{AccT, AccF + 1}
end
end,
lists:foldl(CheckFun, Counters, HashList).
empty_bloom_test() ->
BloomBin0 = create_bloom([]),
?assertMatch({0, 4},
check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})).
bloom_test_() ->
{timeout, 20, fun bloom_test_ranges/0}.
bloom_test_ranges() ->
test_bloom(128, 256),
test_bloom(64, 100),
test_bloom(32, 100),
test_bloom(16, 100),
test_bloom(8, 100).
test_bloom(N, Runs) ->
ListOfHashLists =
lists:map(fun(_X) -> get_hashlist(N) end, lists:seq(1, Runs)),
SWa = os:timestamp(),
ListOfBlooms =
lists:map(fun(HL) -> create_bloom(HL) end, ListOfHashLists),
TSa = timer:now_diff(os:timestamp(), SWa),
SWb = os:timestamp(),
lists:foreach(fun(Nth) ->
HL = lists:nth(Nth, ListOfHashLists),
BB = lists:nth(Nth, ListOfBlooms),
check_all_hashes(BB, HL)
end,
lists:seq(1, Runs)),
TSb = timer:now_diff(os:timestamp(), SWb),
HashPool = get_hashlist(N * 2),
ListOfMisses =
lists:map(fun(HL) ->
lists:sublist(lists:subtract(HashPool, HL), N)
end,
ListOfHashLists),
SWc = os:timestamp(),
{Pos, Neg} =
lists:foldl(fun(Nth, Acc) ->
HL = lists:nth(Nth, ListOfMisses),
BB = lists:nth(Nth, ListOfBlooms),
check_neg_hashes(BB, HL, Acc)
end,
{0, 0},
lists:seq(1, Runs)),
FPR = Pos / (Pos + Neg),
TSc = timer:now_diff(os:timestamp(), SWc),
io:format(user,
"Test with size ~w has microsecond timings: -"
++ " build ~w check ~w neg_check ~w and fpr ~w~n",
[N, TSa, TSb, TSc, FPR]).
-endif.

View file

@ -879,7 +879,7 @@ search_range_idx_test() ->
{{[{{o_rkv,"Bucket1","Key1",null},
{manifest_entry,{o_rkv,"Bucket","Key9083",null},
{o_rkv,"Bucket1","Key1",null},
"<0.320.0>","./16_1_6.sst"}}]},
"<0.320.0>","./16_1_6.sst", none}}]},
{1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
StartKeyFun =
fun(ME) ->