Switch to using bloom at penciller

Previouslythe tinybloom was used within the SST file as an extra check to remove false fetches.

However the SST already has a low FPR check in the slot_index.  If the newebloom was used (which is no longer per slot, but per sst), this can be shared with the penciller and then the penciller could use it and avoid the message pass.

the message pass may be blocked by a 2i query or a slot fetch request for a merge.  So this should make performance within the Penciller snappier.

This is as a result of taking sst_timings within a volume test - where there was an average of + 100microsecs for each level that was dropped down.  Given the bloom/slot checks were < 20 microsecs - there seems to be some further delay.

The bloom is a binary of > 64 bytes - so passing it around should not require a copy.
This commit is contained in:
Martin Sumner 2017-11-28 01:19:30 +00:00
parent 467ad50cd1
commit c2f19d8825
8 changed files with 367 additions and 572 deletions

View file

@ -33,7 +33,8 @@
{start_key :: tuple() | undefined, {start_key :: tuple() | undefined,
end_key :: tuple() | undefined, end_key :: tuple() | undefined,
owner :: pid()|list(), owner :: pid()|list(),
filename :: string() | undefined}). filename :: string() | undefined,
bloom :: binary() | none}).
-record(cdb_options, -record(cdb_options,
{max_size :: integer() | undefined, {max_size :: integer() | undefined,

View file

@ -226,9 +226,9 @@
{"SST12", {"SST12",
{info, "SST Timings for sample_count=~w" {info, "SST Timings for sample_count=~w"
++ " at timing points index_query_time=~w" ++ " at timing points index_query_time=~w"
++ " tiny_bloom_time=~w slot_index_time=~w slot_fetch_time=~w" ++ " lookup_cache_time=~w slot_index_time=~w slot_fetch_time=~w"
++ " noncached_block_fetch_time=~w" ++ " noncached_block_fetch_time=~w"
++ " exiting at points tiny_bloom=~w slot_index=~w" ++ " exiting at points slot_index=~w"
++ " slot_fetch=~w noncached_block_fetch=~w"}}, ++ " slot_fetch=~w noncached_block_fetch=~w"}},

View file

@ -223,12 +223,13 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, CM, Additions) ->
RP, NewSQN, MaxSQN, RP, NewSQN, MaxSQN,
CM, CM,
Additions); Additions);
{ok, Pid, Reply} -> {ok, Pid, Reply, Bloom} ->
{{KL1Rem, KL2Rem}, SmallestKey, HighestKey} = Reply, {{KL1Rem, KL2Rem}, SmallestKey, HighestKey} = Reply,
Entry = #manifest_entry{start_key=SmallestKey, Entry = #manifest_entry{start_key=SmallestKey,
end_key=HighestKey, end_key=HighestKey,
owner=Pid, owner=Pid,
filename=FileName}, filename=FileName,
bloom=Bloom},
leveled_log:log_timer("PC015", [], TS1), leveled_log:log_timer("PC015", [], TS1),
do_merge(KL1Rem, KL2Rem, do_merge(KL1Rem, KL2Rem,
SinkLevel, SinkB, SinkLevel, SinkB,
@ -275,35 +276,40 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
merge_file_test() -> merge_file_test() ->
KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)), KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)),
{ok, PidL1_1, _} = leveled_sst:sst_new("../test/", {ok, PidL1_1, _, _} =
leveled_sst:sst_new("../test/",
"KL1_L1.sst", "KL1_L1.sst",
1, 1,
KL1_L1, KL1_L1,
999999, 999999,
native), native),
KL1_L2 = lists:sort(generate_randomkeys(8000, 0, 250)), KL1_L2 = lists:sort(generate_randomkeys(8000, 0, 250)),
{ok, PidL2_1, _} = leveled_sst:sst_new("../test/", {ok, PidL2_1, _, _} =
leveled_sst:sst_new("../test/",
"KL1_L2.sst", "KL1_L2.sst",
2, 2,
KL1_L2, KL1_L2,
999999, 999999,
native), native),
KL2_L2 = lists:sort(generate_randomkeys(8000, 250, 250)), KL2_L2 = lists:sort(generate_randomkeys(8000, 250, 250)),
{ok, PidL2_2, _} = leveled_sst:sst_new("../test/", {ok, PidL2_2, _, _} =
leveled_sst:sst_new("../test/",
"KL2_L2.sst", "KL2_L2.sst",
2, 2,
KL2_L2, KL2_L2,
999999, 999999,
lz4), lz4),
KL3_L2 = lists:sort(generate_randomkeys(8000, 500, 250)), KL3_L2 = lists:sort(generate_randomkeys(8000, 500, 250)),
{ok, PidL2_3, _} = leveled_sst:sst_new("../test/", {ok, PidL2_3, _, _} =
leveled_sst:sst_new("../test/",
"KL3_L2.sst", "KL3_L2.sst",
2, 2,
KL3_L2, KL3_L2,
999999, 999999,
lz4), lz4),
KL4_L2 = lists:sort(generate_randomkeys(8000, 750, 250)), KL4_L2 = lists:sort(generate_randomkeys(8000, 750, 250)),
{ok, PidL2_4, _} = leveled_sst:sst_new("../test/", {ok, PidL2_4, _, _} =
leveled_sst:sst_new("../test/",
"KL4_L2.sst", "KL4_L2.sst",
2, 2,
KL4_L2, KL4_L2,

View file

@ -181,7 +181,7 @@
pcl_checksequencenumber/3, pcl_checksequencenumber/3,
pcl_workforclerk/1, pcl_workforclerk/1,
pcl_manifestchange/2, pcl_manifestchange/2,
pcl_confirml0complete/4, pcl_confirml0complete/5,
pcl_confirmdelete/3, pcl_confirmdelete/3,
pcl_close/1, pcl_close/1,
pcl_doom/1, pcl_doom/1,
@ -439,14 +439,14 @@ pcl_workforclerk(Pid) ->
pcl_manifestchange(Pid, Manifest) -> pcl_manifestchange(Pid, Manifest) ->
gen_server:cast(Pid, {manifest_change, Manifest}). gen_server:cast(Pid, {manifest_change, Manifest}).
-spec pcl_confirml0complete(pid(), string(), tuple(), tuple()) -> ok. -spec pcl_confirml0complete(pid(), string(), tuple(), tuple(), binary()) -> ok.
%% @doc %% @doc
%% Allows a SST writer that has written a L0 file to confirm that the file %% Allows a SST writer that has written a L0 file to confirm that the file
%% is now complete, so the filename and key ranges can be added to the %% is now complete, so the filename and key ranges can be added to the
%% manifest and the file can be used in place of the in-memory levelzero %% manifest and the file can be used in place of the in-memory levelzero
%% cache. %% cache.
pcl_confirml0complete(Pid, FN, StartKey, EndKey) -> pcl_confirml0complete(Pid, FN, StartKey, EndKey, Bloom) ->
gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey}). gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey, Bloom}).
-spec pcl_confirmdelete(pid(), string(), pid()) -> ok. -spec pcl_confirmdelete(pid(), string(), pid()) -> ok.
%% @doc %% @doc
@ -759,12 +759,13 @@ handle_cast({confirm_delete, Filename, FilePid}, State=#state{is_snapshot=Snap})
% from the Clerk % from the Clerk
{noreply, State} {noreply, State}
end; end;
handle_cast({levelzero_complete, FN, StartKey, EndKey}, State) -> handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
leveled_log:log("P0029", []), leveled_log:log("P0029", []),
ManEntry = #manifest_entry{start_key=StartKey, ManEntry = #manifest_entry{start_key=StartKey,
end_key=EndKey, end_key=EndKey,
owner=State#state.levelzero_constructor, owner=State#state.levelzero_constructor,
filename=FN}, filename=FN,
bloom=Bloom},
ManifestSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1, ManifestSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1,
UpdMan = leveled_pmanifest:insert_manifest_entry(State#state.manifest, UpdMan = leveled_pmanifest:insert_manifest_entry(State#state.manifest,
ManifestSQN, ManifestSQN,
@ -837,7 +838,7 @@ terminate(Reason, State) ->
L0_Left = State#state.levelzero_size > 0, L0_Left = State#state.levelzero_size > 0,
case {State#state.levelzero_pending, L0_Present, L0_Left} of case {State#state.levelzero_pending, L0_Present, L0_Left} of
{false, false, true} -> {false, false, true} ->
L0Pid = roll_memory(State, true), {L0Pid, _L0Bloom} = roll_memory(State, true),
ok = leveled_sst:sst_close(L0Pid); ok = leveled_sst:sst_close(L0Pid);
StatusTuple -> StatusTuple ->
leveled_log:log("P0010", [StatusTuple]) leveled_log:log("P0010", [StatusTuple])
@ -911,11 +912,9 @@ start_from_file(PCLopts) ->
Manifest0 = leveled_pmanifest:open_manifest(RootPath), Manifest0 = leveled_pmanifest:open_manifest(RootPath),
OpenFun = OpenFun =
fun(FN) -> fun(FN) ->
{ok, {ok, Pid, {_FK, _LK}, Bloom} =
Pid,
{_FK, _LK}} =
leveled_sst:sst_open(sst_rootpath(RootPath), FN), leveled_sst:sst_open(sst_rootpath(RootPath), FN),
Pid {Pid, Bloom}
end, end,
SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1, SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1,
{MaxSQN, Manifest1, FileList} = {MaxSQN, Manifest1, FileList} =
@ -930,12 +929,13 @@ start_from_file(PCLopts) ->
true -> true ->
leveled_log:log("P0015", [L0FN]), leveled_log:log("P0015", [L0FN]),
L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN), L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN),
{ok, L0Pid, {L0StartKey, L0EndKey}} = L0Open, {ok, L0Pid, {L0StartKey, L0EndKey}, Bloom} = L0Open,
L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid), L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid),
L0Entry = #manifest_entry{start_key = L0StartKey, L0Entry = #manifest_entry{start_key = L0StartKey,
end_key = L0EndKey, end_key = L0EndKey,
filename = L0FN, filename = L0FN,
owner = L0Pid}, owner = L0Pid,
bloom = Bloom},
Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1, Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1,
ManSQN + 1, ManSQN + 1,
0, 0,
@ -1025,7 +1025,7 @@ update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
JitterCheck = RandomFactor or CacheMuchTooBig, JitterCheck = RandomFactor or CacheMuchTooBig,
case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of
{true, true, true, true} -> {true, true, true, true} ->
L0Constructor = roll_memory(UpdState, false), {L0Constructor, none} = roll_memory(UpdState, false),
leveled_log:log_timer("P0031", [true, true], SW), leveled_log:log_timer("P0031", [true, true], SW),
UpdState#state{levelzero_pending=true, UpdState#state{levelzero_pending=true,
levelzero_constructor=L0Constructor}; levelzero_constructor=L0Constructor};
@ -1063,7 +1063,7 @@ roll_memory(State, false) ->
State#state.ledger_sqn, State#state.ledger_sqn,
State#state.compression_method), State#state.compression_method),
{ok, Constructor, _} = R, {ok, Constructor, _} = R,
Constructor; {Constructor, none};
roll_memory(State, true) -> roll_memory(State, true) ->
ManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1, ManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1,
RootPath = sst_rootpath(State#state.root_path), RootPath = sst_rootpath(State#state.root_path),
@ -1077,8 +1077,8 @@ roll_memory(State, true) ->
KVList, KVList,
State#state.ledger_sqn, State#state.ledger_sqn,
State#state.compression_method), State#state.compression_method),
{ok, Constructor, _} = R, {ok, Constructor, _, Bloom} = R,
Constructor. {Constructor, Bloom}.
timed_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, Timings) -> timed_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, Timings) ->
SW = os:timestamp(), SW = os:timestamp(),
@ -1107,11 +1107,16 @@ fetch(Key, Hash, Manifest, Level, FetchFun) ->
false -> false ->
fetch(Key, Hash, Manifest, Level + 1, FetchFun); fetch(Key, Hash, Manifest, Level + 1, FetchFun);
FP -> FP ->
case leveled_pmanifest:check_bloom(Manifest, FP, Hash) of
true ->
case FetchFun(FP, Key, Hash, Level) of case FetchFun(FP, Key, Hash, Level) of
not_present -> not_present ->
fetch(Key, Hash, Manifest, Level + 1, FetchFun); fetch(Key, Hash, Manifest, Level + 1, FetchFun);
ObjectFound -> ObjectFound ->
{ObjectFound, Level} {ObjectFound, Level}
end;
false ->
fetch(Key, Hash, Manifest, Level + 1, FetchFun)
end end
end. end.

View file

@ -41,7 +41,8 @@
ready_to_delete/2, ready_to_delete/2,
check_for_work/2, check_for_work/2,
is_basement/2, is_basement/2,
levelzero_present/1 levelzero_present/1,
check_bloom/3
]). ]).
-export([ -export([
@ -69,8 +70,10 @@
pending_deletes, % OTP16 does not like defining type pending_deletes, % OTP16 does not like defining type
% a dictionary mapping keys (filenames) to SQN when the % a dictionary mapping keys (filenames) to SQN when the
% deletion was made, and the original Manifest Entry % deletion was made, and the original Manifest Entry
basement :: integer() basement :: integer(),
% Currently the lowest level (the largest number) % Currently the lowest level (the largest number)
blooms :: dict:dict()
% A dictionary mapping PIDs to bloom filters
}). }).
-type manifest() :: #manifest{}. -type manifest() :: #manifest{}.
@ -100,7 +103,8 @@ new_manifest() ->
manifest_sqn = 0, manifest_sqn = 0,
snapshots = [], snapshots = [],
pending_deletes = dict:new(), pending_deletes = dict:new(),
basement = 0 basement = 0,
blooms = dict:new()
}. }.
-spec open_manifest(string()) -> manifest(). -spec open_manifest(string()) -> manifest().
@ -143,17 +147,23 @@ copy_manifest(Manifest) ->
%% manifest. The PidFun should be able to return the Pid of a file process %% manifest. The PidFun should be able to return the Pid of a file process
%% (having started one). The SQNFun will return the max sequence number %% (having started one). The SQNFun will return the max sequence number
%% of that file, if passed the Pid that owns it. %% of that file, if passed the Pid that owns it.
load_manifest(Manifest, PidFun, SQNFun) -> load_manifest(Manifest, LoadFun, SQNFun) ->
UpdateLevelFun = UpdateLevelFun =
fun(LevelIdx, {AccMaxSQN, AccMan, AccFL}) -> fun(LevelIdx, {AccMaxSQN, AccMan, AccFL}) ->
L0 = array:get(LevelIdx, AccMan#manifest.levels), L0 = array:get(LevelIdx, AccMan#manifest.levels),
{L1, SQN1, FileList} = load_level(LevelIdx, L0, PidFun, SQNFun), {L1, SQN1, FileList, LvlBloom} =
load_level(LevelIdx, L0, LoadFun, SQNFun),
UpdLevels = array:set(LevelIdx, L1, AccMan#manifest.levels), UpdLevels = array:set(LevelIdx, L1, AccMan#manifest.levels),
UpdBlooms =
dict:merge(fun(_K, V, V) -> V end,
AccMan#manifest.blooms,
LvlBloom),
{max(AccMaxSQN, SQN1), {max(AccMaxSQN, SQN1),
AccMan#manifest{levels = UpdLevels}, AccMan#manifest{levels = UpdLevels, blooms = UpdBlooms},
AccFL ++ FileList} AccFL ++ FileList}
end, end,
lists:foldl(UpdateLevelFun, {0, Manifest, []}, lists:foldl(UpdateLevelFun,
{0, Manifest, []},
lists:seq(0, Manifest#manifest.basement)). lists:seq(0, Manifest#manifest.basement)).
-spec close_manifest(manifest(), fun()) -> ok. -spec close_manifest(manifest(), fun()) -> ok.
@ -182,7 +192,8 @@ save_manifest(Manifest, RootPath) ->
FP = filepath(RootPath, Manifest#manifest.manifest_sqn, current_manifest), FP = filepath(RootPath, Manifest#manifest.manifest_sqn, current_manifest),
ManBin = term_to_binary(Manifest#manifest{snapshots = [], ManBin = term_to_binary(Manifest#manifest{snapshots = [],
pending_deletes = dict:new(), pending_deletes = dict:new(),
min_snapshot_sqn = 0}), min_snapshot_sqn = 0,
blooms = dict:new()}),
CRC = erlang:crc32(ManBin), CRC = erlang:crc32(ManBin),
ok = file:write_file(FP, <<CRC:32/integer, ManBin/binary>>). ok = file:write_file(FP, <<CRC:32/integer, ManBin/binary>>).
@ -198,9 +209,12 @@ save_manifest(Manifest, RootPath) ->
replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) -> replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) ->
Levels = Manifest#manifest.levels, Levels = Manifest#manifest.levels,
Level = array:get(LevelIdx, Levels), Level = array:get(LevelIdx, Levels),
UpdLevel = replace_entry(LevelIdx, Level, Removals, Additions), {UpdBlooms, StrippedAdditions} =
update_blooms(Removals, Additions, Manifest#manifest.blooms),
UpdLevel = replace_entry(LevelIdx, Level, Removals, StrippedAdditions),
leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]), leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]),
PendingDeletes = update_pendingdeletes(ManSQN, PendingDeletes =
update_pendingdeletes(ManSQN,
Removals, Removals,
Manifest#manifest.pending_deletes), Manifest#manifest.pending_deletes),
UpdLevels = array:set(LevelIdx, UpdLevel, Levels), UpdLevels = array:set(LevelIdx, UpdLevel, Levels),
@ -209,13 +223,15 @@ replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) ->
Manifest#manifest{levels = UpdLevels, Manifest#manifest{levels = UpdLevels,
basement = get_basement(UpdLevels), basement = get_basement(UpdLevels),
manifest_sqn = ManSQN, manifest_sqn = ManSQN,
pending_deletes = PendingDeletes}; pending_deletes = PendingDeletes,
blooms = UpdBlooms};
false -> false ->
Basement = max(LevelIdx, Manifest#manifest.basement), Basement = max(LevelIdx, Manifest#manifest.basement),
Manifest#manifest{levels = UpdLevels, Manifest#manifest{levels = UpdLevels,
basement = Basement, basement = Basement,
manifest_sqn = ManSQN, manifest_sqn = ManSQN,
pending_deletes = PendingDeletes} pending_deletes = PendingDeletes,
blooms = UpdBlooms}
end. end.
-spec insert_manifest_entry(manifest(), integer(), integer(), -spec insert_manifest_entry(manifest(), integer(), integer(),
@ -226,12 +242,15 @@ replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) ->
insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
Levels = Manifest#manifest.levels, Levels = Manifest#manifest.levels,
Level = array:get(LevelIdx, Levels), Level = array:get(LevelIdx, Levels),
UpdLevel = add_entry(LevelIdx, Level, Entry), {UpdBlooms, UpdEntry} =
update_blooms([], Entry, Manifest#manifest.blooms),
UpdLevel = add_entry(LevelIdx, Level, UpdEntry),
leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]), leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]),
Basement = max(LevelIdx, Manifest#manifest.basement), Basement = max(LevelIdx, Manifest#manifest.basement),
Manifest#manifest{levels = array:set(LevelIdx, UpdLevel, Levels), Manifest#manifest{levels = array:set(LevelIdx, UpdLevel, Levels),
basement = Basement, basement = Basement,
manifest_sqn = ManSQN}. manifest_sqn = ManSQN,
blooms = UpdBlooms}.
-spec remove_manifest_entry(manifest(), integer(), integer(), -spec remove_manifest_entry(manifest(), integer(), integer(),
list()|manifest_entry()) -> manifest(). list()|manifest_entry()) -> manifest().
@ -240,6 +259,8 @@ insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
Levels = Manifest#manifest.levels, Levels = Manifest#manifest.levels,
Level = array:get(LevelIdx, Levels), Level = array:get(LevelIdx, Levels),
{UpdBlooms, []} =
update_blooms(Entry, [], Manifest#manifest.blooms),
UpdLevel = remove_entry(LevelIdx, Level, Entry), UpdLevel = remove_entry(LevelIdx, Level, Entry),
leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]), leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]),
PendingDeletes = update_pendingdeletes(ManSQN, PendingDeletes = update_pendingdeletes(ManSQN,
@ -251,11 +272,13 @@ remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
Manifest#manifest{levels = UpdLevels, Manifest#manifest{levels = UpdLevels,
basement = get_basement(UpdLevels), basement = get_basement(UpdLevels),
manifest_sqn = ManSQN, manifest_sqn = ManSQN,
pending_deletes = PendingDeletes}; pending_deletes = PendingDeletes,
blooms = UpdBlooms};
false -> false ->
Manifest#manifest{levels = UpdLevels, Manifest#manifest{levels = UpdLevels,
manifest_sqn = ManSQN, manifest_sqn = ManSQN,
pending_deletes = PendingDeletes} pending_deletes = PendingDeletes,
blooms = UpdBlooms}
end. end.
-spec switch_manifest_entry(manifest(), integer(), integer(), -spec switch_manifest_entry(manifest(), integer(), integer(),
@ -479,6 +502,20 @@ is_basement(Manifest, Level) ->
levelzero_present(Manifest) -> levelzero_present(Manifest) ->
not is_empty(0, array:get(0, Manifest#manifest.levels)). not is_empty(0, array:get(0, Manifest#manifest.levels)).
-spec check_bloom(manifest(), string(), {integer(), integer()}) -> boolean().
%% @doc
%% Check to see if a hahs is present in a manifest entry by using the exported
%% bloom filter
check_bloom(Manifest, FP, Hash) ->
case dict:find(FP, Manifest#manifest.blooms) of
{ok, Bloom} when is_binary(Bloom) ->
leveled_ebloom:check_hash(Hash, Bloom);
_ ->
true
end.
%%%============================================================================ %%%============================================================================
%%% Internal Functions %%% Internal Functions
%%%============================================================================ %%%============================================================================
@ -489,35 +526,39 @@ levelzero_present(Manifest) ->
%% future branches may make lower levels trees or skiplists to improve fetch %% future branches may make lower levels trees or skiplists to improve fetch
%% efficiency %% efficiency
load_level(LevelIdx, Level, PidFun, SQNFun) -> load_level(LevelIdx, Level, LoadFun, SQNFun) ->
HigherLevelLoadFun = HigherLevelLoadFun =
fun(ME, {L_Out, L_MaxSQN, FileList}) -> fun(ME, {L_Out, L_MaxSQN, FileList, BloomD}) ->
FN = ME#manifest_entry.filename, FN = ME#manifest_entry.filename,
P = PidFun(FN), {P, Bloom} = LoadFun(FN),
SQN = SQNFun(P), SQN = SQNFun(P),
{[ME#manifest_entry{owner=P}|L_Out], {[ME#manifest_entry{owner=P}|L_Out],
max(SQN, L_MaxSQN), max(SQN, L_MaxSQN),
[FN|FileList]} [FN|FileList],
dict:store(FN, Bloom, BloomD)}
end, end,
LowerLevelLoadFun = LowerLevelLoadFun =
fun({EK, ME}, {L_Out, L_MaxSQN, FileList}) -> fun({EK, ME}, {L_Out, L_MaxSQN, FileList, BloomD}) ->
FN = ME#manifest_entry.filename, FN = ME#manifest_entry.filename,
P = PidFun(FN), {P, Bloom} = LoadFun(FN),
SQN = SQNFun(P), SQN = SQNFun(P),
{[{EK, ME#manifest_entry{owner=P}}|L_Out], {[{EK, ME#manifest_entry{owner=P}}|L_Out],
max(SQN, L_MaxSQN), max(SQN, L_MaxSQN),
[FN|FileList]} [FN|FileList],
dict:store(FN, Bloom, BloomD)}
end, end,
case LevelIdx =< 1 of case LevelIdx =< 1 of
true -> true ->
lists:foldr(HigherLevelLoadFun, {[], 0, []}, Level); lists:foldr(HigherLevelLoadFun, {[], 0, [], dict:new()}, Level);
false -> false ->
{L0, MaxSQN, Flist} = lists:foldr(LowerLevelLoadFun, {L0, MaxSQN, Flist, UpdBloomD} =
{[], 0, []}, lists:foldr(LowerLevelLoadFun,
{[], 0, [], dict:new()},
leveled_tree:to_list(Level)), leveled_tree:to_list(Level)),
{leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH),
MaxSQN, MaxSQN,
Flist} Flist,
UpdBloomD}
end. end.
close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 -> close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 ->
@ -567,9 +608,7 @@ add_entry(LevelIdx, Level, Entries) when is_list(Entries) ->
leveled_tree:from_orderedlist(lists:append([LHS, Entries0, RHS]), leveled_tree:from_orderedlist(lists:append([LHS, Entries0, RHS]),
?TREE_TYPE, ?TREE_TYPE,
?TREE_WIDTH) ?TREE_WIDTH)
end; end.
add_entry(LevelIdx, Level, Entry) ->
add_entry(LevelIdx, Level, [Entry]).
remove_entry(LevelIdx, Level, Entries) -> remove_entry(LevelIdx, Level, Entries) ->
% We're assuming we're removing a sorted sublist % We're assuming we're removing a sorted sublist
@ -608,12 +647,7 @@ replace_entry(LevelIdx, Level, Removals, Additions) when LevelIdx =< 1 ->
FirstEntry#manifest_entry.end_key), FirstEntry#manifest_entry.end_key),
{LHS, RHS} = lists:splitwith(PredFun, Level), {LHS, RHS} = lists:splitwith(PredFun, Level),
Post = lists:nthtail(SectionLength, RHS), Post = lists:nthtail(SectionLength, RHS),
case is_list(Additions) of
true ->
lists:append([LHS, Additions, Post]); lists:append([LHS, Additions, Post]);
false ->
lists:append([LHS, [Additions], Post])
end;
replace_entry(LevelIdx, Level, Removals, Additions) -> replace_entry(LevelIdx, Level, Removals, Additions) ->
{SectionLength, FirstEntry} = measure_removals(Removals), {SectionLength, FirstEntry} = measure_removals(Removals),
PredFun = pred_fun(LevelIdx, PredFun = pred_fun(LevelIdx,
@ -627,21 +661,11 @@ replace_entry(LevelIdx, Level, Removals, Additions) ->
_ -> _ ->
lists:nthtail(SectionLength, RHS) lists:nthtail(SectionLength, RHS)
end, end,
UpdList =
case is_list(Additions) of
true ->
MapFun = MapFun =
fun(ME) -> fun(ME) ->
{ME#manifest_entry.end_key, ME} {ME#manifest_entry.end_key, ME}
end, end,
Additions0 = lists:map(MapFun, Additions), UpdList = lists:append([LHS, lists:map(MapFun, Additions), Post]),
lists:append([LHS, Additions0, Post]);
false ->
lists:append([LHS,
[{Additions#manifest_entry.end_key,
Additions}],
Post])
end,
leveled_tree:from_orderedlist(UpdList, ?TREE_TYPE, ?TREE_WIDTH). leveled_tree:from_orderedlist(UpdList, ?TREE_TYPE, ?TREE_WIDTH).
@ -661,6 +685,46 @@ update_pendingdeletes(ManSQN, Removals, PendingDeletes) ->
end, end,
lists:foldl(DelFun, PendingDeletes, Entries). lists:foldl(DelFun, PendingDeletes, Entries).
-spec update_blooms(list()|manifest_entry(),
list()|manifest_entry(),
dict:dict())
-> {dict:dict(), list()}.
%% @doc
%%
%% The manifest is a Pid-> Bloom mappping for every Pid, and this needs to
%% be updated to represent the changes. However, the bloom would bloat out
%% the stored manifest, so the bloom must be stripped from the manifest entry
%% as part of this process
update_blooms(Removals, Additions, Blooms) ->
Additions0 =
case is_list(Additions) of
true -> Additions;
false -> [Additions]
end,
Removals0 =
case is_list(Removals) of
true -> Removals;
false -> [Removals]
end,
RemFun =
fun(R, BloomD) ->
dict:erase(R#manifest_entry.owner, BloomD)
end,
AddFun =
fun(A, BloomD) ->
dict:store(A#manifest_entry.owner, A#manifest_entry.bloom, BloomD)
end,
StripFun =
fun(A) ->
A#manifest_entry{bloom = none}
end,
Blooms0 = lists:foldl(RemFun, Blooms, Removals0),
Blooms1 = lists:foldl(AddFun, Blooms0, Additions0),
{Blooms1, lists:map(StripFun, Additions0)}.
key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 -> key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 ->
false; false;
key_lookup_level(LevelIdx, [Entry|Rest], Key) when LevelIdx =< 1 -> key_lookup_level(LevelIdx, [Entry|Rest], Key) when LevelIdx =< 1 ->
@ -782,27 +846,33 @@ initial_setup() ->
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"}, E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"}, end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
filename="Z1", filename="Z1",
owner="pid_z1"}, owner="pid_z1",
bloom=none},
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"}, E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
end_key={o, "Bucket1", "K71", null}, end_key={o, "Bucket1", "K71", null},
filename="Z2", filename="Z2",
owner="pid_z2"}, owner="pid_z2",
bloom=none},
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null}, E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
end_key={o, "Bucket1", "K993", null}, end_key={o, "Bucket1", "K993", null},
filename="Z3", filename="Z3",
owner="pid_z3"}, owner="pid_z3",
bloom=none},
E4 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"}, E4 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld7"}, "K93"}, end_key={i, "Bucket1", {"Idx1", "Fld7"}, "K93"},
filename="Z4", filename="Z4",
owner="pid_z4"}, owner="pid_z4",
bloom=none},
E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"}, E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"},
end_key={o, "Bucket1", "K78", null}, end_key={o, "Bucket1", "K78", null},
filename="Z5", filename="Z5",
owner="pid_z5"}, owner="pid_z5",
bloom=none},
E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null}, E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null},
end_key={o, "Bucket1", "K996", null}, end_key={o, "Bucket1", "K996", null},
filename="Z6", filename="Z6",
owner="pid_z6"}, owner="pid_z6",
bloom=none},
Man0 = new_manifest(), Man0 = new_manifest(),
@ -819,32 +889,39 @@ changeup_setup(Man6) ->
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"}, E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"}, end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
filename="Z1", filename="Z1",
owner="pid_z1"}, owner="pid_z1",
bloom=none},
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"}, E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
end_key={o, "Bucket1", "K71", null}, end_key={o, "Bucket1", "K71", null},
filename="Z2", filename="Z2",
owner="pid_z2"}, owner="pid_z2",
bloom=none},
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null}, E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
end_key={o, "Bucket1", "K993", null}, end_key={o, "Bucket1", "K993", null},
filename="Z3", filename="Z3",
owner="pid_z3"}, owner="pid_z3",
bloom=none},
E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"}, E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"}, end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"},
owner="pid_y1", owner="pid_y1",
filename="Y1"}, filename="Y1",
bloom=none},
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"}, E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
end_key={o, "Bucket1", "K45", null}, end_key={o, "Bucket1", "K45", null},
owner="pid_y2", owner="pid_y2",
filename="Y2"}, filename="Y2",
bloom=none},
E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null}, E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null},
end_key={o, "Bucket1", "K812", null}, end_key={o, "Bucket1", "K812", null},
owner="pid_y3", owner="pid_y3",
filename="Y3"}, filename="Y3",
bloom=none},
E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null}, E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null},
end_key={o, "Bucket1", "K998", null}, end_key={o, "Bucket1", "K998", null},
owner="pid_y4", owner="pid_y4",
filename="Y4"}, filename="Y4",
bloom=none},
Man7 = remove_manifest_entry(Man6, 2, 1, E1), Man7 = remove_manifest_entry(Man6, 2, 1, E1),
Man8 = remove_manifest_entry(Man7, 2, 1, E2), Man8 = remove_manifest_entry(Man7, 2, 1, E2),
@ -949,32 +1026,39 @@ ext_keylookup_manifest_test() ->
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"}, E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"}, end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
filename="Z1", filename="Z1",
owner="pid_z1"}, owner="pid_z1",
bloom=none},
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"}, E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
end_key={o, "Bucket1", "K71", null}, end_key={o, "Bucket1", "K71", null},
filename="Z2", filename="Z2",
owner="pid_z2"}, owner="pid_z2",
bloom=none},
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null}, E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
end_key={o, "Bucket1", "K993", null}, end_key={o, "Bucket1", "K993", null},
filename="Z3", filename="Z3",
owner="pid_z3"}, owner="pid_z3",
bloom=none},
E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"}, E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"}, end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"},
owner="pid_y1", owner="pid_y1",
filename="Y1"}, filename="Y1",
bloom=none},
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"}, E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
end_key={o, "Bucket1", "K45", null}, end_key={o, "Bucket1", "K45", null},
owner="pid_y2", owner="pid_y2",
filename="Y2"}, filename="Y2",
bloom=none},
E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null}, E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null},
end_key={o, "Bucket1", "K812", null}, end_key={o, "Bucket1", "K812", null},
owner="pid_y3", owner="pid_y3",
filename="Y3"}, filename="Y3",
bloom=none},
E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null}, E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null},
end_key={o, "Bucket1", "K998", null}, end_key={o, "Bucket1", "K998", null},
owner="pid_y4", owner="pid_y4",
filename="Y4"}, filename="Y4",
bloom=none},
Man8 = replace_manifest_entry(ManOpen2, 2, 1, E1, E1_2), Man8 = replace_manifest_entry(ManOpen2, 2, 1, E1, E1_2),
Man9 = remove_manifest_entry(Man8, 2, 1, [E2, E3]), Man9 = remove_manifest_entry(Man8, 2, 1, [E2, E3]),
@ -988,21 +1072,18 @@ ext_keylookup_manifest_test() ->
E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"}, E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"},
end_key={o, "Bucket1", "K78", null}, end_key={o, "Bucket1", "K78", null},
filename="Z5", filename="Z5",
owner="pid_z5"}, owner="pid_z5",
bloom=none},
E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null}, E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null},
end_key={o, "Bucket1", "K996", null}, end_key={o, "Bucket1", "K996", null},
filename="Z6", filename="Z6",
owner="pid_z6"}, owner="pid_z6",
bloom=none},
Man11 = remove_manifest_entry(Man10, 3, 2, [E5, E6]), Man11 = remove_manifest_entry(Man10, 3, 2, [E5, E6]),
?assertMatch(3, get_manifest_sqn(Man11)), ?assertMatch(3, get_manifest_sqn(Man11)),
?assertMatch(false, key_lookup(Man11, 2, LK1_4)), ?assertMatch(false, key_lookup(Man11, 2, LK1_4)),
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
end_key={o, "Bucket1", "K45", null},
owner="pid_y2",
filename="Y2"},
Man12 = replace_manifest_entry(Man11, 4, 2, E2_2, E5), Man12 = replace_manifest_entry(Man11, 4, 2, E2_2, E5),
?assertMatch(4, get_manifest_sqn(Man12)), ?assertMatch(4, get_manifest_sqn(Man12)),
?assertMatch("pid_z5", key_lookup(Man12, 2, LK1_4)). ?assertMatch("pid_z5", key_lookup(Man12, 2, LK1_4)).
@ -1057,7 +1138,8 @@ levelzero_present_test() ->
E0 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"}, E0 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={o, "Bucket1", "Key996", null}, end_key={o, "Bucket1", "Key996", null},
filename="Z0", filename="Z0",
owner="pid_z0"}, owner="pid_z0",
bloom=none},
Man0 = new_manifest(), Man0 = new_manifest(),
?assertMatch(false, levelzero_present(Man0)), ?assertMatch(false, levelzero_present(Man0)),
@ -1070,15 +1152,18 @@ snapshot_release_test() ->
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"}, E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"}, end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
filename="Z1", filename="Z1",
owner="pid_z1"}, owner="pid_z1",
bloom=none},
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"}, E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
end_key={o, "Bucket1", "K71", null}, end_key={o, "Bucket1", "K71", null},
filename="Z2", filename="Z2",
owner="pid_z2"}, owner="pid_z2",
bloom=none},
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null}, E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
end_key={o, "Bucket1", "K993", null}, end_key={o, "Bucket1", "K993", null},
filename="Z3", filename="Z3",
owner="pid_z3"}, owner="pid_z3",
bloom=none},
Man7 = add_snapshot(Man6, pid_a1, 3600), Man7 = add_snapshot(Man6, pid_a1, 3600),
Man8 = remove_manifest_entry(Man7, 2, 1, E1), Man8 = remove_manifest_entry(Man7, 2, 1, E1),
@ -1134,18 +1219,18 @@ potential_issue_test() ->
{[], {[],
[{manifest_entry,{o_rkv,"Bucket","Key10",null}, [{manifest_entry,{o_rkv,"Bucket","Key10",null},
{o_rkv,"Bucket","Key12949",null}, {o_rkv,"Bucket","Key12949",null},
"<0.313.0>","./16_1_0.sst"}, "<0.313.0>","./16_1_0.sst", none},
{manifest_entry,{o_rkv,"Bucket","Key129490",null}, {manifest_entry,{o_rkv,"Bucket","Key129490",null},
{o_rkv,"Bucket","Key158981",null}, {o_rkv,"Bucket","Key158981",null},
"<0.315.0>","./16_1_1.sst"}, "<0.315.0>","./16_1_1.sst", none},
{manifest_entry,{o_rkv,"Bucket","Key158982",null}, {manifest_entry,{o_rkv,"Bucket","Key158982",null},
{o_rkv,"Bucket","Key188472",null}, {o_rkv,"Bucket","Key188472",null},
"<0.316.0>","./16_1_2.sst"}], "<0.316.0>","./16_1_2.sst", none}],
{idxt,1, {idxt,1,
{{[{{o_rkv,"Bucket1","Key1",null}, {{[{{o_rkv,"Bucket1","Key1",null},
{manifest_entry,{o_rkv,"Bucket","Key9083",null}, {manifest_entry,{o_rkv,"Bucket","Key9083",null},
{o_rkv,"Bucket1","Key1",null}, {o_rkv,"Bucket1","Key1",null},
"<0.320.0>","./16_1_6.sst"}}]}, "<0.320.0>","./16_1_6.sst", none}}]},
{1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}}, {1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
{idxt,0,{{},{0,nil}}}, {idxt,0,{{},{0,nil}}},
{idxt,0,{{},{0,nil}}}, {idxt,0,{{},{0,nil}}},
@ -1158,7 +1243,8 @@ potential_issue_test() ->
{dict,0,16,16,8,80,48, {dict,0,16,16,8,80,48,
{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}, {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
{{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}}, {{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}},
2}, 2,
dict:new()},
Range1 = range_lookup(Manifest, Range1 = range_lookup(Manifest,
1, 1,
{o_rkv, "Bucket", null, null}, {o_rkv, "Bucket", null, null},

View file

@ -115,8 +115,7 @@
-record(slot_index_value, {slot_id :: integer(), -record(slot_index_value, {slot_id :: integer(),
start_position :: integer(), start_position :: integer(),
length :: integer(), length :: integer()}).
bloom :: binary()}).
-record(summary, {first_key :: tuple(), -record(summary, {first_key :: tuple(),
last_key :: tuple(), last_key :: tuple(),
@ -148,11 +147,11 @@
-record(sst_timings, -record(sst_timings,
{sample_count = 0 :: integer(), {sample_count = 0 :: integer(),
index_query_time = 0 :: integer(), index_query_time = 0 :: integer(),
tiny_bloom_time = 0 :: integer(), lookup_cache_time = 0 :: integer(),
slot_index_time = 0 :: integer(), slot_index_time = 0 :: integer(),
slot_fetch_time = 0 :: integer(), slot_fetch_time = 0 :: integer(),
noncached_block_time = 0 :: integer(), noncached_block_time = 0 :: integer(),
tiny_bloom_count = 0 :: integer(), lookup_cache_count = 0 :: integer(),
slot_index_count = 0 :: integer(), slot_index_count = 0 :: integer(),
slot_fetch_count = 0 :: integer(), slot_fetch_count = 0 :: integer(),
noncached_block_count = 0 :: integer()}). noncached_block_count = 0 :: integer()}).
@ -164,7 +163,8 @@
%%% API %%% API
%%%============================================================================ %%%============================================================================
-spec sst_open(string(), string()) -> {ok, pid(), {tuple(), tuple()}}. -spec sst_open(string(), string()) ->
{ok, pid(), {tuple(), tuple()}, binary()}.
%% @doc %% @doc
%% Open an SST file at a given path and filename. The first and last keys %% Open an SST file at a given path and filename. The first and last keys
%% are returned in response to the request - so that those keys can be used %% are returned in response to the request - so that those keys can be used
@ -178,13 +178,13 @@ sst_open(RootPath, Filename) ->
case gen_fsm:sync_send_event(Pid, case gen_fsm:sync_send_event(Pid,
{sst_open, RootPath, Filename}, {sst_open, RootPath, Filename},
infinity) of infinity) of
{ok, {SK, EK}} -> {ok, {SK, EK}, Bloom} ->
{ok, Pid, {SK, EK}} {ok, Pid, {SK, EK}, Bloom}
end. end.
-spec sst_new(string(), string(), integer(), -spec sst_new(string(), string(), integer(),
list(), integer(), press_methods()) -> list(), integer(), press_methods()) ->
{ok, pid(), {tuple(), tuple()}}. {ok, pid(), {tuple(), tuple()}, binary()}.
%% @doc %% @doc
%% Start a new SST file at the assigned level passing in a list of Key, Value %% Start a new SST file at the assigned level passing in a list of Key, Value
%% pairs. This should not be used for basement levels or unexpanded Key/Value %% pairs. This should not be used for basement levels or unexpanded Key/Value
@ -201,13 +201,13 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) ->
MaxSQN, MaxSQN,
PressMethod}, PressMethod},
infinity) of infinity) of
{ok, {SK, EK}} -> {ok, {SK, EK}, Bloom} ->
{ok, Pid, {SK, EK}} {ok, Pid, {SK, EK}, Bloom}
end. end.
-spec sst_new(string(), string(), list(), list(), -spec sst_new(string(), string(), list(), list(),
boolean(), integer(), integer(), press_methods()) -> boolean(), integer(), integer(), press_methods()) ->
empty|{ok, pid(), {{list(), list()}, tuple(), tuple()}}. empty|{ok, pid(), {{list(), list()}, tuple(), tuple()}, binary()}.
%% @doc %% @doc
%% Start a new SST file at the assigned level passing in a two lists of %% Start a new SST file at the assigned level passing in a two lists of
%% {Key, Value} pairs to be merged. The merge_lists function will use the %% {Key, Value} pairs to be merged. The merge_lists function will use the
@ -238,8 +238,8 @@ sst_new(RootPath, Filename,
MaxSQN, MaxSQN,
PressMethod}, PressMethod},
infinity) of infinity) of
{ok, {SK, EK}} -> {ok, {SK, EK}, Bloom} ->
{ok, Pid, {{Rem1, Rem2}, SK, EK}} {ok, Pid, {{Rem1, Rem2}, SK, EK}, Bloom}
end end
end. end.
@ -399,10 +399,11 @@ init([]) ->
{ok, starting, #state{}}. {ok, starting, #state{}}.
starting({sst_open, RootPath, Filename}, _From, State) -> starting({sst_open, RootPath, Filename}, _From, State) ->
UpdState = read_file(Filename, State#state{root_path=RootPath}), {UpdState, Bloom} =
read_file(Filename, State#state{root_path=RootPath}),
Summary = UpdState#state.summary, Summary = UpdState#state.summary,
{reply, {reply,
{ok, {Summary#summary.first_key, Summary#summary.last_key}}, {ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom},
reader, reader,
UpdState}; UpdState};
starting({sst_new, starting({sst_new,
@ -413,24 +414,22 @@ starting({sst_new,
{Length, {Length,
SlotIndex, SlotIndex,
BlockIndex, BlockIndex,
SlotsBin} = build_all_slots(SlotList, PressMethod), SlotsBin,
SummaryBin = build_table_summary(SlotIndex, Bloom} = build_all_slots(SlotList, PressMethod),
Level, SummaryBin =
FirstKey, build_table_summary(SlotIndex, Level, FirstKey, Length, MaxSQN, Bloom),
Length,
MaxSQN),
ActualFilename = ActualFilename =
write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod), write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod),
YBQ = Level =< 2, YBQ = Level =< 2,
UpdState = read_file(ActualFilename, {UpdState, Bloom} =
State#state{root_path=RootPath, read_file(ActualFilename,
yield_blockquery=YBQ}), State#state{root_path=RootPath, yield_blockquery=YBQ}),
Summary = UpdState#state.summary, Summary = UpdState#state.summary,
leveled_log:log_timer("SST08", leveled_log:log_timer("SST08",
[ActualFilename, Level, Summary#summary.max_sqn], [ActualFilename, Level, Summary#summary.max_sqn],
SW), SW),
{reply, {reply,
{ok, {Summary#summary.first_key, Summary#summary.last_key}}, {ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom},
reader, reader,
UpdState#state{blockindex_cache = BlockIndex}}. UpdState#state{blockindex_cache = BlockIndex}}.
@ -449,23 +448,21 @@ starting({sst_newlevelzero, RootPath, Filename,
{SlotCount, {SlotCount,
SlotIndex, SlotIndex,
BlockIndex, BlockIndex,
SlotsBin} = build_all_slots(SlotList, PressMethod), SlotsBin,
Bloom} = build_all_slots(SlotList, PressMethod),
Time2 = timer:now_diff(os:timestamp(), SW2), Time2 = timer:now_diff(os:timestamp(), SW2),
SW3 = os:timestamp(), SW3 = os:timestamp(),
SummaryBin = build_table_summary(SlotIndex, SummaryBin =
0, build_table_summary(SlotIndex, 0, FirstKey, SlotCount, MaxSQN, Bloom),
FirstKey,
SlotCount,
MaxSQN),
Time3 = timer:now_diff(os:timestamp(), SW3), Time3 = timer:now_diff(os:timestamp(), SW3),
SW4 = os:timestamp(), SW4 = os:timestamp(),
ActualFilename = ActualFilename =
write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod), write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod),
UpdState = read_file(ActualFilename, {UpdState, Bloom} =
State#state{root_path = RootPath, read_file(ActualFilename,
yield_blockquery = true}), State#state{root_path=RootPath, yield_blockquery=true}),
Summary = UpdState#state.summary, Summary = UpdState#state.summary,
Time4 = timer:now_diff(os:timestamp(), SW4), Time4 = timer:now_diff(os:timestamp(), SW4),
@ -483,7 +480,8 @@ starting({sst_newlevelzero, RootPath, Filename,
leveled_penciller:pcl_confirml0complete(Penciller, leveled_penciller:pcl_confirml0complete(Penciller,
UpdState#state.filename, UpdState#state.filename,
Summary#summary.first_key, Summary#summary.first_key,
Summary#summary.last_key), Summary#summary.last_key,
Bloom),
{next_state, {next_state,
reader, reader,
UpdState#state{blockindex_cache = BlockIndex}} UpdState#state{blockindex_cache = BlockIndex}}
@ -646,26 +644,18 @@ fetch(LedgerKey, Hash, State, Timings0) ->
{SW1, Timings1} = update_timings(SW0, Timings0, index_query, true), {SW1, Timings1} = update_timings(SW0, Timings0, index_query, true),
SlotID = Slot#slot_index_value.slot_id, SlotID = Slot#slot_index_value.slot_id,
Bloom = Slot#slot_index_value.bloom, CachedBlockIdx =
case leveled_tinybloom:check_hash(Hash, Bloom) of array:get(SlotID - 1, State#state.blockindex_cache),
false -> {SW2, Timings2} = update_timings(SW1, Timings1, lookup_cache, true),
{_SW2, Timings2} =
update_timings(SW1, Timings1, tiny_bloom, false),
{not_present, State, Timings2};
true ->
{SW2, Timings2} =
update_timings(SW1, Timings1, tiny_bloom, true),
CachedBlockIdx = array:get(SlotID - 1,
State#state.blockindex_cache),
case CachedBlockIdx of case CachedBlockIdx of
none -> none ->
SlotBin = read_slot(State#state.handle, Slot), SlotBin = read_slot(State#state.handle, Slot),
{Result, BlockLengths, BlockIdx} = {Result, BlockLengths, BlockIdx} =
binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod), binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod),
BlockIndexCache = array:set(SlotID - 1, BlockIndexCache =
<<BlockLengths/binary, array:set(SlotID - 1,
BlockIdx/binary>>, <<BlockLengths/binary, BlockIdx/binary>>,
State#state.blockindex_cache), State#state.blockindex_cache),
{_SW3, Timings3} = {_SW3, Timings3} =
update_timings(SW2, Timings2, noncached_block, false), update_timings(SW2, Timings2, noncached_block, false),
@ -673,24 +663,15 @@ fetch(LedgerKey, Hash, State, Timings0) ->
State#state{blockindex_cache = BlockIndexCache}, State#state{blockindex_cache = BlockIndexCache},
Timings3}; Timings3};
<<BlockLengths:24/binary, BlockIdx/binary>> -> <<BlockLengths:24/binary, BlockIdx/binary>> ->
PosList = find_pos(BlockIdx, PosList = find_pos(BlockIdx, extra_hash(Hash), [], 0),
extra_hash(Hash),
[],
0),
case PosList of case PosList of
[] -> [] ->
{_SW3, Timings3} = {_SW3, Timings3} =
update_timings(SW2, update_timings(SW2, Timings2, slot_index, false),
Timings2,
slot_index,
false),
{not_present, State, Timings3}; {not_present, State, Timings3};
_ -> _ ->
{SW3, Timings3} = {SW3, Timings3} =
update_timings(SW2, update_timings(SW2, Timings2, slot_index, true),
Timings2,
slot_index,
true),
StartPos = Slot#slot_index_value.start_position, StartPos = Slot#slot_index_value.start_position,
Result = Result =
check_blocks(PosList, check_blocks(PosList,
@ -701,13 +682,9 @@ fetch(LedgerKey, Hash, State, Timings0) ->
PressMethod, PressMethod,
not_present), not_present),
{_SW4, Timings4} = {_SW4, Timings4} =
update_timings(SW3, update_timings(SW3, Timings3, slot_fetch, false),
Timings3,
slot_fetch,
false),
{Result, State, Timings4} {Result, State, Timings4}
end end
end
end. end.
@ -808,7 +785,7 @@ read_file(Filename, State) ->
{Handle, FileVersion, SummaryBin} = {Handle, FileVersion, SummaryBin} =
open_reader(filename:join(State#state.root_path, Filename)), open_reader(filename:join(State#state.root_path, Filename)),
UpdState0 = imp_fileversion(FileVersion, State), UpdState0 = imp_fileversion(FileVersion, State),
{Summary, SlotList} = read_table_summary(SummaryBin), {Summary, Bloom, SlotList} = read_table_summary(SummaryBin),
BlockIndexCache = array:new([{size, Summary#summary.size}, BlockIndexCache = array:new([{size, Summary#summary.size},
{default, none}]), {default, none}]),
UpdState1 = UpdState0#state{blockindex_cache = BlockIndexCache}, UpdState1 = UpdState0#state{blockindex_cache = BlockIndexCache},
@ -817,9 +794,10 @@ read_file(Filename, State) ->
leveled_log:log("SST03", [Filename, leveled_log:log("SST03", [Filename,
Summary#summary.size, Summary#summary.size,
Summary#summary.max_sqn]), Summary#summary.max_sqn]),
UpdState1#state{summary = UpdSummary, {UpdState1#state{summary = UpdSummary,
handle = Handle, handle = Handle,
filename = Filename}. filename = Filename},
Bloom}.
gen_fileversion(PressMethod) -> gen_fileversion(PressMethod) ->
Bit1 = Bit1 =
@ -848,13 +826,14 @@ open_reader(Filename) ->
{ok, SummaryBin} = file:pread(Handle, SlotsLength + 9, SummaryLength), {ok, SummaryBin} = file:pread(Handle, SlotsLength + 9, SummaryLength),
{Handle, FileVersion, SummaryBin}. {Handle, FileVersion, SummaryBin}.
build_table_summary(SlotIndex, _Level, FirstKey, SlotCount, MaxSQN) -> build_table_summary(SlotIndex, _Level, FirstKey, SlotCount, MaxSQN, Bloom) ->
[{LastKey, _LastV}|_Rest] = SlotIndex, [{LastKey, _LastV}|_Rest] = SlotIndex,
Summary = #summary{first_key = FirstKey, Summary = #summary{first_key = FirstKey,
last_key = LastKey, last_key = LastKey,
size = SlotCount, size = SlotCount,
max_sqn = MaxSQN}, max_sqn = MaxSQN},
SummBin = term_to_binary({Summary, lists:reverse(SlotIndex)}, SummBin =
term_to_binary({Summary, Bloom, lists:reverse(SlotIndex)},
?BINARY_SETTINGS), ?BINARY_SETTINGS),
SummCRC = erlang:crc32(SummBin), SummCRC = erlang:crc32(SummBin),
<<SummCRC:32/integer, SummBin/binary>>. <<SummCRC:32/integer, SummBin/binary>>.
@ -878,30 +857,31 @@ build_all_slots(SlotList, PressMethod) ->
array:new([{size, SlotCount}, array:new([{size, SlotCount},
{default, none}]), {default, none}]),
<<>>, <<>>,
[],
PressMethod), PressMethod),
{SlotIndex, BlockIndex, SlotsBin} = BuildResponse, {SlotIndex, BlockIndex, SlotsBin, HashLists} = BuildResponse,
{SlotCount, SlotIndex, BlockIndex, SlotsBin}. Bloom = leveled_ebloom:create_bloom(HashLists),
{SlotCount, SlotIndex, BlockIndex, SlotsBin, Bloom}.
build_all_slots([], _Pos, _SlotID, build_all_slots([], _Pos, _SlotID,
SlotIdxAcc, BlockIdxAcc, SlotBinAcc, SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists,
_PressMethod) -> _PressMethod) ->
{SlotIdxAcc, BlockIdxAcc, SlotBinAcc}; {SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists};
build_all_slots([SlotD|Rest], Pos, SlotID, build_all_slots([SlotD|Rest], Pos, SlotID,
SlotIdxAcc, BlockIdxAcc, SlotBinAcc, SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists,
PressMethod) -> PressMethod) ->
{BlockIdx, SlotBin, HashList, LastKey} = SlotD, {BlockIdx, SlotBin, HashList, LastKey} = SlotD,
Length = byte_size(SlotBin), Length = byte_size(SlotBin),
Bloom = leveled_tinybloom:create_bloom(HashList),
SlotIndexV = #slot_index_value{slot_id = SlotID, SlotIndexV = #slot_index_value{slot_id = SlotID,
start_position = Pos, start_position = Pos,
length = Length, length = Length},
bloom = Bloom},
build_all_slots(Rest, build_all_slots(Rest,
Pos + Length, Pos + Length,
SlotID + 1, SlotID + 1,
[{LastKey, SlotIndexV}|SlotIdxAcc], [{LastKey, SlotIndexV}|SlotIdxAcc],
array:set(SlotID - 1, BlockIdx, BlockIdxAcc), array:set(SlotID - 1, BlockIdx, BlockIdxAcc),
<<SlotBinAcc/binary, SlotBin/binary>>, <<SlotBinAcc/binary, SlotBin/binary>>,
lists:append(HashLists, HashList),
PressMethod). PressMethod).
@ -1828,11 +1808,10 @@ log_timings(no_timing) ->
log_timings(Timings) -> log_timings(Timings) ->
leveled_log:log("SST12", [Timings#sst_timings.sample_count, leveled_log:log("SST12", [Timings#sst_timings.sample_count,
Timings#sst_timings.index_query_time, Timings#sst_timings.index_query_time,
Timings#sst_timings.tiny_bloom_time, Timings#sst_timings.lookup_cache_time,
Timings#sst_timings.slot_index_time, Timings#sst_timings.slot_index_time,
Timings#sst_timings.slot_fetch_time, Timings#sst_timings.slot_fetch_time,
Timings#sst_timings.noncached_block_time, Timings#sst_timings.noncached_block_time,
Timings#sst_timings.tiny_bloom_count,
Timings#sst_timings.slot_index_count, Timings#sst_timings.slot_index_count,
Timings#sst_timings.slot_fetch_count, Timings#sst_timings.slot_fetch_count,
Timings#sst_timings.noncached_block_count]). Timings#sst_timings.noncached_block_count]).
@ -1847,9 +1826,9 @@ update_timings(SW, Timings, Stage, Continue) ->
index_query -> index_query ->
IQT = Timings#sst_timings.index_query_time, IQT = Timings#sst_timings.index_query_time,
Timings#sst_timings{index_query_time = IQT + Timer}; Timings#sst_timings{index_query_time = IQT + Timer};
tiny_bloom -> lookup_cache ->
TBT = Timings#sst_timings.tiny_bloom_time, TBT = Timings#sst_timings.lookup_cache_time,
Timings#sst_timings{tiny_bloom_time = TBT + Timer}; Timings#sst_timings{lookup_cache_time = TBT + Timer};
slot_index -> slot_index ->
SIT = Timings#sst_timings.slot_index_time, SIT = Timings#sst_timings.slot_index_time,
Timings#sst_timings{slot_index_time = SIT + Timer}; Timings#sst_timings{slot_index_time = SIT + Timer};
@ -1866,9 +1845,6 @@ update_timings(SW, Timings, Stage, Continue) ->
false -> false ->
Timings1 = Timings1 =
case Stage of case Stage of
tiny_bloom ->
TBC = Timings#sst_timings.tiny_bloom_count,
Timings0#sst_timings{tiny_bloom_count = TBC + 1};
slot_index -> slot_index ->
SIC = Timings#sst_timings.slot_index_count, SIC = Timings#sst_timings.slot_index_count,
Timings0#sst_timings{slot_index_count = SIC + 1}; Timings0#sst_timings{slot_index_count = SIC + 1};
@ -2149,9 +2125,9 @@ merge_test() ->
KVL2 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 20)), KVL2 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 20)),
KVL3 = lists:ukeymerge(1, KVL1, KVL2), KVL3 = lists:ukeymerge(1, KVL1, KVL2),
SW0 = os:timestamp(), SW0 = os:timestamp(),
{ok, P1, {FK1, LK1}} = {ok, P1, {FK1, LK1}, _Bloom1} =
sst_new("../test/", "level1_src", 1, KVL1, 6000, native), sst_new("../test/", "level1_src", 1, KVL1, 6000, native),
{ok, P2, {FK2, LK2}} = {ok, P2, {FK2, LK2}, _Bloom2} =
sst_new("../test/", "level2_src", 2, KVL2, 3000, native), sst_new("../test/", "level2_src", 2, KVL2, 3000, native),
ExpFK1 = element(1, lists:nth(1, KVL1)), ExpFK1 = element(1, lists:nth(1, KVL1)),
ExpLK1 = element(1, lists:last(KVL1)), ExpLK1 = element(1, lists:last(KVL1)),
@ -2165,7 +2141,7 @@ merge_test() ->
ML2 = [{next, #manifest_entry{owner = P2}, FK2}], ML2 = [{next, #manifest_entry{owner = P2}, FK2}],
NewR = NewR =
sst_new("../test/", "level2_merge", ML1, ML2, false, 2, N * 2, native), sst_new("../test/", "level2_merge", ML1, ML2, false, 2, N * 2, native),
{ok, P3, {{Rem1, Rem2}, FK3, LK3}} = NewR, {ok, P3, {{Rem1, Rem2}, FK3, LK3}, _Bloom3} = NewR,
?assertMatch([], Rem1), ?assertMatch([], Rem1),
?assertMatch([], Rem2), ?assertMatch([], Rem2),
?assertMatch(true, FK3 == min(FK1, FK2)), ?assertMatch(true, FK3 == min(FK1, FK2)),
@ -2198,7 +2174,7 @@ simple_persisted_range_test() ->
KVList1 = lists:ukeysort(1, KVList0), KVList1 = lists:ukeysort(1, KVList0),
[{FirstKey, _FV}|_Rest] = KVList1, [{FirstKey, _FV}|_Rest] = KVList1,
{LastKey, _LV} = lists:last(KVList1), {LastKey, _LV} = lists:last(KVList1),
{ok, Pid, {FirstKey, LastKey}} = {ok, Pid, {FirstKey, LastKey}, _Bloom} =
sst_new(RP, Filename, 1, KVList1, length(KVList1), native), sst_new(RP, Filename, 1, KVList1, length(KVList1), native),
{o, B, K, null} = LastKey, {o, B, K, null} = LastKey,
@ -2248,7 +2224,7 @@ additional_range_test() ->
[], [],
lists:seq(?NOLOOK_SLOTSIZE + Gap + 1, lists:seq(?NOLOOK_SLOTSIZE + Gap + 1,
2 * ?NOLOOK_SLOTSIZE + Gap)), 2 * ?NOLOOK_SLOTSIZE + Gap)),
{ok, P1, {{Rem1, Rem2}, SK, EK}} = {ok, P1, {{Rem1, Rem2}, SK, EK}, _Bloom1} =
sst_new("../test/", "range1_src", IK1, IK2, false, 1, 9999, native), sst_new("../test/", "range1_src", IK1, IK2, false, 1, 9999, native),
?assertMatch([], Rem1), ?assertMatch([], Rem1),
?assertMatch([], Rem2), ?assertMatch([], Rem2),
@ -2306,7 +2282,7 @@ simple_persisted_slotsize_test() ->
?LOOK_SLOTSIZE), ?LOOK_SLOTSIZE),
[{FirstKey, _FV}|_Rest] = KVList1, [{FirstKey, _FV}|_Rest] = KVList1,
{LastKey, _LV} = lists:last(KVList1), {LastKey, _LV} = lists:last(KVList1),
{ok, Pid, {FirstKey, LastKey}} = {ok, Pid, {FirstKey, LastKey}, _Bloom} =
sst_new(RP, Filename, 1, KVList1, length(KVList1), native), sst_new(RP, Filename, 1, KVList1, length(KVList1), native),
lists:foreach(fun({K, V}) -> lists:foreach(fun({K, V}) ->
?assertMatch({K, V}, sst_get(Pid, K)) ?assertMatch({K, V}, sst_get(Pid, K))
@ -2321,7 +2297,7 @@ simple_persisted_test() ->
KVList1 = lists:ukeysort(1, KVList0), KVList1 = lists:ukeysort(1, KVList0),
[{FirstKey, _FV}|_Rest] = KVList1, [{FirstKey, _FV}|_Rest] = KVList1,
{LastKey, _LV} = lists:last(KVList1), {LastKey, _LV} = lists:last(KVList1),
{ok, Pid, {FirstKey, LastKey}} = {ok, Pid, {FirstKey, LastKey}, _Bloom} =
sst_new(RP, Filename, 1, KVList1, length(KVList1), native), sst_new(RP, Filename, 1, KVList1, length(KVList1), native),
SW0 = os:timestamp(), SW0 = os:timestamp(),
lists:foreach(fun({K, V}) -> lists:foreach(fun({K, V}) ->
@ -2534,16 +2510,15 @@ check_segment_match(PosBinIndex1, KVL, TreeSize) ->
timings_test() -> timings_test() ->
SW = os:timestamp(), SW = os:timestamp(),
timer:sleep(1), timer:sleep(1),
{no_timing, T0} = update_timings(SW, #sst_timings{}, tiny_bloom, false), {no_timing, T1} = update_timings(SW, #sst_timings{}, slot_index, false),
{no_timing, T1} = update_timings(SW, T0, slot_index, false),
{no_timing, T2} = update_timings(SW, T1, slot_fetch, false), {no_timing, T2} = update_timings(SW, T1, slot_fetch, false),
{no_timing, T3} = update_timings(SW, T2, noncached_block, false), {no_timing, T3} = update_timings(SW, T2, noncached_block, false),
timer:sleep(1), timer:sleep(1),
{_, T4} = update_timings(SW, T3, tiny_bloom, true), {_, T4} = update_timings(SW, T3, slot_fetch, true),
?assertMatch(4, T4#sst_timings.sample_count), ?assertMatch(3, T4#sst_timings.sample_count),
?assertMatch(1, T4#sst_timings.tiny_bloom_count), ?assertMatch(1, T4#sst_timings.slot_fetch_count),
?assertMatch(true, T4#sst_timings.tiny_bloom_time > ?assertMatch(true, T4#sst_timings.slot_fetch_time >
T3#sst_timings.tiny_bloom_time). T3#sst_timings.slot_fetch_time).
-endif. -endif.

View file

@ -1,278 +0,0 @@
%% -------- TinyBloom ---------
%%
%% A fixed size bloom that supports 128 keys only, made to try and minimise
%% the cost of producing the bloom
%%
-module(leveled_tinybloom).
-include("include/leveled.hrl").
-include_lib("eunit/include/eunit.hrl").
-export([
create_bloom/1,
check_hash/2
]).
-define(BLOOM_SIZE_BYTES, 16).
-define(INTEGER_SIZE, 128).
-define(BAND_MASK, ?INTEGER_SIZE - 1).
%%%============================================================================
%%% API
%%%============================================================================
-spec create_bloom(list(integer())) -> binary().
%% @doc
%% Create a binary bloom filter from alist of hashes
create_bloom(HashList) ->
case length(HashList) of
0 ->
<<>>;
L when L > 32 ->
add_hashlist(HashList,
7,
0, 0, 0, 0, 0, 0, 0, 0);
L when L > 16 ->
add_hashlist(HashList, 3, 0, 0, 0, 0);
_ ->
add_hashlist(HashList, 1, 0, 0)
end.
-spec check_hash(integer(), binary()) -> boolean().
%% @doc
%% Check for the presence of a given hash within a bloom
check_hash(_Hash, <<>>) ->
false;
check_hash({_SegHash, Hash}, BloomBin) ->
SlotSplit = (byte_size(BloomBin) div ?BLOOM_SIZE_BYTES) - 1,
{Slot, Hashes} = split_hash(Hash, SlotSplit),
Mask = get_mask(Hashes),
Pos = Slot * ?BLOOM_SIZE_BYTES,
IntSize = ?INTEGER_SIZE,
<<_H:Pos/binary, CheckInt:IntSize/integer, _T/binary>> = BloomBin,
case CheckInt band Mask of
Mask ->
true;
_ ->
false
end.
%%%============================================================================
%%% Internal Functions
%%%============================================================================
split_hash(Hash, SlotSplit) ->
Slot = Hash band SlotSplit,
H0 = (Hash bsr 4) band (?BAND_MASK),
H1 = (Hash bsr 11) band (?BAND_MASK),
H2 = (Hash bsr 18) band (?BAND_MASK),
H3 = (Hash bsr 25) band (?BAND_MASK),
{Slot, [H0, H1, H2, H3]}.
get_mask([H0, H1, H2, H3]) ->
(1 bsl H0) bor (1 bsl H1) bor (1 bsl H2) bor (1 bsl H3).
%% This looks ugly and clunky, but in tests it was quicker than modifying an
%% Erlang term like an array as it is passed around the loop
add_hashlist([], _S, S0, S1) ->
IntSize = ?INTEGER_SIZE,
<<S0:IntSize/integer, S1:IntSize/integer>>;
add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1) ->
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
Mask = get_mask(Hashes),
case Slot of
0 ->
add_hashlist(T, SlotSplit, S0 bor Mask, S1);
1 ->
add_hashlist(T, SlotSplit, S0, S1 bor Mask)
end.
add_hashlist([], _S, S0, S1, S2, S3) ->
IntSize = ?INTEGER_SIZE,
<<S0:IntSize/integer, S1:IntSize/integer,
S2:IntSize/integer, S3:IntSize/integer>>;
add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1, S2, S3) ->
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
Mask = get_mask(Hashes),
case Slot of
0 ->
add_hashlist(T, SlotSplit, S0 bor Mask, S1, S2, S3);
1 ->
add_hashlist(T, SlotSplit, S0, S1 bor Mask, S2, S3);
2 ->
add_hashlist(T, SlotSplit, S0, S1, S2 bor Mask, S3);
3 ->
add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask)
end.
add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7) ->
IntSize = ?INTEGER_SIZE,
<<S0:IntSize/integer, S1:IntSize/integer,
S2:IntSize/integer, S3:IntSize/integer,
S4:IntSize/integer, S5:IntSize/integer,
S6:IntSize/integer, S7:IntSize/integer>>;
add_hashlist([{_SegHash, TopHash}|T],
SlotSplit,
S0, S1, S2, S3, S4, S5, S6, S7) ->
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
Mask = get_mask(Hashes),
case Slot of
0 ->
add_hashlist(T,
SlotSplit,
S0 bor Mask, S1, S2, S3, S4, S5, S6, S7);
1 ->
add_hashlist(T,
SlotSplit,
S0, S1 bor Mask, S2, S3, S4, S5, S6, S7);
2 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2 bor Mask, S3, S4, S5, S6, S7);
3 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3 bor Mask, S4, S5, S6, S7);
4 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3, S4 bor Mask, S5, S6, S7);
5 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3, S4, S5 bor Mask, S6, S7);
6 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3, S4, S5, S6 bor Mask, S7);
7 ->
add_hashlist(T,
SlotSplit,
S0, S1, S2, S3, S4, S5, S6, S7 bor Mask)
end.
%%%============================================================================
%%% Test
%%%============================================================================
-ifdef(TEST).
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
generate_randomkeys(Seqn,
Count,
[],
BucketRangeLow,
BucketRangeHigh).
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
Acc;
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BRand = leveled_rand:uniform(BRange),
BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0),
KNumber = string:right(integer_to_list(leveled_rand:uniform(10000)), 6, $0),
LK = leveled_codec:to_ledgerkey("Bucket" ++ BNumber, "Key" ++ KNumber, o),
Chunk = leveled_rand:rand_bytes(16),
{_B, _K, MV, _H, _LMs} =
leveled_codec:generate_ledgerkv(LK, Seqn, Chunk, 64, infinity),
generate_randomkeys(Seqn + 1,
Count - 1,
[{LK, MV}|Acc],
BucketLow,
BRange).
get_hashlist(N) ->
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N * 2, 1, 20)),
KVL = lists:sublist(KVL0, N),
HashFun =
fun({K, _V}) ->
leveled_codec:segment_hash(K)
end,
lists:map(HashFun, KVL).
check_all_hashes(BloomBin, HashList) ->
CheckFun =
fun(Hash) ->
?assertMatch(true, check_hash(Hash, BloomBin))
end,
lists:foreach(CheckFun, HashList).
check_neg_hashes(BloomBin, HashList, Counters) ->
CheckFun =
fun(Hash, {AccT, AccF}) ->
case check_hash(Hash, BloomBin) of
true ->
{AccT + 1, AccF};
false ->
{AccT, AccF + 1}
end
end,
lists:foldl(CheckFun, Counters, HashList).
empty_bloom_test() ->
BloomBin0 = create_bloom([]),
?assertMatch({0, 4},
check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})).
bloom_test_() ->
{timeout, 20, fun bloom_test_ranges/0}.
bloom_test_ranges() ->
test_bloom(128, 256),
test_bloom(64, 100),
test_bloom(32, 100),
test_bloom(16, 100),
test_bloom(8, 100).
test_bloom(N, Runs) ->
ListOfHashLists =
lists:map(fun(_X) -> get_hashlist(N) end, lists:seq(1, Runs)),
SWa = os:timestamp(),
ListOfBlooms =
lists:map(fun(HL) -> create_bloom(HL) end, ListOfHashLists),
TSa = timer:now_diff(os:timestamp(), SWa),
SWb = os:timestamp(),
lists:foreach(fun(Nth) ->
HL = lists:nth(Nth, ListOfHashLists),
BB = lists:nth(Nth, ListOfBlooms),
check_all_hashes(BB, HL)
end,
lists:seq(1, Runs)),
TSb = timer:now_diff(os:timestamp(), SWb),
HashPool = get_hashlist(N * 2),
ListOfMisses =
lists:map(fun(HL) ->
lists:sublist(lists:subtract(HashPool, HL), N)
end,
ListOfHashLists),
SWc = os:timestamp(),
{Pos, Neg} =
lists:foldl(fun(Nth, Acc) ->
HL = lists:nth(Nth, ListOfMisses),
BB = lists:nth(Nth, ListOfBlooms),
check_neg_hashes(BB, HL, Acc)
end,
{0, 0},
lists:seq(1, Runs)),
FPR = Pos / (Pos + Neg),
TSc = timer:now_diff(os:timestamp(), SWc),
io:format(user,
"Test with size ~w has microsecond timings: -"
++ " build ~w check ~w neg_check ~w and fpr ~w~n",
[N, TSa, TSb, TSc, FPR]).
-endif.

View file

@ -879,7 +879,7 @@ search_range_idx_test() ->
{{[{{o_rkv,"Bucket1","Key1",null}, {{[{{o_rkv,"Bucket1","Key1",null},
{manifest_entry,{o_rkv,"Bucket","Key9083",null}, {manifest_entry,{o_rkv,"Bucket","Key9083",null},
{o_rkv,"Bucket1","Key1",null}, {o_rkv,"Bucket1","Key1",null},
"<0.320.0>","./16_1_6.sst"}}]}, "<0.320.0>","./16_1_6.sst", none}}]},
{1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}}, {1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
StartKeyFun = StartKeyFun =
fun(ME) -> fun(ME) ->