Add tiny bloom to Penciller Manifest

This is an attempt to save on unnecessary message transfers, and
slightly more expensive GCS checks in the SFT file itself.
This commit is contained in:
martinsumner 2016-12-11 04:48:50 +00:00
parent ea8f3c07a7
commit 523716e8f2
5 changed files with 153 additions and 90 deletions

View file

@ -41,6 +41,7 @@
{start_key :: tuple(), {start_key :: tuple(),
end_key :: tuple(), end_key :: tuple(),
owner :: pid(), owner :: pid(),
bloom,
filename :: string()}). filename :: string()}).
-record(cdb_options, -record(cdb_options,

View file

@ -320,6 +320,7 @@ do_merge(KL1, KL2, {SrcLevel, IsB}, {Filepath, MSN}, FileCounter, OutList) ->
KL1, KL1,
KL2, KL2,
LevelR), LevelR),
{ok, Bloom} = leveled_sft:sft_getbloom(Pid),
case Reply of case Reply of
{{[], []}, null, _} -> {{[], []}, null, _} ->
leveled_log:log("PC013", [FileName]), leveled_log:log("PC013", [FileName]),
@ -331,6 +332,7 @@ do_merge(KL1, KL2, {SrcLevel, IsB}, {Filepath, MSN}, FileCounter, OutList) ->
[#manifest_entry{start_key=SmallestKey, [#manifest_entry{start_key=SmallestKey,
end_key=HighestKey, end_key=HighestKey,
owner=Pid, owner=Pid,
bloom=Bloom,
filename=FileName}]), filename=FileName}]),
leveled_log:log_timer("PC015", [], TS1), leveled_log:log_timer("PC015", [], TS1),
do_merge(KL1Rem, KL2Rem, do_merge(KL1Rem, KL2Rem,

View file

@ -175,7 +175,7 @@
pcl_checksequencenumber/4, pcl_checksequencenumber/4,
pcl_workforclerk/1, pcl_workforclerk/1,
pcl_promptmanifestchange/2, pcl_promptmanifestchange/2,
pcl_confirml0complete/4, pcl_confirml0complete/5,
pcl_confirmdelete/2, pcl_confirmdelete/2,
pcl_close/1, pcl_close/1,
pcl_doom/1, pcl_doom/1,
@ -285,8 +285,8 @@ pcl_workforclerk(Pid) ->
pcl_promptmanifestchange(Pid, WI) -> pcl_promptmanifestchange(Pid, WI) ->
gen_server:cast(Pid, {manifest_change, WI}). gen_server:cast(Pid, {manifest_change, WI}).
pcl_confirml0complete(Pid, FN, StartKey, EndKey) -> pcl_confirml0complete(Pid, FN, StartKey, EndKey, Bloom) ->
gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey}). gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey, Bloom}).
pcl_confirmdelete(Pid, FileName) -> pcl_confirmdelete(Pid, FileName) ->
gen_server:cast(Pid, {confirm_delete, FileName}). gen_server:cast(Pid, {confirm_delete, FileName}).
@ -454,10 +454,11 @@ handle_cast({confirm_delete, FileName}, State=#state{is_snapshot=Snap})
_ -> _ ->
{noreply, State} {noreply, State}
end; end;
handle_cast({levelzero_complete, FN, StartKey, EndKey}, State) -> handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
leveled_log:log("P0029", []), leveled_log:log("P0029", []),
ManEntry = #manifest_entry{start_key=StartKey, ManEntry = #manifest_entry{start_key=StartKey,
end_key=EndKey, end_key=EndKey,
bloom=Bloom,
owner=State#state.levelzero_constructor, owner=State#state.levelzero_constructor,
filename=FN}, filename=FN},
UpdMan = lists:keystore(0, 1, State#state.manifest, {0, [ManEntry]}), UpdMan = lists:keystore(0, 1, State#state.manifest, {0, [ManEntry]}),
@ -721,34 +722,40 @@ fetch_mem(Key, Hash, Manifest, L0Cache) ->
L0Check = leveled_pmem:check_levelzero(Key, Hash, L0Cache), L0Check = leveled_pmem:check_levelzero(Key, Hash, L0Cache),
case L0Check of case L0Check of
{false, not_found} -> {false, not_found} ->
fetch(Key, Manifest, 0, fun leveled_sft:sft_get/2); fetch(Key, Hash, Manifest, 0, fun leveled_sft:sft_get/2);
{true, KV} -> {true, KV} ->
KV KV
end. end.
fetch(_Key, _Manifest, ?MAX_LEVELS + 1, _FetchFun) -> fetch(_Key, _Hash, _Manifest, ?MAX_LEVELS + 1, _FetchFun) ->
not_present; not_present;
fetch(Key, Manifest, Level, FetchFun) -> fetch(Key, Hash, Manifest, Level, FetchFun) ->
LevelManifest = get_item(Level, Manifest, []), LevelManifest = get_item(Level, Manifest, []),
case lists:foldl(fun(File, Acc) -> case lists:foldl(fun(File, Acc) ->
case Acc of case Acc of
not_present when not_present when
Key >= File#manifest_entry.start_key, Key >= File#manifest_entry.start_key,
File#manifest_entry.end_key >= Key -> File#manifest_entry.end_key >= Key ->
File#manifest_entry.owner; {File#manifest_entry.owner,
PidFound -> File#manifest_entry.bloom};
PidFound FoundDetails ->
FoundDetails
end end, end end,
not_present, not_present,
LevelManifest) of LevelManifest) of
not_present -> not_present ->
fetch(Key, Manifest, Level + 1, FetchFun); fetch(Key, Hash, Manifest, Level + 1, FetchFun);
FileToCheck -> {FileToCheck, Bloom} ->
case leveled_tinybloom:check({hash, Hash}, Bloom) of
true ->
case FetchFun(FileToCheck, Key) of case FetchFun(FileToCheck, Key) of
not_present -> not_present ->
fetch(Key, Manifest, Level + 1, FetchFun); fetch(Key, Hash, Manifest, Level + 1, FetchFun);
ObjectFound -> ObjectFound ->
ObjectFound ObjectFound
end;
false ->
fetch(Key, Hash, Manifest, Level + 1, FetchFun)
end end
end. end.

View file

@ -161,6 +161,7 @@
sft_newfroml0cache/4, sft_newfroml0cache/4,
sft_open/1, sft_open/1,
sft_get/2, sft_get/2,
sft_getbloom/1,
sft_getkvrange/4, sft_getkvrange/4,
sft_close/1, sft_close/1,
sft_clear/1, sft_clear/1,
@ -189,6 +190,7 @@
-define(HEADER_LEN, 56). -define(HEADER_LEN, 56).
-define(ITERATOR_SCANWIDTH, 1). -define(ITERATOR_SCANWIDTH, 1).
-define(MERGE_SCANWIDTH, 32). -define(MERGE_SCANWIDTH, 32).
-define(BLOOM_WIDTH, 48).
-define(DELETE_TIMEOUT, 10000). -define(DELETE_TIMEOUT, 10000).
-define(MAX_KEYS, ?SLOT_COUNT * ?BLOCK_COUNT * ?BLOCK_SIZE). -define(MAX_KEYS, ?SLOT_COUNT * ?BLOCK_COUNT * ?BLOCK_SIZE).
-define(DISCARD_EXT, ".discarded"). -define(DISCARD_EXT, ".discarded").
@ -211,7 +213,8 @@
handle :: file:fd(), handle :: file:fd(),
background_complete = false :: boolean(), background_complete = false :: boolean(),
oversized_file = false :: boolean(), oversized_file = false :: boolean(),
penciller :: pid()}). penciller :: pid(),
bloom}).
%%%============================================================================ %%%============================================================================
@ -268,6 +271,9 @@ sft_open(Filename) ->
sft_setfordelete(Pid, Penciller) -> sft_setfordelete(Pid, Penciller) ->
gen_fsm:sync_send_event(Pid, {set_for_delete, Penciller}, infinity). gen_fsm:sync_send_event(Pid, {set_for_delete, Penciller}, infinity).
sft_getbloom(Pid) ->
gen_fsm:sync_send_event(Pid, get_bloom, infinity).
sft_get(Pid, Key) -> sft_get(Pid, Key) ->
gen_fsm:sync_send_event(Pid, {get_kv, Key}, infinity). gen_fsm:sync_send_event(Pid, {get_kv, Key}, infinity).
@ -342,8 +348,9 @@ starting({sft_newfroml0cache, Filename, Slots, FetchFun, PCL}, _State) ->
leveled_penciller:pcl_confirml0complete(PCL, leveled_penciller:pcl_confirml0complete(PCL,
State#state.filename, State#state.filename,
State#state.smallest_key, State#state.smallest_key,
State#state.highest_key), State#state.highest_key,
{next_state, reader, State} State#state.bloom),
{next_state, reader, State#state{bloom=none}}
end. end.
@ -378,6 +385,12 @@ reader(background_complete, _From, State) ->
reader, reader,
State} State}
end; end;
reader(get_bloom, _From, State) ->
Bloom = State#state.bloom,
if
Bloom /= none ->
{reply, {ok, Bloom}, reader, State#state{bloom=none}}
end;
reader(close, _From, State) -> reader(close, _From, State) ->
ok = file:close(State#state.handle), ok = file:close(State#state.handle),
{stop, normal, ok, State}. {stop, normal, ok, State}.
@ -510,7 +523,7 @@ open_file(FileMD) ->
Slen:32/integer>> = HeaderLengths, Slen:32/integer>> = HeaderLengths,
{ok, SummaryBin} = file:pread(Handle, {ok, SummaryBin} = file:pread(Handle,
?HEADER_LEN + Blen + Ilen + Flen, Slen), ?HEADER_LEN + Blen + Ilen + Flen, Slen),
{{LowSQN, HighSQN}, {LowKey, HighKey}} = binary_to_term(SummaryBin), {{LowSQN, HighSQN}, {LowKey, HighKey}, Bloom} = binary_to_term(SummaryBin),
{ok, SlotIndexBin} = file:pread(Handle, ?HEADER_LEN + Blen, Ilen), {ok, SlotIndexBin} = file:pread(Handle, ?HEADER_LEN + Blen, Ilen),
SlotIndex = binary_to_term(SlotIndexBin), SlotIndex = binary_to_term(SlotIndexBin),
{Handle, FileMD#state{slot_index=SlotIndex, {Handle, FileMD#state{slot_index=SlotIndex,
@ -523,7 +536,8 @@ open_file(FileMD) ->
filter_pointer=?HEADER_LEN + Blen + Ilen, filter_pointer=?HEADER_LEN + Blen + Ilen,
summ_pointer=?HEADER_LEN + Blen + Ilen + Flen, summ_pointer=?HEADER_LEN + Blen + Ilen + Flen,
summ_length=Slen, summ_length=Slen,
handle=Handle}}. handle=Handle,
bloom=Bloom}}.
%% Take a file handle with a previously created header and complete it based on %% Take a file handle with a previously created header and complete it based on
%% the two key lists KL1 and KL2 %% the two key lists KL1 and KL2
@ -531,10 +545,11 @@ complete_file(Handle, FileMD, KL1, KL2, LevelR) ->
complete_file(Handle, FileMD, KL1, KL2, LevelR, false). complete_file(Handle, FileMD, KL1, KL2, LevelR, false).
complete_file(Handle, FileMD, KL1, KL2, LevelR, Rename) -> complete_file(Handle, FileMD, KL1, KL2, LevelR, Rename) ->
EmptyBloom = leveled_tinybloom:empty(?BLOOM_WIDTH),
{ok, KeyRemainders} = write_keys(Handle, {ok, KeyRemainders} = write_keys(Handle,
maybe_expand_pointer(KL1), maybe_expand_pointer(KL1),
maybe_expand_pointer(KL2), maybe_expand_pointer(KL2),
[], <<>>, [], <<>>, EmptyBloom,
LevelR, LevelR,
fun sftwrite_function/2), fun sftwrite_function/2),
{ReadHandle, UpdFileMD} = case Rename of {ReadHandle, UpdFileMD} = case Rename of
@ -769,12 +784,12 @@ get_nextkeyaftermatch([_KTuple|T], KeyToFind, PrevV) ->
write_keys(Handle, write_keys(Handle,
KL1, KL2, KL1, KL2,
SlotIndex, SerialisedSlots, SlotIndex, SerialisedSlots, InitialBloom,
LevelR, WriteFun) -> LevelR, WriteFun) ->
write_keys(Handle, write_keys(Handle,
KL1, KL2, KL1, KL2,
{0, 0}, {0, 0},
SlotIndex, SerialisedSlots, SlotIndex, SerialisedSlots, InitialBloom,
{infinity, 0}, null, {last, null}, {infinity, 0}, null, {last, null},
LevelR, WriteFun). LevelR, WriteFun).
@ -782,7 +797,7 @@ write_keys(Handle,
write_keys(Handle, write_keys(Handle,
KL1, KL2, KL1, KL2,
{SlotCount, SlotTotal}, {SlotCount, SlotTotal},
SlotIndex, SerialisedSlots, SlotIndex, SerialisedSlots, Bloom,
{LSN, HSN}, LowKey, LastKey, {LSN, HSN}, LowKey, LastKey,
LevelR, WriteFun) LevelR, WriteFun)
when SlotCount =:= ?SLOT_GROUPWRITE_COUNT -> when SlotCount =:= ?SLOT_GROUPWRITE_COUNT ->
@ -791,26 +806,27 @@ write_keys(Handle,
reached -> reached ->
{complete_keywrite(UpdHandle, {complete_keywrite(UpdHandle,
SlotIndex, SlotIndex,
{LSN, HSN}, {LowKey, LastKey}, {{LSN, HSN}, {LowKey, LastKey}, Bloom},
WriteFun), WriteFun),
{KL1, KL2}}; {KL1, KL2}};
continue -> continue ->
write_keys(UpdHandle, write_keys(UpdHandle,
KL1, KL2, KL1, KL2,
{0, SlotTotal}, {0, SlotTotal},
SlotIndex, <<>>, SlotIndex, <<>>, Bloom,
{LSN, HSN}, LowKey, LastKey, {LSN, HSN}, LowKey, LastKey,
LevelR, WriteFun) LevelR, WriteFun)
end; end;
write_keys(Handle, write_keys(Handle,
KL1, KL2, KL1, KL2,
{SlotCount, SlotTotal}, {SlotCount, SlotTotal},
SlotIndex, SerialisedSlots, SlotIndex, SerialisedSlots, Bloom,
{LSN, HSN}, LowKey, LastKey, {LSN, HSN}, LowKey, LastKey,
LevelR, WriteFun) -> LevelR, WriteFun) ->
SlotOutput = create_slot(KL1, KL2, LevelR), SlotOutput = create_slot(KL1, KL2, LevelR, Bloom),
{{LowKey_Slot, SegFilter, SerialisedSlot, LengthList}, {{LowKey_Slot, SegFilter, SerialisedSlot, LengthList},
{{LSN_Slot, HSN_Slot}, LastKey_Slot, Status}, {{LSN_Slot, HSN_Slot}, LastKey_Slot, Status},
UpdBloom,
KL1rem, KL2rem} = SlotOutput, KL1rem, KL2rem} = SlotOutput,
UpdSlotIndex = lists:append(SlotIndex, UpdSlotIndex = lists:append(SlotIndex,
[{LowKey_Slot, SegFilter, LengthList}]), [{LowKey_Slot, SegFilter, LengthList}]),
@ -829,34 +845,34 @@ write_keys(Handle,
UpdHandle = WriteFun(slots , {Handle, UpdSlots}), UpdHandle = WriteFun(slots , {Handle, UpdSlots}),
{complete_keywrite(UpdHandle, {complete_keywrite(UpdHandle,
UpdSlotIndex, UpdSlotIndex,
SNExtremes, {FirstKey, FinalKey}, {SNExtremes, {FirstKey, FinalKey}, UpdBloom},
WriteFun), WriteFun),
{KL1rem, KL2rem}}; {KL1rem, KL2rem}};
full -> full ->
write_keys(Handle, write_keys(Handle,
KL1rem, KL2rem, KL1rem, KL2rem,
{SlotCount + 1, SlotTotal + 1}, {SlotCount + 1, SlotTotal + 1},
UpdSlotIndex, UpdSlots, UpdSlotIndex, UpdSlots, UpdBloom,
SNExtremes, FirstKey, FinalKey, SNExtremes, FirstKey, FinalKey,
LevelR, WriteFun); LevelR, WriteFun);
complete -> complete ->
UpdHandle = WriteFun(slots , {Handle, UpdSlots}), UpdHandle = WriteFun(slots , {Handle, UpdSlots}),
{complete_keywrite(UpdHandle, {complete_keywrite(UpdHandle,
UpdSlotIndex, UpdSlotIndex,
SNExtremes, {FirstKey, FinalKey}, {SNExtremes, {FirstKey, FinalKey}, UpdBloom},
WriteFun), WriteFun),
{KL1rem, KL2rem}} {KL1rem, KL2rem}}
end. end.
complete_keywrite(Handle, SlotIndex, complete_keywrite(Handle,
SNExtremes, {FirstKey, FinalKey}, SlotIndex,
{SNExtremes, {FirstKey, FinalKey}, Bloom},
WriteFun) -> WriteFun) ->
ConvSlotIndex = convert_slotindex(SlotIndex), ConvSlotIndex = convert_slotindex(SlotIndex),
WriteFun(finalise, {Handle, WriteFun(finalise, {Handle,
ConvSlotIndex, ConvSlotIndex,
SNExtremes, {SNExtremes, {FirstKey, FinalKey}, Bloom}}).
{FirstKey, FinalKey}}).
%% Take a slot index, and remove the SegFilters replacing with pointers %% Take a slot index, and remove the SegFilters replacing with pointers
@ -885,15 +901,14 @@ sftwrite_function(slots, {Handle, SerialisedSlots}) ->
sftwrite_function(finalise, sftwrite_function(finalise,
{Handle, {Handle,
{SlotFilters, PointerIndex}, {SlotFilters, PointerIndex},
SNExtremes, {SNExtremes, KeyExtremes, Bloom}}) ->
KeyExtremes}) ->
{ok, Position} = file:position(Handle, cur), {ok, Position} = file:position(Handle, cur),
BlocksLength = Position - ?HEADER_LEN, BlocksLength = Position - ?HEADER_LEN,
Index = term_to_binary(PointerIndex), Index = term_to_binary(PointerIndex),
IndexLength = byte_size(Index), IndexLength = byte_size(Index),
FilterLength = byte_size(SlotFilters), FilterLength = byte_size(SlotFilters),
Summary = term_to_binary({SNExtremes, KeyExtremes}), Summary = term_to_binary({SNExtremes, KeyExtremes, Bloom}),
SummaryLength = byte_size(Summary), SummaryLength = byte_size(Summary),
%% Write Index, Filter and Summary %% Write Index, Filter and Summary
ok = file:write(Handle, <<Index/binary, ok = file:write(Handle, <<Index/binary,
@ -947,39 +962,47 @@ maxslots_bylevel(SlotTotal, _Level) ->
%% Also this should return a partial block if the KeyLists have been exhausted %% Also this should return a partial block if the KeyLists have been exhausted
%% but the block is full %% but the block is full
create_block(KeyList1, KeyList2, LevelR) -> create_block(KeyList1, KeyList2, LevelR, Bloom) ->
create_block(KeyList1, KeyList2, [], {infinity, 0}, [], LevelR). create_block(KeyList1, KeyList2, [], {infinity, 0}, [], LevelR, Bloom).
create_block(KeyList1, KeyList2, create_block(KeyList1, KeyList2,
BlockKeyList, {LSN, HSN}, SegmentList, _LevelR) BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom)
when length(BlockKeyList)==?BLOCK_SIZE -> when length(BlockKeyList)==?BLOCK_SIZE ->
case {KeyList1, KeyList2} of case {KeyList1, KeyList2} of
{[], []} -> {[], []} ->
{BlockKeyList, complete, {LSN, HSN}, SegmentList, [], []}; {BlockKeyList, complete, {LSN, HSN}, SegmentList,
Bloom,
[], []};
_ -> _ ->
{BlockKeyList, full, {LSN, HSN}, SegmentList, KeyList1, KeyList2} {BlockKeyList, full, {LSN, HSN}, SegmentList,
Bloom,
KeyList1, KeyList2}
end; end;
create_block([], [], create_block([], [], BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom) ->
BlockKeyList, {LSN, HSN}, SegmentList, _LevelR) -> {BlockKeyList, partial, {LSN, HSN}, SegmentList,
{BlockKeyList, partial, {LSN, HSN}, SegmentList, [], []}; Bloom,
[], []};
create_block(KeyList1, KeyList2, create_block(KeyList1, KeyList2,
BlockKeyList, {LSN, HSN}, SegmentList, LevelR) -> BlockKeyList, {LSN, HSN}, SegmentList, LevelR, Bloom) ->
case key_dominates(KeyList1, case key_dominates(KeyList1,
KeyList2, KeyList2,
{LevelR#level.is_basement, LevelR#level.timestamp}) of {LevelR#level.is_basement, LevelR#level.timestamp}) of
{{next_key, TopKey}, Rem1, Rem2} -> {{next_key, TopKey}, Rem1, Rem2} ->
{UpdLSN, UpdHSN} = update_sequencenumbers(TopKey, LSN, HSN), {_K, V} = TopKey,
{SQN, _St, MH, _MD} = leveled_codec:striphead_to_details(V),
{UpdLSN, UpdHSN} = update_sequencenumbers(SQN, LSN, HSN),
UpdBloom = leveled_tinybloom:enter({hash, MH}, Bloom),
NewBlockKeyList = lists:append(BlockKeyList, NewBlockKeyList = lists:append(BlockKeyList,
[TopKey]), [TopKey]),
NewSegmentList = lists:append(SegmentList, NewSegmentList = lists:append(SegmentList,
[hash_for_segmentid(TopKey)]), [hash_for_segmentid(TopKey)]),
create_block(Rem1, Rem2, create_block(Rem1, Rem2,
NewBlockKeyList, {UpdLSN, UpdHSN}, NewBlockKeyList, {UpdLSN, UpdHSN},
NewSegmentList, LevelR); NewSegmentList, LevelR, UpdBloom);
{skipped_key, Rem1, Rem2} -> {skipped_key, Rem1, Rem2} ->
create_block(Rem1, Rem2, create_block(Rem1, Rem2,
BlockKeyList, {LSN, HSN}, BlockKeyList, {LSN, HSN},
SegmentList, LevelR) SegmentList, LevelR, Bloom)
end. end.
@ -996,33 +1019,43 @@ create_block(KeyList1, KeyList2,
%% - Remainder of any KeyLists used to make the slot %% - Remainder of any KeyLists used to make the slot
create_slot(KeyList1, KeyList2, Level) -> create_slot(KeyList1, KeyList2, Level, Bloom) ->
create_slot(KeyList1, KeyList2, Level, ?BLOCK_COUNT, [], <<>>, [], create_slot(KeyList1, KeyList2, Level, ?BLOCK_COUNT, Bloom,
[], <<>>, [],
{null, infinity, 0, null, full}). {null, infinity, 0, null, full}).
%% Keep adding blocks to the slot until either the block count is reached or %% Keep adding blocks to the slot until either the block count is reached or
%% there is a partial block %% there is a partial block
create_slot(KL1, KL2, _, 0, SegLists, SerialisedSlot, LengthList, create_slot(KL1, KL2, _, 0, Bloom,
SegLists, SerialisedSlot, LengthList,
{LowKey, LSN, HSN, LastKey, Status}) -> {LowKey, LSN, HSN, LastKey, Status}) ->
{{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList}, {{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
{{LSN, HSN}, LastKey, Status}, {{LSN, HSN}, LastKey, Status},
Bloom,
KL1, KL2}; KL1, KL2};
create_slot(KL1, KL2, _, _, SegLists, SerialisedSlot, LengthList, create_slot(KL1, KL2, _, _, Bloom,
SegLists, SerialisedSlot, LengthList,
{LowKey, LSN, HSN, LastKey, partial}) -> {LowKey, LSN, HSN, LastKey, partial}) ->
{{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList}, {{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
{{LSN, HSN}, LastKey, partial}, {{LSN, HSN}, LastKey, partial},
Bloom,
KL1, KL2}; KL1, KL2};
create_slot(KL1, KL2, _, _, SegLists, SerialisedSlot, LengthList, create_slot(KL1, KL2, _, _, Bloom,
SegLists, SerialisedSlot, LengthList,
{LowKey, LSN, HSN, LastKey, complete}) -> {LowKey, LSN, HSN, LastKey, complete}) ->
{{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList}, {{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
{{LSN, HSN}, LastKey, partial}, {{LSN, HSN}, LastKey, partial},
Bloom,
KL1, KL2}; KL1, KL2};
create_slot(KL1, KL2, LevelR, BlockCount, SegLists, SerialisedSlot, LengthList, create_slot(KL1, KL2, LevelR, BlockCount, Bloom,
SegLists, SerialisedSlot, LengthList,
{LowKey, LSN, HSN, LastKey, _Status}) -> {LowKey, LSN, HSN, LastKey, _Status}) ->
{BlockKeyList, Status, {BlockKeyList, Status,
{LSNb, HSNb}, {LSNb, HSNb},
SegmentList, KL1b, KL2b} = create_block(KL1, KL2, LevelR), SegmentList,
UpdBloom,
KL1b, KL2b} = create_block(KL1, KL2, LevelR, Bloom),
TrackingMetadata = case {LowKey, BlockKeyList} of TrackingMetadata = case {LowKey, BlockKeyList} of
{null, []} -> {null, []} ->
{null, LSN, HSN, LastKey, Status}; {null, LSN, HSN, LastKey, Status};
@ -1043,8 +1076,9 @@ create_slot(KL1, KL2, LevelR, BlockCount, SegLists, SerialisedSlot, LengthList,
SerialisedBlock = serialise_block(BlockKeyList), SerialisedBlock = serialise_block(BlockKeyList),
BlockLength = byte_size(SerialisedBlock), BlockLength = byte_size(SerialisedBlock),
SerialisedSlot2 = <<SerialisedSlot/binary, SerialisedBlock/binary>>, SerialisedSlot2 = <<SerialisedSlot/binary, SerialisedBlock/binary>>,
create_slot(KL1b, KL2b, LevelR, BlockCount - 1, SegLists ++ [SegmentList], SegList2 = SegLists ++ [SegmentList],
SerialisedSlot2, LengthList ++ [BlockLength], create_slot(KL1b, KL2b, LevelR, BlockCount - 1, UpdBloom,
SegList2, SerialisedSlot2, LengthList ++ [BlockLength],
TrackingMetadata). TrackingMetadata).
serialise_block(BlockKeyList) -> serialise_block(BlockKeyList) ->
@ -1133,8 +1167,6 @@ pointer_append_queryresults(Results, QueryPid) ->
%% Update the sequence numbers %% Update the sequence numbers
update_sequencenumbers(Item, LSN, HSN) when is_tuple(Item) ->
update_sequencenumbers(leveled_codec:strip_to_seqonly(Item), LSN, HSN);
update_sequencenumbers(SN, infinity, 0) -> update_sequencenumbers(SN, infinity, 0) ->
{SN, SN}; {SN, SN};
update_sequencenumbers(SN, LSN, HSN) when SN < LSN -> update_sequencenumbers(SN, LSN, HSN) when SN < LSN ->
@ -1433,9 +1465,11 @@ simple_create_block_test() ->
{2, {active, infinity}, no_lookup, null}}], {2, {active, infinity}, no_lookup, null}}],
KeyList2 = [{{o, "Bucket1", "Key2", null}, KeyList2 = [{{o, "Bucket1", "Key2", null},
{3, {active, infinity}, no_lookup, null}}], {3, {active, infinity}, no_lookup, null}}],
{MergedKeyList, ListStatus, SN, _, _, _} = create_block(KeyList1, BlockOutput = create_block(KeyList1,
KeyList2, KeyList2,
#level{level=1}), #level{level=1},
leveled_tinybloom:empty(4)),
{MergedKeyList, ListStatus, SN, _, _, _, _} = BlockOutput,
?assertMatch(partial, ListStatus), ?assertMatch(partial, ListStatus),
[H1|T1] = MergedKeyList, [H1|T1] = MergedKeyList,
?assertMatch({{o, "Bucket1", "Key1", null}, ?assertMatch({{o, "Bucket1", "Key1", null},
@ -1454,9 +1488,11 @@ dominate_create_block_test() ->
{2, {active, infinity}, no_lookup, null}}], {2, {active, infinity}, no_lookup, null}}],
KeyList2 = [{{o, "Bucket1", "Key2", null}, KeyList2 = [{{o, "Bucket1", "Key2", null},
{3, {tomb, infinity}, no_lookup, null}}], {3, {tomb, infinity}, no_lookup, null}}],
{MergedKeyList, ListStatus, SN, _, _, _} = create_block(KeyList1, BlockOutput = create_block(KeyList1,
KeyList2, KeyList2,
#level{level=1}), #level{level=1},
leveled_tinybloom:empty(4)),
{MergedKeyList, ListStatus, SN, _, _, _, _} = BlockOutput,
?assertMatch(partial, ListStatus), ?assertMatch(partial, ListStatus),
[K1, K2] = MergedKeyList, [K1, K2] = MergedKeyList,
?assertMatch(K1, lists:nth(1, KeyList1)), ?assertMatch(K1, lists:nth(1, KeyList1)),
@ -1502,9 +1538,11 @@ sample_keylist() ->
alternating_create_block_test() -> alternating_create_block_test() ->
{KeyList1, KeyList2} = sample_keylist(), {KeyList1, KeyList2} = sample_keylist(),
{MergedKeyList, ListStatus, _, _, _, _} = create_block(KeyList1, BlockOutput = create_block(KeyList1,
KeyList2, KeyList2,
#level{level=1}), #level{level=1},
leveled_tinybloom:empty(4)),
{MergedKeyList, ListStatus, _SN, _, _, _, _} = BlockOutput,
BlockSize = length(MergedKeyList), BlockSize = length(MergedKeyList),
?assertMatch(BlockSize, 32), ?assertMatch(BlockSize, 32),
?assertMatch(ListStatus, complete), ?assertMatch(ListStatus, complete),
@ -1515,10 +1553,11 @@ alternating_create_block_test() ->
K32 = lists:nth(32, MergedKeyList), K32 = lists:nth(32, MergedKeyList),
?assertMatch(K32, {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, 0, null}}), ?assertMatch(K32, {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, 0, null}}),
HKey = {{o, "Bucket1", "Key0", null}, {1, {active, infinity}, 0, null}}, HKey = {{o, "Bucket1", "Key0", null}, {1, {active, infinity}, 0, null}},
{_, ListStatus2, _, _, _, _} = create_block([HKey|KeyList1], {_, LStatus2, _, _, _, _, _} = create_block([HKey|KeyList1],
KeyList2, KeyList2,
#level{level=1}), #level{level=1},
?assertMatch(ListStatus2, full). leveled_tinybloom:empty(4)),
?assertMatch(full, LStatus2).
merge_seglists_test() -> merge_seglists_test() ->
@ -1655,9 +1694,13 @@ merge_seglists_test() ->
createslot_stage1_test() -> createslot_stage1_test() ->
{KeyList1, KeyList2} = sample_keylist(), {KeyList1, KeyList2} = sample_keylist(),
Out = create_slot(KeyList1, KeyList2, #level{level=1}), Out = create_slot(KeyList1,
KeyList2,
#level{level=1},
leveled_tinybloom:empty(4)),
{{LowKey, SegFilter, _SerialisedSlot, _LengthList}, {{LowKey, SegFilter, _SerialisedSlot, _LengthList},
{{LSN, HSN}, LastKey, Status}, {{LSN, HSN}, LastKey, Status},
_UpdBloom,
KL1, KL2} = Out, KL1, KL2} = Out,
?assertMatch(LowKey, {o, "Bucket1", "Key1", null}), ?assertMatch(LowKey, {o, "Bucket1", "Key1", null}),
?assertMatch(LastKey, {o, "Bucket4", "Key1", null}), ?assertMatch(LastKey, {o, "Bucket4", "Key1", null}),
@ -1678,9 +1721,11 @@ createslot_stage1_test() ->
createslot_stage2_test() -> createslot_stage2_test() ->
Out = create_slot(lists:sort(generate_randomkeys(100)), Out = create_slot(lists:sort(generate_randomkeys(100)),
lists:sort(generate_randomkeys(100)), lists:sort(generate_randomkeys(100)),
#level{level=1}), #level{level=1},
leveled_tinybloom:empty(4)),
{{_LowKey, _SegFilter, SerialisedSlot, LengthList}, {{_LowKey, _SegFilter, SerialisedSlot, LengthList},
{{_LSN, _HSN}, _LastKey, Status}, {{_LSN, _HSN}, _LastKey, Status},
_UpdBloom,
_KL1, _KL2} = Out, _KL1, _KL2} = Out,
?assertMatch(Status, full), ?assertMatch(Status, full),
Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList), Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList),
@ -1691,9 +1736,11 @@ createslot_stage2_test() ->
createslot_stage3_test() -> createslot_stage3_test() ->
Out = create_slot(lists:sort(generate_sequentialkeys(100, 1)), Out = create_slot(lists:sort(generate_sequentialkeys(100, 1)),
lists:sort(generate_sequentialkeys(100, 101)), lists:sort(generate_sequentialkeys(100, 101)),
#level{level=1}), #level{level=1},
leveled_tinybloom:empty(4)),
{{LowKey, SegFilter, SerialisedSlot, LengthList}, {{LowKey, SegFilter, SerialisedSlot, LengthList},
{{_LSN, _HSN}, LastKey, Status}, {{_LSN, _HSN}, LastKey, Status},
_UpdBloom,
KL1, KL2} = Out, KL1, KL2} = Out,
?assertMatch(Status, full), ?assertMatch(Status, full),
Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList), Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList),
@ -1729,17 +1776,19 @@ createslot_stage3_test() ->
testwrite_function(slots, {Handle, SerialisedSlots}) -> testwrite_function(slots, {Handle, SerialisedSlots}) ->
lists:append(Handle, [SerialisedSlots]); lists:append(Handle, [SerialisedSlots]);
testwrite_function(finalise, {Handle, C_SlotIndex, SNExtremes, KeyExtremes}) -> testwrite_function(finalise,
{Handle, C_SlotIndex, SNExtremes, KeyExtremes}. {Handle, C_SlotIndex, {SNExtremes, KeyExtremes, Bloom}}) ->
{Handle, C_SlotIndex, SNExtremes, KeyExtremes, Bloom}.
writekeys_stage1_test() -> writekeys_stage1_test() ->
{KL1, KL2} = sample_keylist(), {KL1, KL2} = sample_keylist(),
{FunOut, {_KL1Rem, _KL2Rem}} = write_keys([], {FunOut, {_KL1Rem, _KL2Rem}} = write_keys([],
KL1, KL2, KL1, KL2,
[], <<>>, [], <<>>,
leveled_tinybloom:empty(4),
#level{level=1}, #level{level=1},
fun testwrite_function/2), fun testwrite_function/2),
{Handle, {_, PointerIndex}, SNExtremes, KeyExtremes} = FunOut, {Handle, {_, PointerIndex}, SNExtremes, KeyExtremes, _Bloom} = FunOut,
?assertMatch(SNExtremes, {1,3}), ?assertMatch(SNExtremes, {1,3}),
?assertMatch(KeyExtremes, {{o, "Bucket1", "Key1", null}, ?assertMatch(KeyExtremes, {{o, "Bucket1", "Key1", null},
{o, "Bucket4", "Key1", null}}), {o, "Bucket4", "Key1", null}}),

View file

@ -33,6 +33,8 @@ empty(Width) when Width =< 256 ->
FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end, FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end,
lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)). lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)).
enter({hash, no_lookup}, Bloom) ->
Bloom;
enter({hash, Hash}, Bloom) -> enter({hash, Hash}, Bloom) ->
{H0, Bit1, Bit2} = split_hash(Hash), {H0, Bit1, Bit2} = split_hash(Hash),
Slot = H0 rem dict:size(Bloom), Slot = H0 rem dict:size(Bloom),
@ -45,6 +47,8 @@ enter(Key, Bloom) ->
Hash = leveled_codec:magic_hash(Key), Hash = leveled_codec:magic_hash(Key),
enter({hash, Hash}, Bloom). enter({hash, Hash}, Bloom).
check({hash, _Hash}, undefined) ->
true;
check({hash, Hash}, Bloom) -> check({hash, Hash}, Bloom) ->
{H0, Bit1, Bit2} = split_hash(Hash), {H0, Bit1, Bit2} = split_hash(Hash),
Slot = H0 rem dict:size(Bloom), Slot = H0 rem dict:size(Bloom),