Refactor writing SFT Files

Previously the code had involved veyr high arity functions which were
hard to follow.  This has been simplified somewhat with the addition of
a writer record to make things easier to track, as well as a general
refactoring to better logically seperate the building of things.
This commit is contained in:
martinsumner 2016-12-12 16:12:31 +00:00
parent addd4c89d0
commit 1f56501499

View file

@ -214,6 +214,13 @@
oversized_file = false :: boolean(), oversized_file = false :: boolean(),
penciller :: pid()}). penciller :: pid()}).
%% Helper object when writing a file to keep track of various accumulators
-record(writer, {slot_index = [] :: list(),
slot_binary = <<>> :: binary(),
bloom = leveled_tinybloom:empty(?BLOOM_WIDTH),
min_sqn = infinity :: integer()|infinity,
max_sqn = 0 :: integer(),
last_key = {last, null}}).
%%%============================================================================ %%%============================================================================
%%% API %%% API
@ -532,13 +539,12 @@ complete_file(Handle, FileMD, KL1, KL2, LevelR) ->
complete_file(Handle, FileMD, KL1, KL2, LevelR, false). complete_file(Handle, FileMD, KL1, KL2, LevelR, false).
complete_file(Handle, FileMD, KL1, KL2, LevelR, Rename) -> complete_file(Handle, FileMD, KL1, KL2, LevelR, Rename) ->
EmptyBloom = leveled_tinybloom:empty(?BLOOM_WIDTH),
{ok, KeyRemainders} = write_keys(Handle, {ok, KeyRemainders} = write_keys(Handle,
maybe_expand_pointer(KL1), maybe_expand_pointer(KL1),
maybe_expand_pointer(KL2), maybe_expand_pointer(KL2),
[], <<>>, EmptyBloom,
LevelR, LevelR,
fun sftwrite_function/2), fun sftwrite_function/2,
#writer{}),
{ReadHandle, UpdFileMD} = case Rename of {ReadHandle, UpdFileMD} = case Rename of
false -> false ->
open_file(FileMD); open_file(FileMD);
@ -570,27 +576,33 @@ rename_file(OldName, NewName) ->
%% A key out of range may fail %% A key out of range may fail
fetch_keyvalue(Handle, FileMD, Key) -> fetch_keyvalue(Handle, FileMD, Key) ->
{_NearestKey, {FilterLen, PointerF}, case get_nearestkey(FileMD#state.slot_index, Key) of
{LengthList, PointerB}} = get_nearestkey(FileMD#state.slot_index, Key), not_found ->
not_present;
{_NearestKey, {FilterLen, PointerF}, {LengthList, PointerB}} ->
FilterPointer = PointerF + FileMD#state.filter_pointer,
{ok, SegFilter} = file:pread(Handle, {ok, SegFilter} = file:pread(Handle,
PointerF + FileMD#state.filter_pointer, FilterPointer,
FilterLen), FilterLen),
SegID = hash_for_segmentid({keyonly, Key}), SegID = hash_for_segmentid({keyonly, Key}),
case check_for_segments(SegFilter, [SegID], true) of case check_for_segments(SegFilter, [SegID], true) of
{maybe_present, BlockList} -> {maybe_present, BlockList} ->
BlockPointer = PointerB + FileMD#state.slots_pointer,
fetch_keyvalue_fromblock(BlockList, fetch_keyvalue_fromblock(BlockList,
Key, Key,
LengthList, LengthList,
Handle, Handle,
PointerB + FileMD#state.slots_pointer); BlockPointer);
not_present -> not_present ->
not_present; not_present;
error_so_maybe_present -> error_so_maybe_present ->
BlockPointer = PointerB + FileMD#state.slots_pointer,
fetch_keyvalue_fromblock(lists:seq(0,length(LengthList)), fetch_keyvalue_fromblock(lists:seq(0,length(LengthList)),
Key, Key,
LengthList, LengthList,
Handle, Handle,
PointerB + FileMD#state.slots_pointer) BlockPointer)
end
end. end.
%% Fetches a range of keys returning a list of {Key, SeqN} tuples %% Fetches a range of keys returning a list of {Key, SeqN} tuples
@ -767,100 +779,59 @@ get_nextkeyaftermatch([_KTuple|T], KeyToFind, PrevV) ->
%% write the Key lists to the file slot by slot. %% write the Key lists to the file slot by slot.
%% %%
%% Slots are created then written in bulk to impove I/O efficiency. Slots will %% Slots are created then written in bulk to impove I/O efficiency. Slots will
%% be written in groups of 32 %% be written in groups
write_keys(Handle, write_keys(Handle, KL1, KL2, LevelR, WriteFun, WriteState) ->
KL1, KL2, write_keys(Handle, KL1, KL2, LevelR, WriteFun, WriteState, {0, 0, []}).
SlotIndex, SerialisedSlots, InitialBloom,
LevelR, WriteFun) ->
write_keys(Handle,
KL1, KL2,
{0, 0},
SlotIndex, SerialisedSlots, InitialBloom,
{infinity, 0}, null, {last, null},
LevelR, WriteFun).
write_keys(Handle, KL1, KL2, LevelR, WriteFun, WState,
write_keys(Handle, {SlotC, SlotT, SlotLists})
KL1, KL2, when SlotC =:= ?SLOT_GROUPWRITE_COUNT ->
{SlotCount, SlotTotal}, WState0 = lists:foldl(fun finalise_slot/2, WState, SlotLists),
SlotIndex, SerialisedSlots, Bloom, Handle0 = WriteFun(slots, {Handle, WState0#writer.slot_binary}),
{LSN, HSN}, LowKey, LastKey, case maxslots_bylevel(SlotT, LevelR#level.level) of
LevelR, WriteFun)
when SlotCount =:= ?SLOT_GROUPWRITE_COUNT ->
UpdHandle = WriteFun(slots , {Handle, SerialisedSlots}),
case maxslots_bylevel(SlotTotal, LevelR#level.level) of
reached -> reached ->
{complete_keywrite(UpdHandle, {complete_keywrite(Handle0, WState0, WriteFun), {KL1, KL2}};
SlotIndex,
{{LSN, HSN}, {LowKey, LastKey}, Bloom},
WriteFun),
{KL1, KL2}};
continue -> continue ->
write_keys(UpdHandle, write_keys(Handle0, KL1, KL2, LevelR, WriteFun,
KL1, KL2, WState0#writer{slot_binary = <<>>}, {0, SlotT, []})
{0, SlotTotal},
SlotIndex, <<>>, Bloom,
{LSN, HSN}, LowKey, LastKey,
LevelR, WriteFun)
end; end;
write_keys(Handle, write_keys(Handle, KL1, KL2, LevelR, WriteFun, WState,
KL1, KL2, {SlotC, SlotT, SlotLists}) ->
{SlotCount, SlotTotal}, {Status, BlockKeyLists} = create_slot(KL1, KL2, LevelR),
SlotIndex, SerialisedSlots, Bloom,
{LSN, HSN}, LowKey, LastKey,
LevelR, WriteFun) ->
SlotOutput = create_slot(KL1, KL2, LevelR, Bloom),
{{LowKey_Slot, SegFilter, SerialisedSlot, LengthList},
{{LSN_Slot, HSN_Slot}, LastKey_Slot, Status},
UpdBloom,
KL1rem, KL2rem} = SlotOutput,
UpdSlotIndex = lists:append(SlotIndex,
[{LowKey_Slot, SegFilter, LengthList}]),
UpdSlots = <<SerialisedSlots/binary, SerialisedSlot/binary>>,
SNExtremes = {min(LSN_Slot, LSN), max(HSN_Slot, HSN)},
FinalKey = case LastKey_Slot of
null -> LastKey;
_ -> LastKey_Slot
end,
FirstKey = case LowKey of
null -> LowKey_Slot;
_ -> LowKey
end,
case Status of case Status of
partial -> S when S == complete; S == partial ->
UpdHandle = WriteFun(slots , {Handle, UpdSlots}), WState0 =
{complete_keywrite(UpdHandle, case BlockKeyLists of
UpdSlotIndex, [[]] ->
{SNExtremes, {FirstKey, FinalKey}, UpdBloom}, WState;
WriteFun), _ ->
{KL1rem, KL2rem}}; lists:foldl(fun finalise_slot/2,
full -> WState,
write_keys(Handle, SlotLists ++ [BlockKeyLists])
KL1rem, KL2rem, end,
{SlotCount + 1, SlotTotal + 1}, Handle0 = WriteFun(slots, {Handle, WState0#writer.slot_binary}),
UpdSlotIndex, UpdSlots, UpdBloom, {complete_keywrite(Handle0, WState0, WriteFun), {[], []}};
SNExtremes, FirstKey, FinalKey, {full, KL1Rem, KL2Rem} ->
LevelR, WriteFun); write_keys(Handle, KL1Rem, KL2Rem, LevelR, WriteFun, WState,
complete -> {SlotC + 1, SlotT, SlotLists ++ [BlockKeyLists]})
UpdHandle = WriteFun(slots , {Handle, UpdSlots}),
{complete_keywrite(UpdHandle,
UpdSlotIndex,
{SNExtremes, {FirstKey, FinalKey}, UpdBloom},
WriteFun),
{KL1rem, KL2rem}}
end. end.
complete_keywrite(Handle, complete_keywrite(Handle, WriteState, WriteFun) ->
SlotIndex, FirstKey =
{SNExtremes, {FirstKey, FinalKey}, Bloom}, case length(WriteState#writer.slot_index) of
WriteFun) -> 0 ->
ConvSlotIndex = convert_slotindex(SlotIndex), null;
_ ->
element(1, lists:nth(1, WriteState#writer.slot_index))
end,
ConvSlotIndex = convert_slotindex(WriteState#writer.slot_index),
WriteFun(finalise, {Handle, WriteFun(finalise, {Handle,
ConvSlotIndex, ConvSlotIndex,
{SNExtremes, {FirstKey, FinalKey}, Bloom}}). {{WriteState#writer.min_sqn, WriteState#writer.max_sqn},
{FirstKey, WriteState#writer.last_key},
WriteState#writer.bloom}}).
%% Take a slot index, and remove the SegFilters replacing with pointers %% Take a slot index, and remove the SegFilters replacing with pointers
%% Return a tuple of the accumulated slot filters, and a pointer-based %% Return a tuple of the accumulated slot filters, and a pointer-based
@ -877,9 +848,8 @@ convert_slotindex(SlotIndex) ->
{LengthList, PointerB}}]), {LengthList, PointerB}}]),
PointerF + FilterLen, PointerF + FilterLen,
PointerB + lists:sum(LengthList)} end, PointerB + lists:sum(LengthList)} end,
{SlotFilters, PointerIndex, _FLength, _BLength} = lists:foldl(SlotFun, {SlotFilters, PointerIndex, _FLength, _BLength} =
{<<>>, [], 0, 0}, lists:foldl(SlotFun, {<<>>, [], 0, 0}, SlotIndex),
SlotIndex),
{SlotFilters, PointerIndex}. {SlotFilters, PointerIndex}.
sftwrite_function(slots, {Handle, SerialisedSlots}) -> sftwrite_function(slots, {Handle, SerialisedSlots}) ->
@ -927,158 +897,115 @@ maxslots_bylevel(SlotTotal, _Level) ->
%% Take two potentially overlapping lists of keys and output a Block, %% Take two potentially overlapping lists of keys and produce a block size
%% together with: %% list of keys in the correct order. Outputs:
%% - block status (full, partial) %% - Status of
%% - the lowest and highest sequence numbers in the block %% - - all_complete (no more keys and block is complete)
%% - the list of segment IDs in the block %% - - partial (no more keys and block is not complete)
%% - the remainders of the lists %% - - {block_full, Rem1, Rem2} the block is complete but there is a remainder
%% The Key lists must be sorted in key order. The last key in a list may be %% of keys
%% a pointer to request more keys for the file (otherwise it is assumed there
%% are no more keys) create_block(KeyList1, KeyList2, LevelR) ->
%% create_block(KeyList1, KeyList2, LevelR, []).
%% Level also to be passed in
%% This is either an integer (to be ignored) of {floor, os:timestamp()}
%% if this is the basement level of the LevelDB database and expired keys
%% and tombstone should be reaped
%% Do we need to check here that KeyList1 and KeyList2 are not just a [pointer] create_block([], [], _LevelR, BlockKeyList)
%% Otherwise the pointer will never be expanded
%%
%% Also this should return a partial block if the KeyLists have been exhausted
%% but the block is full
create_block(KeyList1, KeyList2, LevelR, Bloom) ->
create_block(KeyList1, KeyList2, [], {infinity, 0}, [], LevelR, Bloom).
create_block(KeyList1, KeyList2,
BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom)
when length(BlockKeyList)==?BLOCK_SIZE -> when length(BlockKeyList)==?BLOCK_SIZE ->
case {KeyList1, KeyList2} of {all_complete, lists:reverse(BlockKeyList)};
{[], []} -> create_block([], [], _LevelR, BlockKeyList) ->
{lists:reverse(BlockKeyList), {partial, lists:reverse(BlockKeyList)};
complete, create_block(KeyList1, KeyList2, _LevelR, BlockKeyList)
{LSN, HSN}, when length(BlockKeyList)==?BLOCK_SIZE ->
SegmentList, {{block_full, KeyList1, KeyList2}, lists:reverse(BlockKeyList)};
Bloom, create_block(KeyList1, KeyList2, LevelR, BlockKeyList) ->
[], []}; case key_dominates(KeyList1, KeyList2,
_ ->
{lists:reverse(BlockKeyList),
full,
{LSN, HSN},
SegmentList,
Bloom,
KeyList1, KeyList2}
end;
create_block([], [], BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom) ->
{lists:reverse(BlockKeyList),
partial,
{LSN, HSN},
SegmentList,
Bloom,
[], []};
create_block(KeyList1, KeyList2,
BlockKeyList, {LSN, HSN}, SegmentList, LevelR, Bloom) ->
case key_dominates(KeyList1,
KeyList2,
{LevelR#level.is_basement, LevelR#level.timestamp}) of {LevelR#level.is_basement, LevelR#level.timestamp}) of
{{next_key, TopKey}, Rem1, Rem2} -> {{next_key, TopKey}, Rem1, Rem2} ->
{_K, V} = TopKey, create_block(Rem1, Rem2, LevelR, [TopKey|BlockKeyList]);
{SQN, _St, MH, _MD} = leveled_codec:striphead_to_details(V),
{UpdLSN, UpdHSN} = update_sequencenumbers(SQN, LSN, HSN),
UpdBloom = leveled_tinybloom:enter({hash, MH}, Bloom),
NewBlockKeyList = [TopKey|BlockKeyList],
NewSegmentList = [hash_for_segmentid(TopKey)|SegmentList],
create_block(Rem1, Rem2,
NewBlockKeyList, {UpdLSN, UpdHSN},
NewSegmentList, LevelR, UpdBloom);
{skipped_key, Rem1, Rem2} -> {skipped_key, Rem1, Rem2} ->
create_block(Rem1, Rem2, create_block(Rem1, Rem2, LevelR, BlockKeyList)
BlockKeyList, {LSN, HSN}, end.
SegmentList, LevelR, Bloom)
%% create_slot should simply output a list of BlockKeyLists no bigger than
%% the BlockCount, the the status (with key remianders if not complete)
create_slot(KL1, KL2, LevelR) ->
create_slot(KL1, KL2, LevelR, ?BLOCK_COUNT, []).
create_slot(KL1, KL2, LevelR, BlockCount, BlockKeyLists) ->
{Status, KeyList} = create_block(KL1, KL2, LevelR),
case {Status, BlockCount - 1} of
{partial, _N} ->
{partial, BlockKeyLists ++ [KeyList]};
{all_complete, 0} ->
{complete, BlockKeyLists ++ [KeyList]};
{all_complete, _N} ->
% From the perspective of the slot it is partially complete
{partial, BlockKeyLists ++ [KeyList]};
{{block_full, KL1Rem, KL2Rem}, 0} ->
{{full, KL1Rem, KL2Rem}, BlockKeyLists ++ [KeyList]};
{{block_full, KL1Rem, KL2Rem}, N} ->
create_slot(KL1Rem, KL2Rem, LevelR, N, BlockKeyLists ++ [KeyList])
end. end.
%% Should return an index entry in the Slot Index. Each entry consists of: %% Fold over the List of BlockKeys updating the writer record
%% - Start Key finalise_slot(BlockKeyLists, WriteState) ->
%% - SegmentIDFilter for the (will eventually be replaced with a pointer) BlockFolder =
%% - Serialised Slot (will eventually be replaced with a pointer) fun(KV, {AccMinSQN, AccMaxSQN, Bloom, SegmentIDList}) ->
%% - Length for each Block within the Serialised Slot {SQN, Hash} = leveled_codec:strip_to_seqnhashonly(KV),
%% Additional information will also be provided {min(AccMinSQN, SQN),
%% - {Low Seq Number, High Seq Number} within the slot max(AccMaxSQN, SQN),
%% - End Key leveled_tinybloom:enter({hash, Hash}, Bloom),
%% - Whether the slot is full or partially filled [hash_for_segmentid(KV)|SegmentIDList]}
%% - Remainder of any KeyLists used to make the slot
create_slot(KeyList1, KeyList2, Level, Bloom) ->
create_slot(KeyList1, KeyList2, Level, ?BLOCK_COUNT, Bloom,
[], <<>>, [],
{null, infinity, 0, null, full}).
%% Keep adding blocks to the slot until either the block count is reached or
%% there is a partial block
create_slot(KL1, KL2, _, 0, Bloom,
SegLists, SerialisedSlot, LengthList,
{LowKey, LSN, HSN, LastKey, Status}) ->
{{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
{{LSN, HSN}, LastKey, Status},
Bloom,
KL1, KL2};
create_slot(KL1, KL2, _, _, Bloom,
SegLists, SerialisedSlot, LengthList,
{LowKey, LSN, HSN, LastKey, partial}) ->
{{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
{{LSN, HSN}, LastKey, partial},
Bloom,
KL1, KL2};
create_slot(KL1, KL2, _, _, Bloom,
SegLists, SerialisedSlot, LengthList,
{LowKey, LSN, HSN, LastKey, complete}) ->
{{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
{{LSN, HSN}, LastKey, partial},
Bloom,
KL1, KL2};
create_slot(KL1, KL2, LevelR, BlockCount, Bloom,
SegLists, SerialisedSlot, LengthList,
{LowKey, LSN, HSN, LastKey, _Status}) ->
{BlockKeyList, Status,
{LSNb, HSNb},
SegmentList,
UpdBloom,
KL1b, KL2b} = create_block(KL1, KL2, LevelR, Bloom),
TrackingMetadata = case {LowKey, BlockKeyList} of
{null, []} ->
{null, LSN, HSN, LastKey, Status};
{null, _} ->
[NewLowKeyV|_] = BlockKeyList,
NewLastKey = last_key(BlockKeyList, {keyonly, LastKey}),
{leveled_codec:strip_to_keyonly(NewLowKeyV),
min(LSN, LSNb), max(HSN, HSNb),
leveled_codec:strip_to_keyonly(NewLastKey),
Status};
{_, _} ->
NewLastKey = last_key(BlockKeyList, {keyonly, LastKey}),
{LowKey,
min(LSN, LSNb), max(HSN, HSNb),
leveled_codec:strip_to_keyonly(NewLastKey),
Status}
end, end,
SlotFolder =
fun(BlockKeyList,
{MinSQN, MaxSQN, Bloom, SegLists, KVBinary, Lengths}) ->
{BlockMinSQN, BlockMaxSQN, UpdBloom, Segs} =
lists:foldr(BlockFolder,
{infinity, 0, Bloom, []},
BlockKeyList),
SerialisedBlock = serialise_block(BlockKeyList), SerialisedBlock = serialise_block(BlockKeyList),
BlockLength = byte_size(SerialisedBlock), {min(MinSQN, BlockMinSQN),
SerialisedSlot2 = <<SerialisedSlot/binary, SerialisedBlock/binary>>, max(MaxSQN, BlockMaxSQN),
SegList2 = SegLists ++ [SegmentList], UpdBloom,
create_slot(KL1b, KL2b, LevelR, BlockCount - 1, UpdBloom, SegLists ++ [Segs],
SegList2, SerialisedSlot2, LengthList ++ [BlockLength], <<KVBinary/binary, SerialisedBlock/binary>>,
TrackingMetadata). Lengths ++ [byte_size(SerialisedBlock)]}
end,
{SlotMinSQN,
SlotMaxSQN,
SlotUpdBloom,
SlotSegLists,
SlotBinary,
BlockLengths} =
lists:foldl(SlotFolder,
{WriteState#writer.min_sqn,
WriteState#writer.max_sqn,
WriteState#writer.bloom,
[],
WriteState#writer.slot_binary,
[]},
BlockKeyLists),
FirstSlotKey = leveled_codec:strip_to_keyonly(lists:nth(1,
lists:nth(1,
BlockKeyLists))),
LastSlotKV = lists:last(lists:last(BlockKeyLists)),
SegFilter = generate_segment_filter(SlotSegLists),
UpdSlotIndex = lists:append(WriteState#writer.slot_index,
[{FirstSlotKey, SegFilter, BlockLengths}]),
#writer{slot_index = UpdSlotIndex,
slot_binary = SlotBinary,
bloom = SlotUpdBloom,
min_sqn = SlotMinSQN,
max_sqn = SlotMaxSQN,
last_key = leveled_codec:strip_to_keyonly(LastSlotKV)}.
last_key([], LastKey) ->
LastKey;
last_key(BlockKeyList, _LastKey) ->
lists:last(BlockKeyList).
serialise_block(BlockKeyList) -> serialise_block(BlockKeyList) ->
term_to_binary(BlockKeyList, [{compressed, ?COMPRESSION_LEVEL}]). term_to_binary(BlockKeyList, [{compressed, ?COMPRESSION_LEVEL}]).
@ -1165,17 +1092,6 @@ pointer_append_queryresults(Results, QueryPid) ->
end. end.
%% Update the sequence numbers
update_sequencenumbers(SN, infinity, 0) ->
{SN, SN};
update_sequencenumbers(SN, LSN, HSN) when SN < LSN ->
{SN, HSN};
update_sequencenumbers(SN, LSN, HSN) when SN > HSN ->
{LSN, SN};
update_sequencenumbers(_SN, LSN, HSN) ->
{LSN, HSN}.
%% The Segment filter is a compressed filter representing the keys in a %% The Segment filter is a compressed filter representing the keys in a
%% given slot. The filter is delta-compressed list of integers using rice %% given slot. The filter is delta-compressed list of integers using rice
%% encoding extended by the reference to each integer having an extra two bits %% encoding extended by the reference to each integer having an extra two bits
@ -1464,21 +1380,18 @@ simple_create_block_test() ->
{2, {active, infinity}, no_lookup, null}}], {2, {active, infinity}, no_lookup, null}}],
KeyList2 = [{{o, "Bucket1", "Key2", null}, KeyList2 = [{{o, "Bucket1", "Key2", null},
{3, {active, infinity}, no_lookup, null}}], {3, {active, infinity}, no_lookup, null}}],
BlockOutput = create_block(KeyList1, {Status, BlockKeyList} = create_block(KeyList1,
KeyList2, KeyList2,
#level{level=1}, #level{level=1}),
leveled_tinybloom:empty(4)), ?assertMatch(partial, Status),
{MergedKeyList, ListStatus, SN, _, _, _, _} = BlockOutput, [H1|T1] = BlockKeyList,
?assertMatch(partial, ListStatus),
[H1|T1] = MergedKeyList,
?assertMatch({{o, "Bucket1", "Key1", null}, ?assertMatch({{o, "Bucket1", "Key1", null},
{1, {active, infinity}, no_lookup, null}}, H1), {1, {active, infinity}, no_lookup, null}}, H1),
[H2|T2] = T1, [H2|T2] = T1,
?assertMatch({{o, "Bucket1", "Key2", null}, ?assertMatch({{o, "Bucket1", "Key2", null},
{3, {active, infinity}, no_lookup, null}}, H2), {3, {active, infinity}, no_lookup, null}}, H2),
?assertMatch([{{o, "Bucket1", "Key3", null}, ?assertMatch([{{o, "Bucket1", "Key3", null},
{2, {active, infinity}, no_lookup, null}}], T2), {2, {active, infinity}, no_lookup, null}}], T2).
?assertMatch(SN, {1,3}).
dominate_create_block_test() -> dominate_create_block_test() ->
KeyList1 = [{{o, "Bucket1", "Key1", null}, KeyList1 = [{{o, "Bucket1", "Key1", null},
@ -1487,16 +1400,13 @@ dominate_create_block_test() ->
{2, {active, infinity}, no_lookup, null}}], {2, {active, infinity}, no_lookup, null}}],
KeyList2 = [{{o, "Bucket1", "Key2", null}, KeyList2 = [{{o, "Bucket1", "Key2", null},
{3, {tomb, infinity}, no_lookup, null}}], {3, {tomb, infinity}, no_lookup, null}}],
BlockOutput = create_block(KeyList1, {Status, BlockKeyList} = create_block(KeyList1,
KeyList2, KeyList2,
#level{level=1}, #level{level=1}),
leveled_tinybloom:empty(4)), ?assertMatch(partial, Status),
{MergedKeyList, ListStatus, SN, _, _, _, _} = BlockOutput, [K1, K2] = BlockKeyList,
?assertMatch(partial, ListStatus),
[K1, K2] = MergedKeyList,
?assertMatch(K1, lists:nth(1, KeyList1)), ?assertMatch(K1, lists:nth(1, KeyList1)),
?assertMatch(K2, lists:nth(1, KeyList2)), ?assertMatch(K2, lists:nth(1, KeyList2)).
?assertMatch(SN, {1,3}).
sample_keylist() -> sample_keylist() ->
KeyList1 = KeyList1 =
@ -1537,26 +1447,21 @@ sample_keylist() ->
alternating_create_block_test() -> alternating_create_block_test() ->
{KeyList1, KeyList2} = sample_keylist(), {KeyList1, KeyList2} = sample_keylist(),
BlockOutput = create_block(KeyList1, {Status, BlockKeyList} = create_block(KeyList1,
KeyList2, KeyList2,
#level{level=1}, #level{level=1}),
leveled_tinybloom:empty(4)), BlockSize = length(BlockKeyList),
{MergedKeyList, ListStatus, _SN, _, _, _, _} = BlockOutput,
BlockSize = length(MergedKeyList),
?assertMatch(BlockSize, 32), ?assertMatch(BlockSize, 32),
?assertMatch(ListStatus, complete), ?assertMatch(all_complete, Status),
K1 = lists:nth(1, MergedKeyList), K1 = lists:nth(1, BlockKeyList),
?assertMatch(K1, {{o, "Bucket1", "Key1", null}, {1, {active, infinity}, 0, null}}), ?assertMatch(K1, {{o, "Bucket1", "Key1", null}, {1, {active, infinity}, 0, null}}),
K11 = lists:nth(11, MergedKeyList), K11 = lists:nth(11, BlockKeyList),
?assertMatch(K11, {{o, "Bucket1", "Key9b", null}, {1, {active, infinity}, 0, null}}), ?assertMatch(K11, {{o, "Bucket1", "Key9b", null}, {1, {active, infinity}, 0, null}}),
K32 = lists:nth(32, MergedKeyList), K32 = lists:nth(32, BlockKeyList),
?assertMatch(K32, {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, 0, null}}), ?assertMatch(K32, {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, 0, null}}),
HKey = {{o, "Bucket1", "Key0", null}, {1, {active, infinity}, 0, null}}, HKey = {{o, "Bucket1", "Key0", null}, {1, {active, infinity}, 0, null}},
{_, LStatus2, _, _, _, _, _} = create_block([HKey|KeyList1], {Status2, _} = create_block([HKey|KeyList1], KeyList2, #level{level=1}),
KeyList2, ?assertMatch(block_full, element(1, Status2)).
#level{level=1},
leveled_tinybloom:empty(4)),
?assertMatch(full, LStatus2).
merge_seglists_test() -> merge_seglists_test() ->
@ -1693,113 +1598,78 @@ merge_seglists_test() ->
createslot_stage1_test() -> createslot_stage1_test() ->
{KeyList1, KeyList2} = sample_keylist(), {KeyList1, KeyList2} = sample_keylist(),
Out = create_slot(KeyList1, {Status, BlockKeyLists} = create_slot(KeyList1, KeyList2, #level{level=1}),
KeyList2, WState = finalise_slot(BlockKeyLists, #writer{}),
#level{level=1},
leveled_tinybloom:empty(4)), ?assertMatch({o, "Bucket4", "Key1", null}, WState#writer.last_key),
{{LowKey, SegFilter, _SerialisedSlot, _LengthList}, ?assertMatch(partial, Status),
{{LSN, HSN}, LastKey, Status},
_UpdBloom, %% Writer state has the SlotIndex which includes the segment filter
KL1, KL2} = Out, SegFilter = element(2, lists:nth(1, WState#writer.slot_index)),
?assertMatch(LowKey, {o, "Bucket1", "Key1", null}),
?assertMatch(LastKey, {o, "Bucket4", "Key1", null}),
?assertMatch(Status, partial),
?assertMatch(KL1, []),
?assertMatch(KL2, []),
R0 = check_for_segments(serialise_segment_filter(SegFilter), R0 = check_for_segments(serialise_segment_filter(SegFilter),
[hash_for_segmentid({keyonly, {o, "Bucket1", "Key1", null}})], [hash_for_segmentid({keyonly, {o, "Bucket1", "Key1", null}})],
true), true),
?assertMatch(R0, {maybe_present, [0]}), ?assertMatch({maybe_present, [0]}, R0),
R1 = check_for_segments(serialise_segment_filter(SegFilter), R1 = check_for_segments(serialise_segment_filter(SegFilter),
[hash_for_segmentid({keyonly, {o, "Bucket1", "Key99", null}})], [hash_for_segmentid({keyonly, {o, "Bucket1", "Key99", null}})],
true), true),
?assertMatch(R1, not_present), ?assertMatch(not_present, R1),
?assertMatch(LSN, 1), ?assertMatch(1, WState#writer.min_sqn),
?assertMatch(HSN, 3). ?assertMatch(3, WState#writer.max_sqn).
createslot_stage2_test() -> createslot_stage2_test() ->
Out = create_slot(lists:sort(generate_randomkeys(100)), {Status, BlockKeyLists} = create_slot(lists:sort(generate_randomkeys(100)),
lists:sort(generate_randomkeys(100)), lists:sort(generate_randomkeys(100)),
#level{level=1}, #level{level=1}),
leveled_tinybloom:empty(4)), WState = finalise_slot(BlockKeyLists, #writer{}),
{{_LowKey, _SegFilter, SerialisedSlot, LengthList}, LengthList = element(3, lists:nth(1, WState#writer.slot_index)),
{{_LSN, _HSN}, _LastKey, Status},
_UpdBloom, ?assertMatch(full, element(1, Status)),
_KL1, _KL2} = Out, Sum1 = lists:sum(LengthList),
?assertMatch(Status, full), Sum2 = byte_size(WState#writer.slot_binary),
Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList),
Sum2 = byte_size(SerialisedSlot),
?assertMatch(Sum1, Sum2). ?assertMatch(Sum1, Sum2).
createslot_stage3_test() -> createslot_stage3_test() ->
Out = create_slot(lists:sort(generate_sequentialkeys(100, 1)), {Status, BlockKeyLists} = create_slot(lists:sort(generate_sequentialkeys(100, 1)),
lists:sort(generate_sequentialkeys(100, 101)), lists:sort(generate_sequentialkeys(100, 101)),
#level{level=1}, #level{level=1}),
leveled_tinybloom:empty(4)), WState = finalise_slot(BlockKeyLists, #writer{}),
{{LowKey, SegFilter, SerialisedSlot, LengthList}, {FirstKey, SegFilter, LengthList} = lists:nth(1, WState#writer.slot_index),
{{_LSN, _HSN}, LastKey, Status},
_UpdBloom, ?assertMatch(full, element(1, Status)),
KL1, KL2} = Out, Sum1 = lists:sum(LengthList),
?assertMatch(Status, full), Sum2 = byte_size(WState#writer.slot_binary),
Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList),
Sum2 = byte_size(SerialisedSlot),
?assertMatch(Sum1, Sum2), ?assertMatch(Sum1, Sum2),
?assertMatch(LowKey, {o, "BucketSeq", "Key00000001", null}), ?assertMatch({o, "BucketSeq", "Key00000001", null}, FirstKey),
?assertMatch(LastKey, {o, "BucketSeq", "Key00000128", null}), ?assertMatch({o, "BucketSeq", "Key00000128", null}, WState#writer.last_key),
?assertMatch(KL1, []), ?assertMatch([], element(2, Status)),
Rem = length(KL2), Rem = length(element(3, Status)),
?assertMatch(Rem, 72), ?assertMatch(Rem, 72),
R0 = check_for_segments(serialise_segment_filter(SegFilter), R0 = check_for_segments(serialise_segment_filter(SegFilter),
[hash_for_segmentid({keyonly, [hash_for_segmentid({keyonly,
{o, "BucketSeq", "Key00000100", null}})], {o, "BucketSeq", "Key00000100", null}})],
true), true),
?assertMatch(R0, {maybe_present, [3]}), ?assertMatch({maybe_present, [3]}, R0),
R1 = check_for_segments(serialise_segment_filter(SegFilter), R1 = check_for_segments(serialise_segment_filter(SegFilter),
[hash_for_segmentid({keyonly, [hash_for_segmentid({keyonly,
{o, "Bucket1", "Key99", null}})], {o, "Bucket1", "Key99", null}})],
true), true),
?assertMatch(R1, not_present), ?assertMatch(not_present, R1),
R2 = check_for_segments(serialise_segment_filter(SegFilter), R2 = check_for_segments(serialise_segment_filter(SegFilter),
[hash_for_segmentid({keyonly, [hash_for_segmentid({keyonly,
{o, "BucketSeq", "Key00000040", null}})], {o, "BucketSeq", "Key00000040", null}})],
true), true),
?assertMatch(R2, {maybe_present, [1]}), ?assertMatch({maybe_present, [1]}, R2),
R3 = check_for_segments(serialise_segment_filter(SegFilter), R3 = check_for_segments(serialise_segment_filter(SegFilter),
[hash_for_segmentid({keyonly, [hash_for_segmentid({keyonly,
{o, "BucketSeq", "Key00000004", null}})], {o, "BucketSeq", "Key00000004", null}})],
true), true),
?assertMatch(R3, {maybe_present, [0]}). ?assertMatch({maybe_present, [0]}, R3).
testwrite_function(slots, {Handle, SerialisedSlots}) ->
lists:append(Handle, [SerialisedSlots]);
testwrite_function(finalise,
{Handle, C_SlotIndex, {SNExtremes, KeyExtremes, Bloom}}) ->
{Handle, C_SlotIndex, SNExtremes, KeyExtremes, Bloom}.
writekeys_stage1_test() ->
{KL1, KL2} = sample_keylist(),
{FunOut, {_KL1Rem, _KL2Rem}} = write_keys([],
KL1, KL2,
[], <<>>,
leveled_tinybloom:empty(4),
#level{level=1},
fun testwrite_function/2),
{Handle, {_, PointerIndex}, SNExtremes, KeyExtremes, _Bloom} = FunOut,
?assertMatch(SNExtremes, {1,3}),
?assertMatch(KeyExtremes, {{o, "Bucket1", "Key1", null},
{o, "Bucket4", "Key1", null}}),
[TopIndex|[]] = PointerIndex,
{TopKey, _SegFilter, {LengthList, _Total}} = TopIndex,
?assertMatch(TopKey, {o, "Bucket1", "Key1", null}),
TotalLength = lists:foldl(fun(X, Acc) -> Acc + X end,
0, LengthList),
ActualLength = lists:foldl(fun(X, Acc) -> Acc + byte_size(X) end,
0, Handle),
?assertMatch(TotalLength, ActualLength).
initial_create_header_test() -> initial_create_header_test() ->
Output = create_header(initial), Output = create_header(initial),
?assertMatch(?HEADER_LEN, byte_size(Output)). ?assertMatch(?HEADER_LEN, byte_size(Output)).
@ -1811,13 +1681,13 @@ initial_create_file_test() ->
{UpdHandle, UpdFileMD, {[], []}} = complete_file(Handle, FileMD, {UpdHandle, UpdFileMD, {[], []}} = complete_file(Handle, FileMD,
KL1, KL2, KL1, KL2,
#level{level=1}), #level{level=1}),
io:format("Slot Index of UpdFileMD ~w~n", [UpdFileMD#state.slot_index]),
Result1 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key8", null}), Result1 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key8", null}),
io:format("Result is ~w~n", [Result1]), ?assertMatch({{o, "Bucket1", "Key8", null},
?assertMatch(Result1, {{o, "Bucket1", "Key8", null}, {1, {active, infinity}, 0, null}}, Result1),
{1, {active, infinity}, 0, null}}),
Result2 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key88", null}), Result2 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key88", null}),
io:format("Result is ~w~n", [Result2]), ?assertMatch(not_present, Result2),
?assertMatch(Result2, not_present),
ok = file:close(UpdHandle), ok = file:close(UpdHandle),
ok = file:delete(Filename). ok = file:delete(Filename).
@ -1834,8 +1704,8 @@ big_create_file_test() ->
[{K2, {Sq2, St2, MH2, V2}}|_] = KL2, [{K2, {Sq2, St2, MH2, V2}}|_] = KL2,
Result1 = fetch_keyvalue(Handle, FileMD, K1), Result1 = fetch_keyvalue(Handle, FileMD, K1),
Result2 = fetch_keyvalue(Handle, FileMD, K2), Result2 = fetch_keyvalue(Handle, FileMD, K2),
?assertMatch(Result1, {K1, {Sq1, St1, MH1, V1}}), ?assertMatch({K1, {Sq1, St1, MH1, V1}}, Result1),
?assertMatch(Result2, {K2, {Sq2, St2, MH2, V2}}), ?assertMatch({K2, {Sq2, St2, MH2, V2}}, Result2),
SubList = lists:sublist(KL2, 1000), SubList = lists:sublist(KL2, 1000),
lists:foreach(fun(KV) -> lists:foreach(fun(KV) ->
{Kn, _} = KV, {Kn, _} = KV,
@ -1997,9 +1867,8 @@ big_iterator_test() ->
{o, "Bucket0000", "Key0000", null}, {o, "Bucket0000", "Key0000", null},
{o, "Bucket9999", "Key9999", null}, {o, "Bucket9999", "Key9999", null},
256), 256),
NumFoundKeys1 = length(Result1),
NumAddedKeys = 10000 - length(KL1Rem), NumAddedKeys = 10000 - length(KL1Rem),
?assertMatch(NumFoundKeys1, NumAddedKeys), ?assertMatch(NumAddedKeys, length(Result1)),
{partial, {partial,
Result2, Result2,
_} = fetch_range_keysonly(Handle, _} = fetch_range_keysonly(Handle,