%% This module provides functions for managing sft files - a modified version %% of sst files, to be used in leveleddb. %% %% sft files are segment filtered tables in that they are guarded by a quick %% access filter that checks for the presence of key by segment id, with the %% segment id being a hash in the range 0 - 1024 * 1024 %% %% This filter has a dual purpose %% - a memory efficient way of discovering non-presence with low false positive %% rate %% - to make searching for all keys by hashtree segment more efficient (a %% specific change to optimise behaviour for use with the incremental refresh) %% of riak hashtrees %% %% All keys are not equal in sft files, keys are only expected in a specific %% series of formats %% - {o, Bucket, Key} - Object Keys %% - {i, Bucket, IndexName, IndexTerm, Key} - Postings %% The {Bucket, Key} part of all types of keys are hashed for segment filters. %% For Postings the {Bucket, IndexName, IndexTerm} is also hashed. This %% causes a false positive on lookup of a segment, but allows for the presence %% of specific index terms to be checked %% %% The objects stored are a tuple of {Key, SequenceNumber, State, Value}, where %% Key - as above %% SequenceNumber - monotonically increasing counter of addition to the nursery %% log %% State - {active|tomb, ExpiryTimestamp | infinity} %% Value - null (all postings) | [Object Metadata] (all object keys) %% Keys should be unique in files. If more than two keys are candidate for %% the same file the highest sequence number should be chosen. If the file %% is at the basemenet level of a leveleddb database the objects with an %% ExpiryTimestamp in the past should not be written, but at all other levels %% keys should not be ignored because of a timestamp in the past. %% tomb objects are written for deletions, and these tombstones may have an %% Expirytimestamp which in effect is the time when the tombstone should be %% reaped. %% %% sft files are broken into the following sections: %% - Header (fixed width 80 bytes - containing pointers and metadata) %% - Blocks (variable length) %% - Slot Filter (variable length) %% - Slot Index (variable length) %% - Table Summary (variable length) %% Each section should contain at the footer of the section a 4-byte CRC which %% is to be checked only on the opening of the file %% %% The keys in the sft file are placed into the file in erlang term order. %% There will normally be 256 slots of keys. The Slot Index is a gb_tree %% acting as a helper to find the right slot to check when searching for a key %% or range of keys. %% The Key in the Slot Index is the Key at the start of the Slot. %% The Value in the Slot Index is a record indicating: %% - The starting position of the Slot within the Blocks (relative to the %% starting position of the Blocks) %% - The (relative) starting position of the Slot Filter for this Slot %% - The number of blocks within the Slot %% - The length of each of the Blocks within the Slot %% %% When checking for a Key in the sft file, the key should be hashed to the %% segment, then the key should be looked-up in the Slot Index. The segment %% ID can then be checked against the Slot Filter which will either return %% not_present or [BlockIDs] %% If a list of BlockIDs (normally of length 1) is returned the block should %% be fetched using the starting position and length of the Block to find the %% actual key (or not if the Slot Filter had returned a false positive) %% %% There will exist a Slot Filter for each entry in the Slot Index %% The Slot Filter starts with some fixed length metadata %% - 1 byte stating the expected number of keys in the block %% - 1 byte stating the number of complete (i.e. containing the expected %% number of keys) Blocks in the Slot %% - 1 byte stating the number of keys in any incomplete Block (there can %% only be 1 incomplete Block per Slot and it must be the last block) %% - 3 bytes stating the largest segment ID in the Slot %% - 1 byte stating the exponent used in the rice-encoding of the filter %% The Filter itself is a rice-encoded list of Integers representing the %% differences between the Segment IDs in the Slot with each entry being %% appended by the minimal number of bits to represent the Block ID in which %% an entry for that segment can be found. Where a segment exists more than %% once then a 0 length will be used. %% To use the filter code should roll over the filter incrementing the Segment %% ID by each difference, and counting the keys by Block ID. This should %% return one of: %% mismatch - the final Segment Count didn't meet the largest Segment ID or %% the per-block key counts don't add-up. There could have been a bit-flip, %% so don't rely on the filter %% no_match - everything added up but the counter never equalled the queried %% Segment ID %% {match, [BlockIDs]} - everything added up and the Segment may be %% represented in the given blocks %% %% The makeup of a block %% - A block is a list of 32 {Key, Value} pairs in Erlang term order %% - The block is stored using standard compression in term_to_binary %% May be improved by use of lz4 or schema-based binary_to_term %% %% The Table Summary may contain multiple summaries %% The standard table summary contains: %% - a count of keys by bucket and type of key (posting or object key) %% - the total size of objects referred to by object keys %% - the number of postings by index name %% - the number of tombstones within the file %% - the highest and lowest sequence number in the file %% Summaries could be used for other summaries of table content in the future, %% perhaps application-specific bloom filters %% The 80-byte header is made up of %% - 1 byte version (major 5 bits, minor 3 bits) - default 0.1 %% - 1 byte options (currently undefined) %% - 1 byte Block Size - the expected number of keys in each block %% - 1 byte Block Count - the expected number of blocks in each slot %% - 2 byte Slot Count - the maximum number of slots in the file %% - 6 bytes - spare %% - 4 bytes - Blocks position %% - 4 bytes - Blocks length %% - 4 bytes - Slot Index position %% - 4 bytes - Slot Index length %% - 4 bytes - Slot Filter position %% - 4 bytes - Slot Filter length %% - 4 bytes - Table Summary position %% - 4 bytes - Table summary length %% - 24 bytes - spare %% - 4 bytes - CRC32 %% %% The file body is written in the same order of events as the header (i.e. %% Blocks first) %% %% Once open the file can be in the following states %% - writing, the file is still being created %% - available, the file may be read, but never again must be modified %% - pending_deletion, the file can be closed and deleted once all outstanding %% Snapshots have been started beyond a certain sequence number %% %% Level managers should only be aware of files in the available state. %% Iterators may be aware of files in either available or pending_delete. %% Level maintainers should control the file exclusively when in the writing %% state, and send the event to trigger pending_delete with the a sequence %% number equal to or higher than the number at the point it was no longer %% active at any level. %% %% The format of the file is intended to support quick lookups, whilst %% allowing for a new file to be written incrementally (so that all keys and %% values need not be retained in memory) - perhaps n blocks at a time -module(leveled_sft). -export([create_file/1, generate_segment_filter/1, serialise_segment_filter/1, check_for_segments/3, speedtest_check_forsegment/4, generate_randomsegfilter/1, create_slot/3]). -include_lib("eunit/include/eunit.hrl"). -define(WORD_SIZE, 4). -define(DWORD_SIZE, 8). -define(CURRENT_VERSION, {0,1}). -define(SLOT_COUNT, 256). -define(BLOCK_SIZE, 32). -define(BLOCK_COUNT, 4). -define(FOOTERPOS_HEADERPOS, 2). -define(MAX_SEG_HASH, 1048576). -define(DIVISOR_BITS, 13). -define(DIVISOR, 8092). -define(COMPRESSION_LEVEL, 1). -record(state, {version = ?CURRENT_VERSION :: tuple(), slot_index = gb_trees:empty() :: gb_trees:tree(), next_position :: integer(), smallest_sqn :: integer(), largest_sqn :: integer()}). %% Start a bare file with an initial header and no further details %% Return the {Handle, metadata record} create_file(FileName) when is_list(FileName) -> {ok, Handle} = file:open(FileName, [binary, raw, read, write]), create_file(Handle); create_file(Handle) -> Header = create_header(initial), {ok, _} = file:position(Handle, bof), ok = file:write(Handle, Header), {ok, StartPos} = file:position(Handle), FileMD = #state{next_position=StartPos}, {Handle, FileMD}. create_header(initial) -> {Major, Minor} = ?CURRENT_VERSION, Version = <>, Options = <<0:8>>, % Not thought of any options {BlSize, BlCount, SlCount} = {?BLOCK_COUNT, ?BLOCK_SIZE, ?SLOT_COUNT}, Settings = <>, {SpareO, SpareL} = {<<0:48>>, <<0:192>>}, Lengths = <<0:32, 0:32, 0:32, 0:32>>, H1 = <>, CRC32 = erlang:crc32(H1), <

>. %% Take two potentially overlapping lists of keys and output a Block, %% together with: %% - block status (full, partial) %% - the lowest and highest sequence numbers in the block %% - the list of segment IDs in the block %% - the remainders of the lists %% The Key lists must be sorted in key order. The last key in a list may be %% a pointer to request more keys for the file (otherwise it is assumed there %% are no more keys) %% %% Level also to be passed in %% This is either an integer (to be ignored) of {floor, os:timestamp()} %% if this is the basement level of the LevelDB database and expired keys %% and tombstone should be reaped %% Do we need to check here that KeyList1 and KeyList2 are not just a [pointer] %% Otherwise the pointer will never be expanded %% %% Also this should return a partial block if the KeyLists have been exhausted %% but the block is full create_block(KeyList1, KeyList2, Level) -> create_block(KeyList1, KeyList2, [], {infinity, 0}, [], Level). create_block(KeyList1, KeyList2, BlockKeyList, {LSN, HSN}, SegmentList, _) when length(BlockKeyList)==?BLOCK_SIZE -> {BlockKeyList, full, {LSN, HSN}, SegmentList, KeyList1, KeyList2}; create_block([], [], BlockKeyList, {LSN, HSN}, SegmentList, _) -> {BlockKeyList, partial, {LSN, HSN}, SegmentList, [], []}; create_block(KeyList1, KeyList2, BlockKeyList, {LSN, HSN}, SegmentList, Level) -> case key_dominates(KeyList1, KeyList2, Level) of {{next_key, TopKey}, Rem1, Rem2} -> {UpdLSN, UpdHSN} = update_sequencenumbers(TopKey, LSN, HSN), NewBlockKeyList = lists:append(BlockKeyList, [TopKey]), NewSegmentList = lists:append(SegmentList, [hash_for_segmentid(TopKey)]), create_block(Rem1, Rem2, NewBlockKeyList, {UpdLSN, UpdHSN}, NewSegmentList, Level); {skipped_key, Rem1, Rem2} -> create_block(Rem1, Rem2, BlockKeyList, {LSN, HSN}, SegmentList, Level) end. %% Should return an index entry in the Slot Index. Each entry consists of: %% - Start Key %% - SegmentIDFilter for the (will eventually be replaced with a pointer) %% - Serialised Slot (will eventually be replaced with a pointer) %% - Length for each Block within the Serialised Slot %% Additional information will also be provided %% - {Low Seq Number, High Seq Number} within the slot %% - End Key %% - Whether the slot is full or partially filled %% - Remainder of any KeyLists used to make the slot create_slot(KeyList1, KeyList2, Level) -> create_slot(KeyList1, KeyList2, Level, ?BLOCK_COUNT, [], <<>>, [], {null, infinity, 0, null, full}). %% Keep adding blocks to the slot until either the block count is reached or %% there is a partial block create_slot(KL1, KL2, _, 0, SegLists, SerialisedSlot, LengthList, {LowKey, LSN, HSN, LastKey, Status}) -> {{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList}, {{LSN, HSN}, LastKey, Status}, KL1, KL2}; create_slot(KL1, KL2, _, _, SegLists, SerialisedSlot, LengthList, {LowKey, LSN, HSN, LastKey, partial}) -> {{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList}, {{LSN, HSN}, LastKey, partial}, KL1, KL2}; create_slot(KL1, KL2, Level, BlockCount, SegLists, SerialisedSlot, LengthList, {LowKey, LSN, HSN, LastKey, _Status}) -> {BlockKeyList, Status, {LSNb, HSNb}, SegmentList, KL1b, KL2b} = create_block(KL1, KL2, Level), case LowKey of null -> [NewLowKeyV|_] = BlockKeyList, TrackingMetadata = {strip_to_keyonly(NewLowKeyV), min(LSN, LSNb), max(HSN, HSNb), strip_to_keyonly(last(BlockKeyList, {last, LastKey})), Status}; _ -> TrackingMetadata = {LowKey, min(LSN, LSNb), max(HSN, HSNb), strip_to_keyonly(last(BlockKeyList, {last, LastKey})), Status} end, SerialisedBlock = serialise_block(BlockKeyList), BlockLength = bit_size(SerialisedBlock), SerialisedSlot2 = <>, create_slot(KL1b, KL2b, Level, BlockCount - 1, SegLists ++ [SegmentList], SerialisedSlot2, LengthList ++ [BlockLength], TrackingMetadata). last([], {last, LastKey}) -> {keyonly, LastKey}; last([E|Es], PrevLast) -> last(E, Es, PrevLast). last(_, [E|Es], PrevLast) -> last(E, Es, PrevLast); last(E, [], _) -> E. strip_to_keyonly({keyonly, K}) -> K; strip_to_keyonly({K, _, _, _}) -> K. serialise_block(BlockKeyList) -> term_to_binary(BlockKeyList, [{compressed, ?COMPRESSION_LEVEL}]). %% Compare the keys at the head of the list, and either skip that "best" key or %% identify as the next key. %% %% The logic needs to change if the file is in the basement level, as keys with %% expired timestamps need not be written at this level %% %% The best key is considered to be the lowest key in erlang term order. If %% there are matching keys then the highest sequence number must be chosen and %% any lower sequence numbers should be compacted out of existence key_dominates([H1|T1], [], Level) -> {_, _, St1, _} = H1, case maybe_reap_expiredkey(St1, Level) of true -> {skipped_key, maybe_expand_pointer(T1), []}; false -> {{next_key, H1}, maybe_expand_pointer(T1), []} end; key_dominates([], [H2|T2], Level) -> {_, _, St2, _} = H2, case maybe_reap_expiredkey(St2, Level) of true -> {skipped_key, [], maybe_expand_pointer(T2)}; false -> {{next_key, H2}, [], maybe_expand_pointer(T2)} end; key_dominates([H1|T1], [H2|T2], Level) -> {K1, Sq1, St1, _} = H1, {K2, Sq2, St2, _} = H2, case K1 of K2 -> case Sq1 > Sq2 of true -> {skipped_key, [H1|T1], maybe_expand_pointer(T2)}; false -> {skipped_key, maybe_expand_pointer(T1), [H2|T2]} end; K1 when K1 < K2 -> case maybe_reap_expiredkey(St1, Level) of true -> {skipped_key, maybe_expand_pointer(T1), [H2|T2]}; false -> {{next_key, H1}, maybe_expand_pointer(T1), [H2|T2]} end; _ -> case maybe_reap_expiredkey(St2, Level) of true -> {skipped_key, [H1|T1], maybe_expand_pointer(T2)}; false -> {{next_key, H2}, [H1|T1], maybe_expand_pointer(T2)} end end. maybe_reap_expiredkey({_, infinity}, _) -> false; % key is not set to expire maybe_reap_expiredkey({_, TS}, {basement, CurrTS}) when CurrTS > TS -> true; % basement and ready to expire maybe_reap_expiredkey(_, _) -> false. %% Not worked out pointers yet maybe_expand_pointer(Tail) -> Tail. %% Update the sequence numbers update_sequencenumbers({_, SN, _, _}, 0, 0) -> {SN, SN}; update_sequencenumbers({_, SN, _, _}, LSN, HSN) when SN < LSN -> {SN, HSN}; update_sequencenumbers({_, SN, _, _}, LSN, HSN) when SN > HSN -> {LSN, SN}; update_sequencenumbers({_, _, _, _}, LSN, HSN) -> {LSN, HSN}. %% The Segment filter is a compressed filter representing the keys in a %% given slot. The filter is delta-compressed list of integers using rice %% encoding extended by the reference to each integer having an extra two bits %% to indicate the block - there are four blocks in each slot. %% %% So each delta is represented as %% - variable length exponent ending in 0, %% with 0 representing the exponent of 0, %% 10 -> 2 ^ 13, %% 110 -> 2^14, %% 1110 -> 2^15 etc %% - 13-bit fixed length remainder %% - 2-bit block number %% This gives about 2-bytes per key, with a 1:8000 (approx) false positive %% ratio (when checking the key by hashing to the segment ID) %% %% Before the delta list are three 20-bit integers representing the highest %% integer in each block. Plus two bytes to indicate how many hashes %% there are in the slot %% %% To check for the presence of a segment in a slot, roll over the deltas %% keeping a running total overall and the current highest segment ID seen %% per block. Roll all the way through even if matches are found or passed %% over to confirm that the totals match the expected value (hence creating %% a natural checksum) %% %% The end-result is a 260-byte check for the presence of a key in a slot %% returning the block in which the segment can be found, which may also be %% used directly for checking for the presence of segments. %% %% This is more space efficient than the equivalent bloom filter and avoids %% the calculation of many hash functions. generate_segment_filter([SegL1, []]) -> generate_segment_filter({SegL1, [], [], []}); generate_segment_filter([SegL1, SegL2, []]) -> generate_segment_filter({SegL1, SegL2, [], []}); generate_segment_filter([SegL1, SegL2, SegL3, SegL4]) -> generate_segment_filter({SegL1, SegL2, SegL3, SegL4}); generate_segment_filter(SegLists) -> generate_segment_filter(merge_seglists(SegLists), [], [{0, 0}, {0, 1}, {0, 2}, {0, 3}]). %% to generate the segment filter needs a sorted list of {Delta, Block} pairs %% as DeltaList and a list of {TopHash, Block} pairs as TopHashes generate_segment_filter([], DeltaList, TopHashes) -> {lists:reverse(DeltaList), TopHashes}; generate_segment_filter([NextSeg|SegTail], DeltaList, TopHashes) -> {TopHash, _} = lists:max(TopHashes), {NextSegHash, NextSegBlock} = NextSeg, DeltaList2 = [{NextSegHash - TopHash, NextSegBlock}|DeltaList], TopHashes2 = lists:keyreplace(NextSegBlock, 2, TopHashes, {NextSegHash, NextSegBlock}), generate_segment_filter(SegTail, DeltaList2, TopHashes2). serialise_segment_filter({DeltaList, TopHashes}) -> TopHashesBin = lists:foldl(fun({X, _}, Acc) -> <> end, <<>>, TopHashes), Length = length(DeltaList), HeaderBin = <>, {Divisor, Factor} = {?DIVISOR, ?DIVISOR_BITS}, F = fun({Delta, Block}, Acc) -> Exponent = buildexponent(Delta div Divisor), Remainder = Delta rem Divisor, Block2Bit = Block, <> end, lists:foldl(F, HeaderBin, DeltaList). buildexponent(Exponent) -> buildexponent(Exponent, <<0:1>>). buildexponent(0, OutputBits) -> OutputBits; buildexponent(Exponent, OutputBits) -> buildexponent(Exponent - 1, <<1:1, OutputBits/bitstring>>). merge_seglists({SegList1, SegList2, SegList3, SegList4}) -> Stage1 = lists:foldl(fun(X, Acc) -> [{X, 0}|Acc] end, [], SegList1), Stage2 = lists:foldl(fun(X, Acc) -> [{X, 1}|Acc] end, Stage1, SegList2), Stage3 = lists:foldl(fun(X, Acc) -> [{X, 2}|Acc] end, Stage2, SegList3), Stage4 = lists:foldl(fun(X, Acc) -> [{X, 3}|Acc] end, Stage3, SegList4), lists:sort(Stage4). hash_for_segmentid(KV) -> erlang:phash2(strip_to_keyonly(KV), ?MAX_SEG_HASH). %% Check for a given list of segments in the filter, returning in normal %% operations a TupleList of {SegmentID, [ListOFBlocks]} where the ListOfBlocks %% are the block IDs which contain keys in that given segment %% %% If there is a failure - perhaps due to a bit flip of some sort an error %% willl be returned (error_so_maybe_present) and all blocks should be checked %% as the filter cannot be relied upon check_for_segments(SegFilter, SegmentList, CRCCheck) -> case CRCCheck of true -> <> = SegFilter, CheckSum = [T0, T1, T2, T3], case safecheck_for_segments(SegRem, SegmentList, [0, 0, 0, 0], 0, Count, []) of {error_so_maybe_present, Reason} -> io:format("Segment filter failed due to ~s~n", [Reason]), error_so_maybe_present; {OutputCheck, BlockList} when OutputCheck == CheckSum, BlockList == [] -> not_present; {OutputCheck, BlockList} when OutputCheck == CheckSum -> {maybe_present, BlockList}; {OutputCheck, _} -> io:format("Segment filter failed due to CRC check~n ~w did not match ~w~n", [OutputCheck, CheckSum]), error_so_maybe_present end; false -> <<_:80/bitstring, Count:16/integer, SegRem/bitstring>> = SegFilter, case quickcheck_for_segments(SegRem, SegmentList, lists:max(SegmentList), 0, Count, []) of {error_so_maybe_present, Reason} -> io:format("Segment filter failed due to ~s~n", [Reason]), error_so_maybe_present; BlockList when BlockList == [] -> not_present; BlockList -> {maybe_present, BlockList} end end. safecheck_for_segments(_, _, TopHashes, _, 0, BlockList) -> {TopHashes, BlockList}; safecheck_for_segments(Filter, SegmentList, TopHs, Acc, Count, BlockList) -> case findexponent(Filter) of {ok, Exp, FilterRem1} -> case findremainder(FilterRem1, ?DIVISOR_BITS) of {ok, Remainder, BlockID, FilterRem2} -> {NextHash, BlockList2} = checkhash_forsegments(Acc, Exp, Remainder, SegmentList, BlockList, BlockID), TopHashes2 = setnth(BlockID, TopHs, NextHash), safecheck_for_segments(FilterRem2, SegmentList, TopHashes2, NextHash, Count - 1, BlockList2); error -> {error_so_maybe_present, "Remainder Check"} end; error -> {error_so_maybe_present, "Exponent Check"} end. quickcheck_for_segments(_, _, _, _, 0, BlockList) -> BlockList; quickcheck_for_segments(Filter, SegmentList, MaxSeg, Acc, Count, BlockList) -> case findexponent(Filter) of {ok, Exp, FilterRem1} -> case findremainder(FilterRem1, ?DIVISOR_BITS) of {ok, Remainder, BlockID, FilterRem2} -> {NextHash, BlockList2} = checkhash_forsegments(Acc, Exp, Remainder, SegmentList, BlockList, BlockID), case NextHash > MaxSeg of true -> BlockList2; false -> quickcheck_for_segments(FilterRem2, SegmentList, MaxSeg, NextHash, Count - 1, BlockList2) end; error -> {error_so_maybe_present, "Remainder Check"} end; error -> {error_so_maybe_present, "Exponent Check"} end. checkhash_forsegments(Acc, Exp, Remainder, SegmentList, BlockList, BlockID) -> NextHash = Acc + ?DIVISOR * Exp + Remainder, case lists:member(NextHash, SegmentList) of true -> {NextHash, [BlockID|BlockList]}; false -> {NextHash, BlockList} end. setnth(0, [_|Rest], New) -> [New|Rest]; setnth(I, [E|Rest], New) -> [E|setnth(I-1, Rest, New)]. findexponent(BitStr) -> findexponent(BitStr, 0). findexponent(<<>>, _) -> error; findexponent(<>, Acc) -> case H of 1 -> findexponent(T, Acc + 1); 0 -> {ok, Acc, T} end. findremainder(BitStr, Factor) -> case BitStr of <> -> {ok, Remainder, BlockID, Tail}; _ -> error end. %%%%%%%%%%%%%%%% % T E S T %%%%%%%%%%%%%%% speedtest_check_forsegment(_, 0, _, _) -> true; speedtest_check_forsegment(SegFilter, LoopCount, CRCCheck, IDsToCheck) -> check_for_segments(SegFilter, gensegmentids(IDsToCheck), CRCCheck), speedtest_check_forsegment(SegFilter, LoopCount - 1, CRCCheck, IDsToCheck). gensegmentids(Count) -> gensegmentids([], Count). gensegmentids(GeneratedIDs, 0) -> lists:sort(GeneratedIDs); gensegmentids(GeneratedIDs, Count) -> gensegmentids([random:uniform(1024*1024)|GeneratedIDs], Count - 1). generate_randomsegfilter(BlockSize) -> Block1 = gensegmentids(BlockSize), Block2 = gensegmentids(BlockSize), Block3 = gensegmentids(BlockSize), Block4 = gensegmentids(BlockSize), serialise_segment_filter(generate_segment_filter({Block1, Block2, Block3, Block4})). generate_randomkeys(Count) -> generate_randomkeys(Count, []). generate_randomkeys(0, Acc) -> Acc; generate_randomkeys(Count, Acc) -> RandKey = {{o, lists:concat(["Bucket", random:uniform(1024)]), lists:concat(["Key", random:uniform(1024)])}, random:uniform(1024*1024), {active, infinity}, null}, generate_randomkeys(Count - 1, [RandKey|Acc]). generate_sequentialkeys(Count, Start) -> generate_sequentialkeys(Count + Start, Start, []). generate_sequentialkeys(Target, Incr, Acc) when Incr =:= Target -> Acc; generate_sequentialkeys(Target, Incr, Acc) -> KeyStr = string:right(integer_to_list(Incr), 8, $0), NextKey = {{o, "BucketSeq", lists:concat(["Key", KeyStr])}, 5, {active, infinity}, null}, generate_sequentialkeys(Target, Incr + 1, [NextKey|Acc]). simple_create_block_test() -> KeyList1 = [{{o, "Bucket1", "Key1"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key3"}, 2, {active, infinity}, null}], KeyList2 = [{{o, "Bucket1", "Key2"}, 3, {active, infinity}, null}], {MergedKeyList, ListStatus, SN, _, _, _} = create_block(KeyList1, KeyList2, 1), ?assertMatch(partial, ListStatus), [H1|T1] = MergedKeyList, ?assertMatch(H1, {{o, "Bucket1", "Key1"}, 1, {active, infinity}, null}), [H2|T2] = T1, ?assertMatch(H2, {{o, "Bucket1", "Key2"}, 3, {active, infinity}, null}), ?assertMatch(T2, [{{o, "Bucket1", "Key3"}, 2, {active, infinity}, null}]), ?assertMatch(SN, {1,3}). dominate_create_block_test() -> KeyList1 = [{{o, "Bucket1", "Key1"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key2"}, 2, {active, infinity}, null}], KeyList2 = [{{o, "Bucket1", "Key2"}, 3, {tomb, infinity}, null}], {MergedKeyList, ListStatus, SN, _, _, _} = create_block(KeyList1, KeyList2, 1), ?assertMatch(partial, ListStatus), [K1, K2] = MergedKeyList, ?assertMatch(K1, {{o, "Bucket1", "Key1"}, 1, {active, infinity}, null}), ?assertMatch(K2, {{o, "Bucket1", "Key2"}, 3, {tomb, infinity}, null}), ?assertMatch(SN, {1,3}). sample_keylist() -> KeyList1 = [{{o, "Bucket1", "Key1"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key3"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key5"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key7"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key9"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key1"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key3"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key5"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key7"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key9"}, 1, {active, infinity}, null}, {{o, "Bucket3", "Key1"}, 1, {active, infinity}, null}, {{o, "Bucket3", "Key3"}, 1, {active, infinity}, null}, {{o, "Bucket3", "Key5"}, 1, {active, infinity}, null}, {{o, "Bucket3", "Key7"}, 1, {active, infinity}, null}, {{o, "Bucket3", "Key9"}, 1, {active, infinity}, null}, {{o, "Bucket4", "Key1"}, 1, {active, infinity}, null}], KeyList2 = [{{o, "Bucket1", "Key2"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key4"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key6"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key8"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key9a"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key9b"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key9c"}, 1, {active, infinity}, null}, {{o, "Bucket1", "Key9d"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key2"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key4"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key6"}, 1, {active, infinity}, null}, {{o, "Bucket2", "Key8"}, 1, {active, infinity}, null}, {{o, "Bucket3", "Key2"}, 1, {active, infinity}, null}, {{o, "Bucket3", "Key4"}, 3, {active, infinity}, null}, {{o, "Bucket3", "Key6"}, 2, {active, infinity}, null}, {{o, "Bucket3", "Key8"}, 1, {active, infinity}, null}], {KeyList1, KeyList2}. alternating_create_block_test() -> {KeyList1, KeyList2} = sample_keylist(), {MergedKeyList, ListStatus, _, _, _, _} = create_block(KeyList1, KeyList2, 1), BlockSize = length(MergedKeyList), ?assertMatch(BlockSize, 32), ?assertMatch(ListStatus, full), K1 = lists:nth(1, MergedKeyList), ?assertMatch(K1, {{o, "Bucket1", "Key1"}, 1, {active, infinity}, null}), K11 = lists:nth(11, MergedKeyList), ?assertMatch(K11, {{o, "Bucket1", "Key9b"}, 1, {active, infinity}, null}), K32 = lists:nth(32, MergedKeyList), ?assertMatch(K32, {{o, "Bucket4", "Key1"}, 1, {active, infinity}, null}). merge_seglists_test() -> SegList1 = [0, 100, 200], SegList2 = [50, 200], SegList3 = [75, 10000], SegList4 = [], MergedList = merge_seglists({SegList1, SegList2, SegList3, SegList4}), ?assertMatch(MergedList, [{0, 0}, {50, 1}, {75, 2}, {100, 0}, {200, 0}, {200,1}, {10000,2}]), SegTerm = generate_segment_filter({SegList1, SegList2, SegList3, SegList4}), ?assertMatch(SegTerm, {[{0, 0}, {50, 1}, {25, 2}, {25, 0}, {100, 0}, {0, 1}, {9800, 2}], [{200, 0}, {200, 1}, {10000, 2},{0, 3}]}), SegBin = serialise_segment_filter(SegTerm), ExpectedTopHashes = <<200:20, 200:20, 10000:20, 0:20>>, ExpectedDeltas = <<0:1, 0:13, 0:2, 0:1, 50:13, 1:2, 0:1, 25:13, 2:2, 0:1, 25:13, 0:2, 0:1, 100:13, 0:2, 0:1, 0:13, 1:2, 2:2, 1708:13, 2:2>>, ExpectedResult = <>, ?assertMatch(SegBin, ExpectedResult), R1 = check_for_segments(SegBin, [100], true), ?assertMatch(R1,{maybe_present, [0]}), R2 = check_for_segments(SegBin, [900], true), ?assertMatch(R2, not_present), R3 = check_for_segments(SegBin, [200], true), ?assertMatch(R3, {maybe_present, [1,0]}), R4 = check_for_segments(SegBin, [0,900], true), ?assertMatch(R4, {maybe_present, [0]}), R5 = check_for_segments(SegBin, [100], false), ?assertMatch(R5, {maybe_present, [0]}), R6 = check_for_segments(SegBin, [900], false), ?assertMatch(R6, not_present), R7 = check_for_segments(SegBin, [200], false), ?assertMatch(R7, {maybe_present, [1,0]}), R8 = check_for_segments(SegBin, [0,900], false), ?assertMatch(R8, {maybe_present, [0]}), R9 = check_for_segments(SegBin, [1024*1024 - 1], false), ?assertMatch(R9, not_present). createslot_stage1_test() -> {KeyList1, KeyList2} = sample_keylist(), Out = create_slot(KeyList1, KeyList2, 1), {{LowKey, SegFilter, _SerialisedSlot, _LengthList}, {{LSN, HSN}, LastKey, Status}, KL1, KL2} = Out, ?assertMatch(LowKey, {o, "Bucket1", "Key1"}), ?assertMatch(LastKey, {o, "Bucket4", "Key1"}), ?assertMatch(Status, partial), ?assertMatch(KL1, []), ?assertMatch(KL2, []), R0 = check_for_segments(serialise_segment_filter(SegFilter), [hash_for_segmentid({keyonly, {o, "Bucket1", "Key1"}})], true), ?assertMatch(R0, {maybe_present, [0]}), R1 = check_for_segments(serialise_segment_filter(SegFilter), [hash_for_segmentid({keyonly, {o, "Bucket1", "Key99"}})], true), ?assertMatch(R1, not_present), ?assertMatch(LSN, 1), ?assertMatch(HSN, 3). createslot_stage2_test() -> Out = create_slot(lists:sort(generate_randomkeys(100)), lists:sort(generate_randomkeys(100)), 1), {{_LowKey, _SegFilter, SerialisedSlot, LengthList}, {{_LSN, _HSN}, _LastKey, Status}, _KL1, _KL2} = Out, ?assertMatch(Status, full), Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList), Sum2 = bit_size(SerialisedSlot), ?assertMatch(Sum1, Sum2). createslot_stage3_test() -> Out = create_slot(lists:sort(generate_sequentialkeys(100, 1)), lists:sort(generate_sequentialkeys(100, 101)), 1), {{LowKey, SegFilter, SerialisedSlot, LengthList}, {{_LSN, _HSN}, LastKey, Status}, KL1, KL2} = Out, ?assertMatch(Status, full), Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList), Sum2 = bit_size(SerialisedSlot), ?assertMatch(Sum1, Sum2), ?assertMatch(LowKey, {o, "BucketSeq", "Key00000001"}), ?assertMatch(LastKey, {o, "BucketSeq", "Key00000128"}), ?assertMatch(KL1, []), Rem = length(KL2), ?assertMatch(Rem, 72), R0 = check_for_segments(serialise_segment_filter(SegFilter), [hash_for_segmentid({keyonly, {o, "BucketSeq", "Key00000100"}})], true), ?assertMatch(R0, {maybe_present, [3]}), R1 = check_for_segments(serialise_segment_filter(SegFilter), [hash_for_segmentid({keyonly, {o, "Bucket1", "Key99"}})], true), ?assertMatch(R1, not_present), R2 = check_for_segments(serialise_segment_filter(SegFilter), [hash_for_segmentid({keyonly, {o, "BucketSeq", "Key00000040"}})], true), ?assertMatch(R2, {maybe_present, [1]}), R3 = check_for_segments(serialise_segment_filter(SegFilter), [hash_for_segmentid({keyonly, {o, "BucketSeq", "Key00000004"}})], true), ?assertMatch(R3, {maybe_present, [0]}).