Switch to CRC check at Block Level

Previously done at Slot Level - but Blocks were still read from disk after the Slot CRC had been checked.

This seems safer.  It requires an extra CRC check for every fetch.  However, CRC chekcing smaller binaries during the buld process appears to be beneficial to performance.

Hoped this will be an enabler to turning off compression at Levels 0 and 1 to improve performance (wihtout having a compensating issues with reduced CRC performance)
This commit is contained in:
Martin Sumner 2017-12-01 14:15:13 +00:00
parent 7a99d060a3
commit 5bac389d0c

View file

@ -79,6 +79,8 @@
-define(TIMING_SAMPLECOUNTDOWN, 10000). -define(TIMING_SAMPLECOUNTDOWN, 10000).
-define(TIMING_SAMPLESIZE, 100). -define(TIMING_SAMPLESIZE, 100).
-define(CACHE_SIZE, 32). -define(CACHE_SIZE, 32).
-define(BLOCK_LENGTHS_LENGTH, 20).
-define(FLIPPER32, 4294967295).
-include_lib("eunit/include/eunit.hrl"). -include_lib("eunit/include/eunit.hrl").
@ -658,23 +660,22 @@ fetch(LedgerKey, Hash, State, Timings0) ->
CachedBlockIdx = CachedBlockIdx =
array:get(SlotID - 1, State#state.blockindex_cache), array:get(SlotID - 1, State#state.blockindex_cache),
{SW2, Timings2} = update_timings(SW1, Timings1, lookup_cache, true), {SW2, Timings2} = update_timings(SW1, Timings1, lookup_cache, true),
BL = ?BLOCK_LENGTHS_LENGTH,
case CachedBlockIdx of case CachedBlockIdx of
none -> none ->
SlotBin = read_slot(State#state.handle, Slot), SlotBin = read_slot(State#state.handle, Slot),
{Result, BlockLengths, BlockIdx} = {Result, Header} =
binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod), binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod),
BlockIndexCache = BlockIndexCache =
array:set(SlotID - 1, array:set(SlotID - 1, Header, State#state.blockindex_cache),
<<BlockLengths/binary, BlockIdx/binary>>,
State#state.blockindex_cache),
{_SW3, Timings3} = {_SW3, Timings3} =
update_timings(SW2, Timings2, noncached_block, false), update_timings(SW2, Timings2, noncached_block, false),
{Result, {Result,
State#state{blockindex_cache = BlockIndexCache}, State#state{blockindex_cache = BlockIndexCache},
Timings3}; Timings3};
<<BlockLengths:24/binary, BlockIdx/binary>> -> <<BlockLengths:BL/binary, PosBin/binary>> ->
PosList = find_pos(BlockIdx, extra_hash(Hash), [], 0), PosList = find_pos(PosBin, extra_hash(Hash), [], 0),
case PosList of case PosList of
[] -> [] ->
{_SW3, Timings3} = {_SW3, Timings3} =
@ -700,6 +701,7 @@ fetch(LedgerKey, Hash, State, Timings0) ->
State#state.handle, State#state.handle,
StartPos, StartPos,
BlockLengths, BlockLengths,
byte_size(PosBin),
LedgerKey, LedgerKey,
PressMethod, PressMethod,
not_present), not_present),
@ -865,12 +867,12 @@ build_table_summary(SlotIndex, _Level, FirstKey, SlotCount, MaxSQN, Bloom) ->
SummBin = SummBin =
term_to_binary({Summary, Bloom, lists:reverse(SlotIndex)}, term_to_binary({Summary, Bloom, lists:reverse(SlotIndex)},
?BINARY_SETTINGS), ?BINARY_SETTINGS),
SummCRC = erlang:crc32(SummBin), SummCRC = hmac(SummBin),
<<SummCRC:32/integer, SummBin/binary>>. <<SummCRC:32/integer, SummBin/binary>>.
read_table_summary(BinWithCheck) -> read_table_summary(BinWithCheck) ->
<<SummCRC:32/integer, SummBin/binary>> = BinWithCheck, <<SummCRC:32/integer, SummBin/binary>> = BinWithCheck,
CRCCheck = erlang:crc32(SummBin), CRCCheck = hmac(SummBin),
if if
CRCCheck == SummCRC -> CRCCheck == SummCRC ->
% If not might it might be possible to rebuild from all the slots % If not might it might be possible to rebuild from all the slots
@ -938,24 +940,46 @@ generate_filenames(RootFilename) ->
%% checks %% checks
serialise_block(Term, lz4) -> serialise_block(Term, lz4) ->
{ok, Bin} = lz4:pack(term_to_binary(Term)), {ok, Bin} = lz4:pack(term_to_binary(Term)),
Bin; CRC32 = hmac(Bin),
<<Bin/binary, CRC32:32/integer>>;
serialise_block(Term, native) -> serialise_block(Term, native) ->
term_to_binary(Term, ?BINARY_SETTINGS). Bin = term_to_binary(Term, ?BINARY_SETTINGS),
CRC32 = hmac(Bin),
<<Bin/binary, CRC32:32/integer>>.
-spec deserialise_block(binary(), press_methods()) -> any(). -spec deserialise_block(binary(), press_methods()) -> any().
%% @doc %% @doc
%% Convert binary to term %% Convert binary to term
%% Function split out to make it easier to experiment with different %% Function split out to make it easier to experiment with different
%% compression methods. Also, perhaps standardise applictaion of CRC %% compression methods.
%% checks %%
deserialise_block(Bin, lz4) -> %% If CRC check fails we treat all the data as missing
deserialise_block(Bin, PressMethod) ->
BinS = byte_size(Bin) - 4,
<<TermBin:BinS/binary, CRC32:32/integer>> = Bin,
case hmac(TermBin) of
CRC32 ->
deserialise_checkedblock(TermBin, PressMethod);
_ ->
[]
end.
deserialise_checkedblock(Bin, lz4) ->
{ok, Bin0} = lz4:unpack(Bin), {ok, Bin0} = lz4:unpack(Bin),
binary_to_term(Bin0); binary_to_term(Bin0);
deserialise_block(Bin, native) -> deserialise_checkedblock(Bin, native) ->
binary_to_term(Bin). binary_to_term(Bin).
-spec hmac(binary()|integer()) -> integer().
%% @doc
%% Perform a CRC check on an input
hmac(Bin) when is_binary(Bin) ->
erlang:crc32(Bin);
hmac(Int) when is_integer(Int) ->
Int bxor ?FLIPPER32.
%%%============================================================================ %%%============================================================================
%%% SlotIndex Implementation %%% SlotIndex Implementation
%%%============================================================================ %%%============================================================================
@ -1130,45 +1154,45 @@ generate_binary_slot(Lookup, KVL, PressMethod, BuildTimings0) ->
BuildTimings2 = update_buildtimings(SW1, BuildTimings1, slot_serialise), BuildTimings2 = update_buildtimings(SW1, BuildTimings1, slot_serialise),
SW2 = os:timestamp(), SW2 = os:timestamp(),
B1P = byte_size(PosBinIndex), B1P = byte_size(PosBinIndex) + ?BLOCK_LENGTHS_LENGTH,
CheckB1P = hmac(B1P),
B1L = byte_size(B1), B1L = byte_size(B1),
B2L = byte_size(B2), B2L = byte_size(B2),
B3L = byte_size(B3), B3L = byte_size(B3),
B4L = byte_size(B4), B4L = byte_size(B4),
B5L = byte_size(B5), B5L = byte_size(B5),
Lengths = <<B1P:32/integer, Header = <<B1L:32/integer,
B1L:32/integer,
B2L:32/integer, B2L:32/integer,
B3L:32/integer, B3L:32/integer,
B4L:32/integer, B4L:32/integer,
B5L:32/integer>>, B5L:32/integer,
SlotBin = <<Lengths/binary, PosBinIndex/binary>>,
PosBinIndex/binary, CheckH = hmac(Header),
B1/binary, B2/binary, B3/binary, B4/binary, B5/binary>>, SlotBin = <<CheckB1P:32/integer, B1P:32/integer,
CRC32 = erlang:crc32(SlotBin), CheckH:32/integer, Header/binary,
FullBin = <<CRC32:32/integer, SlotBin/binary>>, B1/binary, B2/binary, B3/binary, B4/binary, B5/binary>>,
{LastKey, _LV} = lists:last(KVL), {LastKey, _LV} = lists:last(KVL),
BuildTimings3 = update_buildtimings(SW2, BuildTimings2, slot_finish), BuildTimings3 = update_buildtimings(SW2, BuildTimings2, slot_finish),
{{<<Lengths/binary, PosBinIndex/binary>>, FullBin, HashL, LastKey}, {{Header, SlotBin, HashL, LastKey}, BuildTimings3}.
BuildTimings3}.
% Acc should start as not_present if LedgerKey is a key, and a list if % Acc should start as not_present if LedgerKey is a key, and a list if
% LedgerKey is false % LedgerKey is false
check_blocks([], _Handle, _StartPos, _BlockLengths, check_blocks([], _Handle, _StartPos, _BlockLengths, _PosBinLength,
_LedgerKeyToCheck, _PressMethod, Acc) -> _LedgerKeyToCheck, _PressMethod, Acc) ->
Acc; Acc;
check_blocks([Pos|Rest], Handle, StartPos, check_blocks([Pos|Rest], Handle, StartPos, BlockLengths, PosBinLength,
BlockLengths, LedgerKeyToCheck, PressMethod, Acc) -> LedgerKeyToCheck, PressMethod, Acc) ->
{BlockNumber, BlockPos} = revert_position(Pos), {BlockNumber, BlockPos} = revert_position(Pos),
BlockBin = BlockBin =
read_block(Handle, read_block(Handle,
StartPos, StartPos,
BlockLengths, BlockLengths,
PosBinLength,
BlockNumber), BlockNumber),
BlockL = deserialise_block(BlockBin, PressMethod), BlockL = deserialise_block(BlockBin, PressMethod),
{K, V} = lists:nth(BlockPos, BlockL), {K, V} = lists:nth(BlockPos, BlockL),
@ -1180,20 +1204,22 @@ check_blocks([Pos|Rest], Handle, StartPos,
false -> false ->
Acc ++ [{K, V}]; Acc ++ [{K, V}];
_ -> _ ->
check_blocks(Rest, Handle, StartPos, BlockLengths, check_blocks(Rest, Handle, StartPos,
BlockLengths, PosBinLength,
LedgerKeyToCheck, PressMethod, Acc) LedgerKeyToCheck, PressMethod, Acc)
end end
end. end.
read_block(Handle, StartPos, BlockLengths, BlockID) -> read_block(Handle, StartPos, BlockLengths, PosBinLength, BlockID) ->
{BlockPos, Offset, Length} = block_offsetandlength(BlockLengths, BlockID), {Offset, Length} = block_offsetandlength(BlockLengths, BlockID),
{ok, BlockBin} = file:pread(Handle, {ok, BlockBin} = file:pread(Handle,
StartPos StartPos
+ BlockPos
+ Offset + Offset
+ 28, + PosBinLength
% 4-byte CRC, 4 byte pos, 5x4 byte lengths + 32,
% 4-byte CRC, 4-byte pos,
% 4-byte CRC, 5x4 byte lengths
Length), Length),
BlockBin. BlockBin.
@ -1260,6 +1286,7 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) ->
BinMapFun = BinMapFun =
fun(Pointer, Acc) -> fun(Pointer, Acc) ->
{SP, _L, ID, _SK, _EK} = pointer_mapfun(Pointer), {SP, _L, ID, _SK, _EK} = pointer_mapfun(Pointer),
BL = ?BLOCK_LENGTHS_LENGTH,
case array:get(ID - 1, BlockIndexCache) of case array:get(ID - 1, BlockIndexCache) of
none -> none ->
% If there is an attempt to use the seg list query and the % If there is an attempt to use the seg list query and the
@ -1270,8 +1297,8 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) ->
read_length_list(Handle, [LengthDetails]), read_length_list(Handle, [LengthDetails]),
MapFun = binarysplit_mapfun(MultiSlotBin, StartPos), MapFun = binarysplit_mapfun(MultiSlotBin, StartPos),
Acc ++ [MapFun(LengthDetails)]; Acc ++ [MapFun(LengthDetails)];
<<BlockLengths:24/binary, BlockIdx/binary>> -> <<BlockLengths:BL/binary, BlockIdx/binary>> ->
% If there is a BlockIndex cached then we cna use it to % If there is a BlockIndex cached then we can use it to
% check to see if any of the expected segments are % check to see if any of the expected segments are
% present without lifting the slot off disk. Also the % present without lifting the slot off disk. Also the
% fact that we know position can be used to filter out % fact that we know position can be used to filter out
@ -1279,9 +1306,14 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) ->
case find_pos(BlockIdx, SegList, [], 0) of case find_pos(BlockIdx, SegList, [], 0) of
[] -> [] ->
Acc; Acc;
PL -> PositionList ->
Acc ++ check_blocks(PL, Handle, SP, BlockLengths, Acc ++
false, PressMethod, []) check_blocks(PositionList,
Handle, SP,
BlockLengths,
byte_size(BlockIdx),
false, PressMethod,
[])
end end
end end
end, end,
@ -1319,19 +1351,17 @@ read_length_list(Handle, LengthList) ->
binaryslot_get(FullBin, Key, Hash, PressMethod) -> binaryslot_get(FullBin, Key, Hash, PressMethod) ->
case crc_check_slot(FullBin) of case crc_check_slot(FullBin) of
{BlockLengths, Rest} -> {Header, Blocks} ->
<<B1P:32/integer, _R/binary>> = BlockLengths, BL = ?BLOCK_LENGTHS_LENGTH,
<<PosBinIndex:B1P/binary, Blocks/binary>> = Rest, <<BlockLengths:BL/binary, PosBinIndex/binary>> = Header,
PosList = find_pos(PosBinIndex, PosList = find_pos(PosBinIndex,
extra_hash(Hash), extra_hash(Hash),
[], [],
0), 0),
{fetch_value(PosList, BlockLengths, Blocks, Key, PressMethod), {fetch_value(PosList, BlockLengths, Blocks, Key, PressMethod),
BlockLengths, Header};
PosBinIndex};
crc_wonky -> crc_wonky ->
{not_present, {not_present,
none,
none} none}
end. end.
@ -1349,14 +1379,13 @@ binaryslot_tolist(FullBin, PressMethod) ->
{Out, _Rem} = {Out, _Rem} =
case crc_check_slot(FullBin) of case crc_check_slot(FullBin) of
{BlockLengths, RestBin} -> {Header, Blocks} ->
<<B1P:32/integer, <<B1L:32/integer,
B1L:32/integer,
B2L:32/integer, B2L:32/integer,
B3L:32/integer, B3L:32/integer,
B4L:32/integer, B4L:32/integer,
B5L:32/integer>> = BlockLengths, B5L:32/integer,
<<_PosBinIndex:B1P/binary, Blocks/binary>> = RestBin, _PosBinIndex/binary>> = Header,
lists:foldl(BlockFetchFun, lists:foldl(BlockFetchFun,
{[], Blocks}, {[], Blocks},
[B1L, B2L, B3L, B4L, B5L]); [B1L, B2L, B3L, B4L, B5L]);
@ -1382,17 +1411,16 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
% scenario is hard to do in concise code % scenario is hard to do in concise code
BlocksToCheck = BlocksToCheck =
case crc_check_slot(FullBin) of case crc_check_slot(FullBin) of
{BlockLengths, RestBin} -> {Header, Blocks} ->
<<B1P:32/integer, <<B1L:32/integer,
B1L:32/integer,
B2L:32/integer, B2L:32/integer,
B3L:32/integer, B3L:32/integer,
B4L:32/integer, B4L:32/integer,
B5L:32/integer>> = BlockLengths, B5L:32/integer,
<<_PosBinIndex:B1P/binary, _PosBinIndex/binary>> = Header,
Block1:B1L/binary, Block2:B2L/binary, <<Block1:B1L/binary, Block2:B2L/binary,
MidBlock:B3L/binary, MidBlock:B3L/binary,
Block4:B4L/binary, Block5:B5L/binary>> = RestBin, Block4:B4L/binary, Block5:B5L/binary>> = Blocks,
case B3L of case B3L of
0 -> 0 ->
[Block1, Block2]; [Block1, Block2];
@ -1440,11 +1468,10 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
false -> false ->
Block Block
end, end,
{LastKey, _LV} = lists:last(BlockList), case fetchend_rawblock(BlockList) of
case StartKey > LastKey of {LastKey, _LV} when StartKey > LastKey ->
true ->
{Acc, true}; {Acc, true};
false -> {LastKey, _LV} ->
{_LDrop, RKeep} = lists:splitwith(LTrimFun, {_LDrop, RKeep} = lists:splitwith(LTrimFun,
BlockList), BlockList),
case leveled_codec:endkey_passed(EndKey, LastKey) of case leveled_codec:endkey_passed(EndKey, LastKey) of
@ -1453,7 +1480,9 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
{Acc ++ LKeep, false}; {Acc ++ LKeep, false};
false -> false ->
{Acc ++ RKeep, true} {Acc ++ RKeep, true}
end end;
_ ->
{Acc, true}
end; end;
{_ , false} -> {_ , false} ->
{Acc, false} {Acc, false}
@ -1465,46 +1494,47 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
crc_check_slot(FullBin) -> crc_check_slot(FullBin) ->
<<CRC32:32/integer, SlotBin/binary>> = FullBin, <<CRC32PBL:32/integer,
case erlang:crc32(SlotBin) of PosBL:32/integer,
CRC32 -> CRC32H:32/integer,
<<BlockLengths:24/binary, Rest/binary>> = SlotBin, Header:PosBL/binary,
{BlockLengths, Rest}; Blocks/binary>> = FullBin,
case {hmac(Header), hmac(PosBL)} of
{CRC32H, CRC32PBL} ->
{Header, Blocks};
_ -> _ ->
leveled_log:log("SST09", []), leveled_log:log("SST09", []),
crc_wonky crc_wonky
end. end.
block_offsetandlength(BlockLengths, BlockID) -> block_offsetandlength(BlockLengths, BlockID) ->
<<BlocksPos:32/integer, BlockLengths0:20/binary>> = BlockLengths,
case BlockID of case BlockID of
1 -> 1 ->
<<B1L:32/integer, _BR/binary>> = BlockLengths0, <<B1L:32/integer, _BR/binary>> = BlockLengths,
{BlocksPos, 0, B1L}; {0, B1L};
2 -> 2 ->
<<B1L:32/integer, B2L:32/integer, _BR/binary>> = BlockLengths0, <<B1L:32/integer, B2L:32/integer, _BR/binary>> = BlockLengths,
{BlocksPos, B1L, B2L}; {B1L, B2L};
3 -> 3 ->
<<B1L:32/integer, <<B1L:32/integer,
B2L:32/integer, B2L:32/integer,
B3L:32/integer, B3L:32/integer,
_BR/binary>> = BlockLengths0, _BR/binary>> = BlockLengths,
{BlocksPos, B1L + B2L, B3L}; {B1L + B2L, B3L};
4 -> 4 ->
<<B1L:32/integer, <<B1L:32/integer,
B2L:32/integer, B2L:32/integer,
B3L:32/integer, B3L:32/integer,
B4L:32/integer, B4L:32/integer,
_BR/binary>> = BlockLengths0, _BR/binary>> = BlockLengths,
{BlocksPos, B1L + B2L + B3L, B4L}; {B1L + B2L + B3L, B4L};
5 -> 5 ->
<<B1L:32/integer, <<B1L:32/integer,
B2L:32/integer, B2L:32/integer,
B3L:32/integer, B3L:32/integer,
B4L:32/integer, B4L:32/integer,
B5L:32/integer, B5L:32/integer>> = BlockLengths,
_BR/binary>> = BlockLengths0, {B1L + B2L + B3L + B4L, B5L}
{BlocksPos, B1L + B2L + B3L + B4L, B5L}
end. end.
extra_hash({SegHash, _ExtraHash}) when is_integer(SegHash) -> extra_hash({SegHash, _ExtraHash}) when is_integer(SegHash) ->
@ -1530,19 +1560,26 @@ fetch_value([], _BlockLengths, _Blocks, _Key, _PressMethod) ->
not_present; not_present;
fetch_value([Pos|Rest], BlockLengths, Blocks, Key, PressMethod) -> fetch_value([Pos|Rest], BlockLengths, Blocks, Key, PressMethod) ->
{BlockNumber, BlockPos} = revert_position(Pos), {BlockNumber, BlockPos} = revert_position(Pos),
{_BlockPos, {Offset, Length} = block_offsetandlength(BlockLengths, BlockNumber),
Offset,
Length} = block_offsetandlength(BlockLengths, BlockNumber),
<<_Pre:Offset/binary, Block:Length/binary, _Rest/binary>> = Blocks, <<_Pre:Offset/binary, Block:Length/binary, _Rest/binary>> = Blocks,
BlockL = deserialise_block(Block, PressMethod), RawBlock = deserialise_block(Block, PressMethod),
{K, V} = lists:nth(BlockPos, BlockL), case fetchfrom_rawblock(BlockPos, RawBlock) of
case K of {K, V} when K == Key ->
Key ->
{K, V}; {K, V};
_ -> _ ->
fetch_value(Rest, BlockLengths, Blocks, Key, PressMethod) fetch_value(Rest, BlockLengths, Blocks, Key, PressMethod)
end. end.
fetchfrom_rawblock(_BlockPos, []) ->
not_present;
fetchfrom_rawblock(BlockPos, RawBlock) ->
lists:nth(BlockPos, RawBlock).
fetchend_rawblock([]) ->
not_present;
fetchend_rawblock(RawBlock) ->
lists:last(RawBlock).
revert_position(Pos) -> revert_position(Pos) ->
{SideBlockSize, MidBlockSize} = ?LOOK_BLOCKSIZE, {SideBlockSize, MidBlockSize} = ?LOOK_BLOCKSIZE,
@ -2123,7 +2160,7 @@ indexed_list_mixedkeys2_test() ->
IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)), IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)),
% this isn't actually ordered correctly % this isn't actually ordered correctly
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2, Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
{{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = {{_Header, FullBin, _HL, _LK}, no_timing} =
generate_binary_slot(lookup, Keys, native, no_timing), generate_binary_slot(lookup, Keys, native, no_timing),
lists:foreach(fun({K, V}) -> lists:foreach(fun({K, V}) ->
MH = leveled_codec:segment_hash(K), MH = leveled_codec:segment_hash(K),
@ -2134,10 +2171,10 @@ indexed_list_mixedkeys2_test() ->
indexed_list_allindexkeys_test() -> indexed_list_allindexkeys_test() ->
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
?LOOK_SLOTSIZE), ?LOOK_SLOTSIZE),
{{PosBinIndex1, FullBin, _HL, _LK}, no_timing} = {{Header, FullBin, _HL, _LK}, no_timing} =
generate_binary_slot(lookup, Keys, native, no_timing), generate_binary_slot(lookup, Keys, native, no_timing),
EmptySlotSize = ?LOOK_SLOTSIZE - 1, EmptySlotSize = ?LOOK_SLOTSIZE - 1,
?assertMatch(<<_BL:24/binary, EmptySlotSize:8/integer>>, PosBinIndex1), ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, Header),
% SW = os:timestamp(), % SW = os:timestamp(),
BinToList = binaryslot_tolist(FullBin, native), BinToList = binaryslot_tolist(FullBin, native),
% io:format(user, % io:format(user,
@ -2149,9 +2186,9 @@ indexed_list_allindexkeys_test() ->
indexed_list_allindexkeys_nolookup_test() -> indexed_list_allindexkeys_nolookup_test() ->
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)), Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)),
?NOLOOK_SLOTSIZE), ?NOLOOK_SLOTSIZE),
{{PosBinIndex1, FullBin, _HL, _LK}, no_timing} = {{Header, FullBin, _HL, _LK}, no_timing} =
generate_binary_slot(no_lookup, Keys, native, no_timing), generate_binary_slot(no_lookup, Keys, native, no_timing),
?assertMatch(<<_BL:24/binary, 127:8/integer>>, PosBinIndex1), ?assertMatch(<<_BL:20/binary, 127:8/integer>>, Header),
% SW = os:timestamp(), % SW = os:timestamp(),
BinToList = binaryslot_tolist(FullBin, native), BinToList = binaryslot_tolist(FullBin, native),
% io:format(user, % io:format(user,
@ -2163,10 +2200,10 @@ indexed_list_allindexkeys_nolookup_test() ->
indexed_list_allindexkeys_trimmed_test() -> indexed_list_allindexkeys_trimmed_test() ->
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
?LOOK_SLOTSIZE), ?LOOK_SLOTSIZE),
{{PosBinIndex1, FullBin, _HL, _LK}, no_timing} = {{Header, FullBin, _HL, _LK}, no_timing} =
generate_binary_slot(lookup, Keys, native, no_timing), generate_binary_slot(lookup, Keys, native, no_timing),
EmptySlotSize = ?LOOK_SLOTSIZE - 1, EmptySlotSize = ?LOOK_SLOTSIZE - 1,
?assertMatch(<<_BL:24/binary, EmptySlotSize:8/integer>>, PosBinIndex1), ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, Header),
?assertMatch(Keys, binaryslot_trimmedlist(FullBin, ?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
{i, {i,
"Bucket", "Bucket",
@ -2204,29 +2241,64 @@ indexed_list_mixedkeys_bitflip_test() ->
KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)), KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)),
KVL1 = lists:sublist(KVL0, 33), KVL1 = lists:sublist(KVL0, 33),
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
{{_PosBinIndex1, FullBin, _HL, LK}, no_timing} = {{Header, SlotBin, _HL, LK}, no_timing} =
generate_binary_slot(lookup, Keys, native, no_timing), generate_binary_slot(lookup, Keys, native, no_timing),
?assertMatch(LK, element(1, lists:last(Keys))), ?assertMatch(LK, element(1, lists:last(Keys))),
FullBin0 = flip_byte(FullBin),
{TestK1, _TestV1} = lists:nth(20, KVL1),
MH1 = leveled_codec:segment_hash(TestK1),
test_binary_slot(FullBin0, TestK1, MH1, not_present), <<B1L:32/integer,
ToList = binaryslot_tolist(FullBin0, native), _B2L:32/integer,
?assertMatch([], ToList), _B3L:32/integer,
_B4L:32/integer,
_B5L:32/integer,
PosBin/binary>> = Header,
TestKey1 = element(1, lists:nth(1, KVL1)),
TestKey2 = element(1, lists:nth(33, KVL1)),
MH1 = leveled_codec:segment_hash(TestKey1),
MH2 = leveled_codec:segment_hash(TestKey2),
test_binary_slot(SlotBin, TestKey1, MH1, lists:nth(1, KVL1)),
test_binary_slot(SlotBin, TestKey2, MH2, lists:nth(33, KVL1)),
ToList = binaryslot_tolist(SlotBin, native),
?assertMatch(Keys, ToList),
[Pos1] = find_pos(PosBin, extra_hash(MH1), [], 0),
[Pos2] = find_pos(PosBin, extra_hash(MH2), [], 0),
{BN1, _BP1} = revert_position(Pos1),
{BN2, _BP2} = revert_position(Pos2),
{Offset1, Length1} = block_offsetandlength(Header, BN1),
{Offset2, Length2} = block_offsetandlength(Header, BN2),
SlotBin1 = flip_byte(SlotBin, byte_size(Header) + 12 + Offset1, Length1),
SlotBin2 = flip_byte(SlotBin, byte_size(Header) + 12 + Offset2, Length2),
test_binary_slot(SlotBin2, TestKey1, MH1, lists:nth(1, KVL1)),
test_binary_slot(SlotBin1, TestKey2, MH2, lists:nth(33, KVL1)),
test_binary_slot(SlotBin1, TestKey1, MH1, not_present),
test_binary_slot(SlotBin2, TestKey2, MH2, not_present),
ToList1 = binaryslot_tolist(SlotBin1, native),
ToList2 = binaryslot_tolist(SlotBin2, native),
?assertMatch(true, is_list(ToList1)),
?assertMatch(true, is_list(ToList2)),
?assertMatch(true, length(ToList1) > 0),
?assertMatch(true, length(ToList2) > 0),
?assertMatch(true, length(ToList1) < length(Keys)),
?assertMatch(true, length(ToList2) < length(Keys)),
SlotBin3 = flip_byte(SlotBin, byte_size(Header) + 12, B1L),
{SK1, _} = lists:nth(10, Keys), {SK1, _} = lists:nth(10, Keys),
{EK1, _} = lists:nth(50, Keys), {EK1, _} = lists:nth(20, Keys),
O1 = binaryslot_trimmedlist(FullBin0, SK1, EK1, native), O1 = binaryslot_trimmedlist(SlotBin3, SK1, EK1, native),
?assertMatch(0, length(O1)),
?assertMatch([], O1). ?assertMatch([], O1).
flip_byte(Binary) -> flip_byte(Binary, Offset, Length) ->
L = byte_size(Binary), Byte1 = leveled_rand:uniform(Length) + Offset - 1,
Byte1 = leveled_rand:uniform(L) - 1,
<<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = Binary, <<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = Binary,
case A of case A of
0 -> 0 ->
@ -2238,7 +2310,7 @@ flip_byte(Binary) ->
test_binary_slot(FullBin, Key, Hash, ExpectedValue) -> test_binary_slot(FullBin, Key, Hash, ExpectedValue) ->
% SW = os:timestamp(), % SW = os:timestamp(),
{ReturnedValue, _BLs, _Idx} = binaryslot_get(FullBin, Key, Hash, native), {ReturnedValue, _Header} = binaryslot_get(FullBin, Key, Hash, native),
?assertMatch(ExpectedValue, ReturnedValue). ?assertMatch(ExpectedValue, ReturnedValue).
% io:format(user, "Fetch success in ~w microseconds ~n", % io:format(user, "Fetch success in ~w microseconds ~n",
% [timer:now_diff(os:timestamp(), SW)]). % [timer:now_diff(os:timestamp(), SW)]).
@ -2589,7 +2661,7 @@ nonsense_coverage_test() ->
handle_sync_event("hello", self(), reader, #state{})), handle_sync_event("hello", self(), reader, #state{})),
SampleBin = <<0:128/integer>>, SampleBin = <<0:128/integer>>,
FlippedBin = flip_byte(SampleBin), FlippedBin = flip_byte(SampleBin, 0, 16),
?assertMatch(false, FlippedBin == SampleBin). ?assertMatch(false, FlippedBin == SampleBin).
hashmatching_bytreesize_test() -> hashmatching_bytreesize_test() ->