Switch to CRC check at Block Level
Previously done at Slot Level - but Blocks were still read from disk after the Slot CRC had been checked. This seems safer. It requires an extra CRC check for every fetch. However, CRC chekcing smaller binaries during the buld process appears to be beneficial to performance. Hoped this will be an enabler to turning off compression at Levels 0 and 1 to improve performance (wihtout having a compensating issues with reduced CRC performance)
This commit is contained in:
parent
7a99d060a3
commit
5bac389d0c
1 changed files with 182 additions and 110 deletions
|
@ -79,6 +79,8 @@
|
||||||
-define(TIMING_SAMPLECOUNTDOWN, 10000).
|
-define(TIMING_SAMPLECOUNTDOWN, 10000).
|
||||||
-define(TIMING_SAMPLESIZE, 100).
|
-define(TIMING_SAMPLESIZE, 100).
|
||||||
-define(CACHE_SIZE, 32).
|
-define(CACHE_SIZE, 32).
|
||||||
|
-define(BLOCK_LENGTHS_LENGTH, 20).
|
||||||
|
-define(FLIPPER32, 4294967295).
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
|
@ -658,23 +660,22 @@ fetch(LedgerKey, Hash, State, Timings0) ->
|
||||||
CachedBlockIdx =
|
CachedBlockIdx =
|
||||||
array:get(SlotID - 1, State#state.blockindex_cache),
|
array:get(SlotID - 1, State#state.blockindex_cache),
|
||||||
{SW2, Timings2} = update_timings(SW1, Timings1, lookup_cache, true),
|
{SW2, Timings2} = update_timings(SW1, Timings1, lookup_cache, true),
|
||||||
|
BL = ?BLOCK_LENGTHS_LENGTH,
|
||||||
|
|
||||||
case CachedBlockIdx of
|
case CachedBlockIdx of
|
||||||
none ->
|
none ->
|
||||||
SlotBin = read_slot(State#state.handle, Slot),
|
SlotBin = read_slot(State#state.handle, Slot),
|
||||||
{Result, BlockLengths, BlockIdx} =
|
{Result, Header} =
|
||||||
binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod),
|
binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod),
|
||||||
BlockIndexCache =
|
BlockIndexCache =
|
||||||
array:set(SlotID - 1,
|
array:set(SlotID - 1, Header, State#state.blockindex_cache),
|
||||||
<<BlockLengths/binary, BlockIdx/binary>>,
|
|
||||||
State#state.blockindex_cache),
|
|
||||||
{_SW3, Timings3} =
|
{_SW3, Timings3} =
|
||||||
update_timings(SW2, Timings2, noncached_block, false),
|
update_timings(SW2, Timings2, noncached_block, false),
|
||||||
{Result,
|
{Result,
|
||||||
State#state{blockindex_cache = BlockIndexCache},
|
State#state{blockindex_cache = BlockIndexCache},
|
||||||
Timings3};
|
Timings3};
|
||||||
<<BlockLengths:24/binary, BlockIdx/binary>> ->
|
<<BlockLengths:BL/binary, PosBin/binary>> ->
|
||||||
PosList = find_pos(BlockIdx, extra_hash(Hash), [], 0),
|
PosList = find_pos(PosBin, extra_hash(Hash), [], 0),
|
||||||
case PosList of
|
case PosList of
|
||||||
[] ->
|
[] ->
|
||||||
{_SW3, Timings3} =
|
{_SW3, Timings3} =
|
||||||
|
@ -700,6 +701,7 @@ fetch(LedgerKey, Hash, State, Timings0) ->
|
||||||
State#state.handle,
|
State#state.handle,
|
||||||
StartPos,
|
StartPos,
|
||||||
BlockLengths,
|
BlockLengths,
|
||||||
|
byte_size(PosBin),
|
||||||
LedgerKey,
|
LedgerKey,
|
||||||
PressMethod,
|
PressMethod,
|
||||||
not_present),
|
not_present),
|
||||||
|
@ -865,12 +867,12 @@ build_table_summary(SlotIndex, _Level, FirstKey, SlotCount, MaxSQN, Bloom) ->
|
||||||
SummBin =
|
SummBin =
|
||||||
term_to_binary({Summary, Bloom, lists:reverse(SlotIndex)},
|
term_to_binary({Summary, Bloom, lists:reverse(SlotIndex)},
|
||||||
?BINARY_SETTINGS),
|
?BINARY_SETTINGS),
|
||||||
SummCRC = erlang:crc32(SummBin),
|
SummCRC = hmac(SummBin),
|
||||||
<<SummCRC:32/integer, SummBin/binary>>.
|
<<SummCRC:32/integer, SummBin/binary>>.
|
||||||
|
|
||||||
read_table_summary(BinWithCheck) ->
|
read_table_summary(BinWithCheck) ->
|
||||||
<<SummCRC:32/integer, SummBin/binary>> = BinWithCheck,
|
<<SummCRC:32/integer, SummBin/binary>> = BinWithCheck,
|
||||||
CRCCheck = erlang:crc32(SummBin),
|
CRCCheck = hmac(SummBin),
|
||||||
if
|
if
|
||||||
CRCCheck == SummCRC ->
|
CRCCheck == SummCRC ->
|
||||||
% If not might it might be possible to rebuild from all the slots
|
% If not might it might be possible to rebuild from all the slots
|
||||||
|
@ -938,24 +940,46 @@ generate_filenames(RootFilename) ->
|
||||||
%% checks
|
%% checks
|
||||||
serialise_block(Term, lz4) ->
|
serialise_block(Term, lz4) ->
|
||||||
{ok, Bin} = lz4:pack(term_to_binary(Term)),
|
{ok, Bin} = lz4:pack(term_to_binary(Term)),
|
||||||
Bin;
|
CRC32 = hmac(Bin),
|
||||||
|
<<Bin/binary, CRC32:32/integer>>;
|
||||||
serialise_block(Term, native) ->
|
serialise_block(Term, native) ->
|
||||||
term_to_binary(Term, ?BINARY_SETTINGS).
|
Bin = term_to_binary(Term, ?BINARY_SETTINGS),
|
||||||
|
CRC32 = hmac(Bin),
|
||||||
|
<<Bin/binary, CRC32:32/integer>>.
|
||||||
|
|
||||||
|
|
||||||
-spec deserialise_block(binary(), press_methods()) -> any().
|
-spec deserialise_block(binary(), press_methods()) -> any().
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Convert binary to term
|
%% Convert binary to term
|
||||||
%% Function split out to make it easier to experiment with different
|
%% Function split out to make it easier to experiment with different
|
||||||
%% compression methods. Also, perhaps standardise applictaion of CRC
|
%% compression methods.
|
||||||
%% checks
|
%%
|
||||||
deserialise_block(Bin, lz4) ->
|
%% If CRC check fails we treat all the data as missing
|
||||||
|
deserialise_block(Bin, PressMethod) ->
|
||||||
|
BinS = byte_size(Bin) - 4,
|
||||||
|
<<TermBin:BinS/binary, CRC32:32/integer>> = Bin,
|
||||||
|
case hmac(TermBin) of
|
||||||
|
CRC32 ->
|
||||||
|
deserialise_checkedblock(TermBin, PressMethod);
|
||||||
|
_ ->
|
||||||
|
[]
|
||||||
|
end.
|
||||||
|
|
||||||
|
deserialise_checkedblock(Bin, lz4) ->
|
||||||
{ok, Bin0} = lz4:unpack(Bin),
|
{ok, Bin0} = lz4:unpack(Bin),
|
||||||
binary_to_term(Bin0);
|
binary_to_term(Bin0);
|
||||||
deserialise_block(Bin, native) ->
|
deserialise_checkedblock(Bin, native) ->
|
||||||
binary_to_term(Bin).
|
binary_to_term(Bin).
|
||||||
|
|
||||||
|
|
||||||
|
-spec hmac(binary()|integer()) -> integer().
|
||||||
|
%% @doc
|
||||||
|
%% Perform a CRC check on an input
|
||||||
|
hmac(Bin) when is_binary(Bin) ->
|
||||||
|
erlang:crc32(Bin);
|
||||||
|
hmac(Int) when is_integer(Int) ->
|
||||||
|
Int bxor ?FLIPPER32.
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% SlotIndex Implementation
|
%%% SlotIndex Implementation
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
@ -1130,45 +1154,45 @@ generate_binary_slot(Lookup, KVL, PressMethod, BuildTimings0) ->
|
||||||
BuildTimings2 = update_buildtimings(SW1, BuildTimings1, slot_serialise),
|
BuildTimings2 = update_buildtimings(SW1, BuildTimings1, slot_serialise),
|
||||||
SW2 = os:timestamp(),
|
SW2 = os:timestamp(),
|
||||||
|
|
||||||
B1P = byte_size(PosBinIndex),
|
B1P = byte_size(PosBinIndex) + ?BLOCK_LENGTHS_LENGTH,
|
||||||
|
CheckB1P = hmac(B1P),
|
||||||
B1L = byte_size(B1),
|
B1L = byte_size(B1),
|
||||||
B2L = byte_size(B2),
|
B2L = byte_size(B2),
|
||||||
B3L = byte_size(B3),
|
B3L = byte_size(B3),
|
||||||
B4L = byte_size(B4),
|
B4L = byte_size(B4),
|
||||||
B5L = byte_size(B5),
|
B5L = byte_size(B5),
|
||||||
Lengths = <<B1P:32/integer,
|
Header = <<B1L:32/integer,
|
||||||
B1L:32/integer,
|
|
||||||
B2L:32/integer,
|
B2L:32/integer,
|
||||||
B3L:32/integer,
|
B3L:32/integer,
|
||||||
B4L:32/integer,
|
B4L:32/integer,
|
||||||
B5L:32/integer>>,
|
B5L:32/integer,
|
||||||
SlotBin = <<Lengths/binary,
|
PosBinIndex/binary>>,
|
||||||
PosBinIndex/binary,
|
CheckH = hmac(Header),
|
||||||
B1/binary, B2/binary, B3/binary, B4/binary, B5/binary>>,
|
SlotBin = <<CheckB1P:32/integer, B1P:32/integer,
|
||||||
CRC32 = erlang:crc32(SlotBin),
|
CheckH:32/integer, Header/binary,
|
||||||
FullBin = <<CRC32:32/integer, SlotBin/binary>>,
|
B1/binary, B2/binary, B3/binary, B4/binary, B5/binary>>,
|
||||||
|
|
||||||
{LastKey, _LV} = lists:last(KVL),
|
{LastKey, _LV} = lists:last(KVL),
|
||||||
|
|
||||||
BuildTimings3 = update_buildtimings(SW2, BuildTimings2, slot_finish),
|
BuildTimings3 = update_buildtimings(SW2, BuildTimings2, slot_finish),
|
||||||
|
|
||||||
{{<<Lengths/binary, PosBinIndex/binary>>, FullBin, HashL, LastKey},
|
{{Header, SlotBin, HashL, LastKey}, BuildTimings3}.
|
||||||
BuildTimings3}.
|
|
||||||
|
|
||||||
|
|
||||||
% Acc should start as not_present if LedgerKey is a key, and a list if
|
% Acc should start as not_present if LedgerKey is a key, and a list if
|
||||||
% LedgerKey is false
|
% LedgerKey is false
|
||||||
|
|
||||||
check_blocks([], _Handle, _StartPos, _BlockLengths,
|
check_blocks([], _Handle, _StartPos, _BlockLengths, _PosBinLength,
|
||||||
_LedgerKeyToCheck, _PressMethod, Acc) ->
|
_LedgerKeyToCheck, _PressMethod, Acc) ->
|
||||||
Acc;
|
Acc;
|
||||||
check_blocks([Pos|Rest], Handle, StartPos,
|
check_blocks([Pos|Rest], Handle, StartPos, BlockLengths, PosBinLength,
|
||||||
BlockLengths, LedgerKeyToCheck, PressMethod, Acc) ->
|
LedgerKeyToCheck, PressMethod, Acc) ->
|
||||||
{BlockNumber, BlockPos} = revert_position(Pos),
|
{BlockNumber, BlockPos} = revert_position(Pos),
|
||||||
BlockBin =
|
BlockBin =
|
||||||
read_block(Handle,
|
read_block(Handle,
|
||||||
StartPos,
|
StartPos,
|
||||||
BlockLengths,
|
BlockLengths,
|
||||||
|
PosBinLength,
|
||||||
BlockNumber),
|
BlockNumber),
|
||||||
BlockL = deserialise_block(BlockBin, PressMethod),
|
BlockL = deserialise_block(BlockBin, PressMethod),
|
||||||
{K, V} = lists:nth(BlockPos, BlockL),
|
{K, V} = lists:nth(BlockPos, BlockL),
|
||||||
|
@ -1180,20 +1204,22 @@ check_blocks([Pos|Rest], Handle, StartPos,
|
||||||
false ->
|
false ->
|
||||||
Acc ++ [{K, V}];
|
Acc ++ [{K, V}];
|
||||||
_ ->
|
_ ->
|
||||||
check_blocks(Rest, Handle, StartPos, BlockLengths,
|
check_blocks(Rest, Handle, StartPos,
|
||||||
|
BlockLengths, PosBinLength,
|
||||||
LedgerKeyToCheck, PressMethod, Acc)
|
LedgerKeyToCheck, PressMethod, Acc)
|
||||||
end
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
read_block(Handle, StartPos, BlockLengths, BlockID) ->
|
read_block(Handle, StartPos, BlockLengths, PosBinLength, BlockID) ->
|
||||||
{BlockPos, Offset, Length} = block_offsetandlength(BlockLengths, BlockID),
|
{Offset, Length} = block_offsetandlength(BlockLengths, BlockID),
|
||||||
{ok, BlockBin} = file:pread(Handle,
|
{ok, BlockBin} = file:pread(Handle,
|
||||||
StartPos
|
StartPos
|
||||||
+ BlockPos
|
|
||||||
+ Offset
|
+ Offset
|
||||||
+ 28,
|
+ PosBinLength
|
||||||
% 4-byte CRC, 4 byte pos, 5x4 byte lengths
|
+ 32,
|
||||||
|
% 4-byte CRC, 4-byte pos,
|
||||||
|
% 4-byte CRC, 5x4 byte lengths
|
||||||
Length),
|
Length),
|
||||||
BlockBin.
|
BlockBin.
|
||||||
|
|
||||||
|
@ -1260,6 +1286,7 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) ->
|
||||||
BinMapFun =
|
BinMapFun =
|
||||||
fun(Pointer, Acc) ->
|
fun(Pointer, Acc) ->
|
||||||
{SP, _L, ID, _SK, _EK} = pointer_mapfun(Pointer),
|
{SP, _L, ID, _SK, _EK} = pointer_mapfun(Pointer),
|
||||||
|
BL = ?BLOCK_LENGTHS_LENGTH,
|
||||||
case array:get(ID - 1, BlockIndexCache) of
|
case array:get(ID - 1, BlockIndexCache) of
|
||||||
none ->
|
none ->
|
||||||
% If there is an attempt to use the seg list query and the
|
% If there is an attempt to use the seg list query and the
|
||||||
|
@ -1270,8 +1297,8 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) ->
|
||||||
read_length_list(Handle, [LengthDetails]),
|
read_length_list(Handle, [LengthDetails]),
|
||||||
MapFun = binarysplit_mapfun(MultiSlotBin, StartPos),
|
MapFun = binarysplit_mapfun(MultiSlotBin, StartPos),
|
||||||
Acc ++ [MapFun(LengthDetails)];
|
Acc ++ [MapFun(LengthDetails)];
|
||||||
<<BlockLengths:24/binary, BlockIdx/binary>> ->
|
<<BlockLengths:BL/binary, BlockIdx/binary>> ->
|
||||||
% If there is a BlockIndex cached then we cna use it to
|
% If there is a BlockIndex cached then we can use it to
|
||||||
% check to see if any of the expected segments are
|
% check to see if any of the expected segments are
|
||||||
% present without lifting the slot off disk. Also the
|
% present without lifting the slot off disk. Also the
|
||||||
% fact that we know position can be used to filter out
|
% fact that we know position can be used to filter out
|
||||||
|
@ -1279,9 +1306,14 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) ->
|
||||||
case find_pos(BlockIdx, SegList, [], 0) of
|
case find_pos(BlockIdx, SegList, [], 0) of
|
||||||
[] ->
|
[] ->
|
||||||
Acc;
|
Acc;
|
||||||
PL ->
|
PositionList ->
|
||||||
Acc ++ check_blocks(PL, Handle, SP, BlockLengths,
|
Acc ++
|
||||||
false, PressMethod, [])
|
check_blocks(PositionList,
|
||||||
|
Handle, SP,
|
||||||
|
BlockLengths,
|
||||||
|
byte_size(BlockIdx),
|
||||||
|
false, PressMethod,
|
||||||
|
[])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
|
@ -1319,19 +1351,17 @@ read_length_list(Handle, LengthList) ->
|
||||||
|
|
||||||
binaryslot_get(FullBin, Key, Hash, PressMethod) ->
|
binaryslot_get(FullBin, Key, Hash, PressMethod) ->
|
||||||
case crc_check_slot(FullBin) of
|
case crc_check_slot(FullBin) of
|
||||||
{BlockLengths, Rest} ->
|
{Header, Blocks} ->
|
||||||
<<B1P:32/integer, _R/binary>> = BlockLengths,
|
BL = ?BLOCK_LENGTHS_LENGTH,
|
||||||
<<PosBinIndex:B1P/binary, Blocks/binary>> = Rest,
|
<<BlockLengths:BL/binary, PosBinIndex/binary>> = Header,
|
||||||
PosList = find_pos(PosBinIndex,
|
PosList = find_pos(PosBinIndex,
|
||||||
extra_hash(Hash),
|
extra_hash(Hash),
|
||||||
[],
|
[],
|
||||||
0),
|
0),
|
||||||
{fetch_value(PosList, BlockLengths, Blocks, Key, PressMethod),
|
{fetch_value(PosList, BlockLengths, Blocks, Key, PressMethod),
|
||||||
BlockLengths,
|
Header};
|
||||||
PosBinIndex};
|
|
||||||
crc_wonky ->
|
crc_wonky ->
|
||||||
{not_present,
|
{not_present,
|
||||||
none,
|
|
||||||
none}
|
none}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -1349,14 +1379,13 @@ binaryslot_tolist(FullBin, PressMethod) ->
|
||||||
|
|
||||||
{Out, _Rem} =
|
{Out, _Rem} =
|
||||||
case crc_check_slot(FullBin) of
|
case crc_check_slot(FullBin) of
|
||||||
{BlockLengths, RestBin} ->
|
{Header, Blocks} ->
|
||||||
<<B1P:32/integer,
|
<<B1L:32/integer,
|
||||||
B1L:32/integer,
|
|
||||||
B2L:32/integer,
|
B2L:32/integer,
|
||||||
B3L:32/integer,
|
B3L:32/integer,
|
||||||
B4L:32/integer,
|
B4L:32/integer,
|
||||||
B5L:32/integer>> = BlockLengths,
|
B5L:32/integer,
|
||||||
<<_PosBinIndex:B1P/binary, Blocks/binary>> = RestBin,
|
_PosBinIndex/binary>> = Header,
|
||||||
lists:foldl(BlockFetchFun,
|
lists:foldl(BlockFetchFun,
|
||||||
{[], Blocks},
|
{[], Blocks},
|
||||||
[B1L, B2L, B3L, B4L, B5L]);
|
[B1L, B2L, B3L, B4L, B5L]);
|
||||||
|
@ -1382,17 +1411,16 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
|
||||||
% scenario is hard to do in concise code
|
% scenario is hard to do in concise code
|
||||||
BlocksToCheck =
|
BlocksToCheck =
|
||||||
case crc_check_slot(FullBin) of
|
case crc_check_slot(FullBin) of
|
||||||
{BlockLengths, RestBin} ->
|
{Header, Blocks} ->
|
||||||
<<B1P:32/integer,
|
<<B1L:32/integer,
|
||||||
B1L:32/integer,
|
|
||||||
B2L:32/integer,
|
B2L:32/integer,
|
||||||
B3L:32/integer,
|
B3L:32/integer,
|
||||||
B4L:32/integer,
|
B4L:32/integer,
|
||||||
B5L:32/integer>> = BlockLengths,
|
B5L:32/integer,
|
||||||
<<_PosBinIndex:B1P/binary,
|
_PosBinIndex/binary>> = Header,
|
||||||
Block1:B1L/binary, Block2:B2L/binary,
|
<<Block1:B1L/binary, Block2:B2L/binary,
|
||||||
MidBlock:B3L/binary,
|
MidBlock:B3L/binary,
|
||||||
Block4:B4L/binary, Block5:B5L/binary>> = RestBin,
|
Block4:B4L/binary, Block5:B5L/binary>> = Blocks,
|
||||||
case B3L of
|
case B3L of
|
||||||
0 ->
|
0 ->
|
||||||
[Block1, Block2];
|
[Block1, Block2];
|
||||||
|
@ -1440,11 +1468,10 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
|
||||||
false ->
|
false ->
|
||||||
Block
|
Block
|
||||||
end,
|
end,
|
||||||
{LastKey, _LV} = lists:last(BlockList),
|
case fetchend_rawblock(BlockList) of
|
||||||
case StartKey > LastKey of
|
{LastKey, _LV} when StartKey > LastKey ->
|
||||||
true ->
|
|
||||||
{Acc, true};
|
{Acc, true};
|
||||||
false ->
|
{LastKey, _LV} ->
|
||||||
{_LDrop, RKeep} = lists:splitwith(LTrimFun,
|
{_LDrop, RKeep} = lists:splitwith(LTrimFun,
|
||||||
BlockList),
|
BlockList),
|
||||||
case leveled_codec:endkey_passed(EndKey, LastKey) of
|
case leveled_codec:endkey_passed(EndKey, LastKey) of
|
||||||
|
@ -1453,7 +1480,9 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
|
||||||
{Acc ++ LKeep, false};
|
{Acc ++ LKeep, false};
|
||||||
false ->
|
false ->
|
||||||
{Acc ++ RKeep, true}
|
{Acc ++ RKeep, true}
|
||||||
end
|
end;
|
||||||
|
_ ->
|
||||||
|
{Acc, true}
|
||||||
end;
|
end;
|
||||||
{_ , false} ->
|
{_ , false} ->
|
||||||
{Acc, false}
|
{Acc, false}
|
||||||
|
@ -1465,46 +1494,47 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
|
||||||
|
|
||||||
|
|
||||||
crc_check_slot(FullBin) ->
|
crc_check_slot(FullBin) ->
|
||||||
<<CRC32:32/integer, SlotBin/binary>> = FullBin,
|
<<CRC32PBL:32/integer,
|
||||||
case erlang:crc32(SlotBin) of
|
PosBL:32/integer,
|
||||||
CRC32 ->
|
CRC32H:32/integer,
|
||||||
<<BlockLengths:24/binary, Rest/binary>> = SlotBin,
|
Header:PosBL/binary,
|
||||||
{BlockLengths, Rest};
|
Blocks/binary>> = FullBin,
|
||||||
|
case {hmac(Header), hmac(PosBL)} of
|
||||||
|
{CRC32H, CRC32PBL} ->
|
||||||
|
{Header, Blocks};
|
||||||
_ ->
|
_ ->
|
||||||
leveled_log:log("SST09", []),
|
leveled_log:log("SST09", []),
|
||||||
crc_wonky
|
crc_wonky
|
||||||
end.
|
end.
|
||||||
|
|
||||||
block_offsetandlength(BlockLengths, BlockID) ->
|
block_offsetandlength(BlockLengths, BlockID) ->
|
||||||
<<BlocksPos:32/integer, BlockLengths0:20/binary>> = BlockLengths,
|
|
||||||
case BlockID of
|
case BlockID of
|
||||||
1 ->
|
1 ->
|
||||||
<<B1L:32/integer, _BR/binary>> = BlockLengths0,
|
<<B1L:32/integer, _BR/binary>> = BlockLengths,
|
||||||
{BlocksPos, 0, B1L};
|
{0, B1L};
|
||||||
2 ->
|
2 ->
|
||||||
<<B1L:32/integer, B2L:32/integer, _BR/binary>> = BlockLengths0,
|
<<B1L:32/integer, B2L:32/integer, _BR/binary>> = BlockLengths,
|
||||||
{BlocksPos, B1L, B2L};
|
{B1L, B2L};
|
||||||
3 ->
|
3 ->
|
||||||
<<B1L:32/integer,
|
<<B1L:32/integer,
|
||||||
B2L:32/integer,
|
B2L:32/integer,
|
||||||
B3L:32/integer,
|
B3L:32/integer,
|
||||||
_BR/binary>> = BlockLengths0,
|
_BR/binary>> = BlockLengths,
|
||||||
{BlocksPos, B1L + B2L, B3L};
|
{B1L + B2L, B3L};
|
||||||
4 ->
|
4 ->
|
||||||
<<B1L:32/integer,
|
<<B1L:32/integer,
|
||||||
B2L:32/integer,
|
B2L:32/integer,
|
||||||
B3L:32/integer,
|
B3L:32/integer,
|
||||||
B4L:32/integer,
|
B4L:32/integer,
|
||||||
_BR/binary>> = BlockLengths0,
|
_BR/binary>> = BlockLengths,
|
||||||
{BlocksPos, B1L + B2L + B3L, B4L};
|
{B1L + B2L + B3L, B4L};
|
||||||
5 ->
|
5 ->
|
||||||
<<B1L:32/integer,
|
<<B1L:32/integer,
|
||||||
B2L:32/integer,
|
B2L:32/integer,
|
||||||
B3L:32/integer,
|
B3L:32/integer,
|
||||||
B4L:32/integer,
|
B4L:32/integer,
|
||||||
B5L:32/integer,
|
B5L:32/integer>> = BlockLengths,
|
||||||
_BR/binary>> = BlockLengths0,
|
{B1L + B2L + B3L + B4L, B5L}
|
||||||
{BlocksPos, B1L + B2L + B3L + B4L, B5L}
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
extra_hash({SegHash, _ExtraHash}) when is_integer(SegHash) ->
|
extra_hash({SegHash, _ExtraHash}) when is_integer(SegHash) ->
|
||||||
|
@ -1530,19 +1560,26 @@ fetch_value([], _BlockLengths, _Blocks, _Key, _PressMethod) ->
|
||||||
not_present;
|
not_present;
|
||||||
fetch_value([Pos|Rest], BlockLengths, Blocks, Key, PressMethod) ->
|
fetch_value([Pos|Rest], BlockLengths, Blocks, Key, PressMethod) ->
|
||||||
{BlockNumber, BlockPos} = revert_position(Pos),
|
{BlockNumber, BlockPos} = revert_position(Pos),
|
||||||
{_BlockPos,
|
{Offset, Length} = block_offsetandlength(BlockLengths, BlockNumber),
|
||||||
Offset,
|
|
||||||
Length} = block_offsetandlength(BlockLengths, BlockNumber),
|
|
||||||
<<_Pre:Offset/binary, Block:Length/binary, _Rest/binary>> = Blocks,
|
<<_Pre:Offset/binary, Block:Length/binary, _Rest/binary>> = Blocks,
|
||||||
BlockL = deserialise_block(Block, PressMethod),
|
RawBlock = deserialise_block(Block, PressMethod),
|
||||||
{K, V} = lists:nth(BlockPos, BlockL),
|
case fetchfrom_rawblock(BlockPos, RawBlock) of
|
||||||
case K of
|
{K, V} when K == Key ->
|
||||||
Key ->
|
|
||||||
{K, V};
|
{K, V};
|
||||||
_ ->
|
_ ->
|
||||||
fetch_value(Rest, BlockLengths, Blocks, Key, PressMethod)
|
fetch_value(Rest, BlockLengths, Blocks, Key, PressMethod)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
fetchfrom_rawblock(_BlockPos, []) ->
|
||||||
|
not_present;
|
||||||
|
fetchfrom_rawblock(BlockPos, RawBlock) ->
|
||||||
|
lists:nth(BlockPos, RawBlock).
|
||||||
|
|
||||||
|
fetchend_rawblock([]) ->
|
||||||
|
not_present;
|
||||||
|
fetchend_rawblock(RawBlock) ->
|
||||||
|
lists:last(RawBlock).
|
||||||
|
|
||||||
|
|
||||||
revert_position(Pos) ->
|
revert_position(Pos) ->
|
||||||
{SideBlockSize, MidBlockSize} = ?LOOK_BLOCKSIZE,
|
{SideBlockSize, MidBlockSize} = ?LOOK_BLOCKSIZE,
|
||||||
|
@ -2123,7 +2160,7 @@ indexed_list_mixedkeys2_test() ->
|
||||||
IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)),
|
IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)),
|
||||||
% this isn't actually ordered correctly
|
% this isn't actually ordered correctly
|
||||||
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
||||||
{{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} =
|
{{_Header, FullBin, _HL, _LK}, no_timing} =
|
||||||
generate_binary_slot(lookup, Keys, native, no_timing),
|
generate_binary_slot(lookup, Keys, native, no_timing),
|
||||||
lists:foreach(fun({K, V}) ->
|
lists:foreach(fun({K, V}) ->
|
||||||
MH = leveled_codec:segment_hash(K),
|
MH = leveled_codec:segment_hash(K),
|
||||||
|
@ -2134,10 +2171,10 @@ indexed_list_mixedkeys2_test() ->
|
||||||
indexed_list_allindexkeys_test() ->
|
indexed_list_allindexkeys_test() ->
|
||||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
|
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
|
||||||
?LOOK_SLOTSIZE),
|
?LOOK_SLOTSIZE),
|
||||||
{{PosBinIndex1, FullBin, _HL, _LK}, no_timing} =
|
{{Header, FullBin, _HL, _LK}, no_timing} =
|
||||||
generate_binary_slot(lookup, Keys, native, no_timing),
|
generate_binary_slot(lookup, Keys, native, no_timing),
|
||||||
EmptySlotSize = ?LOOK_SLOTSIZE - 1,
|
EmptySlotSize = ?LOOK_SLOTSIZE - 1,
|
||||||
?assertMatch(<<_BL:24/binary, EmptySlotSize:8/integer>>, PosBinIndex1),
|
?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, Header),
|
||||||
% SW = os:timestamp(),
|
% SW = os:timestamp(),
|
||||||
BinToList = binaryslot_tolist(FullBin, native),
|
BinToList = binaryslot_tolist(FullBin, native),
|
||||||
% io:format(user,
|
% io:format(user,
|
||||||
|
@ -2149,9 +2186,9 @@ indexed_list_allindexkeys_test() ->
|
||||||
indexed_list_allindexkeys_nolookup_test() ->
|
indexed_list_allindexkeys_nolookup_test() ->
|
||||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)),
|
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)),
|
||||||
?NOLOOK_SLOTSIZE),
|
?NOLOOK_SLOTSIZE),
|
||||||
{{PosBinIndex1, FullBin, _HL, _LK}, no_timing} =
|
{{Header, FullBin, _HL, _LK}, no_timing} =
|
||||||
generate_binary_slot(no_lookup, Keys, native, no_timing),
|
generate_binary_slot(no_lookup, Keys, native, no_timing),
|
||||||
?assertMatch(<<_BL:24/binary, 127:8/integer>>, PosBinIndex1),
|
?assertMatch(<<_BL:20/binary, 127:8/integer>>, Header),
|
||||||
% SW = os:timestamp(),
|
% SW = os:timestamp(),
|
||||||
BinToList = binaryslot_tolist(FullBin, native),
|
BinToList = binaryslot_tolist(FullBin, native),
|
||||||
% io:format(user,
|
% io:format(user,
|
||||||
|
@ -2163,10 +2200,10 @@ indexed_list_allindexkeys_nolookup_test() ->
|
||||||
indexed_list_allindexkeys_trimmed_test() ->
|
indexed_list_allindexkeys_trimmed_test() ->
|
||||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
|
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
|
||||||
?LOOK_SLOTSIZE),
|
?LOOK_SLOTSIZE),
|
||||||
{{PosBinIndex1, FullBin, _HL, _LK}, no_timing} =
|
{{Header, FullBin, _HL, _LK}, no_timing} =
|
||||||
generate_binary_slot(lookup, Keys, native, no_timing),
|
generate_binary_slot(lookup, Keys, native, no_timing),
|
||||||
EmptySlotSize = ?LOOK_SLOTSIZE - 1,
|
EmptySlotSize = ?LOOK_SLOTSIZE - 1,
|
||||||
?assertMatch(<<_BL:24/binary, EmptySlotSize:8/integer>>, PosBinIndex1),
|
?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, Header),
|
||||||
?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
|
?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
|
||||||
{i,
|
{i,
|
||||||
"Bucket",
|
"Bucket",
|
||||||
|
@ -2204,29 +2241,64 @@ indexed_list_mixedkeys_bitflip_test() ->
|
||||||
KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)),
|
KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)),
|
||||||
KVL1 = lists:sublist(KVL0, 33),
|
KVL1 = lists:sublist(KVL0, 33),
|
||||||
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
|
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
|
||||||
{{_PosBinIndex1, FullBin, _HL, LK}, no_timing} =
|
{{Header, SlotBin, _HL, LK}, no_timing} =
|
||||||
generate_binary_slot(lookup, Keys, native, no_timing),
|
generate_binary_slot(lookup, Keys, native, no_timing),
|
||||||
|
|
||||||
?assertMatch(LK, element(1, lists:last(Keys))),
|
?assertMatch(LK, element(1, lists:last(Keys))),
|
||||||
|
|
||||||
FullBin0 = flip_byte(FullBin),
|
|
||||||
|
|
||||||
{TestK1, _TestV1} = lists:nth(20, KVL1),
|
|
||||||
MH1 = leveled_codec:segment_hash(TestK1),
|
|
||||||
|
|
||||||
test_binary_slot(FullBin0, TestK1, MH1, not_present),
|
<<B1L:32/integer,
|
||||||
ToList = binaryslot_tolist(FullBin0, native),
|
_B2L:32/integer,
|
||||||
?assertMatch([], ToList),
|
_B3L:32/integer,
|
||||||
|
_B4L:32/integer,
|
||||||
|
_B5L:32/integer,
|
||||||
|
PosBin/binary>> = Header,
|
||||||
|
|
||||||
|
TestKey1 = element(1, lists:nth(1, KVL1)),
|
||||||
|
TestKey2 = element(1, lists:nth(33, KVL1)),
|
||||||
|
MH1 = leveled_codec:segment_hash(TestKey1),
|
||||||
|
MH2 = leveled_codec:segment_hash(TestKey2),
|
||||||
|
|
||||||
|
test_binary_slot(SlotBin, TestKey1, MH1, lists:nth(1, KVL1)),
|
||||||
|
test_binary_slot(SlotBin, TestKey2, MH2, lists:nth(33, KVL1)),
|
||||||
|
ToList = binaryslot_tolist(SlotBin, native),
|
||||||
|
?assertMatch(Keys, ToList),
|
||||||
|
|
||||||
|
[Pos1] = find_pos(PosBin, extra_hash(MH1), [], 0),
|
||||||
|
[Pos2] = find_pos(PosBin, extra_hash(MH2), [], 0),
|
||||||
|
{BN1, _BP1} = revert_position(Pos1),
|
||||||
|
{BN2, _BP2} = revert_position(Pos2),
|
||||||
|
{Offset1, Length1} = block_offsetandlength(Header, BN1),
|
||||||
|
{Offset2, Length2} = block_offsetandlength(Header, BN2),
|
||||||
|
|
||||||
|
SlotBin1 = flip_byte(SlotBin, byte_size(Header) + 12 + Offset1, Length1),
|
||||||
|
SlotBin2 = flip_byte(SlotBin, byte_size(Header) + 12 + Offset2, Length2),
|
||||||
|
|
||||||
|
test_binary_slot(SlotBin2, TestKey1, MH1, lists:nth(1, KVL1)),
|
||||||
|
test_binary_slot(SlotBin1, TestKey2, MH2, lists:nth(33, KVL1)),
|
||||||
|
|
||||||
|
test_binary_slot(SlotBin1, TestKey1, MH1, not_present),
|
||||||
|
test_binary_slot(SlotBin2, TestKey2, MH2, not_present),
|
||||||
|
|
||||||
|
ToList1 = binaryslot_tolist(SlotBin1, native),
|
||||||
|
ToList2 = binaryslot_tolist(SlotBin2, native),
|
||||||
|
|
||||||
|
?assertMatch(true, is_list(ToList1)),
|
||||||
|
?assertMatch(true, is_list(ToList2)),
|
||||||
|
?assertMatch(true, length(ToList1) > 0),
|
||||||
|
?assertMatch(true, length(ToList2) > 0),
|
||||||
|
?assertMatch(true, length(ToList1) < length(Keys)),
|
||||||
|
?assertMatch(true, length(ToList2) < length(Keys)),
|
||||||
|
|
||||||
|
SlotBin3 = flip_byte(SlotBin, byte_size(Header) + 12, B1L),
|
||||||
|
|
||||||
{SK1, _} = lists:nth(10, Keys),
|
{SK1, _} = lists:nth(10, Keys),
|
||||||
{EK1, _} = lists:nth(50, Keys),
|
{EK1, _} = lists:nth(20, Keys),
|
||||||
O1 = binaryslot_trimmedlist(FullBin0, SK1, EK1, native),
|
O1 = binaryslot_trimmedlist(SlotBin3, SK1, EK1, native),
|
||||||
?assertMatch(0, length(O1)),
|
|
||||||
?assertMatch([], O1).
|
?assertMatch([], O1).
|
||||||
|
|
||||||
|
|
||||||
flip_byte(Binary) ->
|
flip_byte(Binary, Offset, Length) ->
|
||||||
L = byte_size(Binary),
|
Byte1 = leveled_rand:uniform(Length) + Offset - 1,
|
||||||
Byte1 = leveled_rand:uniform(L) - 1,
|
|
||||||
<<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = Binary,
|
<<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = Binary,
|
||||||
case A of
|
case A of
|
||||||
0 ->
|
0 ->
|
||||||
|
@ -2238,7 +2310,7 @@ flip_byte(Binary) ->
|
||||||
|
|
||||||
test_binary_slot(FullBin, Key, Hash, ExpectedValue) ->
|
test_binary_slot(FullBin, Key, Hash, ExpectedValue) ->
|
||||||
% SW = os:timestamp(),
|
% SW = os:timestamp(),
|
||||||
{ReturnedValue, _BLs, _Idx} = binaryslot_get(FullBin, Key, Hash, native),
|
{ReturnedValue, _Header} = binaryslot_get(FullBin, Key, Hash, native),
|
||||||
?assertMatch(ExpectedValue, ReturnedValue).
|
?assertMatch(ExpectedValue, ReturnedValue).
|
||||||
% io:format(user, "Fetch success in ~w microseconds ~n",
|
% io:format(user, "Fetch success in ~w microseconds ~n",
|
||||||
% [timer:now_diff(os:timestamp(), SW)]).
|
% [timer:now_diff(os:timestamp(), SW)]).
|
||||||
|
@ -2589,7 +2661,7 @@ nonsense_coverage_test() ->
|
||||||
handle_sync_event("hello", self(), reader, #state{})),
|
handle_sync_event("hello", self(), reader, #state{})),
|
||||||
|
|
||||||
SampleBin = <<0:128/integer>>,
|
SampleBin = <<0:128/integer>>,
|
||||||
FlippedBin = flip_byte(SampleBin),
|
FlippedBin = flip_byte(SampleBin, 0, 16),
|
||||||
?assertMatch(false, FlippedBin == SampleBin).
|
?assertMatch(false, FlippedBin == SampleBin).
|
||||||
|
|
||||||
hashmatching_bytreesize_test() ->
|
hashmatching_bytreesize_test() ->
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue