Change hash algorithm for penciller
Switch from magic hash to md5 - to hopefully remove the need for some of the artificial jumps required to get expected fall positive ratios. Also split the hash into two 16-bit integers. We assume that SegmentID (from the perspective of AAE merkle/tictac trees) will always be at least 16 bits. the idea is that hashes should be used in blooms and indexes such that some advantage can be gained from just knowing the segmentID - in particular when folding over all the keys in a bucket. Performance testing has been difficult so far - I think due to “cloud” mysteries.
This commit is contained in:
parent
ede0982b2d
commit
a128dcdadf
7 changed files with 75 additions and 62 deletions
|
@ -947,7 +947,7 @@ fetch_head(Key, Penciller, LedgerCache) ->
|
||||||
[{Key, Head}] ->
|
[{Key, Head}] ->
|
||||||
Head;
|
Head;
|
||||||
[] ->
|
[] ->
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
Hash = leveled_codec:segment_hash(Key),
|
||||||
case leveled_penciller:pcl_fetch(Penciller, Key, Hash) of
|
case leveled_penciller:pcl_fetch(Penciller, Key, Hash) of
|
||||||
{Key, Head} ->
|
{Key, Head} ->
|
||||||
maybe_longrunning(SW, pcl_head),
|
maybe_longrunning(SW, pcl_head),
|
||||||
|
|
|
@ -65,6 +65,7 @@
|
||||||
integer_now/0,
|
integer_now/0,
|
||||||
riak_extract_metadata/2,
|
riak_extract_metadata/2,
|
||||||
magic_hash/1,
|
magic_hash/1,
|
||||||
|
segment_hash/1,
|
||||||
to_lookup/1]).
|
to_lookup/1]).
|
||||||
|
|
||||||
-define(V1_VERS, 1).
|
-define(V1_VERS, 1).
|
||||||
|
@ -79,6 +80,20 @@
|
||||||
integer()|null, % Hash of vclock - non-exportable
|
integer()|null, % Hash of vclock - non-exportable
|
||||||
integer()}. % Size in bytes of real object
|
integer()}. % Size in bytes of real object
|
||||||
|
|
||||||
|
|
||||||
|
-spec segment_hash(any()) -> {integer(), integer()}.
|
||||||
|
%% @doc
|
||||||
|
%% Return two 16 bit integers - the segment ID and a second integer for spare
|
||||||
|
%% entropy. The hashed should be used in blooms or indexes such that some
|
||||||
|
%% speed can be gained if just the segment ID is known - but more can be
|
||||||
|
%% gained should the extended hash (with the second element) is known
|
||||||
|
segment_hash(Key) when is_binary(Key) ->
|
||||||
|
<<SegmentID:16/integer, ExtraHash:16/integer, _Rest/binary>> =
|
||||||
|
crypto:hash(md5, Key),
|
||||||
|
{SegmentID, ExtraHash};
|
||||||
|
segment_hash(Key) ->
|
||||||
|
segment_hash(term_to_binary(Key)).
|
||||||
|
|
||||||
-spec magic_hash(any()) -> integer().
|
-spec magic_hash(any()) -> integer().
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Use DJ Bernstein magic hash function. Note, this is more expensive than
|
%% Use DJ Bernstein magic hash function. Note, this is more expensive than
|
||||||
|
@ -87,10 +102,6 @@
|
||||||
%% Hash function contains mysterious constants, some explanation here as to
|
%% Hash function contains mysterious constants, some explanation here as to
|
||||||
%% what they are -
|
%% what they are -
|
||||||
%% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function
|
%% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function
|
||||||
magic_hash({?RIAK_TAG, Bucket, Key, _SubKey}) ->
|
|
||||||
magic_hash({Bucket, Key});
|
|
||||||
magic_hash({?STD_TAG, Bucket, Key, _SubKey}) ->
|
|
||||||
magic_hash({Bucket, Key});
|
|
||||||
magic_hash({binary, BinaryKey}) ->
|
magic_hash({binary, BinaryKey}) ->
|
||||||
H = 5381,
|
H = 5381,
|
||||||
hash1(H, BinaryKey) band 16#FFFFFFFF;
|
hash1(H, BinaryKey) band 16#FFFFFFFF;
|
||||||
|
@ -516,7 +527,9 @@ parse_date(LMD, UnitMins, LimitMins, Now) ->
|
||||||
|
|
||||||
-spec generate_ledgerkv(
|
-spec generate_ledgerkv(
|
||||||
tuple(), integer(), any(), integer(), tuple()|infinity) ->
|
tuple(), integer(), any(), integer(), tuple()|infinity) ->
|
||||||
{any(), any(), any(), {integer()|no_lookup, integer()}, list()}.
|
{any(), any(), any(),
|
||||||
|
{{integer(), integer()}|no_lookup, integer()},
|
||||||
|
list()}.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Function to extract from an object the information necessary to populate
|
%% Function to extract from an object the information necessary to populate
|
||||||
%% the Penciller's ledger.
|
%% the Penciller's ledger.
|
||||||
|
@ -537,7 +550,7 @@ generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) ->
|
||||||
_ ->
|
_ ->
|
||||||
{active, TS}
|
{active, TS}
|
||||||
end,
|
end,
|
||||||
Hash = magic_hash(PrimaryKey),
|
Hash = segment_hash(PrimaryKey),
|
||||||
{MD, LastMods} = extract_metadata(Obj, Size, Tag),
|
{MD, LastMods} = extract_metadata(Obj, Size, Tag),
|
||||||
ObjHash = get_objhash(Tag, MD),
|
ObjHash = get_objhash(Tag, MD),
|
||||||
Value = {SQN,
|
Value = {SQN,
|
||||||
|
|
|
@ -254,7 +254,7 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
|
||||||
K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber},
|
K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber},
|
||||||
RandKey = {K, {Count + 1,
|
RandKey = {K, {Count + 1,
|
||||||
{active, infinity},
|
{active, infinity},
|
||||||
leveled_codec:magic_hash(K),
|
leveled_codec:segment_hash(K),
|
||||||
null}},
|
null}},
|
||||||
generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
|
generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
|
||||||
|
|
||||||
|
|
|
@ -315,21 +315,22 @@ pcl_fetchlevelzero(Pid, Slot) ->
|
||||||
%% The Key needs to be hashable (i.e. have a tag which indicates that the key
|
%% The Key needs to be hashable (i.e. have a tag which indicates that the key
|
||||||
%% can be looked up) - index entries are not hashable for example.
|
%% can be looked up) - index entries are not hashable for example.
|
||||||
%%
|
%%
|
||||||
%% If the hash is already knonw, call pcl_fetch/3 as magic_hash is a
|
%% If the hash is already knonw, call pcl_fetch/3 as segment_hash is a
|
||||||
%% relatively expensive hash function
|
%% relatively expensive hash function
|
||||||
pcl_fetch(Pid, Key) ->
|
pcl_fetch(Pid, Key) ->
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
Hash = leveled_codec:segment_hash(Key),
|
||||||
if
|
if
|
||||||
Hash /= no_lookup ->
|
Hash /= no_lookup ->
|
||||||
gen_server:call(Pid, {fetch, Key, Hash}, infinity)
|
gen_server:call(Pid, {fetch, Key, Hash}, infinity)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec pcl_fetch(pid(), tuple(), integer()) -> {tuple(), tuple()}|not_present.
|
-spec pcl_fetch(pid(), tuple(), {integer(), integer()}) ->
|
||||||
|
{tuple(), tuple()}|not_present.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Fetch a key, return the first (highest SQN) occurrence of that Key along
|
%% Fetch a key, return the first (highest SQN) occurrence of that Key along
|
||||||
%% with the value.
|
%% with the value.
|
||||||
%%
|
%%
|
||||||
%% Hash should be result of leveled_codec:magic_hash(Key)
|
%% Hash should be result of leveled_codec:segment_hash(Key)
|
||||||
pcl_fetch(Pid, Key, Hash) ->
|
pcl_fetch(Pid, Key, Hash) ->
|
||||||
gen_server:call(Pid, {fetch, Key, Hash}, infinity).
|
gen_server:call(Pid, {fetch, Key, Hash}, infinity).
|
||||||
|
|
||||||
|
@ -367,7 +368,7 @@ pcl_fetchnextkey(Pid, StartKey, EndKey, AccFun, InitAcc) ->
|
||||||
%% If the key is not present, it will be assumed that a higher sequence number
|
%% If the key is not present, it will be assumed that a higher sequence number
|
||||||
%% tombstone once existed, and false will be returned.
|
%% tombstone once existed, and false will be returned.
|
||||||
pcl_checksequencenumber(Pid, Key, SQN) ->
|
pcl_checksequencenumber(Pid, Key, SQN) ->
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
Hash = leveled_codec:segment_hash(Key),
|
||||||
if
|
if
|
||||||
Hash /= no_lookup ->
|
Hash /= no_lookup ->
|
||||||
gen_server:call(Pid, {check_sqn, Key, Hash, SQN}, infinity)
|
gen_server:call(Pid, {check_sqn, Key, Hash, SQN}, infinity)
|
||||||
|
@ -1317,7 +1318,7 @@ generate_randomkeys(Count, SQN, Acc) ->
|
||||||
RandKey = {K,
|
RandKey = {K,
|
||||||
{SQN,
|
{SQN,
|
||||||
{active, infinity},
|
{active, infinity},
|
||||||
leveled_codec:magic_hash(K),
|
leveled_codec:segment_hash(K),
|
||||||
null}},
|
null}},
|
||||||
generate_randomkeys(Count - 1, SQN + 1, [RandKey|Acc]).
|
generate_randomkeys(Count - 1, SQN + 1, [RandKey|Acc]).
|
||||||
|
|
||||||
|
@ -1347,7 +1348,7 @@ maybe_pause_push(PCL, KL) ->
|
||||||
T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
|
T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
|
||||||
UpdSL = [{K, V}|AccSL],
|
UpdSL = [{K, V}|AccSL],
|
||||||
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
||||||
H = leveled_codec:magic_hash(K),
|
H = leveled_codec:segment_hash(K),
|
||||||
UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
|
UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
|
||||||
{UpdSL, UpdIdx, min(SQN, MinSQN), max(SQN, MaxSQN)}
|
{UpdSL, UpdIdx, min(SQN, MinSQN), max(SQN, MaxSQN)}
|
||||||
end,
|
end,
|
||||||
|
@ -1366,7 +1367,7 @@ maybe_pause_push(PCL, KL) ->
|
||||||
|
|
||||||
%% old test data doesn't have the magic hash
|
%% old test data doesn't have the magic hash
|
||||||
add_missing_hash({K, {SQN, ST, MD}}) ->
|
add_missing_hash({K, {SQN, ST, MD}}) ->
|
||||||
{K, {SQN, ST, leveled_codec:magic_hash(K), MD}}.
|
{K, {SQN, ST, leveled_codec:segment_hash(K), MD}}.
|
||||||
|
|
||||||
|
|
||||||
clean_dir_test() ->
|
clean_dir_test() ->
|
||||||
|
|
|
@ -50,7 +50,8 @@
|
||||||
%%% API
|
%%% API
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
-spec prepare_for_index(index_array(), integer()|no_lookup) -> index_array().
|
-spec prepare_for_index(index_array(), {integer(), integer()}|no_lookup)
|
||||||
|
-> index_array().
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Add the hash of a key to the index. This is 'prepared' in the sense that
|
%% Add the hash of a key to the index. This is 'prepared' in the sense that
|
||||||
%% this index is not use until it is loaded into the main index.
|
%% this index is not use until it is loaded into the main index.
|
||||||
|
@ -95,7 +96,7 @@ new_index() ->
|
||||||
clear_index(_L0Index) ->
|
clear_index(_L0Index) ->
|
||||||
new_index().
|
new_index().
|
||||||
|
|
||||||
-spec check_index(integer(), index_array()) -> list(integer()).
|
-spec check_index({integer(), integer()}, index_array()) -> list(integer()).
|
||||||
%% @doc
|
%% @doc
|
||||||
%% return a list of positions in the list of cache arrays that may contain the
|
%% return a list of positions in the list of cache arrays that may contain the
|
||||||
%% key associated with the hash being checked
|
%% key associated with the hash being checked
|
||||||
|
@ -158,9 +159,9 @@ to_list(Slots, FetchFun) ->
|
||||||
%% checked (with the most recently received cache being checked first) until a
|
%% checked (with the most recently received cache being checked first) until a
|
||||||
%% match is found.
|
%% match is found.
|
||||||
check_levelzero(Key, PosList, TreeList) ->
|
check_levelzero(Key, PosList, TreeList) ->
|
||||||
check_levelzero(Key, leveled_codec:magic_hash(Key), PosList, TreeList).
|
check_levelzero(Key, leveled_codec:segment_hash(Key), PosList, TreeList).
|
||||||
|
|
||||||
-spec check_levelzero(tuple(), integer(), list(integer()), list())
|
-spec check_levelzero(tuple(), {integer(), integer()}, list(integer()), list())
|
||||||
-> {boolean(), tuple|not_found}.
|
-> {boolean(), tuple|not_found}.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Check for the presence of a given Key in the Level Zero cache, with the
|
%% Check for the presence of a given Key in the Level Zero cache, with the
|
||||||
|
@ -204,10 +205,10 @@ find_pos(<<0:1/integer, NxtSlot:7/integer, T/binary>>, Hash, PosList, _SlotID) -
|
||||||
find_pos(T, Hash, PosList, NxtSlot).
|
find_pos(T, Hash, PosList, NxtSlot).
|
||||||
|
|
||||||
|
|
||||||
split_hash(Hash) ->
|
split_hash({SegmentID, ExtraHash}) ->
|
||||||
Slot = Hash band 255,
|
Slot = SegmentID band 255,
|
||||||
H0 = (Hash bsr 8) band 8388607,
|
H0 = (SegmentID bsr 8) bor (ExtraHash bsl 8),
|
||||||
{Slot, H0}.
|
{Slot, H0 band 8388607}.
|
||||||
|
|
||||||
check_slotlist(Key, _Hash, CheckList, TreeList) ->
|
check_slotlist(Key, _Hash, CheckList, TreeList) ->
|
||||||
SlotCheckFun =
|
SlotCheckFun =
|
||||||
|
@ -358,7 +359,7 @@ with_index_test_() ->
|
||||||
with_index_test2() ->
|
with_index_test2() ->
|
||||||
IndexPrepareFun =
|
IndexPrepareFun =
|
||||||
fun({K, _V}, Acc) ->
|
fun({K, _V}, Acc) ->
|
||||||
H = leveled_codec:magic_hash(K),
|
H = leveled_codec:segment_hash(K),
|
||||||
prepare_for_index(Acc, H)
|
prepare_for_index(Acc, H)
|
||||||
end,
|
end,
|
||||||
LoadFun =
|
LoadFun =
|
||||||
|
@ -382,7 +383,7 @@ with_index_test2() ->
|
||||||
|
|
||||||
CheckFun =
|
CheckFun =
|
||||||
fun({K, V}, {L0Idx, L0Cache}) ->
|
fun({K, V}, {L0Idx, L0Cache}) ->
|
||||||
H = leveled_codec:magic_hash(K),
|
H = leveled_codec:segment_hash(K),
|
||||||
PosList = check_index(H, L0Idx),
|
PosList = check_index(H, L0Idx),
|
||||||
?assertMatch({true, {K, V}},
|
?assertMatch({true, {K, V}},
|
||||||
check_slotlist(K, H, PosList, L0Cache)),
|
check_slotlist(K, H, PosList, L0Cache)),
|
||||||
|
|
|
@ -237,12 +237,12 @@ sst_newlevelzero(RootPath, Filename, Slots, FetchFun, Penciller, MaxSQN) ->
|
||||||
-spec sst_get(pid(), tuple()) -> tuple()|not_present.
|
-spec sst_get(pid(), tuple()) -> tuple()|not_present.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Return a Key, Value pair matching a Key or not_present if the Key is not in
|
%% Return a Key, Value pair matching a Key or not_present if the Key is not in
|
||||||
%% the store. The magic_hash function is used to accelerate the seeking of
|
%% the store. The segment_hash function is used to accelerate the seeking of
|
||||||
%% keys, sst_get/3 should be used directly if this has already been calculated
|
%% keys, sst_get/3 should be used directly if this has already been calculated
|
||||||
sst_get(Pid, LedgerKey) ->
|
sst_get(Pid, LedgerKey) ->
|
||||||
sst_get(Pid, LedgerKey, leveled_codec:magic_hash(LedgerKey)).
|
sst_get(Pid, LedgerKey, leveled_codec:segment_hash(LedgerKey)).
|
||||||
|
|
||||||
-spec sst_get(pid(), tuple(), integer()) -> tuple()|not_present.
|
-spec sst_get(pid(), tuple(), {integer(), integer()}) -> tuple()|not_present.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Return a Key, Value pair matching a Key or not_present if the Key is not in
|
%% Return a Key, Value pair matching a Key or not_present if the Key is not in
|
||||||
%% the store (with the magic hash precalculated).
|
%% the store (with the magic hash precalculated).
|
||||||
|
@ -554,7 +554,7 @@ fetch(LedgerKey, Hash, State) ->
|
||||||
State#state{blockindex_cache = BlockIndexCache}};
|
State#state{blockindex_cache = BlockIndexCache}};
|
||||||
<<BlockLengths:24/binary, BlockIdx/binary>> ->
|
<<BlockLengths:24/binary, BlockIdx/binary>> ->
|
||||||
PosList = find_pos(BlockIdx,
|
PosList = find_pos(BlockIdx,
|
||||||
double_hash(Hash, LedgerKey),
|
extra_hash(Hash),
|
||||||
[],
|
[],
|
||||||
0),
|
0),
|
||||||
case PosList of
|
case PosList of
|
||||||
|
@ -808,9 +808,9 @@ generate_binary_slot(Lookup, KVL) ->
|
||||||
fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) ->
|
fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) ->
|
||||||
|
|
||||||
{_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
{_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
||||||
case is_integer(H1) of
|
PosH1 = extra_hash(H1),
|
||||||
|
case is_integer(PosH1) of
|
||||||
true ->
|
true ->
|
||||||
PosH1 = double_hash(H1, K),
|
|
||||||
case NoHashCount of
|
case NoHashCount of
|
||||||
0 ->
|
0 ->
|
||||||
{<<1:1/integer,
|
{<<1:1/integer,
|
||||||
|
@ -1003,7 +1003,7 @@ binaryslot_get(FullBin, Key, Hash) ->
|
||||||
<<B1P:32/integer, _R/binary>> = BlockLengths,
|
<<B1P:32/integer, _R/binary>> = BlockLengths,
|
||||||
<<PosBinIndex:B1P/binary, Blocks/binary>> = Rest,
|
<<PosBinIndex:B1P/binary, Blocks/binary>> = Rest,
|
||||||
PosList = find_pos(PosBinIndex,
|
PosList = find_pos(PosBinIndex,
|
||||||
double_hash(Hash, Key),
|
extra_hash(Hash),
|
||||||
[],
|
[],
|
||||||
0),
|
0),
|
||||||
{fetch_value(PosList, BlockLengths, Blocks, Key),
|
{fetch_value(PosList, BlockLengths, Blocks, Key),
|
||||||
|
@ -1186,9 +1186,10 @@ block_offsetandlength(BlockLengths, BlockID) ->
|
||||||
{BlocksPos, B1L + B2L + B3L + B4L, B5L}
|
{BlocksPos, B1L + B2L + B3L + B4L, B5L}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
double_hash(Hash, Key) ->
|
extra_hash({_SegHash, ExtraHash}) when is_integer(ExtraHash) ->
|
||||||
H2 = erlang:phash2(Key),
|
ExtraHash band 32767;
|
||||||
(Hash bxor H2) band 32767.
|
extra_hash(NotHash) ->
|
||||||
|
NotHash.
|
||||||
|
|
||||||
fetch_value([], _BlockLengths, _Blocks, _Key) ->
|
fetch_value([], _BlockLengths, _Blocks, _Key) ->
|
||||||
not_present;
|
not_present;
|
||||||
|
@ -1548,15 +1549,15 @@ indexed_list_test() ->
|
||||||
[timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),
|
[timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),
|
||||||
|
|
||||||
{TestK1, TestV1} = lists:nth(20, KVL1),
|
{TestK1, TestV1} = lists:nth(20, KVL1),
|
||||||
MH1 = leveled_codec:magic_hash(TestK1),
|
MH1 = leveled_codec:segment_hash(TestK1),
|
||||||
{TestK2, TestV2} = lists:nth(40, KVL1),
|
{TestK2, TestV2} = lists:nth(40, KVL1),
|
||||||
MH2 = leveled_codec:magic_hash(TestK2),
|
MH2 = leveled_codec:segment_hash(TestK2),
|
||||||
{TestK3, TestV3} = lists:nth(60, KVL1),
|
{TestK3, TestV3} = lists:nth(60, KVL1),
|
||||||
MH3 = leveled_codec:magic_hash(TestK3),
|
MH3 = leveled_codec:segment_hash(TestK3),
|
||||||
{TestK4, TestV4} = lists:nth(80, KVL1),
|
{TestK4, TestV4} = lists:nth(80, KVL1),
|
||||||
MH4 = leveled_codec:magic_hash(TestK4),
|
MH4 = leveled_codec:segment_hash(TestK4),
|
||||||
{TestK5, TestV5} = lists:nth(100, KVL1),
|
{TestK5, TestV5} = lists:nth(100, KVL1),
|
||||||
MH5 = leveled_codec:magic_hash(TestK5),
|
MH5 = leveled_codec:segment_hash(TestK5),
|
||||||
|
|
||||||
test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
|
test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
|
||||||
test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
|
test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
|
||||||
|
@ -1573,15 +1574,15 @@ indexed_list_mixedkeys_test() ->
|
||||||
{_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
{_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
||||||
|
|
||||||
{TestK1, TestV1} = lists:nth(4, KVL1),
|
{TestK1, TestV1} = lists:nth(4, KVL1),
|
||||||
MH1 = leveled_codec:magic_hash(TestK1),
|
MH1 = leveled_codec:segment_hash(TestK1),
|
||||||
{TestK2, TestV2} = lists:nth(8, KVL1),
|
{TestK2, TestV2} = lists:nth(8, KVL1),
|
||||||
MH2 = leveled_codec:magic_hash(TestK2),
|
MH2 = leveled_codec:segment_hash(TestK2),
|
||||||
{TestK3, TestV3} = lists:nth(12, KVL1),
|
{TestK3, TestV3} = lists:nth(12, KVL1),
|
||||||
MH3 = leveled_codec:magic_hash(TestK3),
|
MH3 = leveled_codec:segment_hash(TestK3),
|
||||||
{TestK4, TestV4} = lists:nth(16, KVL1),
|
{TestK4, TestV4} = lists:nth(16, KVL1),
|
||||||
MH4 = leveled_codec:magic_hash(TestK4),
|
MH4 = leveled_codec:segment_hash(TestK4),
|
||||||
{TestK5, TestV5} = lists:nth(20, KVL1),
|
{TestK5, TestV5} = lists:nth(20, KVL1),
|
||||||
MH5 = leveled_codec:magic_hash(TestK5),
|
MH5 = leveled_codec:segment_hash(TestK5),
|
||||||
|
|
||||||
test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
|
test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
|
||||||
test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
|
test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
|
||||||
|
@ -1598,7 +1599,7 @@ indexed_list_mixedkeys2_test() ->
|
||||||
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
||||||
{_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
{_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
||||||
lists:foreach(fun({K, V}) ->
|
lists:foreach(fun({K, V}) ->
|
||||||
MH = leveled_codec:magic_hash(K),
|
MH = leveled_codec:segment_hash(K),
|
||||||
test_binary_slot(FullBin, K, MH, {K, V})
|
test_binary_slot(FullBin, K, MH, {K, V})
|
||||||
end,
|
end,
|
||||||
KVL1).
|
KVL1).
|
||||||
|
@ -1682,7 +1683,7 @@ indexed_list_mixedkeys_bitflip_test() ->
|
||||||
end,
|
end,
|
||||||
|
|
||||||
{TestK1, _TestV1} = lists:nth(20, KVL1),
|
{TestK1, _TestV1} = lists:nth(20, KVL1),
|
||||||
MH1 = leveled_codec:magic_hash(TestK1),
|
MH1 = leveled_codec:segment_hash(TestK1),
|
||||||
|
|
||||||
test_binary_slot(FullBin0, TestK1, MH1, not_present),
|
test_binary_slot(FullBin0, TestK1, MH1, not_present),
|
||||||
ToList = binaryslot_tolist(FullBin0),
|
ToList = binaryslot_tolist(FullBin0),
|
||||||
|
@ -1920,7 +1921,7 @@ simple_persisted_test() ->
|
||||||
In = lists:keymember(K, 1, KVList1),
|
In = lists:keymember(K, 1, KVList1),
|
||||||
case {K > FirstKey, LastKey > K, In} of
|
case {K > FirstKey, LastKey > K, In} of
|
||||||
{true, true, false} ->
|
{true, true, false} ->
|
||||||
[{K, leveled_codec:magic_hash(K), V}|Acc];
|
[{K, leveled_codec:segment_hash(K), V}|Acc];
|
||||||
_ ->
|
_ ->
|
||||||
Acc
|
Acc
|
||||||
end
|
end
|
||||||
|
|
|
@ -48,7 +48,7 @@ create_bloom(HashList) ->
|
||||||
%% Check for the presence of a given hash within a bloom
|
%% Check for the presence of a given hash within a bloom
|
||||||
check_hash(_Hash, <<>>) ->
|
check_hash(_Hash, <<>>) ->
|
||||||
false;
|
false;
|
||||||
check_hash(Hash, BloomBin) ->
|
check_hash({Hash, _ExtraHash}, BloomBin) ->
|
||||||
SlotSplit = (byte_size(BloomBin) div ?BITS_PER_KEY) - 1,
|
SlotSplit = (byte_size(BloomBin) div ?BITS_PER_KEY) - 1,
|
||||||
{Slot, H0, H1} = split_hash(Hash, SlotSplit),
|
{Slot, H0, H1} = split_hash(Hash, SlotSplit),
|
||||||
Mask = get_mask(H0, H1),
|
Mask = get_mask(H0, H1),
|
||||||
|
@ -66,14 +66,11 @@ check_hash(Hash, BloomBin) ->
|
||||||
%%% Internal Functions
|
%%% Internal Functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
split_hash(Hash, SlotSplit) ->
|
split_hash(SegHash, SlotSplit) ->
|
||||||
Slot = Hash band SlotSplit,
|
Slot = SegHash band SlotSplit,
|
||||||
H0 = (Hash bsr 4) band (?BAND_MASK),
|
H0 = (SegHash bsr 4) band (?BAND_MASK),
|
||||||
H1 = (Hash bsr 10) band (?BAND_MASK),
|
H1 = (SegHash bsr 10) band (?BAND_MASK),
|
||||||
H3 = (Hash bsr 16) band (?BAND_MASK),
|
{Slot, H0, H1}.
|
||||||
H4 = (Hash bsr 22) band (?BAND_MASK),
|
|
||||||
Slot0 = (Hash bsr 28) band SlotSplit,
|
|
||||||
{Slot bxor Slot0, H0 bxor H3, H1 bxor H4}.
|
|
||||||
|
|
||||||
get_mask(H0, H1) ->
|
get_mask(H0, H1) ->
|
||||||
case H0 == H1 of
|
case H0 == H1 of
|
||||||
|
@ -90,7 +87,7 @@ get_mask(H0, H1) ->
|
||||||
add_hashlist([], _S, S0, S1) ->
|
add_hashlist([], _S, S0, S1) ->
|
||||||
IntSize = ?INTEGER_SIZE,
|
IntSize = ?INTEGER_SIZE,
|
||||||
<<S0:IntSize/integer, S1:IntSize/integer>>;
|
<<S0:IntSize/integer, S1:IntSize/integer>>;
|
||||||
add_hashlist([TopHash|T], SlotSplit, S0, S1) ->
|
add_hashlist([{TopHash, _ExtraHash}|T], SlotSplit, S0, S1) ->
|
||||||
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
||||||
Mask = get_mask(H0, H1),
|
Mask = get_mask(H0, H1),
|
||||||
case Slot of
|
case Slot of
|
||||||
|
@ -104,7 +101,7 @@ add_hashlist([], _S, S0, S1, S2, S3) ->
|
||||||
IntSize = ?INTEGER_SIZE,
|
IntSize = ?INTEGER_SIZE,
|
||||||
<<S0:IntSize/integer, S1:IntSize/integer,
|
<<S0:IntSize/integer, S1:IntSize/integer,
|
||||||
S2:IntSize/integer, S3:IntSize/integer>>;
|
S2:IntSize/integer, S3:IntSize/integer>>;
|
||||||
add_hashlist([TopHash|T], SlotSplit, S0, S1, S2, S3) ->
|
add_hashlist([{TopHash, _ExtraHash}|T], SlotSplit, S0, S1, S2, S3) ->
|
||||||
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
||||||
Mask = get_mask(H0, H1),
|
Mask = get_mask(H0, H1),
|
||||||
case Slot of
|
case Slot of
|
||||||
|
@ -129,7 +126,7 @@ add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||||
SA:IntSize/integer, SB:IntSize/integer,
|
SA:IntSize/integer, SB:IntSize/integer,
|
||||||
SC:IntSize/integer, SD:IntSize/integer,
|
SC:IntSize/integer, SD:IntSize/integer,
|
||||||
SE:IntSize/integer, SF:IntSize/integer>>;
|
SE:IntSize/integer, SF:IntSize/integer>>;
|
||||||
add_hashlist([TopHash|T],
|
add_hashlist([{TopHash, _ExtraHash}|T],
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||||
SA, SB, SC, SD, SE, SF) ->
|
SA, SB, SC, SD, SE, SF) ->
|
||||||
|
@ -254,7 +251,7 @@ get_hashlist(N) ->
|
||||||
KVL = lists:sublist(KVL0, N),
|
KVL = lists:sublist(KVL0, N),
|
||||||
HashFun =
|
HashFun =
|
||||||
fun({K, _V}) ->
|
fun({K, _V}) ->
|
||||||
leveled_codec:magic_hash(K)
|
leveled_codec:segment_hash(K)
|
||||||
end,
|
end,
|
||||||
lists:map(HashFun, KVL).
|
lists:map(HashFun, KVL).
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue