Split hash - seperate key has for bxor with value

This commit is contained in:
Martin Sumner 2018-11-09 14:51:38 +00:00
parent 4756d402a0
commit a7773b148d
2 changed files with 28 additions and 12 deletions

View file

@ -176,7 +176,8 @@
%% speed can be gained if just the segment ID is known - but more can be %% speed can be gained if just the segment ID is known - but more can be
%% gained should the extended hash (with the second element) is known %% gained should the extended hash (with the second element) is known
segment_hash(Key) when is_binary(Key) -> segment_hash(Key) when is_binary(Key) ->
{segment_hash, SegmentID, ExtraHash} = leveled_tictac:keyto_segment48(Key), {segment_hash, SegmentID, ExtraHash, _AltHash}
= leveled_tictac:keyto_segment48(Key),
{SegmentID, ExtraHash}; {SegmentID, ExtraHash};
segment_hash({?RIAK_TAG, Bucket, Key, null}) segment_hash({?RIAK_TAG, Bucket, Key, null})
when is_binary(Bucket), is_binary(Key) -> when is_binary(Bucket), is_binary(Key) ->

View file

@ -66,7 +66,6 @@
fetch_leaves/2, fetch_leaves/2,
merge_trees/2, merge_trees/2,
get_segment/2, get_segment/2,
tictac_hash/2,
export_tree/1, export_tree/1,
import_tree/1, import_tree/1,
valid_size/1, valid_size/1,
@ -108,10 +107,14 @@
level2 :: any() % an array - but OTP compatibility level2 :: any() % an array - but OTP compatibility
}). }).
-type tictactree() :: #tictactree{}. -type tictactree() ::
-type segment48() :: {segment_hash, integer(), integer()}. #tictactree{}.
-type tree_extract() :: {binary(), integer(), integer(), integer(), binary()}. -type segment48() ::
-type tree_size() :: xxsmall|xsmall|small|medium|large|xlarge. {segment_hash, non_neg_integer(), non_neg_integer(), non_neg_integer()}.
-type tree_extract() ::
{binary(), integer(), integer(), integer(), binary()}.
-type tree_size() ::
xxsmall|xsmall|small|medium|large|xlarge.
-export_type([tictactree/0, segment48/0, tree_size/0]). -export_type([tictactree/0, segment48/0, tree_size/0]).
@ -327,7 +330,7 @@ get_segment(Hash, TreeSize) ->
%% erlang:phash2. If an exportable hash of the value is required this should %% erlang:phash2. If an exportable hash of the value is required this should
%% be managed through the add_kv ExtractFun providing a pre-prepared Hash. %% be managed through the add_kv ExtractFun providing a pre-prepared Hash.
tictac_hash(BinKey, Val) when is_binary(BinKey) -> tictac_hash(BinKey, Val) when is_binary(BinKey) ->
HashKey = keyto_segment32(BinKey), {HashKeyToSeg, AltHashKey} = keyto_doublesegment32(BinKey),
HashVal = HashVal =
case Val of case Val of
{is_hash, HashedVal} -> {is_hash, HashedVal} ->
@ -335,13 +338,23 @@ tictac_hash(BinKey, Val) when is_binary(BinKey) ->
_ -> _ ->
erlang:phash2(Val) erlang:phash2(Val)
end, end,
{HashKey, HashKey bxor HashVal}. {HashKeyToSeg, AltHashKey bxor HashVal}.
-spec keyto_doublesegment32(binary())
-> {non_neg_integer(), non_neg_integer()}.
%% @doc
%% Used in tictac_hash/2 to provide an alternative hash of the key to bxor with
%% the value, as well as the segment hash to locate the leaf of the tree to be
%% updated
keyto_doublesegment32(BinKey) when is_binary(BinKey) ->
Segment48 = keyto_segment48(BinKey),
{keyto_segment32(Segment48), element(4, Segment48)}.
-spec keyto_segment32(any()) -> integer(). -spec keyto_segment32(any()) -> integer().
%% @doc %% @doc
%% The first 16 bits of the segment hash used in the tictac tree should be %% The first 16 bits of the segment hash used in the tictac tree should be
%% made up of the segment ID part (which is used to accelerate queries) %% made up of the segment ID part (which is used to accelerate queries)
keyto_segment32({segment_hash, SegmentID, ExtraHash}) keyto_segment32({segment_hash, SegmentID, ExtraHash, _AltHash})
when is_integer(SegmentID), is_integer(ExtraHash) -> when is_integer(SegmentID), is_integer(ExtraHash) ->
(ExtraHash band 65535) bsl 16 + SegmentID; (ExtraHash band 65535) bsl 16 + SegmentID;
keyto_segment32(BinKey) when is_binary(BinKey) -> keyto_segment32(BinKey) when is_binary(BinKey) ->
@ -354,9 +367,11 @@ keyto_segment32(Key) ->
%% Produce a segment with an Extra Hash part - for tictac use most of the %% Produce a segment with an Extra Hash part - for tictac use most of the
%% ExtraHash will be discarded %% ExtraHash will be discarded
keyto_segment48(BinKey) -> keyto_segment48(BinKey) ->
<<SegmentID:16/integer, ExtraHash:32/integer, _Rest/binary>> = <<SegmentID:16/integer,
crypto:hash(md5, BinKey), ExtraHash:32/integer,
{segment_hash, SegmentID, ExtraHash}. AltHash:32/integer,
_Rest/binary>> = crypto:hash(md5, BinKey),
{segment_hash, SegmentID, ExtraHash, AltHash}.
-spec generate_segmentfilter_list(list(integer()), tree_size()) -spec generate_segmentfilter_list(list(integer()), tree_size())
-> false|list(integer()). -> false|list(integer()).