Attempt to standardise on segment hashes
To allow for the segment has that accelerates queries to be re-used in tictac tree related queries.
This commit is contained in:
parent
7763df3cef
commit
6bb7ceef0c
4 changed files with 77 additions and 44 deletions
|
@ -480,9 +480,9 @@ aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) ->
|
|||
Acc;
|
||||
{LMD1, TTL} ->
|
||||
TreeSize = AAE#recent_aae.tree_size,
|
||||
SegID32 = leveled_tictac:keyto_segment32(Key),
|
||||
SegID =
|
||||
leveled_tictac:get_segment(erlang:phash2(Key),
|
||||
TreeSize),
|
||||
leveled_tictac:get_segment(SegID32, TreeSize),
|
||||
IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin",
|
||||
IdxTrmStr =
|
||||
string:right(integer_to_list(SegID), 8, $0) ++
|
||||
|
|
|
@ -167,6 +167,15 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
|
|||
{ok, LedgerSnap, JournalSnap} = SnapFun(),
|
||||
% The start key and end key will vary depending on whether the
|
||||
% fold is to fold over an index or a key range
|
||||
EnsureKeyBinaryFun =
|
||||
fun(K, T) ->
|
||||
case is_binary(K) of
|
||||
true ->
|
||||
{K, T};
|
||||
false ->
|
||||
{term_to_binary(K), T}
|
||||
end
|
||||
end,
|
||||
{StartKey, EndKey, ExtractFun} =
|
||||
case Tag of
|
||||
?IDX_TAG ->
|
||||
|
@ -174,12 +183,15 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
|
|||
KeyDefFun = fun leveled_codec:to_ledgerkey/5,
|
||||
{KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, StartIdx),
|
||||
KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, EndIdx),
|
||||
fun(K, T) -> {K, T} end};
|
||||
EnsureKeyBinaryFun};
|
||||
_ ->
|
||||
{StartOKey, EndOKey} = Query,
|
||||
{leveled_codec:to_ledgerkey(Bucket, StartOKey, Tag),
|
||||
leveled_codec:to_ledgerkey(Bucket, EndOKey, Tag),
|
||||
fun(K, H) -> {K, {is_hash, H}} end}
|
||||
fun(K, H) ->
|
||||
V = {is_hash, H},
|
||||
EnsureKeyBinaryFun(K, V)
|
||||
end}
|
||||
end,
|
||||
AccFun =
|
||||
accumulate_tree(Filter, JournalCheck, JournalSnap, ExtractFun),
|
||||
|
@ -363,7 +375,7 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) ->
|
|||
fun(B, K, H, Tree) ->
|
||||
case FilterFun(B, K) of
|
||||
accumulate ->
|
||||
leveled_tictac:add_kv(Tree, K, H, HashFun, false);
|
||||
leveled_tictac:add_kv(Tree, K, H, HashFun);
|
||||
pass ->
|
||||
Tree
|
||||
end
|
||||
|
|
|
@ -57,17 +57,18 @@
|
|||
-export([
|
||||
new_tree/1,
|
||||
new_tree/2,
|
||||
add_kv/5,
|
||||
add_kv/4,
|
||||
find_dirtyleaves/2,
|
||||
find_dirtysegments/2,
|
||||
fetch_root/1,
|
||||
fetch_leaves/2,
|
||||
merge_trees/2,
|
||||
get_segment/2,
|
||||
tictac_hash/3,
|
||||
tictac_hash/2,
|
||||
export_tree/1,
|
||||
import_tree/1,
|
||||
valid_size/1
|
||||
valid_size/1,
|
||||
keyto_segment32/1
|
||||
]).
|
||||
|
||||
|
||||
|
@ -169,24 +170,16 @@ import_tree(ExportedTree) ->
|
|||
level1 = L1Bin,
|
||||
level2 = Lv2}.
|
||||
|
||||
-spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree().
|
||||
add_kv(TicTacTree, Key, Value, BinExtractFun) ->
|
||||
add_kv(TicTacTree, Key, Value, BinExtractFun, false).
|
||||
|
||||
-spec add_kv(tictactree(), tuple(), tuple(), fun(), boolean()) -> tictactree().
|
||||
-spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree().
|
||||
%% @doc
|
||||
%% Add a Key and value to a tictactree using the BinExtractFun to extract a
|
||||
%% binary from the Key and value from which to generate the hash. The
|
||||
%% BinExtractFun will also need to do any canonicalisation necessary to make
|
||||
%% the hash consistent (such as whitespace removal, or sorting)
|
||||
%%
|
||||
%% For exportable trees the hash function will be based on the CJ Bernstein
|
||||
%% magic hash. For non-exportable trees erlang:phash2 will be used, and so
|
||||
%% non-binary Keys and Values can be returned from the BinExtractFun in this
|
||||
%% case.
|
||||
add_kv(TicTacTree, Key, Value, BinExtractFun, Exportable) ->
|
||||
add_kv(TicTacTree, Key, Value, BinExtractFun) ->
|
||||
{BinK, BinV} = BinExtractFun(Key, Value),
|
||||
{SegHash, SegChangeHash} = tictac_hash(BinK, BinV, Exportable),
|
||||
{SegHash, SegChangeHash} = tictac_hash(BinK, BinV),
|
||||
Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count),
|
||||
|
||||
Level2Pos =
|
||||
|
@ -314,28 +307,39 @@ get_segment(Hash, TreeSize) ->
|
|||
get_segment(Hash, element(3, get_size(TreeSize))).
|
||||
|
||||
|
||||
-spec tictac_hash(any(), any(), boolean()) -> {integer(), integer()}.
|
||||
-spec tictac_hash(binary(), any()) -> {integer(), integer()}.
|
||||
%% @doc
|
||||
%% Hash the key and term, to either something repetable in Erlang, or using
|
||||
%% the DJ Bernstein hash if it is the tree needs to be compared with one
|
||||
%% calculated with a non-Erlang store
|
||||
%%
|
||||
%% Boolean is Exportable. does the hash need to be repetable by a non-Erlang
|
||||
%% machine
|
||||
tictac_hash(BinKey, BinVal, true)
|
||||
when is_binary(BinKey) and is_binary(BinVal) ->
|
||||
HashKey = leveled_codec:magic_hash({binary, BinKey}),
|
||||
HashVal = leveled_codec:magic_hash({binary, BinVal}),
|
||||
{HashKey, HashKey bxor HashVal};
|
||||
tictac_hash(BinKey, {is_hash, HashedVal}, false) ->
|
||||
{erlang:phash2(BinKey), erlang:phash2(BinKey) bxor HashedVal};
|
||||
tictac_hash(BinKey, BinVal, false) ->
|
||||
{erlang:phash2(BinKey), erlang:phash2(BinKey) bxor erlang:phash2(BinVal)}.
|
||||
%% Hash the key and term.
|
||||
%% The term can be of the form {is_hash, 32-bit integer)} to indicate the hash
|
||||
%% has already been taken. If the value is not a pre-extracted hash just use
|
||||
%% erlang:phash2. If an exportable hash of the value is required this should
|
||||
%% be managed through the add_kv ExtractFun providing a pre-prepared Hash.
|
||||
tictac_hash(BinKey, Val) when is_binary(BinKey) ->
|
||||
HashKey = keyto_segment32(BinKey),
|
||||
HashVal =
|
||||
case Val of
|
||||
{is_hash, HashedVal} ->
|
||||
HashedVal;
|
||||
_ ->
|
||||
erlang:phash2(Val)
|
||||
end,
|
||||
{HashKey, HashKey bxor HashVal}.
|
||||
|
||||
-spec keyto_segment32(any()) -> integer().
|
||||
%% @doc
|
||||
%% The first 16 bits of the segment hash used in the tictac tree should be
|
||||
%% made up of the segment ID part (which is used to accelerate queries)
|
||||
keyto_segment32(BinKey) when is_binary(BinKey) ->
|
||||
{SegmentID, ExtraHash} = leveled_codec:segment_hash(BinKey),
|
||||
(ExtraHash band 65535) bsl 16 + SegmentID;
|
||||
keyto_segment32(Key) ->
|
||||
keyto_segment32(term_to_binary(Key)).
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal functions
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
get_level2(TicTacTree, L1Pos) ->
|
||||
case array:get(L1Pos, TicTacTree#tictactree.level2) of
|
||||
?EMPTY ->
|
||||
|
@ -454,7 +458,7 @@ simple_test_withsize(Size) ->
|
|||
|
||||
GetSegFun =
|
||||
fun(TK) ->
|
||||
get_segment(erlang:phash2(term_to_binary(TK)), SC)
|
||||
get_segment(keyto_segment32(term_to_binary(TK)), SC)
|
||||
end,
|
||||
|
||||
DL0 = find_dirtyleaves(Tree1, Tree0),
|
||||
|
@ -513,7 +517,7 @@ merge_test_withsize(Size) ->
|
|||
?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1).
|
||||
|
||||
exportable_test() ->
|
||||
{Int1, Int2} = tictac_hash(<<"key">>, <<"value">>, true),
|
||||
{Int1, Int2} = tictac_hash(<<"key">>, <<"value">>),
|
||||
?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}).
|
||||
|
||||
-endif.
|
||||
|
|
|
@ -131,11 +131,18 @@ many_put_compare(_Config) ->
|
|||
{proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value),
|
||||
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
|
||||
VclockBin:VclockLen/binary, _Rest/binary>> = HeadBin,
|
||||
{Key, lists:sort(binary_to_term(VclockBin))}
|
||||
case is_binary(Key) of
|
||||
true ->
|
||||
{Key,
|
||||
lists:sort(binary_to_term(VclockBin))};
|
||||
false ->
|
||||
{term_to_binary(Key),
|
||||
lists:sort(binary_to_term(VclockBin))}
|
||||
end
|
||||
end,
|
||||
FoldObjectsFun =
|
||||
fun(_Bucket, Key, Value, Acc) ->
|
||||
leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun, false)
|
||||
leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun)
|
||||
end,
|
||||
|
||||
FoldQ0 = {foldheads_bybucket,
|
||||
|
@ -179,7 +186,7 @@ many_put_compare(_Config) ->
|
|||
end,
|
||||
AltFoldObjectsFun =
|
||||
fun(_Bucket, Key, Value, Acc) ->
|
||||
leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun, true)
|
||||
leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun)
|
||||
end,
|
||||
AltFoldQ0 = {foldheads_bybucket,
|
||||
o_rkv,
|
||||
|
@ -213,8 +220,7 @@ many_put_compare(_Config) ->
|
|||
FoldKeysFun =
|
||||
fun(SegListToFind) ->
|
||||
fun(_B, K, Acc) ->
|
||||
Seg =
|
||||
leveled_tictac:get_segment(erlang:phash2(K), SegmentCount),
|
||||
Seg = get_segment(K, SegmentCount),
|
||||
case lists:member(Seg, SegListToFind) of
|
||||
true ->
|
||||
[K|Acc];
|
||||
|
@ -488,8 +494,7 @@ index_compare(_Config) ->
|
|||
|
||||
FoldKeysIndexQFun =
|
||||
fun(_Bucket, {Term, Key}, Acc) ->
|
||||
Seg =
|
||||
leveled_tictac:get_segment(erlang:phash2(Key), SegmentCount),
|
||||
Seg = get_segment(Key, SegmentCount),
|
||||
case lists:member(Seg, DL3_0) of
|
||||
true ->
|
||||
[{Term, Key}|Acc];
|
||||
|
@ -1144,3 +1149,15 @@ get_tictactree_fun(Bookie, Bucket, TreeSize) ->
|
|||
[LMD, timer:now_diff(os:timestamp(), SW)]),
|
||||
leveled_tictac:merge_trees(R, Acc)
|
||||
end.
|
||||
|
||||
get_segment(K, SegmentCount) ->
|
||||
BinKey =
|
||||
case is_binary(K) of
|
||||
true ->
|
||||
K;
|
||||
false ->
|
||||
term_to_binary(K)
|
||||
end,
|
||||
{SegmentID, ExtraHash} = leveled_codec:segment_hash(BinKey),
|
||||
SegHash = (ExtraHash band 65535) bsl 16 + SegmentID,
|
||||
leveled_tictac:get_segment(SegHash, SegmentCount).
|
Loading…
Add table
Add a link
Reference in a new issue