Attempt to standardise on segment hashes
To allow for the segment has that accelerates queries to be re-used in tictac tree related queries.
This commit is contained in:
parent
7763df3cef
commit
6bb7ceef0c
4 changed files with 77 additions and 44 deletions
|
@ -480,9 +480,9 @@ aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) ->
|
||||||
Acc;
|
Acc;
|
||||||
{LMD1, TTL} ->
|
{LMD1, TTL} ->
|
||||||
TreeSize = AAE#recent_aae.tree_size,
|
TreeSize = AAE#recent_aae.tree_size,
|
||||||
|
SegID32 = leveled_tictac:keyto_segment32(Key),
|
||||||
SegID =
|
SegID =
|
||||||
leveled_tictac:get_segment(erlang:phash2(Key),
|
leveled_tictac:get_segment(SegID32, TreeSize),
|
||||||
TreeSize),
|
|
||||||
IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin",
|
IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin",
|
||||||
IdxTrmStr =
|
IdxTrmStr =
|
||||||
string:right(integer_to_list(SegID), 8, $0) ++
|
string:right(integer_to_list(SegID), 8, $0) ++
|
||||||
|
|
|
@ -167,6 +167,15 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
|
||||||
{ok, LedgerSnap, JournalSnap} = SnapFun(),
|
{ok, LedgerSnap, JournalSnap} = SnapFun(),
|
||||||
% The start key and end key will vary depending on whether the
|
% The start key and end key will vary depending on whether the
|
||||||
% fold is to fold over an index or a key range
|
% fold is to fold over an index or a key range
|
||||||
|
EnsureKeyBinaryFun =
|
||||||
|
fun(K, T) ->
|
||||||
|
case is_binary(K) of
|
||||||
|
true ->
|
||||||
|
{K, T};
|
||||||
|
false ->
|
||||||
|
{term_to_binary(K), T}
|
||||||
|
end
|
||||||
|
end,
|
||||||
{StartKey, EndKey, ExtractFun} =
|
{StartKey, EndKey, ExtractFun} =
|
||||||
case Tag of
|
case Tag of
|
||||||
?IDX_TAG ->
|
?IDX_TAG ->
|
||||||
|
@ -174,12 +183,15 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
|
||||||
KeyDefFun = fun leveled_codec:to_ledgerkey/5,
|
KeyDefFun = fun leveled_codec:to_ledgerkey/5,
|
||||||
{KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, StartIdx),
|
{KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, StartIdx),
|
||||||
KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, EndIdx),
|
KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, EndIdx),
|
||||||
fun(K, T) -> {K, T} end};
|
EnsureKeyBinaryFun};
|
||||||
_ ->
|
_ ->
|
||||||
{StartOKey, EndOKey} = Query,
|
{StartOKey, EndOKey} = Query,
|
||||||
{leveled_codec:to_ledgerkey(Bucket, StartOKey, Tag),
|
{leveled_codec:to_ledgerkey(Bucket, StartOKey, Tag),
|
||||||
leveled_codec:to_ledgerkey(Bucket, EndOKey, Tag),
|
leveled_codec:to_ledgerkey(Bucket, EndOKey, Tag),
|
||||||
fun(K, H) -> {K, {is_hash, H}} end}
|
fun(K, H) ->
|
||||||
|
V = {is_hash, H},
|
||||||
|
EnsureKeyBinaryFun(K, V)
|
||||||
|
end}
|
||||||
end,
|
end,
|
||||||
AccFun =
|
AccFun =
|
||||||
accumulate_tree(Filter, JournalCheck, JournalSnap, ExtractFun),
|
accumulate_tree(Filter, JournalCheck, JournalSnap, ExtractFun),
|
||||||
|
@ -363,7 +375,7 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) ->
|
||||||
fun(B, K, H, Tree) ->
|
fun(B, K, H, Tree) ->
|
||||||
case FilterFun(B, K) of
|
case FilterFun(B, K) of
|
||||||
accumulate ->
|
accumulate ->
|
||||||
leveled_tictac:add_kv(Tree, K, H, HashFun, false);
|
leveled_tictac:add_kv(Tree, K, H, HashFun);
|
||||||
pass ->
|
pass ->
|
||||||
Tree
|
Tree
|
||||||
end
|
end
|
||||||
|
|
|
@ -57,17 +57,18 @@
|
||||||
-export([
|
-export([
|
||||||
new_tree/1,
|
new_tree/1,
|
||||||
new_tree/2,
|
new_tree/2,
|
||||||
add_kv/5,
|
add_kv/4,
|
||||||
find_dirtyleaves/2,
|
find_dirtyleaves/2,
|
||||||
find_dirtysegments/2,
|
find_dirtysegments/2,
|
||||||
fetch_root/1,
|
fetch_root/1,
|
||||||
fetch_leaves/2,
|
fetch_leaves/2,
|
||||||
merge_trees/2,
|
merge_trees/2,
|
||||||
get_segment/2,
|
get_segment/2,
|
||||||
tictac_hash/3,
|
tictac_hash/2,
|
||||||
export_tree/1,
|
export_tree/1,
|
||||||
import_tree/1,
|
import_tree/1,
|
||||||
valid_size/1
|
valid_size/1,
|
||||||
|
keyto_segment32/1
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
|
||||||
|
@ -169,24 +170,16 @@ import_tree(ExportedTree) ->
|
||||||
level1 = L1Bin,
|
level1 = L1Bin,
|
||||||
level2 = Lv2}.
|
level2 = Lv2}.
|
||||||
|
|
||||||
-spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree().
|
|
||||||
add_kv(TicTacTree, Key, Value, BinExtractFun) ->
|
|
||||||
add_kv(TicTacTree, Key, Value, BinExtractFun, false).
|
|
||||||
|
|
||||||
-spec add_kv(tictactree(), tuple(), tuple(), fun(), boolean()) -> tictactree().
|
-spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree().
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Add a Key and value to a tictactree using the BinExtractFun to extract a
|
%% Add a Key and value to a tictactree using the BinExtractFun to extract a
|
||||||
%% binary from the Key and value from which to generate the hash. The
|
%% binary from the Key and value from which to generate the hash. The
|
||||||
%% BinExtractFun will also need to do any canonicalisation necessary to make
|
%% BinExtractFun will also need to do any canonicalisation necessary to make
|
||||||
%% the hash consistent (such as whitespace removal, or sorting)
|
%% the hash consistent (such as whitespace removal, or sorting)
|
||||||
%%
|
add_kv(TicTacTree, Key, Value, BinExtractFun) ->
|
||||||
%% For exportable trees the hash function will be based on the CJ Bernstein
|
|
||||||
%% magic hash. For non-exportable trees erlang:phash2 will be used, and so
|
|
||||||
%% non-binary Keys and Values can be returned from the BinExtractFun in this
|
|
||||||
%% case.
|
|
||||||
add_kv(TicTacTree, Key, Value, BinExtractFun, Exportable) ->
|
|
||||||
{BinK, BinV} = BinExtractFun(Key, Value),
|
{BinK, BinV} = BinExtractFun(Key, Value),
|
||||||
{SegHash, SegChangeHash} = tictac_hash(BinK, BinV, Exportable),
|
{SegHash, SegChangeHash} = tictac_hash(BinK, BinV),
|
||||||
Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count),
|
Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count),
|
||||||
|
|
||||||
Level2Pos =
|
Level2Pos =
|
||||||
|
@ -314,28 +307,39 @@ get_segment(Hash, TreeSize) ->
|
||||||
get_segment(Hash, element(3, get_size(TreeSize))).
|
get_segment(Hash, element(3, get_size(TreeSize))).
|
||||||
|
|
||||||
|
|
||||||
-spec tictac_hash(any(), any(), boolean()) -> {integer(), integer()}.
|
-spec tictac_hash(binary(), any()) -> {integer(), integer()}.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Hash the key and term, to either something repetable in Erlang, or using
|
%% Hash the key and term.
|
||||||
%% the DJ Bernstein hash if it is the tree needs to be compared with one
|
%% The term can be of the form {is_hash, 32-bit integer)} to indicate the hash
|
||||||
%% calculated with a non-Erlang store
|
%% has already been taken. If the value is not a pre-extracted hash just use
|
||||||
%%
|
%% erlang:phash2. If an exportable hash of the value is required this should
|
||||||
%% Boolean is Exportable. does the hash need to be repetable by a non-Erlang
|
%% be managed through the add_kv ExtractFun providing a pre-prepared Hash.
|
||||||
%% machine
|
tictac_hash(BinKey, Val) when is_binary(BinKey) ->
|
||||||
tictac_hash(BinKey, BinVal, true)
|
HashKey = keyto_segment32(BinKey),
|
||||||
when is_binary(BinKey) and is_binary(BinVal) ->
|
HashVal =
|
||||||
HashKey = leveled_codec:magic_hash({binary, BinKey}),
|
case Val of
|
||||||
HashVal = leveled_codec:magic_hash({binary, BinVal}),
|
{is_hash, HashedVal} ->
|
||||||
{HashKey, HashKey bxor HashVal};
|
HashedVal;
|
||||||
tictac_hash(BinKey, {is_hash, HashedVal}, false) ->
|
_ ->
|
||||||
{erlang:phash2(BinKey), erlang:phash2(BinKey) bxor HashedVal};
|
erlang:phash2(Val)
|
||||||
tictac_hash(BinKey, BinVal, false) ->
|
end,
|
||||||
{erlang:phash2(BinKey), erlang:phash2(BinKey) bxor erlang:phash2(BinVal)}.
|
{HashKey, HashKey bxor HashVal}.
|
||||||
|
|
||||||
|
-spec keyto_segment32(any()) -> integer().
|
||||||
|
%% @doc
|
||||||
|
%% The first 16 bits of the segment hash used in the tictac tree should be
|
||||||
|
%% made up of the segment ID part (which is used to accelerate queries)
|
||||||
|
keyto_segment32(BinKey) when is_binary(BinKey) ->
|
||||||
|
{SegmentID, ExtraHash} = leveled_codec:segment_hash(BinKey),
|
||||||
|
(ExtraHash band 65535) bsl 16 + SegmentID;
|
||||||
|
keyto_segment32(Key) ->
|
||||||
|
keyto_segment32(term_to_binary(Key)).
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% Internal functions
|
%%% Internal functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
|
|
||||||
get_level2(TicTacTree, L1Pos) ->
|
get_level2(TicTacTree, L1Pos) ->
|
||||||
case array:get(L1Pos, TicTacTree#tictactree.level2) of
|
case array:get(L1Pos, TicTacTree#tictactree.level2) of
|
||||||
?EMPTY ->
|
?EMPTY ->
|
||||||
|
@ -454,7 +458,7 @@ simple_test_withsize(Size) ->
|
||||||
|
|
||||||
GetSegFun =
|
GetSegFun =
|
||||||
fun(TK) ->
|
fun(TK) ->
|
||||||
get_segment(erlang:phash2(term_to_binary(TK)), SC)
|
get_segment(keyto_segment32(term_to_binary(TK)), SC)
|
||||||
end,
|
end,
|
||||||
|
|
||||||
DL0 = find_dirtyleaves(Tree1, Tree0),
|
DL0 = find_dirtyleaves(Tree1, Tree0),
|
||||||
|
@ -513,7 +517,7 @@ merge_test_withsize(Size) ->
|
||||||
?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1).
|
?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1).
|
||||||
|
|
||||||
exportable_test() ->
|
exportable_test() ->
|
||||||
{Int1, Int2} = tictac_hash(<<"key">>, <<"value">>, true),
|
{Int1, Int2} = tictac_hash(<<"key">>, <<"value">>),
|
||||||
?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}).
|
?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}).
|
||||||
|
|
||||||
-endif.
|
-endif.
|
||||||
|
|
|
@ -131,11 +131,18 @@ many_put_compare(_Config) ->
|
||||||
{proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value),
|
{proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value),
|
||||||
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
|
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
|
||||||
VclockBin:VclockLen/binary, _Rest/binary>> = HeadBin,
|
VclockBin:VclockLen/binary, _Rest/binary>> = HeadBin,
|
||||||
{Key, lists:sort(binary_to_term(VclockBin))}
|
case is_binary(Key) of
|
||||||
|
true ->
|
||||||
|
{Key,
|
||||||
|
lists:sort(binary_to_term(VclockBin))};
|
||||||
|
false ->
|
||||||
|
{term_to_binary(Key),
|
||||||
|
lists:sort(binary_to_term(VclockBin))}
|
||||||
|
end
|
||||||
end,
|
end,
|
||||||
FoldObjectsFun =
|
FoldObjectsFun =
|
||||||
fun(_Bucket, Key, Value, Acc) ->
|
fun(_Bucket, Key, Value, Acc) ->
|
||||||
leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun, false)
|
leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun)
|
||||||
end,
|
end,
|
||||||
|
|
||||||
FoldQ0 = {foldheads_bybucket,
|
FoldQ0 = {foldheads_bybucket,
|
||||||
|
@ -179,7 +186,7 @@ many_put_compare(_Config) ->
|
||||||
end,
|
end,
|
||||||
AltFoldObjectsFun =
|
AltFoldObjectsFun =
|
||||||
fun(_Bucket, Key, Value, Acc) ->
|
fun(_Bucket, Key, Value, Acc) ->
|
||||||
leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun, true)
|
leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun)
|
||||||
end,
|
end,
|
||||||
AltFoldQ0 = {foldheads_bybucket,
|
AltFoldQ0 = {foldheads_bybucket,
|
||||||
o_rkv,
|
o_rkv,
|
||||||
|
@ -213,8 +220,7 @@ many_put_compare(_Config) ->
|
||||||
FoldKeysFun =
|
FoldKeysFun =
|
||||||
fun(SegListToFind) ->
|
fun(SegListToFind) ->
|
||||||
fun(_B, K, Acc) ->
|
fun(_B, K, Acc) ->
|
||||||
Seg =
|
Seg = get_segment(K, SegmentCount),
|
||||||
leveled_tictac:get_segment(erlang:phash2(K), SegmentCount),
|
|
||||||
case lists:member(Seg, SegListToFind) of
|
case lists:member(Seg, SegListToFind) of
|
||||||
true ->
|
true ->
|
||||||
[K|Acc];
|
[K|Acc];
|
||||||
|
@ -488,8 +494,7 @@ index_compare(_Config) ->
|
||||||
|
|
||||||
FoldKeysIndexQFun =
|
FoldKeysIndexQFun =
|
||||||
fun(_Bucket, {Term, Key}, Acc) ->
|
fun(_Bucket, {Term, Key}, Acc) ->
|
||||||
Seg =
|
Seg = get_segment(Key, SegmentCount),
|
||||||
leveled_tictac:get_segment(erlang:phash2(Key), SegmentCount),
|
|
||||||
case lists:member(Seg, DL3_0) of
|
case lists:member(Seg, DL3_0) of
|
||||||
true ->
|
true ->
|
||||||
[{Term, Key}|Acc];
|
[{Term, Key}|Acc];
|
||||||
|
@ -1144,3 +1149,15 @@ get_tictactree_fun(Bookie, Bucket, TreeSize) ->
|
||||||
[LMD, timer:now_diff(os:timestamp(), SW)]),
|
[LMD, timer:now_diff(os:timestamp(), SW)]),
|
||||||
leveled_tictac:merge_trees(R, Acc)
|
leveled_tictac:merge_trees(R, Acc)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
get_segment(K, SegmentCount) ->
|
||||||
|
BinKey =
|
||||||
|
case is_binary(K) of
|
||||||
|
true ->
|
||||||
|
K;
|
||||||
|
false ->
|
||||||
|
term_to_binary(K)
|
||||||
|
end,
|
||||||
|
{SegmentID, ExtraHash} = leveled_codec:segment_hash(BinKey),
|
||||||
|
SegHash = (ExtraHash band 65535) bsl 16 + SegmentID,
|
||||||
|
leveled_tictac:get_segment(SegHash, SegmentCount).
|
Loading…
Add table
Add a link
Reference in a new issue