diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 53bc81a..dbbfb8b 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -480,9 +480,9 @@ aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) -> Acc; {LMD1, TTL} -> TreeSize = AAE#recent_aae.tree_size, + SegID32 = leveled_tictac:keyto_segment32(Key), SegID = - leveled_tictac:get_segment(erlang:phash2(Key), - TreeSize), + leveled_tictac:get_segment(SegID32, TreeSize), IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin", IdxTrmStr = string:right(integer_to_list(SegID), 8, $0) ++ diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index d9d3d42..4a0b06b 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -167,6 +167,15 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) -> {ok, LedgerSnap, JournalSnap} = SnapFun(), % The start key and end key will vary depending on whether the % fold is to fold over an index or a key range + EnsureKeyBinaryFun = + fun(K, T) -> + case is_binary(K) of + true -> + {K, T}; + false -> + {term_to_binary(K), T} + end + end, {StartKey, EndKey, ExtractFun} = case Tag of ?IDX_TAG -> @@ -174,12 +183,15 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) -> KeyDefFun = fun leveled_codec:to_ledgerkey/5, {KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, StartIdx), KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, EndIdx), - fun(K, T) -> {K, T} end}; + EnsureKeyBinaryFun}; _ -> {StartOKey, EndOKey} = Query, {leveled_codec:to_ledgerkey(Bucket, StartOKey, Tag), leveled_codec:to_ledgerkey(Bucket, EndOKey, Tag), - fun(K, H) -> {K, {is_hash, H}} end} + fun(K, H) -> + V = {is_hash, H}, + EnsureKeyBinaryFun(K, V) + end} end, AccFun = accumulate_tree(Filter, JournalCheck, JournalSnap, ExtractFun), @@ -363,7 +375,7 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) -> fun(B, K, H, Tree) -> case FilterFun(B, K) of accumulate -> - leveled_tictac:add_kv(Tree, K, H, HashFun, false); + leveled_tictac:add_kv(Tree, K, H, HashFun); pass -> Tree end diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 4bf83fb..7442854 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -57,17 +57,18 @@ -export([ new_tree/1, new_tree/2, - add_kv/5, + add_kv/4, find_dirtyleaves/2, find_dirtysegments/2, fetch_root/1, fetch_leaves/2, merge_trees/2, get_segment/2, - tictac_hash/3, + tictac_hash/2, export_tree/1, import_tree/1, - valid_size/1 + valid_size/1, + keyto_segment32/1 ]). @@ -169,24 +170,16 @@ import_tree(ExportedTree) -> level1 = L1Bin, level2 = Lv2}. --spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree(). -add_kv(TicTacTree, Key, Value, BinExtractFun) -> - add_kv(TicTacTree, Key, Value, BinExtractFun, false). --spec add_kv(tictactree(), tuple(), tuple(), fun(), boolean()) -> tictactree(). +-spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree(). %% @doc %% Add a Key and value to a tictactree using the BinExtractFun to extract a %% binary from the Key and value from which to generate the hash. The %% BinExtractFun will also need to do any canonicalisation necessary to make %% the hash consistent (such as whitespace removal, or sorting) -%% -%% For exportable trees the hash function will be based on the CJ Bernstein -%% magic hash. For non-exportable trees erlang:phash2 will be used, and so -%% non-binary Keys and Values can be returned from the BinExtractFun in this -%% case. -add_kv(TicTacTree, Key, Value, BinExtractFun, Exportable) -> +add_kv(TicTacTree, Key, Value, BinExtractFun) -> {BinK, BinV} = BinExtractFun(Key, Value), - {SegHash, SegChangeHash} = tictac_hash(BinK, BinV, Exportable), + {SegHash, SegChangeHash} = tictac_hash(BinK, BinV), Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count), Level2Pos = @@ -314,28 +307,39 @@ get_segment(Hash, TreeSize) -> get_segment(Hash, element(3, get_size(TreeSize))). --spec tictac_hash(any(), any(), boolean()) -> {integer(), integer()}. +-spec tictac_hash(binary(), any()) -> {integer(), integer()}. %% @doc -%% Hash the key and term, to either something repetable in Erlang, or using -%% the DJ Bernstein hash if it is the tree needs to be compared with one -%% calculated with a non-Erlang store -%% -%% Boolean is Exportable. does the hash need to be repetable by a non-Erlang -%% machine -tictac_hash(BinKey, BinVal, true) - when is_binary(BinKey) and is_binary(BinVal) -> - HashKey = leveled_codec:magic_hash({binary, BinKey}), - HashVal = leveled_codec:magic_hash({binary, BinVal}), - {HashKey, HashKey bxor HashVal}; -tictac_hash(BinKey, {is_hash, HashedVal}, false) -> - {erlang:phash2(BinKey), erlang:phash2(BinKey) bxor HashedVal}; -tictac_hash(BinKey, BinVal, false) -> - {erlang:phash2(BinKey), erlang:phash2(BinKey) bxor erlang:phash2(BinVal)}. +%% Hash the key and term. +%% The term can be of the form {is_hash, 32-bit integer)} to indicate the hash +%% has already been taken. If the value is not a pre-extracted hash just use +%% erlang:phash2. If an exportable hash of the value is required this should +%% be managed through the add_kv ExtractFun providing a pre-prepared Hash. +tictac_hash(BinKey, Val) when is_binary(BinKey) -> + HashKey = keyto_segment32(BinKey), + HashVal = + case Val of + {is_hash, HashedVal} -> + HashedVal; + _ -> + erlang:phash2(Val) + end, + {HashKey, HashKey bxor HashVal}. + +-spec keyto_segment32(any()) -> integer(). +%% @doc +%% The first 16 bits of the segment hash used in the tictac tree should be +%% made up of the segment ID part (which is used to accelerate queries) +keyto_segment32(BinKey) when is_binary(BinKey) -> + {SegmentID, ExtraHash} = leveled_codec:segment_hash(BinKey), + (ExtraHash band 65535) bsl 16 + SegmentID; +keyto_segment32(Key) -> + keyto_segment32(term_to_binary(Key)). %%%============================================================================ %%% Internal functions %%%============================================================================ + get_level2(TicTacTree, L1Pos) -> case array:get(L1Pos, TicTacTree#tictactree.level2) of ?EMPTY -> @@ -454,7 +458,7 @@ simple_test_withsize(Size) -> GetSegFun = fun(TK) -> - get_segment(erlang:phash2(term_to_binary(TK)), SC) + get_segment(keyto_segment32(term_to_binary(TK)), SC) end, DL0 = find_dirtyleaves(Tree1, Tree0), @@ -513,7 +517,7 @@ merge_test_withsize(Size) -> ?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1). exportable_test() -> - {Int1, Int2} = tictac_hash(<<"key">>, <<"value">>, true), + {Int1, Int2} = tictac_hash(<<"key">>, <<"value">>), ?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}). -endif. diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 3ec3df5..5ef92d4 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -131,11 +131,18 @@ many_put_compare(_Config) -> {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value), <> = HeadBin, - {Key, lists:sort(binary_to_term(VclockBin))} + case is_binary(Key) of + true -> + {Key, + lists:sort(binary_to_term(VclockBin))}; + false -> + {term_to_binary(Key), + lists:sort(binary_to_term(VclockBin))} + end end, FoldObjectsFun = fun(_Bucket, Key, Value, Acc) -> - leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun, false) + leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun) end, FoldQ0 = {foldheads_bybucket, @@ -179,7 +186,7 @@ many_put_compare(_Config) -> end, AltFoldObjectsFun = fun(_Bucket, Key, Value, Acc) -> - leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun, true) + leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun) end, AltFoldQ0 = {foldheads_bybucket, o_rkv, @@ -213,8 +220,7 @@ many_put_compare(_Config) -> FoldKeysFun = fun(SegListToFind) -> fun(_B, K, Acc) -> - Seg = - leveled_tictac:get_segment(erlang:phash2(K), SegmentCount), + Seg = get_segment(K, SegmentCount), case lists:member(Seg, SegListToFind) of true -> [K|Acc]; @@ -488,8 +494,7 @@ index_compare(_Config) -> FoldKeysIndexQFun = fun(_Bucket, {Term, Key}, Acc) -> - Seg = - leveled_tictac:get_segment(erlang:phash2(Key), SegmentCount), + Seg = get_segment(Key, SegmentCount), case lists:member(Seg, DL3_0) of true -> [{Term, Key}|Acc]; @@ -1144,3 +1149,15 @@ get_tictactree_fun(Bookie, Bucket, TreeSize) -> [LMD, timer:now_diff(os:timestamp(), SW)]), leveled_tictac:merge_trees(R, Acc) end. + +get_segment(K, SegmentCount) -> + BinKey = + case is_binary(K) of + true -> + K; + false -> + term_to_binary(K) + end, + {SegmentID, ExtraHash} = leveled_codec:segment_hash(BinKey), + SegHash = (ExtraHash band 65535) bsl 16 + SegmentID, + leveled_tictac:get_segment(SegHash, SegmentCount). \ No newline at end of file