Magic Hash - and no L0 Index

Move to using the DJ Bernstein Magic Hash consistently, and trying to
make sure we only hash once for each operation (as the hash is more
expensive than phash2).

The improved lookup time for missing keys should allow for the L0 index
to be removed, and hence speed up the completion time for push_mem
operations.

It is expected there will be a second stage of creating a tinybloom as
part of the SFT creation process, and then adding that tinybloom to the
manifest.  This will then reduce the message passing required for a GET
not in the cache or higher levels
This commit is contained in:
martinsumner 2016-12-11 01:02:56 +00:00
parent 95d5e12ce7
commit 2d3a40e6f1
11 changed files with 646 additions and 476 deletions

View file

@ -39,6 +39,7 @@
strip_to_statusonly/1,
strip_to_keyseqstatusonly/1,
strip_to_keyseqonly/1,
strip_to_seqnhashonly/1,
striphead_to_details/1,
is_active/3,
endkey_passed/2,
@ -62,11 +63,38 @@
convert_indexspecs/5,
generate_uuid/0,
integer_now/0,
riak_extract_metadata/2]).
riak_extract_metadata/2,
magic_hash/1]).
-define(V1_VERS, 1).
-define(MAGIC, 53). % riak_kv -> riak_object
%% Use DJ Bernstein magic hash function. Note, this is more expensive than
%% phash2 but provides a much more balanced result.
%%
%% Hash function contains mysterious constants, some explanation here as to
%% what they are -
%% http://stackoverflow.com/ ++
%% questions/10696223/reason-for-5381-number-in-djb-hash-function
magic_hash({?RIAK_TAG, Bucket, Key, _SubKey}) ->
magic_hash({Bucket, Key});
magic_hash({?STD_TAG, Bucket, Key, _SubKey}) ->
magic_hash({Bucket, Key});
magic_hash({?IDX_TAG, _B, _Idx, _Key}) ->
no_lookup;
magic_hash(AnyKey) ->
BK = term_to_binary(AnyKey),
H = 5381,
hash1(H, BK) band 16#FFFFFFFF.
hash1(H, <<>>) ->
H;
hash1(H, <<B:8/integer, Rest/bytes>>) ->
H1 = H * 33,
H2 = H1 bxor B,
hash1(H2, Rest).
%% Credit to
%% https://github.com/afiskon/erlang-uuid-v4/blob/master/src/uuid.erl
@ -87,15 +115,18 @@ inker_reload_strategy(AltList) ->
strip_to_keyonly({keyonly, K}) -> K;
strip_to_keyonly({K, _V}) -> K.
strip_to_keyseqstatusonly({K, {SeqN, St, _MD}}) -> {K, SeqN, St}.
strip_to_keyseqstatusonly({K, {SeqN, St, _, _MD}}) -> {K, SeqN, St}.
strip_to_statusonly({_, {_, St, _}}) -> St.
strip_to_statusonly({_, {_, St, _, _}}) -> St.
strip_to_seqonly({_, {SeqN, _, _}}) -> SeqN.
strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN.
strip_to_keyseqonly({LK, {SeqN, _, _}}) -> {LK, SeqN}.
strip_to_keyseqonly({LK, {SeqN, _, _, _}}) -> {LK, SeqN}.
strip_to_seqnhashonly({_, {SeqN, _, MH, _}}) -> {SeqN, MH}.
striphead_to_details({SeqN, St, MH, MD}) -> {SeqN, St, MH, MD}.
striphead_to_details({SeqN, St, MD}) -> {SeqN, St, MD}.
key_dominates(LeftKey, RightKey) ->
case {LeftKey, RightKey} of
@ -103,10 +134,10 @@ key_dominates(LeftKey, RightKey) ->
left_hand_first;
{{LK, _LVAL}, {RK, _RVAL}} when RK < LK ->
right_hand_first;
{{LK, {LSN, _LST, _LMD}}, {RK, {RSN, _RST, _RMD}}}
{{LK, {LSN, _LST, _LMH, _LMD}}, {RK, {RSN, _RST, _RMH, _RMD}}}
when LK == RK, LSN >= RSN ->
left_hand_dominant;
{{LK, {LSN, _LST, _LMD}}, {RK, {RSN, _RST, _RMD}}}
{{LK, {LSN, _LST, _LMH, _LMD}}, {RK, {RSN, _RST, _RMH, _RMD}}}
when LK == RK, LSN < RSN ->
right_hand_dominant
end.
@ -218,8 +249,6 @@ create_value_for_journal(Value) ->
Value
end.
hash(Obj) ->
erlang:phash2(term_to_binary(Obj)).
@ -273,7 +302,7 @@ convert_indexspecs(IndexSpecs, Bucket, Key, SQN, TTL) ->
end,
{to_ledgerkey(Bucket, Key, ?IDX_TAG,
IdxField, IdxValue),
{SQN, Status, null}}
{SQN, Status, no_lookup, null}}
end,
IndexSpecs).
@ -285,9 +314,11 @@ generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) ->
_ ->
{active, TS}
end,
{Bucket,
Key,
{PrimaryKey, {SQN, Status, extract_metadata(Obj, Size, Tag)}}}.
Value = {SQN,
Status,
magic_hash(PrimaryKey),
extract_metadata(Obj, Size, Tag)},
{Bucket, Key, {PrimaryKey, Value}}.
integer_now() ->
@ -304,7 +335,7 @@ extract_metadata(Obj, Size, ?STD_TAG) ->
get_size(PK, Value) ->
{Tag, _Bucket, _Key, _} = PK,
{_, _, MD} = Value,
{_, _, _, MD} = Value,
case Tag of
?RIAK_TAG ->
{_RMD, _VC, _Hash, Size} = MD,
@ -316,7 +347,7 @@ get_size(PK, Value) ->
get_keyandhash(LK, Value) ->
{Tag, Bucket, Key, _} = LK,
{_, _, MD} = Value,
{_, _, _, MD} = Value,
case Tag of
?RIAK_TAG ->
{_RMD, _VC, Hash, _Size} = MD,
@ -375,11 +406,14 @@ indexspecs_test() ->
{remove, "t1_bin", "abdc456"}],
Changes = convert_indexspecs(IndexSpecs, "Bucket", "Key2", 1, infinity),
?assertMatch({{i, "Bucket", {"t1_int", 456}, "Key2"},
{1, {active, infinity}, null}}, lists:nth(1, Changes)),
{1, {active, infinity}, no_lookup, null}},
lists:nth(1, Changes)),
?assertMatch({{i, "Bucket", {"t1_bin", "adbc123"}, "Key2"},
{1, {active, infinity}, null}}, lists:nth(2, Changes)),
{1, {active, infinity}, no_lookup, null}},
lists:nth(2, Changes)),
?assertMatch({{i, "Bucket", {"t1_bin", "abdc456"}, "Key2"},
{1, tomb, null}}, lists:nth(3, Changes)).
{1, tomb, no_lookup, null}},
lists:nth(3, Changes)).
endkey_passed_test() ->
TestKey = {i, null, null, null},