Entropy fiddle
Try and increase efefctiveness of bloom by combing Magic Hash with phash2
This commit is contained in:
parent
fb75a26497
commit
4784f8521a
2 changed files with 37 additions and 27 deletions
|
@ -401,13 +401,14 @@ code_change(_OldVsn, StateName, State, _Extra) ->
|
|||
|
||||
fetch(LedgerKey, Hash, State) ->
|
||||
Summary = State#state.summary,
|
||||
case leveled_tinybloom:check({hash, Hash}, Summary#summary.bloom) of
|
||||
case leveled_tinybloom:check({hash, Hash},
|
||||
Summary#summary.bloom) of
|
||||
false ->
|
||||
{not_present, summary_bloom, null};
|
||||
true ->
|
||||
Slot = lookup_slot(LedgerKey, Summary#summary.index),
|
||||
SlotBloom = Slot#slot_index_value.bloom,
|
||||
case is_check_slot_required({hash, Hash}, SlotBloom) of
|
||||
case is_check_slot_required({hash, Hash}, LedgerKey, SlotBloom) of
|
||||
false ->
|
||||
{not_present, slot_bloom, null};
|
||||
true ->
|
||||
|
@ -570,7 +571,9 @@ build_table_summary(SlotIndex, AllHashes, Level, FirstKey, L, MaxSQN) ->
|
|||
false ->
|
||||
element(2, lists:keyfind(default, 1, ?LEVEL_BLOOM_SLOTS))
|
||||
end,
|
||||
Bloom = lists:foldr(fun leveled_tinybloom:enter/2,
|
||||
BloomAddFun =
|
||||
fun({H, _K}, Bloom) -> leveled_tinybloom:enter(H, Bloom) end,
|
||||
Bloom = lists:foldr(BloomAddFun,
|
||||
leveled_tinybloom:empty(BloomSlots),
|
||||
AllHashes),
|
||||
[{LastKey, _LastV}|_Rest] = SlotIndex,
|
||||
|
@ -627,7 +630,7 @@ build_all_slots(KVL, Count, Start, AllHashes, SlotID, SlotIndex, SlotsBin) ->
|
|||
no_lookup ->
|
||||
Acc;
|
||||
H ->
|
||||
[{hash, H}|Acc]
|
||||
[{{hash, H}, K}|Acc]
|
||||
end
|
||||
end,
|
||||
HashList = lists:foldr(ExtractHashFun, [], SlotList),
|
||||
|
@ -649,16 +652,18 @@ build_all_slots(KVL, Count, Start, AllHashes, SlotID, SlotIndex, SlotsBin) ->
|
|||
|
||||
build_slot(KVList, HashList) ->
|
||||
Tree = gb_trees:from_orddict(KVList),
|
||||
Bloom = lists:foldr(fun leveled_tinybloom:tiny_enter/2,
|
||||
BloomAddFun =
|
||||
fun({H, K}, Bloom) -> leveled_tinybloom:tiny_enter(H, K, Bloom) end,
|
||||
Bloom = lists:foldr(BloomAddFun,
|
||||
leveled_tinybloom:tiny_empty(),
|
||||
HashList),
|
||||
SlotBin = term_to_binary(Tree, [{compressed, ?COMPRESSION_LEVEL}]),
|
||||
{SlotBin, Bloom}.
|
||||
|
||||
is_check_slot_required(_Hash, none) ->
|
||||
is_check_slot_required(_Hash, _Key, none) ->
|
||||
true;
|
||||
is_check_slot_required(Hash, Bloom) ->
|
||||
leveled_tinybloom:tiny_check(Hash, Bloom).
|
||||
is_check_slot_required(Hash, Key, Bloom) ->
|
||||
leveled_tinybloom:tiny_check(Hash, Key, Bloom).
|
||||
|
||||
%% Returns a section from the summary index and two booleans to indicate if
|
||||
%% the first slot needs trimming, or the last slot
|
||||
|
@ -1030,7 +1035,7 @@ simple_slotbin_test() ->
|
|||
ExtractHashFun =
|
||||
fun({K, V}) ->
|
||||
{_SQN, H} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
||||
{hash, H} end,
|
||||
{{hash, H}, K} end,
|
||||
HashList = lists:map(ExtractHashFun, KVList1),
|
||||
SW0 = os:timestamp(),
|
||||
{SlotBin0, Bloom0} = build_slot(KVList1, HashList),
|
||||
|
@ -1038,8 +1043,10 @@ simple_slotbin_test() ->
|
|||
[timer:now_diff(os:timestamp(), SW0), byte_size(SlotBin0)]),
|
||||
|
||||
SW1 = os:timestamp(),
|
||||
lists:foreach(fun(H) -> ?assertMatch(true,
|
||||
is_check_slot_required(H, Bloom0))
|
||||
lists:foreach(fun({H, K}) -> ?assertMatch(true,
|
||||
is_check_slot_required(H,
|
||||
K,
|
||||
Bloom0))
|
||||
end,
|
||||
HashList),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
enter/2,
|
||||
check/2,
|
||||
empty/1,
|
||||
tiny_enter/2,
|
||||
tiny_check/2,
|
||||
tiny_enter/3,
|
||||
tiny_check/3,
|
||||
tiny_empty/0
|
||||
]).
|
||||
|
||||
|
@ -75,16 +75,16 @@ check(Key, Bloom) ->
|
|||
tiny_empty() ->
|
||||
<<0:1024>>.
|
||||
|
||||
tiny_enter({hash, no_lookup}, Bloom) ->
|
||||
tiny_enter({hash, no_lookup}, _Key, Bloom) ->
|
||||
Bloom;
|
||||
tiny_enter({hash, Hash}, Bloom) ->
|
||||
{Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
|
||||
tiny_enter({hash, Hash}, Key, Bloom) ->
|
||||
{Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash, Key),
|
||||
AddFun = fun(Bit, Arr0) -> add_to_array(Bit, Arr0, 1024) end,
|
||||
lists:foldl(AddFun, Bloom, [Bit0, Bit1, Bit2]).
|
||||
|
||||
|
||||
tiny_check({hash, Hash}, Bloom) ->
|
||||
{Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
|
||||
tiny_check({hash, Hash}, Key, Bloom) ->
|
||||
{Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash, Key),
|
||||
case getbit(Bit0, Bloom, 1024) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
|
@ -113,8 +113,9 @@ split_hash(Hash) ->
|
|||
H2 = Hash bsr 20,
|
||||
{H0, H1, H2}.
|
||||
|
||||
split_hash_for_tinybloom(Hash) ->
|
||||
split_hash_for_tinybloom(MagicHash, Key) ->
|
||||
% Tiny bloom can make k=3 from one hash
|
||||
Hash = MagicHash bxor erlang:phash2(Key),
|
||||
H0 = Hash band 1023,
|
||||
H1 = (Hash bsr 11) band 1023,
|
||||
H2 = (Hash bsr 22) band 1023,
|
||||
|
@ -194,8 +195,8 @@ simple_test() ->
|
|||
?assertMatch(true, FP < (N div 4)).
|
||||
|
||||
tiny_test() ->
|
||||
N = 256,
|
||||
K = 32, % more checks out then in K * checks
|
||||
N = 128,
|
||||
K = 64, % more checks out than in K * checks
|
||||
KLin = lists:map(fun(X) -> "Key_" ++
|
||||
integer_to_list(X) ++
|
||||
integer_to_list(random:uniform(100)) ++
|
||||
|
@ -211,27 +212,29 @@ tiny_test() ->
|
|||
lists:seq(1, N * K)),
|
||||
|
||||
HashIn = lists:map(fun(X) ->
|
||||
{hash, leveled_codec:magic_hash(X)} end,
|
||||
{{hash, leveled_codec:magic_hash(X)}, X} end,
|
||||
KLin),
|
||||
HashOut = lists:map(fun(X) ->
|
||||
{hash, leveled_codec:magic_hash(X)} end,
|
||||
{{hash, leveled_codec:magic_hash(X)}, X} end,
|
||||
KLout),
|
||||
|
||||
SW1 = os:timestamp(),
|
||||
Bloom = lists:foldr(fun tiny_enter/2, tiny_empty(), HashIn),
|
||||
Bloom = lists:foldr(fun({H0, K0}, B) -> tiny_enter(H0, K0, B) end,
|
||||
tiny_empty(),
|
||||
HashIn),
|
||||
io:format(user,
|
||||
"~nAdding ~w hashes to tiny bloom took ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW1)]),
|
||||
|
||||
SW2 = os:timestamp(),
|
||||
lists:foreach(fun(X) ->
|
||||
?assertMatch(true, tiny_check(X, Bloom)) end, HashIn),
|
||||
lists:foreach(fun({H1, K1}) ->
|
||||
?assertMatch(true, tiny_check(H1, K1, Bloom)) end, HashIn),
|
||||
io:format(user,
|
||||
"~nChecking ~w hashes in tiny bloom took ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW2)]),
|
||||
|
||||
SW3 = os:timestamp(),
|
||||
FP = lists:foldr(fun(X, Acc) -> case tiny_check(X, Bloom) of
|
||||
FP = lists:foldr(fun({H3, K3}, Acc) -> case tiny_check(H3, K3, Bloom) of
|
||||
true -> Acc + 1;
|
||||
false -> Acc
|
||||
end end,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue