Revert "Half size of each slot's bloom"
This reverts commit d83eea7c60
.
This commit is contained in:
parent
d83eea7c60
commit
d37c5eab3f
1 changed files with 32 additions and 79 deletions
|
@ -16,8 +16,8 @@
|
||||||
check_hash/2
|
check_hash/2
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-define(BLOOM_SIZE_BYTES, 1024).
|
-define(BLOOM_SIZE_BYTES, 2048).
|
||||||
-define(INTEGER_SIZE, 8192).
|
-define(INTEGER_SIZE, 16384).
|
||||||
-define(BAND_MASK, ?INTEGER_SIZE - 1).
|
-define(BAND_MASK, ?INTEGER_SIZE - 1).
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,12 +32,11 @@ create_bloom(HashList) ->
|
||||||
case length(HashList) of
|
case length(HashList) of
|
||||||
0 ->
|
0 ->
|
||||||
<<>>;
|
<<>>;
|
||||||
L when L > 8192 ->
|
L when L > 16384 ->
|
||||||
add_hashlist(HashList,
|
add_hashlist(HashList,
|
||||||
15,
|
7,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0);
|
0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
L when L > 4084 ->
|
L when L > 8192 ->
|
||||||
add_hashlist(HashList, 3, 0, 0, 0, 0);
|
add_hashlist(HashList, 3, 0, 0, 0, 0);
|
||||||
_ ->
|
_ ->
|
||||||
add_hashlist(HashList, 1, 0, 0)
|
add_hashlist(HashList, 1, 0, 0)
|
||||||
|
@ -70,11 +69,19 @@ check_hash({_SegHash, Hash}, BloomBin) ->
|
||||||
split_hash(Hash, SlotSplit) ->
|
split_hash(Hash, SlotSplit) ->
|
||||||
Slot = Hash band SlotSplit,
|
Slot = Hash band SlotSplit,
|
||||||
H0 = (Hash bsr 4) band (?BAND_MASK),
|
H0 = (Hash bsr 4) band (?BAND_MASK),
|
||||||
H1 = (Hash bsr 17) band (?BAND_MASK),
|
H1 = (Hash bsr 18) band (?BAND_MASK),
|
||||||
{Slot, [H0, H1]}.
|
% H2 = (Hash bsr 34) band (?BAND_MASK),
|
||||||
|
% H3 = (Hash bsr 49) band (?BAND_MASK),
|
||||||
|
{Slot, [H0, H1
|
||||||
|
%, H2, H3
|
||||||
|
]}.
|
||||||
|
|
||||||
get_mask([H0, H1]) ->
|
get_mask([H0, H1
|
||||||
(1 bsl H0) bor (1 bsl H1).
|
%, H2, H3
|
||||||
|
]) ->
|
||||||
|
(1 bsl H0) bor (1 bsl H1)
|
||||||
|
% bor (1 bsl H2) bor (1 bsl H3)
|
||||||
|
.
|
||||||
|
|
||||||
|
|
||||||
%% This looks ugly and clunky, but in tests it was quicker than modifying an
|
%% This looks ugly and clunky, but in tests it was quicker than modifying an
|
||||||
|
@ -111,104 +118,50 @@ add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1, S2, S3) ->
|
||||||
add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask)
|
add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7,
|
add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7) ->
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15) ->
|
|
||||||
IntSize = ?INTEGER_SIZE,
|
IntSize = ?INTEGER_SIZE,
|
||||||
<<S0:IntSize/integer, S1:IntSize/integer,
|
<<S0:IntSize/integer, S1:IntSize/integer,
|
||||||
S2:IntSize/integer, S3:IntSize/integer,
|
S2:IntSize/integer, S3:IntSize/integer,
|
||||||
S4:IntSize/integer, S5:IntSize/integer,
|
S4:IntSize/integer, S5:IntSize/integer,
|
||||||
S6:IntSize/integer, S7:IntSize/integer,
|
S6:IntSize/integer, S7:IntSize/integer>>;
|
||||||
S8:IntSize/integer, S9:IntSize/integer,
|
|
||||||
S10:IntSize/integer, S11:IntSize/integer,
|
|
||||||
S12:IntSize/integer, S13:IntSize/integer,
|
|
||||||
S14:IntSize/integer, S15:IntSize/integer>>;
|
|
||||||
add_hashlist([{_SegHash, TopHash}|T],
|
add_hashlist([{_SegHash, TopHash}|T],
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
S0, S1, S2, S3, S4, S5, S6, S7) ->
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15) ->
|
|
||||||
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
|
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
|
||||||
Mask = get_mask(Hashes),
|
Mask = get_mask(Hashes),
|
||||||
case Slot of
|
case Slot of
|
||||||
0 ->
|
0 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0 bor Mask, S1, S2, S3, S4, S5, S6, S7,
|
S0 bor Mask, S1, S2, S3, S4, S5, S6, S7);
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
1 ->
|
1 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1 bor Mask, S2, S3, S4, S5, S6, S7,
|
S0, S1 bor Mask, S2, S3, S4, S5, S6, S7);
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
2 ->
|
2 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2 bor Mask, S3, S4, S5, S6, S7,
|
S0, S1, S2 bor Mask, S3, S4, S5, S6, S7);
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
3 ->
|
3 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3 bor Mask, S4, S5, S6, S7,
|
S0, S1, S2, S3 bor Mask, S4, S5, S6, S7);
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
4 ->
|
4 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4 bor Mask, S5, S6, S7,
|
S0, S1, S2, S3, S4 bor Mask, S5, S6, S7);
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
5 ->
|
5 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5 bor Mask, S6, S7,
|
S0, S1, S2, S3, S4, S5 bor Mask, S6, S7);
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
6 ->
|
6 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5, S6 bor Mask, S7,
|
S0, S1, S2, S3, S4, S5, S6 bor Mask, S7);
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
7 ->
|
7 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7 bor Mask,
|
S0, S1, S2, S3, S4, S5, S6, S7 bor Mask)
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
8 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
||||||
S8 bor Mask, S9, S10, S11, S12, S13, S14, S15);
|
|
||||||
9 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
||||||
S8, S9 bor Mask, S10, S11, S12, S13, S14, S15);
|
|
||||||
10 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
||||||
S8, S9, S10 bor Mask, S11, S12, S13, S14, S15);
|
|
||||||
11 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
||||||
S8, S9, S10, S11 bor Mask, S12, S13, S14, S15);
|
|
||||||
12 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
||||||
S8, S9, S10, S11, S12 bor Mask, S13, S14, S15);
|
|
||||||
13 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
||||||
S8, S9, S10, S11, S12, S13 bor Mask, S14, S15);
|
|
||||||
14 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
||||||
S8, S9, S10, S11, S12, S13, S14 bor Mask, S15);
|
|
||||||
15 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7,
|
|
||||||
S8, S9, S10, S11, S12, S13, S14, S15 bor Mask)
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
@ -282,7 +235,7 @@ bloom_test_() ->
|
||||||
|
|
||||||
bloom_test_ranges() ->
|
bloom_test_ranges() ->
|
||||||
test_bloom(40000, 2),
|
test_bloom(40000, 2),
|
||||||
test_bloom(128 * 256, 10),
|
test_bloom(?INTEGER_SIZE, 10),
|
||||||
test_bloom(20000, 2),
|
test_bloom(20000, 2),
|
||||||
test_bloom(10000, 2),
|
test_bloom(10000, 2),
|
||||||
test_bloom(5000, 2).
|
test_bloom(5000, 2).
|
||||||
|
@ -309,7 +262,7 @@ test_bloom(N, Runs) ->
|
||||||
ListOfBlooms =
|
ListOfBlooms =
|
||||||
lists:map(fun({HL, _ML}) -> create_bloom(HL) end,
|
lists:map(fun({HL, _ML}) -> create_bloom(HL) end,
|
||||||
SplitListOfHashLists),
|
SplitListOfHashLists),
|
||||||
TSa = timer:now_diff(os:timestamp(), SWa)/Runs,
|
TSa = timer:now_diff(os:timestamp(), SWa),
|
||||||
|
|
||||||
SWb = os:timestamp(),
|
SWb = os:timestamp(),
|
||||||
lists:foreach(fun(Nth) ->
|
lists:foreach(fun(Nth) ->
|
||||||
|
@ -318,7 +271,7 @@ test_bloom(N, Runs) ->
|
||||||
check_all_hashes(BB, HL)
|
check_all_hashes(BB, HL)
|
||||||
end,
|
end,
|
||||||
lists:seq(1, Runs)),
|
lists:seq(1, Runs)),
|
||||||
TSb = timer:now_diff(os:timestamp(), SWb)/Runs,
|
TSb = timer:now_diff(os:timestamp(), SWb),
|
||||||
|
|
||||||
SWc = os:timestamp(),
|
SWc = os:timestamp(),
|
||||||
{Pos, Neg} =
|
{Pos, Neg} =
|
||||||
|
@ -330,7 +283,7 @@ test_bloom(N, Runs) ->
|
||||||
{0, 0},
|
{0, 0},
|
||||||
lists:seq(1, Runs)),
|
lists:seq(1, Runs)),
|
||||||
FPR = Pos / (Pos + Neg),
|
FPR = Pos / (Pos + Neg),
|
||||||
TSc = timer:now_diff(os:timestamp(), SWc)/Runs,
|
TSc = timer:now_diff(os:timestamp(), SWc),
|
||||||
|
|
||||||
io:format(user,
|
io:format(user,
|
||||||
"Test with size ~w has microsecond timings: -"
|
"Test with size ~w has microsecond timings: -"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue