From d37c5eab3f1fa11fbc64fd7acfc3e5af32900c54 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 27 Nov 2017 14:49:43 +0000 Subject: [PATCH] Revert "Half size of each slot's bloom" This reverts commit d83eea7c60075283d0ebd7fb074f3638a95aea29. --- src/leveled_ebloom.erl | 111 ++++++++++++----------------------------- 1 file changed, 32 insertions(+), 79 deletions(-) diff --git a/src/leveled_ebloom.erl b/src/leveled_ebloom.erl index 0292311..d2edc4c 100644 --- a/src/leveled_ebloom.erl +++ b/src/leveled_ebloom.erl @@ -16,8 +16,8 @@ check_hash/2 ]). --define(BLOOM_SIZE_BYTES, 1024). --define(INTEGER_SIZE, 8192). +-define(BLOOM_SIZE_BYTES, 2048). +-define(INTEGER_SIZE, 16384). -define(BAND_MASK, ?INTEGER_SIZE - 1). @@ -32,12 +32,11 @@ create_bloom(HashList) -> case length(HashList) of 0 -> <<>>; - L when L > 8192 -> + L when L > 16384 -> add_hashlist(HashList, - 15, - 0, 0, 0, 0, 0, 0, 0, 0, + 7, 0, 0, 0, 0, 0, 0, 0, 0); - L when L > 4084 -> + L when L > 8192 -> add_hashlist(HashList, 3, 0, 0, 0, 0); _ -> add_hashlist(HashList, 1, 0, 0) @@ -70,11 +69,19 @@ check_hash({_SegHash, Hash}, BloomBin) -> split_hash(Hash, SlotSplit) -> Slot = Hash band SlotSplit, H0 = (Hash bsr 4) band (?BAND_MASK), - H1 = (Hash bsr 17) band (?BAND_MASK), - {Slot, [H0, H1]}. + H1 = (Hash bsr 18) band (?BAND_MASK), + % H2 = (Hash bsr 34) band (?BAND_MASK), + % H3 = (Hash bsr 49) band (?BAND_MASK), + {Slot, [H0, H1 + %, H2, H3 + ]}. -get_mask([H0, H1]) -> - (1 bsl H0) bor (1 bsl H1). +get_mask([H0, H1 + %, H2, H3 + ]) -> + (1 bsl H0) bor (1 bsl H1) + % bor (1 bsl H2) bor (1 bsl H3) + . %% This looks ugly and clunky, but in tests it was quicker than modifying an @@ -111,104 +118,50 @@ add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1, S2, S3) -> add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask) end. -add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15) -> +add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7) -> IntSize = ?INTEGER_SIZE, <>; + S6:IntSize/integer, S7:IntSize/integer>>; add_hashlist([{_SegHash, TopHash}|T], SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15) -> + S0, S1, S2, S3, S4, S5, S6, S7) -> {Slot, Hashes} = split_hash(TopHash, SlotSplit), Mask = get_mask(Hashes), case Slot of 0 -> add_hashlist(T, SlotSplit, - S0 bor Mask, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15); + S0 bor Mask, S1, S2, S3, S4, S5, S6, S7); 1 -> add_hashlist(T, SlotSplit, - S0, S1 bor Mask, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15); + S0, S1 bor Mask, S2, S3, S4, S5, S6, S7); 2 -> add_hashlist(T, SlotSplit, - S0, S1, S2 bor Mask, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15); + S0, S1, S2 bor Mask, S3, S4, S5, S6, S7); 3 -> add_hashlist(T, SlotSplit, - S0, S1, S2, S3 bor Mask, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15); + S0, S1, S2, S3 bor Mask, S4, S5, S6, S7); 4 -> add_hashlist(T, SlotSplit, - S0, S1, S2, S3, S4 bor Mask, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15); + S0, S1, S2, S3, S4 bor Mask, S5, S6, S7); 5 -> add_hashlist(T, SlotSplit, - S0, S1, S2, S3, S4, S5 bor Mask, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15); + S0, S1, S2, S3, S4, S5 bor Mask, S6, S7); 6 -> add_hashlist(T, SlotSplit, - S0, S1, S2, S3, S4, S5, S6 bor Mask, S7, - S8, S9, S10, S11, S12, S13, S14, S15); + S0, S1, S2, S3, S4, S5, S6 bor Mask, S7); 7 -> add_hashlist(T, SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7 bor Mask, - S8, S9, S10, S11, S12, S13, S14, S15); - 8 -> - add_hashlist(T, - SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8 bor Mask, S9, S10, S11, S12, S13, S14, S15); - 9 -> - add_hashlist(T, - SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9 bor Mask, S10, S11, S12, S13, S14, S15); - 10 -> - add_hashlist(T, - SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10 bor Mask, S11, S12, S13, S14, S15); - 11 -> - add_hashlist(T, - SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11 bor Mask, S12, S13, S14, S15); - 12 -> - add_hashlist(T, - SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12 bor Mask, S13, S14, S15); - 13 -> - add_hashlist(T, - SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13 bor Mask, S14, S15); - 14 -> - add_hashlist(T, - SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14 bor Mask, S15); - 15 -> - add_hashlist(T, - SlotSplit, - S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15 bor Mask) + S0, S1, S2, S3, S4, S5, S6, S7 bor Mask) end. @@ -282,7 +235,7 @@ bloom_test_() -> bloom_test_ranges() -> test_bloom(40000, 2), - test_bloom(128 * 256, 10), + test_bloom(?INTEGER_SIZE, 10), test_bloom(20000, 2), test_bloom(10000, 2), test_bloom(5000, 2). @@ -309,7 +262,7 @@ test_bloom(N, Runs) -> ListOfBlooms = lists:map(fun({HL, _ML}) -> create_bloom(HL) end, SplitListOfHashLists), - TSa = timer:now_diff(os:timestamp(), SWa)/Runs, + TSa = timer:now_diff(os:timestamp(), SWa), SWb = os:timestamp(), lists:foreach(fun(Nth) -> @@ -318,7 +271,7 @@ test_bloom(N, Runs) -> check_all_hashes(BB, HL) end, lists:seq(1, Runs)), - TSb = timer:now_diff(os:timestamp(), SWb)/Runs, + TSb = timer:now_diff(os:timestamp(), SWb), SWc = os:timestamp(), {Pos, Neg} = @@ -330,7 +283,7 @@ test_bloom(N, Runs) -> {0, 0}, lists:seq(1, Runs)), FPR = Pos / (Pos + Neg), - TSc = timer:now_diff(os:timestamp(), SWc)/Runs, + TSc = timer:now_diff(os:timestamp(), SWc), io:format(user, "Test with size ~w has microsecond timings: -"