From be1d678d858da3b6097966c3f36e6002082ce32b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 3 Jan 2017 23:43:43 +0000 Subject: [PATCH 1/3] Revert to two hash tiny bloom --- src/leveled_tinybloom.erl | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl index 9d85b9a..03c24bf 100644 --- a/src/leveled_tinybloom.erl +++ b/src/leveled_tinybloom.erl @@ -29,7 +29,6 @@ %%% Bloom API %%%============================================================================ - empty(Width) when Width =< 256 -> FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end, lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)). @@ -37,7 +36,7 @@ empty(Width) when Width =< 256 -> enter({hash, no_lookup}, Bloom) -> Bloom; enter({hash, Hash}, Bloom) -> - {Slot0, Q, Bit1, Bit2, Bit3} = split_hash(Hash), + {Slot0, Q, Bit1, Bit2} = split_hash(Hash), Slot = Slot0 rem dict:size(Bloom), BitArray0 = dict:fetch(Slot, Bloom), {Pre, SplitArray0, Post} = split_array(BitArray0, Q), @@ -45,7 +44,7 @@ enter({hash, Hash}, Bloom) -> fun(Bit, Arr) -> add_to_array(Bit, Arr, 1024) end, SplitArray1 = lists:foldl(FoldFun, SplitArray0, - lists:usort([Bit1, Bit2, Bit3])), + [Bit1, Bit2]), dict:store(Slot, <
>, Bloom);
 enter(Key, Bloom) ->
     Hash = leveled_codec:magic_hash(Key),
@@ -53,7 +52,7 @@ enter(Key, Bloom) ->
 
 
 check({hash, Hash}, Bloom) ->
-    {Slot0, Q, Bit1, Bit2, Bit3} = split_hash(Hash),
+    {Slot0, Q, Bit1, Bit2} = split_hash(Hash),
     Slot = Slot0 rem dict:size(Bloom),
     BitArray = dict:fetch(Slot, Bloom),
     {_Pre, SplitArray, _Post} = split_array(BitArray, Q),
@@ -66,12 +65,7 @@ check({hash, Hash}, Bloom) ->
                 <<0:1>> ->
                     false;
                 <<1:1>> ->
-                    case getbit(Bit3, SplitArray, 1024) of
-                        <<0:1>> ->
-                            false;
-                        <<1:1>> ->
-                            true
-                    end
+                    true
             end
     end;
 check(Key, Bloom) ->
@@ -85,22 +79,17 @@ check(Key, Bloom) ->
 
 split_hash(Hash) ->
     Slot = split_for_slot(Hash),
-    {Q1, H1, H2, H3} = split_for_bits(Hash),
-    {Slot, Q1, H1, H2, H3}.
+    {Q1, H1, H2} = split_for_bits(Hash),
+    {Slot, Q1, H1, H2}.
 
 split_for_slot(Hash) ->
-    SlotH1 = Hash band 255,
-    SlotH2 = (Hash bsr 8) band 255,
-    SlotH3 = (Hash bsr 16) band 255,
-    SlotH4 = (Hash bsr 24) band 255,
-    (SlotH1 bxor SlotH2) bxor (SlotH3 bxor SlotH4).
+    Hash band 255.
 
 split_for_bits(Hash) ->
-    Q1 = Hash band 3,
-    H1 = (Hash bsr 2) band 1023,
-    H2 = (Hash bsr 12) band 1023,
-    H3 = (Hash bsr 22) band 1023,
-    {Q1, H1, H2, H3}.
+    H1 = (Hash bsr 8) band 1023,
+    H2 = (Hash bsr 18) band 1023,
+    Q1 = (Hash bsr 28) band 3,
+    {Q1, H1, H2}.
 
 split_array(Bin, Q) ->
     case Q of

From 85aaccfe3197056d4ff6e817b67e280950b471ea Mon Sep 17 00:00:00 2001
From: Martin Sumner 
Date: Tue, 3 Jan 2017 23:53:57 +0000
Subject: [PATCH 2/3] Revert to non-split tinybloom

---
 src/leveled_tinybloom.erl | 45 +++++++++++----------------------------
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl
index 03c24bf..e513ce5 100644
--- a/src/leveled_tinybloom.erl
+++ b/src/leveled_tinybloom.erl
@@ -36,32 +36,30 @@ empty(Width) when Width =< 256 ->
 enter({hash, no_lookup}, Bloom) ->
     Bloom;
 enter({hash, Hash}, Bloom) ->
-    {Slot0, Q, Bit1, Bit2} = split_hash(Hash),
+    {Slot0, Bit1, Bit2} = split_hash(Hash),
     Slot = Slot0 rem dict:size(Bloom),
     BitArray0 = dict:fetch(Slot, Bloom),
-    {Pre, SplitArray0, Post} = split_array(BitArray0, Q),
     FoldFun =
-        fun(Bit, Arr) -> add_to_array(Bit, Arr, 1024) end,
-    SplitArray1 = lists:foldl(FoldFun,
-                                SplitArray0,
+        fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end,
+    BitArray1 = lists:foldl(FoldFun,
+                                BitArray0,
                                 [Bit1, Bit2]),
-    dict:store(Slot, <
>, Bloom);
+    dict:store(Slot, <>, Bloom);
 enter(Key, Bloom) ->
     Hash = leveled_codec:magic_hash(Key),
     enter({hash, Hash}, Bloom).
 
 
 check({hash, Hash}, Bloom) ->
-    {Slot0, Q, Bit1, Bit2} = split_hash(Hash),
+    {Slot0, Bit1, Bit2} = split_hash(Hash),
     Slot = Slot0 rem dict:size(Bloom),
     BitArray = dict:fetch(Slot, Bloom),
-    {_Pre, SplitArray, _Post} = split_array(BitArray, Q),
     
-    case getbit(Bit1, SplitArray, 1024) of
+    case getbit(Bit1, BitArray, 4096) of
         <<0:1>> ->
             false;
         <<1:1>> ->
-            case getbit(Bit2, SplitArray, 1024) of
+            case getbit(Bit2, BitArray, 4096) of
                 <<0:1>> ->
                     false;
                 <<1:1>> ->
@@ -79,33 +77,16 @@ check(Key, Bloom) ->
 
 split_hash(Hash) ->
     Slot = split_for_slot(Hash),
-    {Q1, H1, H2} = split_for_bits(Hash),
-    {Slot, Q1, H1, H2}.
+    {H1, H2} = split_for_bits(Hash),
+    {Slot, H1, H2}.
 
 split_for_slot(Hash) ->
     Hash band 255.
 
 split_for_bits(Hash) ->
-    H1 = (Hash bsr 8) band 1023,
-    H2 = (Hash bsr 18) band 1023,
-    Q1 = (Hash bsr 28) band 3,
-    {Q1, H1, H2}.
-
-split_array(Bin, Q) ->
-    case Q of
-        0 ->
-            <> = Bin,
-            {<<>>, ToUse, Post};
-        1 ->
-            <> = Bin,
-            {Pre, ToUse, Post};
-        2 ->
-            <> = Bin,
-            {Pre, ToUse, Post};
-        3 ->
-            <> = Bin,
-            {Pre, ToUse, <<>>}
-    end.
+    H1 = (Hash bsr 8) band 4095,
+    H2 = (Hash bsr 20) band 4095,
+    {H1, H2}.
 
 add_to_array(Bit, BitArray, ArrayLength) ->
     RestLen = ArrayLength - Bit - 1,

From 8289c3b783379a97b3a0aabef73845e5d3f6b886 Mon Sep 17 00:00:00 2001
From: Martin Sumner 
Date: Wed, 4 Jan 2017 00:26:52 +0000
Subject: [PATCH 3/3] full reversion

---
 src/leveled_tinybloom.erl | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl
index e513ce5..2278c2a 100644
--- a/src/leveled_tinybloom.erl
+++ b/src/leveled_tinybloom.erl
@@ -43,7 +43,7 @@ enter({hash, Hash}, Bloom) ->
         fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end,
     BitArray1 = lists:foldl(FoldFun,
                                 BitArray0,
-                                [Bit1, Bit2]),
+                                lists:usort([Bit1, Bit2])),
     dict:store(Slot, <>, Bloom);
 enter(Key, Bloom) ->
     Hash = leveled_codec:magic_hash(Key),
@@ -76,29 +76,17 @@ check(Key, Bloom) ->
 %%%============================================================================
 
 split_hash(Hash) ->
-    Slot = split_for_slot(Hash),
-    {H1, H2} = split_for_bits(Hash),
-    {Slot, H1, H2}.
-
-split_for_slot(Hash) ->
-    Hash band 255.
-
-split_for_bits(Hash) ->
+    H0 = Hash band 255,
     H1 = (Hash bsr 8) band 4095,
-    H2 = (Hash bsr 20) band 4095,
-    {H1, H2}.
+    H2 = Hash bsr 20,
+    {H0, H1, H2}.
 
 add_to_array(Bit, BitArray, ArrayLength) ->
     RestLen = ArrayLength - Bit - 1,
     <> = BitArray,
-    case B of
-        0 ->
-            <>;
-        1 ->
-            BitArray
-    end.
+    <>.
 
 getbit(Bit, BitArray, ArrayLength) ->
     RestLen = ArrayLength - Bit - 1,