From 647a7f44dc6dd036ef5417dd15f1d6789c6bd368 Mon Sep 17 00:00:00 2001
From: Martin Sumner <martin.sumner@adaptip.co.uk>
Date: Sun, 31 May 2015 23:31:31 +0100
Subject: [PATCH] Tidy-up initial files and add testing to optimise bst bloom
 filters

---
 ...eled_internal.erl => leveled_iterator.erl} |  52 ++--
 src/leveled_rice.erl                          | 283 ++++++++++++++++++
 test/lookup_test.beam                         | Bin 4096 -> 0 bytes
 test/rice_test.erl                            |  59 ++++
 4 files changed, 375 insertions(+), 19 deletions(-)
 rename src/{leveled_internal.erl => leveled_iterator.erl} (73%)
 create mode 100644 src/leveled_rice.erl
 delete mode 100644 test/lookup_test.beam
 create mode 100644 test/rice_test.erl

diff --git a/src/leveled_internal.erl b/src/leveled_iterator.erl
similarity index 73%
rename from src/leveled_internal.erl
rename to src/leveled_iterator.erl
index 874fe61..f9b97c7 100644
--- a/src/leveled_internal.erl
+++ b/src/leveled_iterator.erl
@@ -1,19 +1,25 @@
 -module(leveled_internal).
+
 -export([termiterator/6]).
+
 -include_lib("eunit/include/eunit.hrl").
 
 
 %% We will have a sorted list of terms
-%% Some terms will be dummy terms which are pointers to more terms which can be found
-%% If a pointer is hit need to replenish the term list before proceeding
+%% Some terms will be dummy terms which are pointers to more terms which can be 
+%% found.  If a pointer is hit need to replenish the term list before 
+%% proceeding.
 %%
-%% Helper Functions should have free functions - FolderFun, CompareFun, PointerCheck}
-%% FolderFun - function which takes the next item and the accumulator and returns an updated accunulator
-%% CompareFun - function which should be able to compare two keys (which are not pointers)
+%% Helper Functions should have free functions - 
+%% {FolderFun, CompareFun, PointerCheck}
+%% FolderFun - function which takes the next item and the accumulator and 
+%% returns an updated accumulator
+%% CompareFun - function which should be able to compare two keys (which are 
+%% not pointers), and return a winning item (or combination of items)
 %% PointerCheck - function for differentiating between keys and pointer
 
-termiterator(HeadItem, [], Acc, HelperFuns, _StartKey, _EndKey) ->
-	io:format("Reached empty list with head item of ~w~n", [HeadItem]),
+termiterator(HeadItem, [], Acc, HelperFuns, 
+	_StartKey, _EndKey) ->
 	case HeadItem of 
 		null ->
 			Acc;
@@ -21,7 +27,8 @@ termiterator(HeadItem, [], Acc, HelperFuns, _StartKey, _EndKey) ->
 			{FolderFun, _, _} = HelperFuns,
 			FolderFun(Acc, HeadItem)
 	end;
-termiterator(null, [NextItem|TailList], Acc, HelperFuns, StartKey, EndKey) ->
+termiterator(null, [NextItem|TailList], Acc, HelperFuns, 
+	StartKey, EndKey) ->
 	%% Check that the NextItem is not a pointer before promoting to HeadItem
 	%% Cannot now promote a HeadItem which is a pointer
 	{_, _, PointerCheck} = HelperFuns,
@@ -29,30 +36,37 @@ termiterator(null, [NextItem|TailList], Acc, HelperFuns, StartKey, EndKey) ->
 		{true, Pointer} ->
 			NewSlice = getnextslice(Pointer, EndKey),
 			ExtendedList = lists:merge(NewSlice, TailList),
-			termiterator(null, ExtendedList, Acc, HelperFuns, StartKey, EndKey);
+			termiterator(null, ExtendedList, Acc, HelperFuns, 
+				StartKey, EndKey);
 		false ->
-			termiterator(NextItem, TailList, Acc, HelperFuns, StartKey, EndKey)
+			termiterator(NextItem, TailList, Acc, HelperFuns, 
+				StartKey, EndKey)
 	end;
-termiterator(HeadItem, [NextItem|TailList], Acc, HelperFuns, StartKey, EndKey) ->
-	io:format("Checking head item of ~w~n", [HeadItem]),
+termiterator(HeadItem, [NextItem|TailList], Acc, HelperFuns, 
+	StartKey, EndKey) ->
 	{FolderFun, CompareFun, PointerCheck} = HelperFuns,
-	%% HeadItem cannot be pointer, but NextItem might be, so check before comparison
+	%% HeadItem cannot be pointer, but NextItem might be, so check before 
+	%% comparison
 	case PointerCheck(NextItem) of 
 		{true, Pointer} ->
 			NewSlice = getnextslice(Pointer, EndKey),
 			ExtendedList = lists:merge(NewSlice, [NextItem|TailList]),
-			termiterator(null, ExtendedList, Acc, HelperFuns, StartKey, EndKey);
+			termiterator(null, ExtendedList, Acc, HelperFuns, 
+				StartKey, EndKey);
 		false ->
-			%% Compare to see if Head and Next match, or if Head is a winner to be added
-			%% to accumulator
+			%% Compare to see if Head and Next match, or if Head is a winner 
+			%% to be added to accumulator
 			case CompareFun(HeadItem, NextItem) of 
 				{match, StrongItem, _WeakItem} ->
-					%% Discard WeakItem
-					termiterator(StrongItem, TailList, Acc, HelperFuns, StartKey, EndKey);
+					%% Discard WeakItem, Strong Item might be an aggregation of
+					%% the items  
+					termiterator(StrongItem, TailList, Acc, HelperFuns, 
+						StartKey, EndKey);
 				{winner, HeadItem} ->
 					%% Add next item to accumulator, and proceed with next item
 					AccPlus = FolderFun(Acc, HeadItem),
-					termiterator(NextItem, TailList, AccPlus, HelperFuns, HeadItem, EndKey)
+					termiterator(NextItem, TailList, AccPlus, HelperFuns, 
+						HeadItem, EndKey)
 			end
 	end.
 			
diff --git a/src/leveled_rice.erl b/src/leveled_rice.erl
new file mode 100644
index 0000000..f432944
--- /dev/null
+++ b/src/leveled_rice.erl
@@ -0,0 +1,283 @@
+%% Used for creating fixed-size self-regulating encoded bloom filters
+%%
+%% Normally a bloom filter in order to achieve optimium size increases the
+%% number of hashes as the desired false positive rate increases.  There is 
+%% a processing overhead for checking this bloom, both because of the number
+%% of hash calculations required, and also because of the need to CRC check
+%% the bloom to ensure a false negative result is not returned due to 
+%% corruption.
+%%
+%% A more space efficient bloom can be achieved through the compression of 
+%% bloom filters with less hashes (and in an optimal case a single hash).  
+%% This can be achieved using rice encoding.
+%%
+%% Rice-encoding and single hash blooms are used here in order to provide an
+%% optimally space efficient solution, but also as the processing required to
+%% support uncompression can be concurrently performing a checksum role.
+%%
+%% For this to work, the bloom is divided into 64 parts and a 32-bit hash is 
+%% required.  Each hash is placed into one of 64 blooms based on the six least
+%% significant bits of the hash, and the fmost significant 26-bits are used 
+%% to indicate the bit to be added to the bloom.
+%%
+%% The bloom is then created by calculating the differences between the ordered
+%% elements of the hash list and representing the difference using an exponent 
+%% and a 13-bit remainder i.e.
+%% 8000  ->   0  11111 01000000
+%% 10000 ->  10  00000 00010000
+%% 20000 -> 110  01110 00100000
+%%
+%% Each bloom should have approximately 64 differences.  
+%%
+%% Fronting the bloom is a bloom index, formed first by 16 pairs of 3-byte 
+%% max hash, 2-byte length (bits) - with then each of the encoded bitstrings 
+%% appended.  The max hash is the  total of all the differences (which should 
+%% be the highest hash in the bloom).
+%%
+%% To check a key against the bloom, hash it, take the four least signifcant 
+%% bits and read the start pointer, max hash end pointer from the expected 
+%% positions in the bloom index.  Then roll through from the start pointer to 
+%% the end pointer, accumulating each difference. There is a possible match if 
+%% either the accumulator hits the expected hash or the max hash doesn't match 
+%% the final accumulator (to cover if the bloom has been corrupted by a bit 
+%% flip somwhere). A miss is more than twice as expensive (on average) than a
+%% potential match - but still only requires around 64 integer additions
+%% and the processing of <100 bytes of data.
+%%
+%% For 2048 keys, this takes up <4KB.  The false positive rate is 0.000122
+%% This compares favourably for the equivalent size optimal bloom which 
+%% would require 11 hashes and have a false positive rate of 0.000459.
+%% Checking with a positive match should take on average about 6 microseconds, 
+%% and a negative match should take around 11 microseconds.  
+%%
+%% See ../test/rice_test.erl for proving timings and fpr.
+
+
+
+-module(leveled_rice).
+
+-export([create_bloom/1, 
+	check_key/2,
+	check_keys/2]).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-define(SLOT_COUNT, 64).
+-define(MAX_HASH, 16777216).
+-define(DIVISOR_BITS, 13).
+-define(DIVISOR, 8092).
+
+%% Create a bitstring representing the bloom filter from a key list
+
+create_bloom(KeyList) ->
+	create_bloom(KeyList, ?SLOT_COUNT, ?MAX_HASH).
+
+create_bloom(KeyList, SlotCount, MaxHash) ->
+	HashLists = array:new(SlotCount, [{default, []}]),
+	OrdHashLists = create_hashlist(KeyList, HashLists, SlotCount, MaxHash),
+	serialise_bloom(OrdHashLists).
+
+
+%% Checking for a key
+
+check_keys([], _) ->
+	true;
+check_keys([Key|Rest], BitStr) ->
+	case check_key(Key, BitStr) of 
+		false ->
+			false;
+		true ->
+			check_keys(Rest, BitStr)
+	end.
+
+check_key(Key, BitStr) ->
+	check_key(Key, BitStr, ?SLOT_COUNT, ?MAX_HASH, ?DIVISOR_BITS, ?DIVISOR).
+
+check_key(Key, BitStr, SlotCount, MaxHash, Factor, Divisor) ->
+	{Slot, Hash} = get_slothash(Key, MaxHash, SlotCount),
+	{StartPos, Length, TopHash} = find_position(Slot, BitStr, 0, 40 * SlotCount),
+	case BitStr of 
+		<<_:StartPos/bitstring, Bloom:Length/bitstring, _/bitstring>> ->
+			check_hash(Hash, Bloom, Factor, Divisor, 0, TopHash);
+		_ ->
+			io:format("Possible corruption of bloom index ~n"),
+			true
+	end.
+
+find_position(Slot, BloomIndex, Counter, StartPosition) ->
+	<<TopHash:24/integer, Length:16/integer, Rest/bitstring>> = BloomIndex,
+	case Slot of 
+		Counter -> 
+			{StartPosition, Length, TopHash};
+		_ ->
+			find_position(Slot, Rest, Counter + 1, StartPosition + Length)
+	end.
+
+
+% Checking for a hash within a bloom
+
+check_hash(_, <<>>, _, _, Acc, MaxHash) ->
+	case Acc of 
+		MaxHash -> 
+			false;
+		_ -> 
+			io:format("Failure of CRC check on bloom filter~n"),
+			true
+	end;
+check_hash(HashToCheck, BitStr, Factor, Divisor, Acc, TopHash) ->
+	case findexponent(BitStr) of 
+		{ok, Exponent, BitStrTail} ->
+			case findremainder(BitStrTail, Factor) of 
+				{ok, Remainder, BitStrTail2} ->
+					NextHash = Acc + Divisor * Exponent + Remainder,
+					case NextHash of 
+						HashToCheck ->
+							true;
+						_ -> 
+							check_hash(HashToCheck, BitStrTail2, Factor, 
+								Divisor, NextHash, TopHash)
+					end;
+				error ->
+					io:format("Failure of CRC check on bloom filter~n"),
+					true 
+			end;
+		error ->
+			io:format("Failure of CRC check on bloom filter~n"),
+			true 
+	end.
+
+%% Convert the key list into an array of sorted hash lists
+
+create_hashlist([], HashLists, _, _) ->
+	HashLists;
+create_hashlist([HeadKey|Rest], HashLists, SlotCount, MaxHash) ->
+	{Slot, Hash} = get_slothash(HeadKey, MaxHash, SlotCount),
+	HashList = array:get(Slot, HashLists),
+	create_hashlist(Rest, 
+		array:set(Slot, lists:usort([Hash|HashList]), HashLists), 
+		SlotCount, MaxHash).
+
+%% Convert an array of hash lists into an serialsed bloom
+
+serialise_bloom(HashLists) ->
+	SlotCount = array:size(HashLists),
+	serialise_bloom(HashLists, SlotCount, 0,  []).
+
+serialise_bloom(HashLists, SlotCount, Counter, Blooms) ->
+	case Counter of 
+		SlotCount -> 
+			finalise_bloom(Blooms);
+		_ ->
+			Bloom = serialise_singlebloom(array:get(Counter, HashLists)),
+			serialise_bloom(HashLists, SlotCount, Counter + 1, [Bloom|Blooms])
+	end.
+
+serialise_singlebloom(HashList) ->
+	serialise_singlebloom(HashList, <<>>, 0, ?DIVISOR, ?DIVISOR_BITS).
+
+serialise_singlebloom([], BloomStr, TopHash, _, _) ->
+	% io:format("Single bloom created with bloom of ~w and top hash of ~w~n", [BloomStr, TopHash]),
+	{BloomStr, TopHash};
+serialise_singlebloom([Hash|Rest], BloomStr, TopHash, Divisor, Factor) ->
+	HashGap = Hash - TopHash,
+	Exponent = buildexponent(HashGap div Divisor),
+	Remainder = HashGap rem Divisor,
+	NewBloomStr = <<BloomStr/bitstring, Exponent/bitstring, Remainder:Factor/integer>>,
+	serialise_singlebloom(Rest, NewBloomStr, Hash, Divisor, Factor).
+
+
+finalise_bloom(Blooms) ->
+	finalise_bloom(Blooms, {<<>>, <<>>}).
+
+finalise_bloom([], BloomAcc) ->
+	{BloomIndex, BloomStr} = BloomAcc,
+	<<BloomIndex/bitstring, BloomStr/bitstring>>;
+finalise_bloom([Bloom|Rest], BloomAcc) ->
+	{BloomStr, TopHash} = Bloom,
+	{BloomIndexAcc, BloomStrAcc} = BloomAcc,
+	Length = bit_size(BloomStr),
+	UpdIdx = <<TopHash:24/integer, Length:16/integer, BloomIndexAcc/bitstring>>,
+	% io:format("Adding bloom string of ~w to bloom~n", [BloomStr]),
+	UpdBloomStr = <<BloomStr/bitstring, BloomStrAcc/bitstring>>, 
+	finalise_bloom(Rest, {UpdIdx, UpdBloomStr}).
+
+
+
+
+buildexponent(Exponent) ->
+	buildexponent(Exponent, <<0:1>>).
+
+buildexponent(0, OutputBits) ->
+	OutputBits;
+buildexponent(Exponent, OutputBits) ->
+	buildexponent(Exponent - 1, <<1:1, OutputBits/bitstring>>).
+
+
+findexponent(BitStr) ->
+	findexponent(BitStr, 0).
+
+findexponent(<<>>, _) -> 
+	error;
+findexponent(<<H:1/integer, T/bitstring>>, Acc) ->
+	case H of
+		1 -> findexponent(T, Acc + 1);
+		0 -> {ok, Acc, T}
+	end.
+
+
+findremainder(BitStr, Factor) ->
+	case BitStr of 
+		<<Remainder:Factor/integer, BitStrTail/bitstring>> ->
+			{ok, Remainder, BitStrTail};
+		_ ->
+			error 
+	end.
+
+
+get_slothash(Key, MaxHash, SlotCount) ->
+	Hash = erlang:phash2(Key, MaxHash),
+	{Hash rem SlotCount, Hash div SlotCount}.
+
+
+%%%%%%%%%%%%%%%%
+% T E S T 
+%%%%%%%%%%%%%%%  
+
+corrupt_bloom(Bloom) ->
+	Length = bit_size(Bloom),
+	Random = random:uniform(Length),
+	<<Part1:Random/bitstring, Bit:1/integer, Rest1/bitstring>> = Bloom,
+	case Bit of 
+		1 -> 
+			<<Part1/bitstring, 0:1/integer, Rest1/bitstring>>;
+		0 ->
+			<<Part1/bitstring, 1:1/integer, Rest1/bitstring>>
+	end.
+
+bloom_test() ->
+	KeyList = ["key1", "key2", "key3", "key4"],
+	Bloom = create_bloom(KeyList),
+	io:format("Bloom of ~w of length ~w ~n", [Bloom, bit_size(Bloom)]),
+	?assertMatch(true, check_key("key1", Bloom)),
+	?assertMatch(true, check_key("key2", Bloom)),
+	?assertMatch(true, check_key("key3", Bloom)),
+	?assertMatch(true, check_key("key4", Bloom)),
+	?assertMatch(false, check_key("key5", Bloom)).
+
+bloom_corruption_test() ->
+	KeyList = ["key1", "key2", "key3", "key4"],
+	Bloom = create_bloom(KeyList),
+	Bloom1 = corrupt_bloom(Bloom),
+	?assertMatch(true, check_keys(KeyList, Bloom1)),
+	Bloom2 = corrupt_bloom(Bloom),
+	?assertMatch(true, check_keys(KeyList, Bloom2)),
+	Bloom3 = corrupt_bloom(Bloom),
+	?assertMatch(true, check_keys(KeyList, Bloom3)),
+	Bloom4 = corrupt_bloom(Bloom),
+	?assertMatch(true, check_keys(KeyList, Bloom4)),
+	Bloom5 = corrupt_bloom(Bloom),
+	?assertMatch(true, check_keys(KeyList, Bloom5)),
+	Bloom6 = corrupt_bloom(Bloom),
+	?assertMatch(true, check_keys(KeyList, Bloom6)).
+
+
diff --git a/test/lookup_test.beam b/test/lookup_test.beam
deleted file mode 100644
index 3c8d76474f8d8ff741da4eecc8fc903230a9a869..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4096
zcmZ`+du$wM5#ROR;~U49?A<zkC2`iib7K4aSjSEin})0tJC2hyPlvXo!S`C*m-ueY
z-Cmj$MT8cFDODTFA818^@COn?Dnvj*Admu5C5k8tr3lJHO$8Ak0Sf>1zw(>CyZHQI
zrMaD*Z)U!k-^}cHd*syFAxYZu{-Me7$HpByCrR>GB}p<iE!y_w($cJB6&>F~vbYei
z3X947yf2l^XZ$P}l6!rrt9Bv853`Ovo6TpetHF7DHj_;|KK@bi*3*HUm9q=iBJ;^Y
zDmib>rtQT=EA3?Me0|YLI@zL=O&0?Nc(rrEQa(Fp7jphZh>L-=olhqn-y9s**_ZuA
z$1YfMzuzhp>_PzhSWZ!ZyciWQfpW}&Y`$m}9D>Gt$|+cum`}~}z38`cOU^a_mE>Z{
z3Y3;INyqZ#?Yy-as>wnjdCi{Vu!9+EE?HW1{GLpiw;ZKtIqHg1*=@^;fURbq&N>U}
zWO~60B$t+~e8yMIK4bZFR$<<1@P22#jf<OdcBZsw0k1iG3!Kl^Ja_j0Kc|Dk{d1*!
zztJyAhwY3dNpuf&QbByVpfQjptFBrX=~QLCK~hacGh4U%6<7D^ldgW<uS&`_=>uF!
z1geH>)J4wfvbq1fHm*g7UGuu%R2Aw_+|%H6C90UiT0ozaPmk=?G&$iW>LSmo@{+sc
zxuOpgO}^pYsEfR;$~PNS?B{cD1|`GvMNOaPH(pcaTkb6^8}jY=>zce-mE7Aw$%LNy
zmI78its3ePU7PYvDW;}GR879qU>aecsf?J|U1>D6n0qIvCR9_MRO+<JDM?dUu;kwL
zsa@uX=H3hHH&j!aj8#`3@TySD%N&vjLuK@L%GiBHKcq<7Aw`YG?`!pqs%}iiqMA8^
zaFy`LvTC>@5;Sh&7YW{K3~R^)X&Ca|_<g0mS=GI5R_SY=zP3jH9yUFu$PZl6SQn{p
zQDvc)@iC`V&8%-#T(LE%->w1`ia;amiY8VzWD$vnfH%V>npf}wzX4!EYcLE`i7{Rp
zfNOL`%UZx#0!&&N6ZAw3+qVVvI}Dqz3Y*yG>1>Bir6|gj&r{tI)bF~Y{kmVe<pU&*
zQAqtpD1tz-*-pf~1L1G2G=R{9(!Ga?HDjg86&<Vu{pNV6rIJ`@O=6uJCf2gP6-0Dh
zYLFncSfv|8S5SYjsvX2GPhodWKXgN9G}Nk$h1zO5BGjhU>l?L1PtXvmD|%{c9BM{3
zt#LT0@7$;@dSQct_O7#Ob452gLu_+J9H2yj&-N;EeVCmJ_VqDLM300*^#yeid5C8_
z^mn+TpG^@#dpxwWg6BXDo&y`=xpQ*@jWXj6)`+Q1xMFY}nveiZ!)uh7D~5b($(+$#
zu?JCT<=zQHjz@3kCLVUruxB_@HQZY^^xE-}6<c(uEB5g?hIGK+Q44=ZWg;gqk$8c`
zlyGcWJ?)D9m@wXX!kf~GW!-SaDBwl!?Oz3NI{?w+a9W;J#J0-M*am(+RQ5Br&QAxz
z<T7=-;!$AT4cMdAG95sJuPf7mRb}deK4AJVn<A>NcxYFpOb2Vqba2Bm?OInRb6uGr
zL7B#@XdkMU>F^5FTqZnv!?04O3D0n{YIvlkOj9eiUYU-1Q?eUn>aHzQcV$Y}mT8)&
zWNn>}mGPZkg>M%S*&T=1^60Fs({b2@*E%{;c65B5qi$Cm<wEti;xVAz1K@i%>j(tx
zadM-1I_VXPkA+hkEuZ3Lue!tDsnqDb@P_$?*Qd)O?uygA4N~Zfhx#i;JyTQEGaDAQ
ze>4;al5s_ESi<=hZVbn86lHJ})z0JDb%llmg+5oU*Gx^l&ac$V6?nK|W)Q0qM&ch?
zS>iUp<C<xG+;jUx)$NltZa=zW>$SlJZ(0Ws{7|_K;-Mi-syR`eE+2346za1TMLQ50
zl*Is4)(xSjcCkEzvKYj1k3-899}B9YDIPMEX&iR5{O19qJ`)YkAQcv1&iS3?rRIu6
zIXK_E<c+q*uZRST4bfaS<db}6SvSi<V3Dk}jB4md^x-LGD#oVef|{~0%oy*7-OQ1h
zWfSj~RJqmCY=`r%V#n`eoP1?FRDZ}77r5{CxFUmw=!a;SbBf|+X`R$Xi!4T52`dAV
zEcTia#g$H~qE!};l%?p?eH^j3po=zH>_^DZH{fYBV1$O1b53D~|J+UImT;Gkg53sE
zKt51ig%8y5!3tlG`z8=ytl!{qWqvbw8MFn&{~wtTgCZ54Z8Z@4XMQV)eK0kFSl$L=
z|5cvjs`A?_d^_$tL3}Y^)nUE^#QvCPedfDBRiDi70<ll#cY|1m<LRy8<KQ`dmiJZU
zoD1u)E!zx&Sf|R@O<rX@r3^@b20*;NufpRd?FIFKSkAt>D|{4pCYE#DJ1RV5)KcLY
zQ>yTEq?G5qSeI=K5XUhJ@-F4z&;NNM>#)uUD1P;obRv;RB@*`*em$3X<K(Mv`@j48
zmy0)I=jxSDjGvi%GBtgvUMZeUJ<}w8@tfk$C+sxaE`u1mTL1V`@b((Uopa%R86(bx
zdB&A<VV>*2xiH@h;#`>Lp5R=VXa9^B=fQpE2QkLgUSxUQxZ@OX?|yC(zw$y|MpCX6
z^WH;Vl}caQ_l?ia{Hr<p=KO5z;`{TLE*+lE&q4EfPv6J-L3@e64vUw(hbva0h^Nbv
zfA4@XFv2pQlg(KRffS;6H|b9{C4<R5NsU(mMY~i;TbHG=!Snb8EDq+91qWY)rCi=B
z3}#XV+ja)6Me7PElSvKo$L(OvhwcEraW6^Ic&g|iCh0^rZ{hY1^Ofr*+*u}l>gzvy
z@4uu`Bel~`>Yz^QqFuC`dMQqQWY7RTN(bm5nRJ*YXp)Z7G##VkG(+d<ae9I-&{K4g
z5|pGA%~6IvP6cx4lk_Y-N1vw8&@z3Fo~O^#3-mI5fnK5)=@t44eT}|FuhO^aJ9L}A
zN8hK{=m+#e`YHW{eoQ~2U(g%$OZpYPNq6Zt^cMY=-lpHvALt$WBmIfqrN7X7^jG>D
P-KW3PKj`1|ALQ{rJu=`t

diff --git a/test/rice_test.erl b/test/rice_test.erl
new file mode 100644
index 0000000..1bbb43f
--- /dev/null
+++ b/test/rice_test.erl
@@ -0,0 +1,59 @@
+%% Test performance and accuracy of rice-encoded bloom filters
+%%
+%% Calling check_negative(2048, 1000000) should return about 122 false 
+%% positives in around 11 seconds, with a size below 4KB
+%%
+%% The equivalent positive check is check_positive(2048, 488) and this
+%% should take around 6 seconds.
+%%
+%% So a blooom with 2048 members should support o(100K) checks per second
+%% on a modern CPU, whilst requiring 2 bytes per member.
+
+-module(rice_test).
+
+-export([check_positive/2, check_negative/2, calc_hash/2]).
+
+
+
+check_positive(KeyCount, LoopCount) ->
+	KeyList = produce_keylist(KeyCount),
+	Bloom = leveled_rice:create_bloom(KeyList),
+	check_positive(KeyList, Bloom, LoopCount).
+
+check_positive(_, Bloom, 0) ->
+	{ok, byte_size(Bloom)};
+check_positive(KeyList, Bloom, LoopCount) ->
+	true = leveled_rice:check_keys(KeyList, Bloom),
+	check_positive(KeyList, Bloom, LoopCount - 1).
+
+
+produce_keylist(KeyCount) ->
+	KeyPrefix = lists:concat(["PositiveKey-", random:uniform(KeyCount)]),
+	produce_keylist(KeyCount, [], KeyPrefix).
+
+produce_keylist(0, KeyList, _) ->
+	KeyList;
+produce_keylist(KeyCount, KeyList, KeyPrefix) ->
+	Key = lists:concat([KeyPrefix, KeyCount]),
+	produce_keylist(KeyCount - 1, [Key|KeyList], KeyPrefix).
+
+
+check_negative(KeyCount, CheckCount) ->
+	KeyList = produce_keylist(KeyCount),
+	Bloom = leveled_rice:create_bloom(KeyList),
+	check_negative(Bloom, CheckCount, 0).
+
+check_negative(Bloom, 0, FalsePos) ->
+	{byte_size(Bloom), FalsePos};
+check_negative(Bloom, CheckCount, FalsePos) ->
+	Key = lists:concat(["NegativeKey-", CheckCount, random:uniform(CheckCount)]),
+	case leveled_rice:check_key(Key, Bloom) of 
+		true -> check_negative(Bloom, CheckCount - 1, FalsePos + 1);
+		false -> check_negative(Bloom, CheckCount - 1, FalsePos)
+	end.
+
+calc_hash(_, 0) ->
+	ok;
+calc_hash(Key, Count) ->
+	erlang:phash2(lists:concat([Key, Count, "sometxt"])),
+	calc_hash(Key, Count -1).