From 5bdb7fd7facc33b447f8914c1e65366c93d5dfcc Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Thu, 8 Dec 2016 23:38:50 +0000
Subject: [PATCH 01/34] Alter Riak HEAD

Change the extract of Riak metadata.

In Riak-based volume tests hte writing of SFT files is tanking.  Could
this be the "extra" metadata.  i.e. There are only current plans to look
at the vclock.  Sibling count is free to fetch, what if we just get
these two items, will it be less CPU to extract the metadata, but also
will the reduced weight reduce the downstream impact?
---
 src/leveled_codec.erl | 49 ++++++++-----------------------------------
 1 file changed, 9 insertions(+), 40 deletions(-)

diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl
index 8903198..19e9c9f 100644
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@@ -331,8 +331,8 @@ build_metadata_object(PrimaryKey, MD) ->
     {Tag, _Bucket, _Key, null} = PrimaryKey,
     case Tag of
         ?RIAK_TAG ->
-            {SibMetaBinList, Vclock, _Hash, _Size} = MD,
-            riak_metadata_to_binary(Vclock, SibMetaBinList);
+            {SibCount, Vclock, _Hash, _Size} = MD,
+            riak_metadata_to_binary(Vclock, SibCount);
         ?STD_TAG ->
             MD
     end.
@@ -341,55 +341,24 @@ build_metadata_object(PrimaryKey, MD) ->
 riak_extract_metadata(delete, Size) ->
     {delete, null, null, Size};
 riak_extract_metadata(ObjBin, Size) ->
-    {Vclock, SibMetaBinList} = riak_metadata_from_binary(ObjBin),
-    {SibMetaBinList, Vclock, erlang:phash2(ObjBin), Size}.
+    {Vclock, SibCount} = riak_metadata_from_binary(ObjBin),
+    {SibCount, Vclock, erlang:phash2(ObjBin), Size}.
 
 %% <<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
 %%%     VclockBin/binary, SibCount:32/integer, SibsBin/binary>>.
 
-riak_metadata_to_binary(Vclock, SibMetaBinList) ->
+riak_metadata_to_binary(Vclock, SibCount) ->
     VclockBin = term_to_binary(Vclock),
     VclockLen = byte_size(VclockBin),
-    SibCount = length(SibMetaBinList),
-    SibsBin = slimbin_contents(SibMetaBinList),
     <<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
-            VclockBin:VclockLen/binary, SibCount:32/integer, SibsBin/binary>>.
+            VclockBin:VclockLen/binary, SibCount:32/integer>>.
     
-% Fixes the value length for each sibling to be zero, and so includes no value
-slimbin_content(MetaBin) ->
-    MetaLen = byte_size(MetaBin),
-    <<0:32/integer,  MetaLen:32/integer, MetaBin:MetaLen/binary>>.
-
-slimbin_contents(SibMetaBinList) ->
-    F = fun(MetaBin, Acc) ->
-                <<Acc/binary, (slimbin_content(MetaBin))/binary>>
-        end,
-    lists:foldl(F, <<>>, SibMetaBinList).
-
 riak_metadata_from_binary(V1Binary) ->
     <<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
             Rest/binary>> = V1Binary,
-    <<VclockBin:VclockLen/binary, SibCount:32/integer, SibsBin/binary>> = Rest,
-    SibMetaBinList =
-        case SibCount of
-            0 ->
-                [];
-            SC when is_integer(SC) ->
-                get_metadata_from_siblings(SibsBin, SibCount, [])
-        end,
-    {binary_to_term(VclockBin), SibMetaBinList}.
-    
-get_metadata_from_siblings(<<>>, 0, SibMetaBinList) ->
-    SibMetaBinList;
-get_metadata_from_siblings(<<ValLen:32/integer, Rest0/binary>>,
-                            SibCount,
-                            SibMetaBinList) ->
-    <<_ValBin:ValLen/binary, MetaLen:32/integer, Rest1/binary>> = Rest0,
-    <<MetaBin:MetaLen/binary, Rest2/binary>> = Rest1,
-    get_metadata_from_siblings(Rest2,
-                                SibCount - 1,
-                                [MetaBin|SibMetaBinList]).
-
+    <<VclockBin:VclockLen/binary, SibCount:32/integer, _Rest/binary>> = Rest,
+    {binary_to_term(VclockBin), SibCount}.
+   
 
 
 

From 349d194a7cf465217345b4ba6c87f4ba99ddfde6 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Fri, 9 Dec 2016 09:52:31 +0000
Subject: [PATCH 02/34] Increase jitter slightly

---
 src/leveled_bookie.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index 30e56e4..891d6a4 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -145,7 +145,7 @@
 -define(LEDGER_FP, "ledger").
 -define(SNAPSHOT_TIMEOUT, 300000).
 -define(CHECKJOURNAL_PROB, 0.2).
--define(CACHE_SIZE_JITTER, 20).
+-define(CACHE_SIZE_JITTER, 25).
 -define(JOURNAL_SIZE_JITTER, 10).
 
 -record(state, {inker :: pid(),

From 82cb49638a1919ae1c3e6a0cd8f4cf9c1144f79e Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Fri, 9 Dec 2016 14:36:03 +0000
Subject: [PATCH 03/34] Attempt at performance improvement

Try to add some extra jitter in to the process of L0 writes, and also
make L0 writes delayed to help with bufferring
---
 include/leveled.hrl       |  3 ++-
 src/leveled_bookie.erl    |  5 +++--
 src/leveled_penciller.erl | 28 ++++++++++++++++++++++++----
 src/leveled_sft.erl       |  4 +++-
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/include/leveled.hrl b/include/leveled.hrl
index 0e62cf3..25216f6 100644
--- a/include/leveled.hrl
+++ b/include/leveled.hrl
@@ -64,7 +64,8 @@
                         {root_path :: string(),
                         max_inmemory_tablesize :: integer(),
                         start_snapshot = false :: boolean(),
-                        source_penciller :: pid()}).
+                        source_penciller :: pid(),
+                        levelzero_cointoss = false :: boolean}).
 
 -record(iclerk_options,
                         {inker :: pid(),
diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index 891d6a4..ce444fb 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -146,7 +146,7 @@
 -define(SNAPSHOT_TIMEOUT, 300000).
 -define(CHECKJOURNAL_PROB, 0.2).
 -define(CACHE_SIZE_JITTER, 25).
--define(JOURNAL_SIZE_JITTER, 10).
+-define(JOURNAL_SIZE_JITTER, 20).
 
 -record(state, {inker :: pid(),
                 penciller :: pid(),
@@ -692,7 +692,8 @@ set_options(Opts) ->
                                                     binary_mode=true,
                                                     sync_strategy=SyncStrat}},
         #penciller_options{root_path = LedgerFP,
-                            max_inmemory_tablesize = PCLL0CacheSize}}.
+                            max_inmemory_tablesize = PCLL0CacheSize,
+                            levelzero_cointoss = true}}.
 
 startup(InkerOpts, PencillerOpts) ->
     {ok, Inker} = leveled_inker:ink_start(InkerOpts),
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index 94bac54..be62cf9 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -198,7 +198,7 @@
 -define(MAX_TABLESIZE, 32000).
 -define(PROMPT_WAIT_ONL0, 5).
 -define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
-
+-define(COIN_SIDECOUNT, 4).
 
 -record(state, {manifest = [] :: list(),
 				manifest_sqn = 0 :: integer(),
@@ -217,6 +217,7 @@
                 % is an array - but cannot specif due to OTP compatability 
                 levelzero_size = 0 :: integer(),
                 levelzero_maxcachesize :: integer(),
+                levelzero_cointoss = false :: boolean(),
                 
                 is_snapshot = false :: boolean(),
                 snapshot_fully_loaded = false :: boolean(),
@@ -537,10 +538,17 @@ start_from_file(PCLopts) ->
                     end,
     
     {ok, MergeClerk} = leveled_pclerk:clerk_new(self()),
+    
+    CoinToss = PCLopts#penciller_options.levelzero_cointoss,
+    % Used to randomly defer the writing of L0 file.  Intended to help with
+    % vnode syncronisation issues (e.g. stop them all by default merging to
+    % level zero concurrently)
+    
     InitState = #state{clerk=MergeClerk,
                         root_path=RootPath,
                         levelzero_index = leveled_pmem:new_index(),
-                        levelzero_maxcachesize=MaxTableSize},
+                        levelzero_maxcachesize=MaxTableSize,
+                        levelzero_cointoss=CoinToss},
     
     %% Open manifest
     ManifestPath = InitState#state.root_path ++ "/" ++ ?MANIFEST_FP ++ "/",
@@ -629,8 +637,20 @@ update_levelzero(L0Index, L0Size, PushedTree, LedgerSQN, L0Cache, State) ->
                                     ledger_sqn=MaxSQN},
             CacheTooBig = NewL0Size > State#state.levelzero_maxcachesize,
             Level0Free = length(get_item(0, State#state.manifest, [])) == 0,
-            case {CacheTooBig, Level0Free} of
-                {true, true}  ->
+            RandomFactor =
+                case State#state.levelzero_cointoss of
+                    true ->
+                        case random:uniform(?COIN_SIDECOUNT) of
+                            1 ->
+                                true;
+                            _ ->
+                                false
+                        end;
+                    false ->
+                        true
+                end,
+            case {CacheTooBig, Level0Free, RandomFactor} of
+                {true, true, true}  ->
                     L0Constructor = roll_memory(UpdState, false),        
                     UpdState#state{levelzero_pending=true,
                                     levelzero_constructor=L0Constructor};
diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index 70b0b0f..9c67721 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -192,6 +192,8 @@
 -define(DELETE_TIMEOUT, 10000).
 -define(MAX_KEYS, ?SLOT_COUNT * ?BLOCK_COUNT * ?BLOCK_SIZE).
 -define(DISCARD_EXT, ".discarded").
+-define(WRITE_OPS, [binary, raw, read, write, delayed_write]).
+-define(READ_OPS, [binary, raw, read]).
 
 -record(state, {version = ?CURRENT_VERSION :: tuple(),
                 slot_index :: list(),
@@ -469,7 +471,7 @@ generate_filenames(RootFilename) ->
 create_file(FileName) when is_list(FileName) ->
     leveled_log:log("SFT01", [FileName]),
     ok = filelib:ensure_dir(FileName),
-    {ok, Handle} = file:open(FileName, [binary, raw, read, write]),
+    {ok, Handle} = file:open(FileName, ?WRITE_OPS),
     Header = create_header(initial),
     {ok, _} = file:position(Handle, bof),
     ok = file:write(Handle, Header),

From f0db730f07dc83f1eebe9324223a3a7f810c1431 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Fri, 9 Dec 2016 16:34:15 +0000
Subject: [PATCH 04/34] Adjust jitter settings

---
 src/leveled_bookie.erl    | 4 ++--
 src/leveled_penciller.erl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index ce444fb..6781e08 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -233,7 +233,7 @@ init([Opts]) ->
             {Inker, Penciller} = startup(InkerOpts, PencillerOpts),
             CacheJitter = ?CACHE_SIZE div (100 div ?CACHE_SIZE_JITTER),
             CacheSize = get_opt(cache_size, Opts, ?CACHE_SIZE)
-                        + erlang:phash2(self()) band CacheJitter,
+                        + erlang:phash2(self()) rem CacheJitter,
             leveled_log:log("B0001", [Inker, Penciller]),
             {ok, #state{inker=Inker,
                         penciller=Penciller,
@@ -668,7 +668,7 @@ set_options(Opts) ->
     MaxJournalSize0 = get_opt(max_journalsize, Opts, 10000000000),
     JournalSizeJitter = MaxJournalSize0 div (100 div ?JOURNAL_SIZE_JITTER),
     MaxJournalSize = MaxJournalSize0 -
-                        erlang:phash2(self()) band JournalSizeJitter,
+                        erlang:phash2(self()) rem JournalSizeJitter,
     
     SyncStrat = get_opt(sync_strategy, Opts, sync),
     WRP = get_opt(waste_retention_period, Opts),
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index be62cf9..ab770a5 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -198,7 +198,7 @@
 -define(MAX_TABLESIZE, 32000).
 -define(PROMPT_WAIT_ONL0, 5).
 -define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
--define(COIN_SIDECOUNT, 4).
+-define(COIN_SIDECOUNT, 2).
 
 -record(state, {manifest = [] :: list(),
 				manifest_sqn = 0 :: integer(),

From d2bd01eaf12dc73243ee9a408833debaf95a18a4 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Fri, 9 Dec 2016 18:30:40 +0000
Subject: [PATCH 05/34] Add fast fail to skiplist

Add a bloom filter to the skiplist, to make it faster at returning not
found.  The SkipList is now encapsulated within a dict().
---
 src/leveled_bookie.erl    |   2 +-
 src/leveled_penciller.erl |   2 +-
 src/leveled_skiplist.erl  | 104 +++++++++++++++++++++++++++++---------
 3 files changed, 81 insertions(+), 27 deletions(-)

diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index 6781e08..78dbed8 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -151,7 +151,7 @@
 -record(state, {inker :: pid(),
                 penciller :: pid(),
                 cache_size :: integer(),
-                ledger_cache :: list(), % a skiplist
+                ledger_cache :: dict:dict(), % a skiplist
                 is_snapshot :: boolean(),
                 slow_offer = false :: boolean()}).
 
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index ab770a5..fb8ef02 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -222,7 +222,7 @@
                 is_snapshot = false :: boolean(),
                 snapshot_fully_loaded = false :: boolean(),
                 source_penciller :: pid(),
-                levelzero_astree :: list(), % skiplist
+                levelzero_astree :: list(),
                 
                 ongoing_work = [] :: list(),
                 work_backlog = false :: boolean()}).
diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl
index b9d9af4..5cf8961 100644
--- a/src/leveled_skiplist.erl
+++ b/src/leveled_skiplist.erl
@@ -23,6 +23,7 @@
         to_range/2,
         to_range/3,
         lookup/2,
+        lookup/3,
         empty/0,
         size/1
         ]).      
@@ -32,50 +33,98 @@
 -define(SKIP_WIDTH, 16).
 -define(LIST_HEIGHT, 2).
 -define(INFINITY_KEY, {null, null, null, null, null}).
-
+-define(BITARRAY_SIZE, 2048).
 
 %%%============================================================================
 %%% SkipList API
 %%%============================================================================
 
 enter(Key, Value, SkipList) ->
-    enter(Key, Value, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT).
+    Hash = erlang:phash2(Key),
+    SkipList0 = add_to_array(Hash, SkipList),
+    NewListPart = enter(Key, Value, Hash,
+                        dict:fetch(?SKIP_WIDTH, SkipList0),
+                        ?SKIP_WIDTH, ?LIST_HEIGHT),
+    dict:store(?SKIP_WIDTH, NewListPart, SkipList0).
 
 from_list(UnsortedKVL) ->
     KVL = lists:ukeysort(1, UnsortedKVL),
-    from_list(KVL, ?SKIP_WIDTH, ?LIST_HEIGHT).
+    from_sortedlist(KVL).
 
 from_sortedlist(SortedKVL) ->
-    from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT).
+    SL0 = lists:foldr(fun({K, _V}, SkipL) ->
+                            H = erlang:phash2(K),
+                            add_to_array(H, SkipL) end,
+                        empty(),
+                        SortedKVL),
+    dict:store(?SKIP_WIDTH,
+                from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT),
+                SL0).
 
 lookup(Key, SkipList) ->
-    lookup(Key, SkipList, ?LIST_HEIGHT).
+    lookup(Key, erlang:phash2(Key), SkipList).
+    
+lookup(Key, Hash, SkipList) ->
+    {Slot, Bit} = hash_toslotbit(Hash),
+    RestLen = ?BITARRAY_SIZE - Bit - 1,
+    <<_Head:Bit/bitstring,
+        B:1/bitstring,
+        _Rest:RestLen/bitstring>> = dict:fetch(Slot, SkipList),
+    case B of
+        <<0:1>> ->
+            none;
+        <<1:1>> ->
+            list_lookup(Key, dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
+    end.
 
 
 %% Rather than support iterator_from like gb_trees, will just an output a key
 %% sorted list for the desired range, which can the be iterated over as normal
 to_range(SkipList, Start) ->
-    to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT).
+    to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
 
 to_range(SkipList, Start, End) ->
-    to_range(SkipList, Start, End, ?LIST_HEIGHT).
+    to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, End, ?LIST_HEIGHT).
 
 to_list(SkipList) ->
-    to_list(SkipList, ?LIST_HEIGHT).
+    to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT).
 
 empty() ->
-    empty([], ?LIST_HEIGHT).
+    FoldFun =
+        fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end,
+    lists:foldl(FoldFun,
+                    dict:store(?SKIP_WIDTH,
+                                empty([], ?LIST_HEIGHT),
+                                dict:new()),
+                    lists:seq(0, ?SKIP_WIDTH - 1)).
+
+
 
 size(SkipList) ->
-    size(SkipList, ?LIST_HEIGHT).
+    size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT).
+
 
 
 %%%============================================================================
 %%% SkipList Base Functions
 %%%============================================================================
 
-enter(Key, Value, SkipList, Width, 1) ->
-    Hash = erlang:phash2(Key),
+hash_toslotbit(Hash) ->
+    Slot = Hash band (?SKIP_WIDTH - 1),
+    Bit = (Hash bsr ?SKIP_WIDTH) band (?BITARRAY_SIZE - 1),
+    {Slot, Bit}.
+
+
+add_to_array(Hash, SkipList) ->
+    {Slot, Bit} = hash_toslotbit(Hash),
+    RestLen = ?BITARRAY_SIZE - Bit - 1,
+    <<Head:Bit/bitstring,
+        _B:1/bitstring,
+        Rest:RestLen/bitstring>> = dict:fetch(Slot, SkipList),
+    BitArray = <<Head/bitstring, 1:1, Rest/bitstring>>,
+    dict:store(Slot, BitArray, SkipList).
+
+enter(Key, Value, Hash, SkipList, Width, 1) ->
     {MarkerKey, SubList} = find_mark(Key, SkipList),
     case Hash rem Width of
         0 ->
@@ -101,11 +150,10 @@ enter(Key, Value, SkipList, Width, 1) ->
                 end,        
             lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
     end;
-enter(Key, Value, SkipList, Width, Level) ->
-    Hash = erlang:phash2(Key),
+enter(Key, Value, Hash, SkipList, Width, Level) ->
     HashMatch = width(Level, Width),
     {MarkerKey, SubSkipList} = find_mark(Key, SkipList),
-    UpdSubSkipList = enter(Key, Value, SubSkipList, Width, Level - 1),
+    UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1),
     case Hash rem HashMatch of
         0 ->
             % 
@@ -171,7 +219,7 @@ from_list(KVL, Width, Level) ->
     end.
 
 
-lookup(Key, SkipList, 1) ->
+list_lookup(Key, SkipList, 1) ->
     SubList = get_sublist(Key, SkipList),
     case lists:keyfind(Key, 1, SubList) of
         false ->
@@ -179,13 +227,13 @@ lookup(Key, SkipList, 1) ->
         {Key, V} ->
             {value, V}
     end;
-lookup(Key, SkipList, Level) ->
+list_lookup(Key, SkipList, Level) ->
     SubList = get_sublist(Key, SkipList),
     case SubList of
         null ->
             none;
         _ ->
-            lookup(Key, SubList, Level - 1)
+            list_lookup(Key, SubList, Level - 1)
     end.
 
 
@@ -385,16 +433,19 @@ dotest_skiplist_small(N) ->
                     lists:ukeysort(1, lists:reverse(KL))).
 
 skiplist_test() ->
-    N = 8000,
+    N = 4000,
     KL = generate_randomkeys(1, N, 1, N div 5),
                 
     SWaGSL = os:timestamp(),
     SkipList = from_list(lists:reverse(KL)),
     io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
                         "Top level key count of ~w~n",
-                [N, timer:now_diff(os:timestamp(), SWaGSL), length(SkipList)]),
+                [N,
+                    timer:now_diff(os:timestamp(), SWaGSL),
+                    length(dict:fetch(?SKIP_WIDTH, SkipList))]),
     io:format(user, "Second tier key counts of ~w~n",
-                [lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]),
+                [lists:map(fun({_L, SL}) -> length(SL) end,
+                    dict:fetch(?SKIP_WIDTH, SkipList))]),
     KLSorted = lists:ukeysort(1, lists:reverse(KL)),
     
     SWaGSL2 = os:timestamp(),
@@ -413,9 +464,12 @@ skiplist_test() ->
     io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
                         "microseconds~n" ++
                         "Top level key count of ~w~n",
-                [N, timer:now_diff(os:timestamp(), SWaDSL), length(SkipList1)]),
+                [N,
+                    timer:now_diff(os:timestamp(), SWaDSL),
+                    length(dict:fetch(?SKIP_WIDTH, SkipList1))]),
        io:format(user, "Second tier key counts of ~w~n",
-                [lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]),
+                [lists:map(fun({_L, SL}) -> length(SL) end,
+                    dict:fetch(?SKIP_WIDTH, SkipList1))]),
     
     io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
     skiplist_timingtest(KLSorted, SkipList, N),
@@ -482,13 +536,13 @@ skiplist_timingtest(KL, SkipList, N) ->
     io:format(user, "Finding 10 ranges took ~w microseconds~n",
                 [timer:now_diff(os:timestamp(), SWc)]),
             
-    AltKL1 = generate_randomkeys(1, 1000, 1, 200),
+    AltKL1 = generate_randomkeys(1, 2000, 1, 200),
     SWd = os:timestamp(),
     lists:foreach(fun({K, _V}) ->
                         lookup(K, SkipList)
                         end,
                     AltKL1),
-    io:format(user, "Getting 1000 mainly missing keys took ~w microseconds~n",
+    io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n",
                 [timer:now_diff(os:timestamp(), SWd)]),
     AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300),
     SWe = os:timestamp(),

From a3f60e36099c58958d88b7cbd798d2763ea39bca Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Fri, 9 Dec 2016 18:55:13 +0000
Subject: [PATCH 06/34] OTP version shenanigans

---
 src/leveled_bookie.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index 78dbed8..3e335a2 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -151,7 +151,7 @@
 -record(state, {inker :: pid(),
                 penciller :: pid(),
                 cache_size :: integer(),
-                ledger_cache :: dict:dict(), % a skiplist
+                ledger_cache, % a skiplist
                 is_snapshot :: boolean(),
                 slow_offer = false :: boolean()}).
 

From 626a8e63f914b416e1db7dba603bf120b485c32f Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sat, 10 Dec 2016 10:55:35 +0000
Subject: [PATCH 07/34] Experiment converting CDB to use skiplist not gb_tree

Might insertion time be faster?
---
 src/leveled_bookie.erl    |   6 +-
 src/leveled_cdb.erl       |  52 ++++++------
 src/leveled_penciller.erl |   2 +-
 src/leveled_skiplist.erl  | 169 ++++++++++++++++++++++++++++++--------
 4 files changed, 166 insertions(+), 63 deletions(-)

diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index 3e335a2..a50e9fa 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -238,14 +238,14 @@ init([Opts]) ->
             {ok, #state{inker=Inker,
                         penciller=Penciller,
                         cache_size=CacheSize,
-                        ledger_cache=leveled_skiplist:empty(),
+                        ledger_cache=leveled_skiplist:empty(true),
                         is_snapshot=false}};
         Bookie ->
             {ok,
                 {Penciller, LedgerCache},
                 Inker} = book_snapshotstore(Bookie, self(), ?SNAPSHOT_TIMEOUT),
             ok = leveled_penciller:pcl_loadsnapshot(Penciller,
-                                                    leveled_skiplist:empty()),
+                                                    leveled_skiplist:empty(true)),
             leveled_log:log("B0002", [Inker, Penciller]),
             {ok, #state{penciller=Penciller,
                         inker=Inker,
@@ -885,7 +885,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) ->
         TimeToPush ->
             case leveled_penciller:pcl_pushmem(Penciller, Cache) of
                 ok ->
-                    {ok, leveled_skiplist:empty()};
+                    {ok, leveled_skiplist:empty(true)};
                 returned ->
                     {returned, Cache}
             end;
diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl
index 1354571..8e4451c 100644
--- a/src/leveled_cdb.erl
+++ b/src/leveled_cdb.erl
@@ -861,24 +861,28 @@ get_hashtree(Key, HashTree) ->
     Hash = hash(Key),
     Index = hash_to_index(Hash),
     Tree = array:get(Index, HashTree),
-    case gb_trees:lookup(Hash, Tree) of 
+    case leveled_skiplist:lookup(Hash, Tree) of 
         {value, List} ->
             List;
         _ ->
             []
     end.
 
-%% Add to hash tree - this is an array of 256 gb_trees that contains the Hash 
+%% Add to hash tree - this is an array of 256 skiplists that contains the Hash 
 %% and position of objects which have been added to an open CDB file
 put_hashtree(Key, Position, HashTree) ->
   Hash = hash(Key),
   Index = hash_to_index(Hash),
   Tree = array:get(Index, HashTree),
-  case gb_trees:lookup(Hash, Tree) of 
+  case leveled_skiplist:lookup(Hash, Tree) of 
       none ->
-          array:set(Index, gb_trees:insert(Hash, [Position], Tree), HashTree);
+          array:set(Index,
+                    leveled_skiplist:enter(Hash, [Position], Tree),
+                    HashTree);
       {value, L} ->
-          array:set(Index, gb_trees:update(Hash, [Position|L], Tree), HashTree)
+          array:set(Index,
+                    leveled_skiplist:enter(Hash, [Position|L], Tree),
+                    HashTree)
   end. 
 
 %% Function to extract a Key-Value pair given a file handle and a position
@@ -920,7 +924,7 @@ extract_key_value_check(Handle, Position) ->
 %% Scan through the file until there is a failure to crc check an input, and 
 %% at that point return the position and the key dictionary scanned so far
 startup_scan_over_file(Handle, Position) ->
-    HashTree = array:new(256, {default, gb_trees:empty()}),
+    HashTree = array:new(256, {default, leveled_skiplist:empty()}),
     scan_over_file(Handle,
                     Position,
                     fun startup_filter/5,
@@ -1148,7 +1152,7 @@ search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, QuickCheck) ->
 % key/value binary in the file.
 write_key_value_pairs(Handle, KeyValueList) ->
     {ok, Position} = file:position(Handle, cur),
-    HashTree = array:new(256, {default, gb_trees:empty()}),
+    HashTree = array:new(256, {default, leveled_skiplist:empty()}),
     write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}).
 
 write_key_value_pairs(_, [], Acc) ->
@@ -1181,11 +1185,11 @@ write_hash_tables([], _HashTree, _CurrPos, IndexList, HashTreeBin) ->
     {IndexList, HashTreeBin};
 write_hash_tables([Index|Rest], HashTree, CurrPos, IndexList, HashTreeBin) ->
     Tree = array:get(Index, HashTree),
-    case gb_trees:keys(Tree) of 
-        [] ->
+    case leveled_skiplist:size(Tree) of 
+        0 ->
             write_hash_tables(Rest, HashTree, CurrPos, IndexList, HashTreeBin);
         _ ->
-            HashList = gb_trees:to_list(Tree),
+            HashList = leveled_skiplist:to_list(Tree),
             BinList = build_binaryhashlist(HashList, []),
             IndexLength = length(BinList) * 2,
             SlotList = lists:duplicate(IndexLength, <<0:32, 0:32>>),
@@ -1402,16 +1406,16 @@ write_key_value_pairs_1_test() ->
     Index1 = hash_to_index(Hash1),
     Hash2 = hash("key2"),
     Index2 = hash_to_index(Hash2),
-    R0 = array:new(256, {default, gb_trees:empty()}),
+    R0 = array:new(256, {default, leveled_skiplist:empty()}),
     R1 = array:set(Index1,
-                    gb_trees:insert(Hash1,
-                                        [0],
-                                        array:get(Index1, R0)),
+                    leveled_skiplist:enter(Hash1,
+                                            [0],
+                                            array:get(Index1, R0)),
                     R0),
     R2 = array:set(Index2,
-                    gb_trees:insert(Hash2,
-                                        [30],
-                                        array:get(Index2, R1)),
+                    leveled_skiplist:enter(Hash2,
+                                            [30],
+                                            array:get(Index2, R1)),
                     R1),
     io:format("HashTree is ~w~n", [HashTree]),
     io:format("Expected HashTree is ~w~n", [R2]),
@@ -1421,16 +1425,16 @@ write_key_value_pairs_1_test() ->
 
 write_hash_tables_1_test() ->
     {ok, Handle} = file:open("../test/testx.cdb", [write]),
-    R0 = array:new(256, {default, gb_trees:empty()}),
+    R0 = array:new(256, {default, leveled_skiplist:empty()}),
     R1 = array:set(64,
-                    gb_trees:insert(6383014720,
-                                    [18],
-                                    array:get(64, R0)),
+                    leveled_skiplist:enter(6383014720,
+                                            [18],
+                                            array:get(64, R0)),
                     R0),
     R2 = array:set(67,
-                    gb_trees:insert(6383014723,
-                                    [0],
-                                    array:get(67, R1)),
+                    leveled_skiplist:enter(6383014723,
+                                            [0],
+                                            array:get(67, R1)),
                     R1),
     Result = write_hash_tables(Handle, R2),
     io:format("write hash tables result of ~w ~n", [Result]),
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index fb8ef02..dc83474 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -1283,7 +1283,7 @@ confirm_delete_test() ->
 
 
 maybe_pause_push(PCL, KL) ->
-    T0 = leveled_skiplist:empty(),
+    T0 = leveled_skiplist:empty(true),
     T1 = lists:foldl(fun({K, V}, Acc) -> leveled_skiplist:enter(K, V, Acc) end,
                         T0,
                         KL),
diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl
index 5cf8961..63a3842 100644
--- a/src/leveled_skiplist.erl
+++ b/src/leveled_skiplist.erl
@@ -17,7 +17,9 @@
 
 -export([
         from_list/1,
+        from_list/2,
         from_sortedlist/1,
+        from_sortedlist/2,
         to_list/1,
         enter/3,
         to_range/2,
@@ -25,6 +27,7 @@
         lookup/2,
         lookup/3,
         empty/0,
+        empty/1,
         size/1
         ]).      
 
@@ -41,28 +44,49 @@
 
 enter(Key, Value, SkipList) ->
     Hash = erlang:phash2(Key),
-    SkipList0 = add_to_array(Hash, SkipList),
-    NewListPart = enter(Key, Value, Hash,
-                        dict:fetch(?SKIP_WIDTH, SkipList0),
-                        ?SKIP_WIDTH, ?LIST_HEIGHT),
-    dict:store(?SKIP_WIDTH, NewListPart, SkipList0).
+    case is_list(SkipList) of
+        true ->
+            enter(Key, Value, Hash, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT);
+        false ->
+            SkipList0 = add_to_array(Hash, SkipList),
+            NewListPart = enter(Key, Value, Hash,
+                                dict:fetch(?SKIP_WIDTH, SkipList0),
+                                ?SKIP_WIDTH, ?LIST_HEIGHT),
+            dict:store(?SKIP_WIDTH, NewListPart, SkipList0)
+    end.
 
 from_list(UnsortedKVL) ->
+    from_list(UnsortedKVL, false).
+
+from_list(UnsortedKVL, BloomProtect) ->
     KVL = lists:ukeysort(1, UnsortedKVL),
-    from_sortedlist(KVL).
+    from_sortedlist(KVL, BloomProtect).
 
 from_sortedlist(SortedKVL) ->
-    SL0 = lists:foldr(fun({K, _V}, SkipL) ->
-                            H = erlang:phash2(K),
-                            add_to_array(H, SkipL) end,
-                        empty(),
-                        SortedKVL),
-    dict:store(?SKIP_WIDTH,
-                from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT),
-                SL0).
+    from_sortedlist(SortedKVL, false).
+
+from_sortedlist(SortedKVL, BloomProtect) ->
+    case BloomProtect of
+        true ->
+            SL0 = lists:foldr(fun({K, _V}, SkipL) ->
+                                    H = erlang:phash2(K),
+                                    add_to_array(H, SkipL) end,
+                                empty(true),
+                                SortedKVL),
+            dict:store(?SKIP_WIDTH,
+                        from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT),
+                        SL0);
+        false ->
+            from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)
+    end.
 
 lookup(Key, SkipList) ->
-    lookup(Key, erlang:phash2(Key), SkipList).
+    case is_list(SkipList) of
+        true ->
+            list_lookup(Key, SkipList, ?LIST_HEIGHT);
+        false ->
+            lookup(Key, erlang:phash2(Key), SkipList)
+    end.
     
 lookup(Key, Hash, SkipList) ->
     {Slot, Bit} = hash_toslotbit(Hash),
@@ -81,27 +105,57 @@ lookup(Key, Hash, SkipList) ->
 %% Rather than support iterator_from like gb_trees, will just an output a key
 %% sorted list for the desired range, which can the be iterated over as normal
 to_range(SkipList, Start) ->
-    to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
+    case is_list(SkipList) of
+        true ->
+            to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT);
+        false ->
+            to_range(dict:fetch(?SKIP_WIDTH, SkipList),
+                        Start, ?INFINITY_KEY,
+                        ?LIST_HEIGHT)
+    end.
 
 to_range(SkipList, Start, End) ->
-    to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, End, ?LIST_HEIGHT).
+    case is_list(SkipList) of
+        true ->
+            to_range(SkipList, Start, End, ?LIST_HEIGHT);
+        false ->
+            to_range(dict:fetch(?SKIP_WIDTH, SkipList),
+                        Start, End,
+                        ?LIST_HEIGHT)
+    end.
 
 to_list(SkipList) ->
-    to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT).
+    case is_list(SkipList) of
+        true ->
+            to_list(SkipList, ?LIST_HEIGHT);
+        false ->
+            to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
+    end.
 
 empty() ->
-    FoldFun =
-        fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end,
-    lists:foldl(FoldFun,
-                    dict:store(?SKIP_WIDTH,
-                                empty([], ?LIST_HEIGHT),
-                                dict:new()),
-                    lists:seq(0, ?SKIP_WIDTH - 1)).
-
+    empty(false).
 
+empty(BloomProtect) ->
+    case BloomProtect of
+        true ->
+            FoldFun =
+                fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end,
+            lists:foldl(FoldFun,
+                            dict:store(?SKIP_WIDTH,
+                                        empty([], ?LIST_HEIGHT),
+                                        dict:new()),
+                            lists:seq(0, ?SKIP_WIDTH - 1));
+        false ->
+            empty([], ?LIST_HEIGHT)
+    end.
 
 size(SkipList) ->
-    size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT).
+    case is_list(SkipList) of
+        true ->
+            size(SkipList, ?LIST_HEIGHT);
+        false ->
+            size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
+    end.
 
 
 
@@ -432,7 +486,54 @@ dotest_skiplist_small(N) ->
                                     end,
                     lists:ukeysort(1, lists:reverse(KL))).
 
-skiplist_test() ->
+skiplist_withbloom_test() ->
+    io:format(user, "~n~nBloom protected skiplist test:~n~n", []),
+    N = 4000,
+    KL = generate_randomkeys(1, N, 1, N div 5),
+                
+    SWaGSL = os:timestamp(),
+    SkipList = from_list(lists:reverse(KL), true),
+    io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
+                        "Top level key count of ~w~n",
+                [N,
+                    timer:now_diff(os:timestamp(), SWaGSL),
+                    length(dict:fetch(?SKIP_WIDTH, SkipList))]),
+    io:format(user, "Second tier key counts of ~w~n",
+                [lists:map(fun({_L, SL}) -> length(SL) end,
+                    dict:fetch(?SKIP_WIDTH, SkipList))]),
+    KLSorted = lists:ukeysort(1, lists:reverse(KL)),
+    
+    SWaGSL2 = os:timestamp(),
+    SkipList = from_sortedlist(KLSorted, true),
+    io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
+                        "microseconds~n",
+                [N, timer:now_diff(os:timestamp(), SWaGSL2)]),
+
+    SWaDSL = os:timestamp(),
+    SkipList1 = 
+        lists:foldl(fun({K, V}, SL) ->
+                            enter(K, V, SL)
+                            end,
+                        empty(true),
+                        KL),
+    io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
+                        "microseconds~n" ++
+                        "Top level key count of ~w~n",
+                [N,
+                    timer:now_diff(os:timestamp(), SWaDSL),
+                    length(dict:fetch(?SKIP_WIDTH, SkipList1))]),
+       io:format(user, "Second tier key counts of ~w~n",
+                [lists:map(fun({_L, SL}) -> length(SL) end,
+                    dict:fetch(?SKIP_WIDTH, SkipList1))]),
+    
+    io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
+    skiplist_timingtest(KLSorted, SkipList, N),
+
+    io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
+    skiplist_timingtest(KLSorted, SkipList1, N).
+
+skiplist_nobloom_test() ->
+    io:format(user, "~n~nBloom free skiplist test:~n~n", []),
     N = 4000,
     KL = generate_randomkeys(1, N, 1, N div 5),
                 
@@ -442,10 +543,9 @@ skiplist_test() ->
                         "Top level key count of ~w~n",
                 [N,
                     timer:now_diff(os:timestamp(), SWaGSL),
-                    length(dict:fetch(?SKIP_WIDTH, SkipList))]),
+                    length(SkipList)]),
     io:format(user, "Second tier key counts of ~w~n",
-                [lists:map(fun({_L, SL}) -> length(SL) end,
-                    dict:fetch(?SKIP_WIDTH, SkipList))]),
+                [lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]),
     KLSorted = lists:ukeysort(1, lists:reverse(KL)),
     
     SWaGSL2 = os:timestamp(),
@@ -466,17 +566,16 @@ skiplist_test() ->
                         "Top level key count of ~w~n",
                 [N,
                     timer:now_diff(os:timestamp(), SWaDSL),
-                    length(dict:fetch(?SKIP_WIDTH, SkipList1))]),
+                    length(SkipList1)]),
        io:format(user, "Second tier key counts of ~w~n",
-                [lists:map(fun({_L, SL}) -> length(SL) end,
-                    dict:fetch(?SKIP_WIDTH, SkipList1))]),
+                [lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]),
     
     io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
     skiplist_timingtest(KLSorted, SkipList, N),
 
     io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
     skiplist_timingtest(KLSorted, SkipList1, N).
-    
+
     
 skiplist_timingtest(KL, SkipList, N) ->
     io:format(user, "Timing tests on skiplist of size ~w~n",

From c4e4cf67fea34d5731439a2787d58708eea0adb1 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sat, 10 Dec 2016 11:39:00 +0000
Subject: [PATCH 08/34] Add bloom to loaded skiplist

---
 src/leveled_inker.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl
index cb00883..2bfcd9c 100644
--- a/src/leveled_inker.erl
+++ b/src/leveled_inker.erl
@@ -633,7 +633,7 @@ load_from_sequence(MinSQN, FilterFun, Penciller, [{_LowSQN, FN, Pid}|Rest]) ->
 load_between_sequence(MinSQN, MaxSQN, FilterFun, Penciller,
                                 CDBpid, StartPos, FN, Rest) ->
     leveled_log:log("I0014", [FN, MinSQN]),
-    InitAcc = {MinSQN, MaxSQN, leveled_skiplist:empty()},
+    InitAcc = {MinSQN, MaxSQN, leveled_skiplist:empty(true)},
     Res = case leveled_cdb:cdb_scan(CDBpid, FilterFun, InitAcc, StartPos) of
                 {eof, {AccMinSQN, _AccMaxSQN, AccKL}} ->
                     ok = push_to_penciller(Penciller, AccKL),

From 06c58bf84becfbdf1c6781f4224ae9bbabe3dcae Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sat, 10 Dec 2016 13:03:38 +0000
Subject: [PATCH 09/34] Split out hashtree implementation

Split out hashtree implementation functions in leveled_cdb to make it
easier to swap this out.  Currently using an array of skiplists - may be
better with an ets ordered_set
---
 src/leveled_cdb.erl | 74 ++++++++++++++++++++++++++++++---------------
 1 file changed, 49 insertions(+), 25 deletions(-)

diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl
index 8e4451c..f8216d6 100644
--- a/src/leveled_cdb.erl
+++ b/src/leveled_cdb.erl
@@ -860,30 +860,14 @@ close_file(Handle, HashTree, BasePos) ->
 get_hashtree(Key, HashTree) ->
     Hash = hash(Key),
     Index = hash_to_index(Hash),
-    Tree = array:get(Index, HashTree),
-    case leveled_skiplist:lookup(Hash, Tree) of 
-        {value, List} ->
-            List;
-        _ ->
-            []
-    end.
+    lookup_positions(HashTree, Index, Hash).
 
 %% Add to hash tree - this is an array of 256 skiplists that contains the Hash 
 %% and position of objects which have been added to an open CDB file
 put_hashtree(Key, Position, HashTree) ->
   Hash = hash(Key),
   Index = hash_to_index(Hash),
-  Tree = array:get(Index, HashTree),
-  case leveled_skiplist:lookup(Hash, Tree) of 
-      none ->
-          array:set(Index,
-                    leveled_skiplist:enter(Hash, [Position], Tree),
-                    HashTree);
-      {value, L} ->
-          array:set(Index,
-                    leveled_skiplist:enter(Hash, [Position|L], Tree),
-                    HashTree)
-  end. 
+  add_position_tohashtree(HashTree, Index, Hash, Position). 
 
 %% Function to extract a Key-Value pair given a file handle and a position
 %% Will confirm that the key matches and do a CRC check
@@ -924,7 +908,7 @@ extract_key_value_check(Handle, Position) ->
 %% Scan through the file until there is a failure to crc check an input, and 
 %% at that point return the position and the key dictionary scanned so far
 startup_scan_over_file(Handle, Position) ->
-    HashTree = array:new(256, {default, leveled_skiplist:empty()}),
+    HashTree = new_hashtree(),
     scan_over_file(Handle,
                     Position,
                     fun startup_filter/5,
@@ -1152,7 +1136,7 @@ search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, QuickCheck) ->
 % key/value binary in the file.
 write_key_value_pairs(Handle, KeyValueList) ->
     {ok, Position} = file:position(Handle, cur),
-    HashTree = array:new(256, {default, leveled_skiplist:empty()}),
+    HashTree = new_hashtree(),
     write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}).
 
 write_key_value_pairs(_, [], Acc) ->
@@ -1184,12 +1168,11 @@ perform_write_hash_tables(Handle, HashTreeBin, StartPos) ->
 write_hash_tables([], _HashTree, _CurrPos, IndexList, HashTreeBin) ->
     {IndexList, HashTreeBin};
 write_hash_tables([Index|Rest], HashTree, CurrPos, IndexList, HashTreeBin) ->
-    Tree = array:get(Index, HashTree),
-    case leveled_skiplist:size(Tree) of 
-        0 ->
+    case is_empty(HashTree, Index) of 
+        true ->
             write_hash_tables(Rest, HashTree, CurrPos, IndexList, HashTreeBin);
-        _ ->
-            HashList = leveled_skiplist:to_list(Tree),
+        false ->
+            HashList = to_list(HashTree, Index),
             BinList = build_binaryhashlist(HashList, []),
             IndexLength = length(BinList) * 2,
             SlotList = lists:duplicate(IndexLength, <<0:32, 0:32>>),
@@ -1345,6 +1328,47 @@ multi_key_value_to_record(KVList, BinaryMode, LastPosition) ->
                     {[], <<>>, empty},
                     KVList).
 
+%%%============================================================================
+%%% HashTree Implementation
+%%%============================================================================
+
+lookup_positions(HashTree, Index, Hash) ->
+    Tree = array:get(Index, HashTree),
+    case leveled_skiplist:lookup(Hash, Tree) of 
+        {value, List} ->
+            List;
+        _ ->
+            []
+    end.
+
+add_position_tohashtree(HashTree, Index, Hash, Position) ->
+    Tree = array:get(Index, HashTree),
+    case leveled_skiplist:lookup(Hash, Tree) of 
+        none ->
+            array:set(Index,
+                        leveled_skiplist:enter(Hash, [Position], Tree),
+                        HashTree);
+        {value, L} ->
+            array:set(Index,
+                        leveled_skiplist:enter(Hash, [Position|L], Tree),
+                        HashTree)
+    end.
+
+new_hashtree() ->
+    array:new(256, {default, leveled_skiplist:empty()}).
+
+is_empty(HashTree, Index) ->
+    Tree = array:get(Index, HashTree),
+    case leveled_skiplist:size(Tree) of
+        0 ->
+            true;
+        _ ->
+            false
+    end.
+
+to_list(HashTree, Index) ->
+    Tree = array:get(Index, HashTree),
+    leveled_skiplist:to_list(Tree).
 
 %%%%%%%%%%%%%%%%
 % T E S T 

From 95d5e12ce73e1fd2ed46a16a845e3bd5eb0830d0 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sat, 10 Dec 2016 14:15:35 +0000
Subject: [PATCH 10/34] Switch to using ets set as index of L0 cache

Hope is that this will cause less garbage collection, and also will be
slightly faster.

Note that snapshots don't now get an index - they get the special index
'snap'.  However, the SkipLists have bloom protection, and most
snapshots are iterators not fetchers.
---
 src/leveled_penciller.erl |   2 +-
 src/leveled_pmem.erl      | 120 +++++++++++++++++++++-----------------
 2 files changed, 69 insertions(+), 53 deletions(-)

diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index dc83474..a111054 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -395,7 +395,7 @@ handle_call({register_snapshot, Snapshot}, _From, State) ->
     Rs = [{Snapshot, State#state.manifest_sqn}|State#state.registered_snapshots],
     {reply, {ok, State}, State#state{registered_snapshots = Rs}};
 handle_call({load_snapshot, BookieIncrTree}, _From, State) ->
-    L0D = leveled_pmem:add_to_index(State#state.levelzero_index,
+    L0D = leveled_pmem:add_to_index(snap,
                                         State#state.levelzero_size,
                                         BookieIncrTree,
                                         State#state.ledger_sqn,
diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl
index 39dd0c6..61ecd4e 100644
--- a/src/leveled_pmem.erl
+++ b/src/leveled_pmem.erl
@@ -51,42 +51,55 @@
 
 -include_lib("eunit/include/eunit.hrl").
 
--define(SLOT_WIDTH, {4096, 12}).
-
 
 %%%============================================================================
 %%% API
 %%%============================================================================
 
+add_to_index(snap, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
+    FoldFun = fun({K, V}, {AccMinSQN, AccMaxSQN, AccCount}) ->
+                    SQN = leveled_codec:strip_to_seqonly({K, V}),
+                    {min(SQN, AccMinSQN),
+                        max(SQN, AccMaxSQN),
+                        AccCount + 1}
+                    end,
+    LM1List = leveled_skiplist:to_list(LevelMinus1),
+    StartingT = {infinity, 0, L0Size},
+    {MinSQN, MaxSQN, NewL0Size} = lists:foldl(FoldFun, StartingT, LM1List),
+    if
+        MinSQN > LedgerSQN ->
+            {MaxSQN,
+                NewL0Size,
+                snap,
+                lists:append(TreeList, [LevelMinus1])}
+    end;
 add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
     SW = os:timestamp(),
     SlotInTreeList = length(TreeList) + 1,
-    FoldFun = fun({K, V}, {AccMinSQN, AccMaxSQN, AccCount, HashIndex}) ->
+    FoldFun = fun({K, V}, {AccMinSQN, AccMaxSQN, AccCount}) ->
                     SQN = leveled_codec:strip_to_seqonly({K, V}),
-                    {Hash, Slot} = hash_to_slot(K),
-                    L = array:get(Slot, HashIndex),
-                    Count0 = case lists:keymember(Hash, 1, L) of
-                                    true ->
-                                        AccCount;
-                                    false ->
-                                        AccCount + 1
+                    Hash = erlang:phash2(K),
+                    Count0 = case ets:lookup(L0Index, Hash) of
+                                    [] ->
+                                        ets:insert(L0Index, {Hash, [SlotInTreeList]}),
+                                        AccCount + 1;
+                                    [{Hash, L}] ->
+                                        ets:insert(L0Index, {Hash, [SlotInTreeList|L]}),
+                                        AccCount
                                 end,
                     {min(SQN, AccMinSQN),
                         max(SQN, AccMaxSQN),
-                        Count0,
-                        array:set(Slot, [{Hash, SlotInTreeList}|L], HashIndex)}
+                        Count0}
                     end,
     LM1List = leveled_skiplist:to_list(LevelMinus1),
-    StartingT = {infinity, 0, L0Size, L0Index},
-    {MinSQN, MaxSQN, NewL0Size, UpdL0Index} = lists:foldl(FoldFun,
-                                                            StartingT,
-                                                            LM1List),
+    StartingT = {infinity, 0, L0Size},
+    {MinSQN, MaxSQN, NewL0Size} = lists:foldl(FoldFun, StartingT, LM1List),
     leveled_log:log_timer("PM001", [NewL0Size], SW),
     if
         MinSQN > LedgerSQN ->
             {MaxSQN,
                 NewL0Size,
-                UpdL0Index,
+                L0Index,
                 lists:append(TreeList, [LevelMinus1])}
     end.
     
@@ -106,38 +119,20 @@ to_list(Slots, FetchFun) ->
 
 
 new_index() ->
-    array:new(element(1, ?SLOT_WIDTH), [{default, []}, fixed]).
-
+    ets:new(index, [set, private]).
 
+check_levelzero(_Key, _L0Index, []) ->
+    {false, not_found};
+check_levelzero(Key, snap, TreeList) ->
+    check_slotlist(Key, lists:seq(1, length(TreeList)), TreeList);
 check_levelzero(Key, L0Index, TreeList) ->
-    {Hash, Slot} = hash_to_slot(Key),
-    CheckList = array:get(Slot, L0Index),
-    SlotList = lists:foldl(fun({H0, S0}, SL) ->
-                                case H0 of
-                                    Hash ->
-                                        [S0|SL];
-                                    _ ->
-                                        SL
-                                end
-                                end,
-                            [],
-                            CheckList),
-    lists:foldl(fun(SlotToCheck, {Found, KV}) ->
-                        case Found of
-                            true ->
-                                {Found, KV};
-                            false ->
-                                CheckTree = lists:nth(SlotToCheck, TreeList),
-                                case leveled_skiplist:lookup(Key, CheckTree) of
-                                    none ->
-                                        {Found, KV};
-                                    {value, Value} ->
-                                        {true, {Key, Value}}
-                                end
-                        end
-                        end,
-                    {false, not_found},
-                    lists:reverse(lists:usort(SlotList))).
+    Hash = erlang:phash2(Key),
+    case ets:lookup(L0Index, Hash) of
+        [] ->
+            {false, not_found};
+        [{Hash, SlotList}] ->
+            check_slotlist(Key, SlotList, TreeList)
+    end.
 
 
 merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) ->
@@ -153,11 +148,25 @@ merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) ->
 %%% Internal Functions
 %%%============================================================================
 
-
-hash_to_slot(Key) ->
-    H = erlang:phash2(Key),
-    {H bsr element(2, ?SLOT_WIDTH), H band (element(1, ?SLOT_WIDTH) - 1)}.
-
+check_slotlist(Key, CheckList, TreeList) ->
+    SlotCheckFun =
+                fun(SlotToCheck, {Found, KV}) ->
+                    case Found of
+                        true ->
+                            {Found, KV};
+                        false ->
+                            CheckTree = lists:nth(SlotToCheck, TreeList),
+                            case leveled_skiplist:lookup(Key, CheckTree) of
+                                none ->
+                                    {Found, KV};
+                                {value, Value} ->
+                                    {true, {Key, Value}}
+                            end
+                    end
+                    end,
+    lists:foldl(SlotCheckFun,
+                    {false, not_found},
+                    lists:reverse(lists:usort(CheckList))).
 
 %%%============================================================================
 %%% Test
@@ -231,8 +240,15 @@ compare_method_test() ->
                             end,
                         [],
                         TestList),
+    S2 = lists:foldl(fun({Key, _V}, Acc) ->
+                            R0 = check_levelzero(Key, snap, TreeList),
+                            [R0|Acc]
+                            end,
+                        [],
+                        TestList),
     
     ?assertMatch(S0, S1),
+    ?assertMatch(S0, S2),
     
     StartKey = {o, "Bucket0100", null, null},
     EndKey = {o, "Bucket0200", null, null},

From 2d3a40e6f11d78ea9321bd7b4227126f81f5c337 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 01:02:56 +0000
Subject: [PATCH 11/34] Magic Hash - and no L0 Index

Move to using the DJ Bernstein Magic Hash consistently, and trying to
make sure we only hash once for each operation (as the hash is more
expensive than phash2).

The improved lookup time for missing keys should allow for the L0 index
to be removed, and hence speed up the completion time for push_mem
operations.

It is expected there will be a second stage of creating a tinybloom as
part of the SFT creation process, and then adding that tinybloom to the
manifest.  This will then reduce the message passing required for a GET
not in the cache or higher levels
---
 src/leveled_bookie.erl    | 114 ++++++++++++------
 src/leveled_cdb.erl       |  16 +--
 src/leveled_codec.erl     |  72 ++++++++---
 src/leveled_inker.erl     |  19 ++-
 src/leveled_log.erl       |   2 +-
 src/leveled_pclerk.erl    |  10 +-
 src/leveled_penciller.erl | 169 +++++++++++++++-----------
 src/leveled_pmem.erl      | 153 +++++++++--------------
 src/leveled_sft.erl       | 168 ++++++++++++++------------
 src/leveled_skiplist.erl  | 248 ++++++++++++++++----------------------
 src/leveled_tinybloom.erl | 151 +++++++++++++++++++++++
 11 files changed, 646 insertions(+), 476 deletions(-)
 create mode 100644 src/leveled_tinybloom.erl

diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index a50e9fa..62892ec 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -136,7 +136,10 @@
         book_destroy/1]).
 
 -export([get_opt/2,
-            get_opt/3]).  
+            get_opt/3,
+            load_snapshot/2,
+            empty_ledgercache/0,
+            push_ledgercache/2]).  
 
 -include_lib("eunit/include/eunit.hrl").
 
@@ -148,15 +151,18 @@
 -define(CACHE_SIZE_JITTER, 25).
 -define(JOURNAL_SIZE_JITTER, 20).
 
+-record(ledger_cache, {skiplist = leveled_skiplist:empty(true) :: tuple(),
+                        min_sqn = infinity :: integer()|infinity,
+                        max_sqn = 0 :: integer()}).
+
 -record(state, {inker :: pid(),
                 penciller :: pid(),
                 cache_size :: integer(),
-                ledger_cache, % a skiplist
+                ledger_cache = #ledger_cache{},
                 is_snapshot :: boolean(),
                 slow_offer = false :: boolean()}).
 
 
-
 %%%============================================================================
 %%% API
 %%%============================================================================
@@ -238,14 +244,14 @@ init([Opts]) ->
             {ok, #state{inker=Inker,
                         penciller=Penciller,
                         cache_size=CacheSize,
-                        ledger_cache=leveled_skiplist:empty(true),
+                        ledger_cache=#ledger_cache{},
                         is_snapshot=false}};
         Bookie ->
             {ok,
                 {Penciller, LedgerCache},
                 Inker} = book_snapshotstore(Bookie, self(), ?SNAPSHOT_TIMEOUT),
-            ok = leveled_penciller:pcl_loadsnapshot(Penciller,
-                                                    leveled_skiplist:empty(true)),
+            CacheToLoad = {leveled_skiplist:empty(true), 0, 0},
+            ok = leveled_penciller:pcl_loadsnapshot(Penciller, CacheToLoad),
             leveled_log:log("B0002", [Inker, Penciller]),
             {ok, #state{penciller=Penciller,
                         inker=Inker,
@@ -276,9 +282,9 @@ handle_call({put, Bucket, Key, Object, IndexSpecs, Tag, TTL}, From, State) ->
         false ->
             gen_server:reply(From, ok)
     end,
-    case  maybepush_ledgercache(State#state.cache_size,
-                                            Cache0,
-                                            State#state.penciller) of
+    case maybepush_ledgercache(State#state.cache_size,
+                                    Cache0,
+                                    State#state.penciller) of
         {ok, NewCache} ->
             {noreply, State#state{ledger_cache=NewCache, slow_offer=false}};
         {returned, NewCache} ->
@@ -292,7 +298,7 @@ handle_call({get, Bucket, Key, Tag}, _From, State) ->
         not_present ->
             {reply, not_found, State};
         Head ->
-            {Seqn, Status, _MD} = leveled_codec:striphead_to_details(Head),
+            {Seqn, Status, _MH, _MD} = leveled_codec:striphead_to_details(Head),
             case Status of
                 tomb ->
                     {reply, not_found, State};
@@ -317,11 +323,10 @@ handle_call({head, Bucket, Key, Tag}, _From, State) ->
         not_present ->
             {reply, not_found, State};
         Head ->
-            {_Seqn, Status, MD} = leveled_codec:striphead_to_details(Head),
-            case Status of
-                tomb ->
+            case leveled_codec:striphead_to_details(Head) of
+                {_SeqN, tomb, _MH, _MD} ->
                     {reply, not_found, State};
-                {active, TS} ->
+                {_SeqN, {active, TS}, _MH, MD} ->
                     case TS >= leveled_codec:integer_now() of
                         true ->
                             OMD = leveled_codec:build_metadata_object(LedgerKey, MD),
@@ -426,19 +431,39 @@ terminate(Reason, State) ->
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
 
+%%%============================================================================
+%%% External functions
+%%%============================================================================
+
+load_snapshot(LedgerSnapshot, LedgerCache) ->
+    CacheToLoad = {LedgerCache#ledger_cache.skiplist,
+                    LedgerCache#ledger_cache.min_sqn,
+                    LedgerCache#ledger_cache.max_sqn},
+    ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot, CacheToLoad).
+
+empty_ledgercache() ->
+    #ledger_cache{}.
+
+push_ledgercache(Penciller, Cache) ->
+    CacheToLoad = {Cache#ledger_cache.skiplist,
+                        Cache#ledger_cache.min_sqn,
+                        Cache#ledger_cache.max_sqn},
+    leveled_penciller:pcl_pushmem(Penciller, CacheToLoad).
 
 %%%============================================================================
 %%% Internal functions
 %%%============================================================================
 
+cache_size(LedgerCache) ->
+    leveled_skiplist:size(LedgerCache#ledger_cache.skiplist).
+
 bucket_stats(State, Bucket, Tag) ->
     {ok,
         {LedgerSnapshot, LedgerCache},
         _JournalSnapshot} = snapshot_store(State, ledger),
     Folder = fun() ->
-                leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
-                ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
-                                                            LedgerCache),
+                leveled_log:log("B0004", [cache_size(LedgerCache)]),
+                load_snapshot(LedgerSnapshot, LedgerCache),
                 StartKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
                 EndKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
                 AccFun = accumulate_size(),
@@ -459,9 +484,8 @@ binary_bucketlist(State, Tag, {FoldBucketsFun, InitAcc}) ->
         {LedgerSnapshot, LedgerCache},
         _JournalSnapshot} = snapshot_store(State, ledger),
     Folder = fun() ->
-                leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
-                ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
-                                                            LedgerCache),
+                leveled_log:log("B0004", [cache_size(LedgerCache)]),
+                load_snapshot(LedgerSnapshot, LedgerCache),
                 BucketAcc = get_nextbucket(null,
                                             Tag,
                                             LedgerSnapshot,
@@ -514,9 +538,8 @@ index_query(State,
                 {B, null}
         end,
     Folder = fun() ->
-                leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
-                ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
-                                                            LedgerCache),
+                leveled_log:log("B0004", [cache_size(LedgerCache)]),
+                load_snapshot(LedgerSnapshot, LedgerCache),
                 StartKey = leveled_codec:to_ledgerkey(Bucket,
                                                         StartObjKey,
                                                         ?IDX_TAG,
@@ -556,9 +579,8 @@ hashtree_query(State, Tag, JournalCheck) ->
         {LedgerSnapshot, LedgerCache},
         JournalSnapshot} = snapshot_store(State, SnapType),
     Folder = fun() ->
-                leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
-                ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
-                                                            LedgerCache),
+                leveled_log:log("B0004", [cache_size(LedgerCache)]),
+                load_snapshot(LedgerSnapshot, LedgerCache),
                 StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
                 EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
                 AccFun = accumulate_hashes(JournalCheck, JournalSnapshot),
@@ -607,9 +629,8 @@ foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun) ->
                                     {FoldObjectsFun, []}
                             end,
     Folder = fun() ->
-                leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
-                ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
-                                                            LedgerCache),
+                leveled_log:log("B0004", [cache_size(LedgerCache)]),
+                load_snapshot(LedgerSnapshot, LedgerCache),
                 AccFun = accumulate_objects(FoldFun, JournalSnapshot, Tag),
                 Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot,
                                                         StartKey,
@@ -628,9 +649,8 @@ bucketkey_query(State, Tag, Bucket, {FoldKeysFun, InitAcc}) ->
         {LedgerSnapshot, LedgerCache},
         _JournalSnapshot} = snapshot_store(State, ledger),
     Folder = fun() ->
-                leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
-                ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
-                                                            LedgerCache),
+                leveled_log:log("B0004", [cache_size(LedgerCache)]),
+                load_snapshot(LedgerSnapshot, LedgerCache),
                 SK = leveled_codec:to_ledgerkey(Bucket, null, Tag),
                 EK = leveled_codec:to_ledgerkey(Bucket, null, Tag),
                 AccFun = accumulate_keys(FoldKeysFun),
@@ -708,7 +728,7 @@ startup(InkerOpts, PencillerOpts) ->
 
 
 fetch_head(Key, Penciller, LedgerCache) ->
-    case leveled_skiplist:lookup(Key, LedgerCache) of
+    case leveled_skiplist:lookup(Key, LedgerCache#ledger_cache.skiplist) of
         {value, Head} ->
             Head;
         none ->
@@ -874,18 +894,34 @@ preparefor_ledgercache(_Type, LedgerKey, SQN, Obj, Size, {IndexSpecs, TTL}) ->
 
 
 addto_ledgercache(Changes, Cache) ->
-    lists:foldl(fun({K, V}, Acc) -> leveled_skiplist:enter(K, V, Acc) end,
-                    Cache,
-                    Changes).
+    FoldChangesFun =
+        fun({K, V}, Cache0) ->
+            {SQN, Hash} = leveled_codec:strip_to_seqnhashonly({K, V}),
+            SL0 = Cache0#ledger_cache.skiplist,
+            SL1 =
+                case Hash of
+                    no_lookup ->
+                        leveled_skiplist:enter_nolookup(K, V, SL0);
+                    _ ->
+                        leveled_skiplist:enter(K, Hash, V, SL0)
+                end,
+            Cache0#ledger_cache{skiplist=SL1,
+                                min_sqn=min(SQN, Cache0#ledger_cache.min_sqn),
+                                max_sqn=max(SQN, Cache0#ledger_cache.max_sqn)}
+            end,
+    lists:foldl(FoldChangesFun, Cache, Changes).
 
 maybepush_ledgercache(MaxCacheSize, Cache, Penciller) ->
-    CacheSize = leveled_skiplist:size(Cache),
+    CacheSize = leveled_skiplist:size(Cache#ledger_cache.skiplist),
     TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize),
     if
         TimeToPush ->
-            case leveled_penciller:pcl_pushmem(Penciller, Cache) of
+            CacheToLoad = {Cache#ledger_cache.skiplist,
+                            Cache#ledger_cache.min_sqn,
+                            Cache#ledger_cache.max_sqn},
+            case leveled_penciller:pcl_pushmem(Penciller, CacheToLoad) of
                 ok ->
-                    {ok, leveled_skiplist:empty(true)};
+                    {ok, #ledger_cache{}};
                 returned ->
                     {returned, Cache}
             end;
diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl
index f8216d6..63777b2 100644
--- a/src/leveled_cdb.erl
+++ b/src/leveled_cdb.erl
@@ -1272,27 +1272,13 @@ write_top_index_table(Handle, BasePos, List) ->
 
 %% To make this compatible with original Bernstein format this endian flip
 %% and also the use of the standard hash function required.
-%%
-%% Hash function contains mysterious constants, some explanation here as to
-%% what they are -
-%% http://stackoverflow.com/ ++
-%% questions/10696223/reason-for-5381-number-in-djb-hash-function
   
 endian_flip(Int) ->
     <<X:32/unsigned-little-integer>> = <<Int:32>>,
     X.
 
 hash(Key) ->
-    BK = term_to_binary(Key),
-    H = 5381,
-    hash1(H, BK) band 16#FFFFFFFF.
-
-hash1(H, <<>>) -> 
-    H;
-hash1(H, <<B:8/integer, Rest/bytes>>) ->
-    H1 = H * 33,
-    H2 = H1 bxor B,
-    hash1(H2, Rest).
+    leveled_codec:magic_hash(Key).
 
 % Get the least significant 8 bits from the hash.
 hash_to_index(Hash) ->
diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl
index 19e9c9f..72b90b0 100644
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@@ -39,6 +39,7 @@
         strip_to_statusonly/1,
         strip_to_keyseqstatusonly/1,
         strip_to_keyseqonly/1,
+        strip_to_seqnhashonly/1,
         striphead_to_details/1,
         is_active/3,
         endkey_passed/2,
@@ -62,11 +63,38 @@
         convert_indexspecs/5,
         generate_uuid/0,
         integer_now/0,
-        riak_extract_metadata/2]).         
+        riak_extract_metadata/2,
+        magic_hash/1]).         
 
 -define(V1_VERS, 1).
 -define(MAGIC, 53). % riak_kv -> riak_object
 
+%% Use DJ Bernstein magic hash function. Note, this is more expensive than
+%% phash2 but provides a much more balanced result.
+%%
+%% Hash function contains mysterious constants, some explanation here as to
+%% what they are -
+%% http://stackoverflow.com/ ++
+%% questions/10696223/reason-for-5381-number-in-djb-hash-function
+
+magic_hash({?RIAK_TAG, Bucket, Key, _SubKey}) ->
+    magic_hash({Bucket, Key});
+magic_hash({?STD_TAG, Bucket, Key, _SubKey}) ->
+    magic_hash({Bucket, Key});
+magic_hash({?IDX_TAG, _B, _Idx, _Key}) ->
+    no_lookup;
+magic_hash(AnyKey) ->
+    BK = term_to_binary(AnyKey),
+    H = 5381,
+    hash1(H, BK) band 16#FFFFFFFF.
+
+hash1(H, <<>>) -> 
+    H;
+hash1(H, <<B:8/integer, Rest/bytes>>) ->
+    H1 = H * 33,
+    H2 = H1 bxor B,
+    hash1(H2, Rest).
+
 
 %% Credit to
 %% https://github.com/afiskon/erlang-uuid-v4/blob/master/src/uuid.erl
@@ -87,15 +115,18 @@ inker_reload_strategy(AltList) ->
 strip_to_keyonly({keyonly, K}) -> K;
 strip_to_keyonly({K, _V}) -> K.
 
-strip_to_keyseqstatusonly({K, {SeqN, St, _MD}}) -> {K, SeqN, St}.
+strip_to_keyseqstatusonly({K, {SeqN, St, _, _MD}}) -> {K, SeqN, St}.
 
-strip_to_statusonly({_, {_, St, _}}) -> St.
+strip_to_statusonly({_, {_, St, _, _}}) -> St.
 
-strip_to_seqonly({_, {SeqN, _, _}}) -> SeqN.
+strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN.
 
-strip_to_keyseqonly({LK, {SeqN, _, _}}) -> {LK, SeqN}.
+strip_to_keyseqonly({LK, {SeqN, _, _, _}}) -> {LK, SeqN}.
+
+strip_to_seqnhashonly({_, {SeqN, _, MH, _}}) -> {SeqN, MH}.
+
+striphead_to_details({SeqN, St, MH, MD}) -> {SeqN, St, MH, MD}.
 
-striphead_to_details({SeqN, St, MD}) -> {SeqN, St, MD}.
 
 key_dominates(LeftKey, RightKey) ->
     case {LeftKey, RightKey} of
@@ -103,10 +134,10 @@ key_dominates(LeftKey, RightKey) ->
             left_hand_first;
         {{LK, _LVAL}, {RK, _RVAL}} when RK < LK ->
             right_hand_first;
-        {{LK, {LSN, _LST, _LMD}}, {RK, {RSN, _RST, _RMD}}}
+        {{LK, {LSN, _LST, _LMH, _LMD}}, {RK, {RSN, _RST, _RMH, _RMD}}}
                                                 when LK == RK, LSN >= RSN ->
             left_hand_dominant;
-        {{LK, {LSN, _LST, _LMD}}, {RK, {RSN, _RST, _RMD}}}
+        {{LK, {LSN, _LST, _LMH, _LMD}}, {RK, {RSN, _RST, _RMH, _RMD}}}
                                                 when LK == RK, LSN < RSN ->
             right_hand_dominant
     end.
@@ -218,8 +249,6 @@ create_value_for_journal(Value) ->
             Value
     end.
 
-
-
 hash(Obj) ->
     erlang:phash2(term_to_binary(Obj)).
 
@@ -273,7 +302,7 @@ convert_indexspecs(IndexSpecs, Bucket, Key, SQN, TTL) ->
                                 end,
                         {to_ledgerkey(Bucket, Key, ?IDX_TAG,
                                 IdxField, IdxValue),
-                            {SQN, Status, null}}
+                            {SQN, Status, no_lookup, null}}
                     end,
                 IndexSpecs).
 
@@ -285,9 +314,11 @@ generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) ->
                     _ ->
                         {active, TS}
                 end,
-    {Bucket,
-        Key,
-        {PrimaryKey, {SQN, Status, extract_metadata(Obj, Size, Tag)}}}.
+    Value = {SQN,
+                Status,
+                magic_hash(PrimaryKey),
+                extract_metadata(Obj, Size, Tag)},
+    {Bucket, Key, {PrimaryKey, Value}}.
 
 
 integer_now() ->
@@ -304,7 +335,7 @@ extract_metadata(Obj, Size, ?STD_TAG) ->
 
 get_size(PK, Value) ->
     {Tag, _Bucket, _Key, _} = PK,
-    {_, _, MD} = Value,
+    {_, _, _, MD} = Value,
     case Tag of
         ?RIAK_TAG ->
             {_RMD, _VC, _Hash, Size} = MD,
@@ -316,7 +347,7 @@ get_size(PK, Value) ->
     
 get_keyandhash(LK, Value) ->
     {Tag, Bucket, Key, _} = LK,
-    {_, _, MD} = Value,
+    {_, _, _, MD} = Value,
     case Tag of
         ?RIAK_TAG ->
             {_RMD, _VC, Hash, _Size} = MD,
@@ -375,11 +406,14 @@ indexspecs_test() ->
                     {remove, "t1_bin", "abdc456"}],
     Changes = convert_indexspecs(IndexSpecs, "Bucket", "Key2", 1, infinity),
     ?assertMatch({{i, "Bucket", {"t1_int", 456}, "Key2"},
-                    {1, {active, infinity}, null}}, lists:nth(1, Changes)),
+                        {1, {active, infinity}, no_lookup, null}},
+                    lists:nth(1, Changes)),
     ?assertMatch({{i, "Bucket", {"t1_bin", "adbc123"}, "Key2"},
-                    {1, {active, infinity}, null}}, lists:nth(2, Changes)),
+                        {1, {active, infinity}, no_lookup, null}},
+                    lists:nth(2, Changes)),
     ?assertMatch({{i, "Bucket", {"t1_bin", "abdc456"}, "Key2"},
-                    {1, tomb, null}}, lists:nth(3, Changes)).
+                        {1, tomb, no_lookup, null}},
+                    lists:nth(3, Changes)).
 
 endkey_passed_test() ->
     TestKey = {i, null, null, null},
diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl
index 2bfcd9c..9a37cae 100644
--- a/src/leveled_inker.erl
+++ b/src/leveled_inker.erl
@@ -633,13 +633,13 @@ load_from_sequence(MinSQN, FilterFun, Penciller, [{_LowSQN, FN, Pid}|Rest]) ->
 load_between_sequence(MinSQN, MaxSQN, FilterFun, Penciller,
                                 CDBpid, StartPos, FN, Rest) ->
     leveled_log:log("I0014", [FN, MinSQN]),
-    InitAcc = {MinSQN, MaxSQN, leveled_skiplist:empty(true)},
+    InitAcc = {MinSQN, MaxSQN, leveled_bookie:empty_ledgercache()},
     Res = case leveled_cdb:cdb_scan(CDBpid, FilterFun, InitAcc, StartPos) of
-                {eof, {AccMinSQN, _AccMaxSQN, AccKL}} ->
-                    ok = push_to_penciller(Penciller, AccKL),
+                {eof, {AccMinSQN, _AccMaxSQN, AccLC}} ->
+                    ok = push_to_penciller(Penciller, AccLC),
                     {ok, AccMinSQN};
-                {LastPosition, {_AccMinSQN, _AccMaxSQN, AccKL}} ->
-                    ok = push_to_penciller(Penciller, AccKL),
+                {LastPosition, {_AccMinSQN, _AccMaxSQN, AccLC}} ->
+                    ok = push_to_penciller(Penciller, AccLC),
                     NextSQN = MaxSQN + 1,
                     load_between_sequence(NextSQN,
                                             NextSQN + ?LOADING_BATCH,
@@ -657,14 +657,13 @@ load_between_sequence(MinSQN, MaxSQN, FilterFun, Penciller,
             ok
     end.
 
-push_to_penciller(Penciller, KeyTree) ->
+push_to_penciller(Penciller, LedgerCache) ->
     % The push to penciller must start as a tree to correctly de-duplicate
     % the list by order before becoming a de-duplicated list for loading
-    R = leveled_penciller:pcl_pushmem(Penciller, KeyTree),
-    case R of
+    case leveled_bookie:push_ledgercache(Penciller, LedgerCache) of
         returned ->
             timer:sleep(?LOADING_PAUSE),
-            push_to_penciller(Penciller, KeyTree);
+            push_to_penciller(Penciller, LedgerCache);
         ok ->
             ok
     end.
@@ -739,7 +738,7 @@ initiate_penciller_snapshot(Bookie) ->
     {ok,
         {LedgerSnap, LedgerCache},
         _} = leveled_bookie:book_snapshotledger(Bookie, self(), undefined),
-    ok = leveled_penciller:pcl_loadsnapshot(LedgerSnap, LedgerCache),
+    leveled_bookie:load_snapshot(LedgerSnap, LedgerCache),
     MaxSQN = leveled_penciller:pcl_getstartupsequencenumber(LedgerSnap),
     {LedgerSnap, MaxSQN}.
 
diff --git a/src/leveled_log.erl b/src/leveled_log.erl
index 6c7e4cb..fa26555 100644
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@@ -165,7 +165,7 @@
     {"I0013",
         {info, "File ~s to be removed from manifest"}},
     {"I0014",
-        {info, "On startup oading from filename ~s from SQN ~w"}},
+        {info, "On startup loading from filename ~s from SQN ~w"}},
     {"I0015",
         {info, "Opening manifest file at ~s with SQN ~w"}},
     {"I0016",
diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl
index bbd2dae..272071d 100644
--- a/src/leveled_pclerk.erl
+++ b/src/leveled_pclerk.erl
@@ -363,11 +363,11 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
     BNumber = string:right(integer_to_list(BucketLow + random:uniform(BRange)),
                                             4, $0),
     KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
-    RandKey = {{o,
-                "Bucket" ++ BNumber,
-                "Key" ++ KNumber},
-                {Count + 1,
-                {active, infinity}, null}},
+    K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber},
+    RandKey = {K, {Count + 1,
+                    {active, infinity},
+                    leveled_codec:magic_hash(K),
+                    null}},
     generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
 
 choose_pid_toquery([ManEntry|_T], Key) when
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index a111054..a1ab9b1 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -168,9 +168,11 @@
         pcl_pushmem/2,
         pcl_fetchlevelzero/2,
         pcl_fetch/2,
+        pcl_fetch/3,
         pcl_fetchkeys/5,
         pcl_fetchnextkey/5,
         pcl_checksequencenumber/3,
+        pcl_checksequencenumber/4,
         pcl_workforclerk/1,
         pcl_promptmanifestchange/2,
         pcl_confirml0complete/4,
@@ -213,8 +215,6 @@
                 levelzero_pending = false :: boolean(),
                 levelzero_constructor :: pid(),
                 levelzero_cache = [] :: list(), % a list of skiplists
-                levelzero_index, 
-                % is an array - but cannot specif due to OTP compatability 
                 levelzero_size = 0 :: integer(),
                 levelzero_maxcachesize :: integer(),
                 levelzero_cointoss = false :: boolean(),
@@ -236,9 +236,9 @@
 pcl_start(PCLopts) ->
     gen_server:start(?MODULE, [PCLopts], []).
 
-pcl_pushmem(Pid, DumpList) ->
+pcl_pushmem(Pid, LedgerCache) ->
     %% Bookie to dump memory onto penciller
-    gen_server:call(Pid, {push_mem, DumpList}, infinity).
+    gen_server:call(Pid, {push_mem, LedgerCache}, infinity).
 
 pcl_fetchlevelzero(Pid, Slot) ->
     %% Timeout to cause crash of L0 file when it can't get the close signal
@@ -249,7 +249,14 @@ pcl_fetchlevelzero(Pid, Slot) ->
     gen_server:call(Pid, {fetch_levelzero, Slot}, 60000).
     
 pcl_fetch(Pid, Key) ->
-    gen_server:call(Pid, {fetch, Key}, infinity).
+    Hash = leveled_codec:magic_hash(Key),
+    if
+        Hash /= no_lookup ->
+            gen_server:call(Pid, {fetch, Key, Hash}, infinity)
+    end.
+
+pcl_fetch(Pid, Key, Hash) ->
+    gen_server:call(Pid, {fetch, Key, Hash}, infinity).
 
 pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc) ->
     gen_server:call(Pid,
@@ -262,7 +269,14 @@ pcl_fetchnextkey(Pid, StartKey, EndKey, AccFun, InitAcc) ->
                     infinity).
 
 pcl_checksequencenumber(Pid, Key, SQN) ->
-    gen_server:call(Pid, {check_sqn, Key, SQN}, infinity).
+    Hash = leveled_codec:magic_hash(Key),
+    if
+        Hash /= no_lookup ->
+            gen_server:call(Pid, {check_sqn, Key, Hash, SQN}, infinity)
+    end.
+
+pcl_checksequencenumber(Pid, Key, Hash, SQN) ->
+    gen_server:call(Pid, {check_sqn, Key, Hash, SQN}, infinity).
 
 pcl_workforclerk(Pid) ->
     gen_server:call(Pid, work_for_clerk, infinity).
@@ -313,8 +327,9 @@ init([PCLopts]) ->
     end.    
     
 
-handle_call({push_mem, PushedTree}, From, State=#state{is_snapshot=Snap})
-                                                        when Snap == false ->
+handle_call({push_mem, {PushedTree, MinSQN, MaxSQN}},
+                From,
+                State=#state{is_snapshot=Snap}) when Snap == false ->
     % The push_mem process is as follows:
     %
     % 1 - Receive a gb_tree containing the latest Key/Value pairs (note that
@@ -342,25 +357,24 @@ handle_call({push_mem, PushedTree}, From, State=#state{is_snapshot=Snap})
         false ->
             leveled_log:log("P0018", [ok, false, false]),
             gen_server:reply(From, ok),
-            {noreply, update_levelzero(State#state.levelzero_index,
-                                        State#state.levelzero_size,
-                                        PushedTree,
+            {noreply, update_levelzero(State#state.levelzero_size,
+                                        {PushedTree, MinSQN, MaxSQN},
                                         State#state.ledger_sqn,
                                         State#state.levelzero_cache,
                                         State)}
     end;
-handle_call({fetch, Key}, _From, State) ->
+handle_call({fetch, Key, Hash}, _From, State) ->
     {reply,
         fetch_mem(Key,
+                    Hash,
                     State#state.manifest,
-                    State#state.levelzero_index,
                     State#state.levelzero_cache),
         State};
-handle_call({check_sqn, Key, SQN}, _From, State) ->
+handle_call({check_sqn, Key, Hash, SQN}, _From, State) ->
     {reply,
         compare_to_sqn(fetch_mem(Key,
+                                    Hash,
                                     State#state.manifest,
-                                    State#state.levelzero_index,
                                     State#state.levelzero_cache),
                         SQN),
         State};
@@ -394,15 +408,13 @@ handle_call(get_startup_sqn, _From, State) ->
 handle_call({register_snapshot, Snapshot}, _From, State) ->
     Rs = [{Snapshot, State#state.manifest_sqn}|State#state.registered_snapshots],
     {reply, {ok, State}, State#state{registered_snapshots = Rs}};
-handle_call({load_snapshot, BookieIncrTree}, _From, State) ->
-    L0D = leveled_pmem:add_to_index(snap,
-                                        State#state.levelzero_size,
-                                        BookieIncrTree,
+handle_call({load_snapshot, {BookieIncrTree, MinSQN, MaxSQN}}, _From, State) ->
+    L0D = leveled_pmem:add_to_cache(State#state.levelzero_size,
+                                        {BookieIncrTree, MinSQN, MaxSQN},
                                         State#state.ledger_sqn,
                                         State#state.levelzero_cache),
-    {LedgerSQN, L0Size, L0Index, L0Cache} = L0D,
+    {LedgerSQN, L0Size, L0Cache} = L0D,
     {reply, ok, State#state{levelzero_cache=L0Cache,
-                                levelzero_index=L0Index,
                                 levelzero_size=L0Size,
                                 ledger_sqn=LedgerSQN,
                                 snapshot_fully_loaded=true}};
@@ -453,7 +465,6 @@ handle_cast({levelzero_complete, FN, StartKey, EndKey}, State) ->
     {noreply, State#state{levelzero_cache=[],
                             levelzero_pending=false,
                             levelzero_constructor=undefined,
-                            levelzero_index=leveled_pmem:new_index(),
                             levelzero_size=0,
                             manifest=UpdMan,
                             persisted_sqn=State#state.ledger_sqn}}.
@@ -546,7 +557,6 @@ start_from_file(PCLopts) ->
     
     InitState = #state{clerk=MergeClerk,
                         root_path=RootPath,
-                        levelzero_index = leveled_pmem:new_index(),
                         levelzero_maxcachesize=MaxTableSize,
                         levelzero_cointoss=CoinToss},
     
@@ -622,19 +632,18 @@ start_from_file(PCLopts) ->
 
 
 
-update_levelzero(L0Index, L0Size, PushedTree, LedgerSQN, L0Cache, State) ->
-    Update = leveled_pmem:add_to_index(L0Index,
-                                        L0Size,
-                                        PushedTree,
+update_levelzero(L0Size, {PushedTree, MinSQN, MaxSQN},
+                                                LedgerSQN, L0Cache, State) ->
+    Update = leveled_pmem:add_to_cache(L0Size,
+                                        {PushedTree, MinSQN, MaxSQN},
                                         LedgerSQN,
                                         L0Cache),
-    {MaxSQN, NewL0Size, UpdL0Index, UpdL0Cache} = Update,
+    {UpdMaxSQN, NewL0Size, UpdL0Cache} = Update,
     if
-        MaxSQN >= LedgerSQN ->
+        UpdMaxSQN >= LedgerSQN ->
             UpdState = State#state{levelzero_cache=UpdL0Cache,
-                                    levelzero_index=UpdL0Index,
                                     levelzero_size=NewL0Size,
-                                    ledger_sqn=MaxSQN},
+                                    ledger_sqn=UpdMaxSQN},
             CacheTooBig = NewL0Size > State#state.levelzero_maxcachesize,
             Level0Free = length(get_item(0, State#state.manifest, [])) == 0,
             RandomFactor =
@@ -659,7 +668,6 @@ update_levelzero(L0Index, L0Size, PushedTree, LedgerSQN, L0Cache, State) ->
             end;
         NewL0Size == L0Size ->
             State#state{levelzero_cache=L0Cache,
-                        levelzero_index=L0Index,
                         levelzero_size=L0Size,
                         ledger_sqn=LedgerSQN}
     end.
@@ -707,8 +715,8 @@ levelzero_filename(State) ->
     FileName.
 
 
-fetch_mem(Key, Manifest, L0Index, L0Cache) ->
-    L0Check = leveled_pmem:check_levelzero(Key, L0Index, L0Cache),
+fetch_mem(Key, Hash, Manifest, L0Cache) ->
+    L0Check = leveled_pmem:check_levelzero(Key, Hash, L0Cache),
     case L0Check of
         {false, not_found} ->
             fetch(Key, Manifest, 0, fun leveled_sft:sft_get/2);
@@ -1284,8 +1292,12 @@ confirm_delete_test() ->
 
 maybe_pause_push(PCL, KL) ->
     T0 = leveled_skiplist:empty(true),
-    T1 = lists:foldl(fun({K, V}, Acc) -> leveled_skiplist:enter(K, V, Acc) end,
-                        T0,
+    T1 = lists:foldl(fun({K, V}, {AccSL, MinSQN, MaxSQN}) ->
+                            SL = leveled_skiplist:enter(K, V, AccSL),
+                            SQN = leveled_codec:strip_to_seqonly({K, V}),
+                            {SL, min(SQN, MinSQN), max(SQN, MaxSQN)}
+                            end,
+                        {T0, infinity, 0},
                         KL),
     case pcl_pushmem(PCL, T1) of
         returned ->
@@ -1295,23 +1307,32 @@ maybe_pause_push(PCL, KL) ->
             ok
     end.
 
+%% old test data doesn't have the magic hash
+add_missing_hash({K, {SQN, ST, MD}}) ->
+    {K, {SQN, ST, leveled_codec:magic_hash(K), MD}}.
+
+
 simple_server_test() ->
     RootPath = "../test/ledger",
     clean_testdir(RootPath),
     {ok, PCL} = pcl_start(#penciller_options{root_path=RootPath,
                                                 max_inmemory_tablesize=1000}),
-    Key1 = {{o,"Bucket0001", "Key0001", null},
-                {1, {active, infinity}, null}},
+    Key1_Pre = {{o,"Bucket0001", "Key0001", null},
+                    {1, {active, infinity}, null}},
+    Key1 = add_missing_hash(Key1_Pre),
     KL1 = leveled_sft:generate_randomkeys({1000, 2}),
-    Key2 = {{o,"Bucket0002", "Key0002", null},
+    Key2_Pre = {{o,"Bucket0002", "Key0002", null},
                 {1002, {active, infinity}, null}},
+    Key2 = add_missing_hash(Key2_Pre),
     KL2 = leveled_sft:generate_randomkeys({900, 1003}),
     % Keep below the max table size by having 900 not 1000
-    Key3 = {{o,"Bucket0003", "Key0003", null},
+    Key3_Pre = {{o,"Bucket0003", "Key0003", null},
                 {2003, {active, infinity}, null}},
+    Key3 = add_missing_hash(Key3_Pre),
     KL3 = leveled_sft:generate_randomkeys({1000, 2004}), 
-    Key4 = {{o,"Bucket0004", "Key0004", null},
+    Key4_Pre = {{o,"Bucket0004", "Key0004", null},
                 {3004, {active, infinity}, null}},
+    Key4 = add_missing_hash(Key4_Pre),
     KL4 = leveled_sft:generate_randomkeys({1000, 3005}),
     ok = maybe_pause_push(PCL, [Key1]),
     ?assertMatch(Key1, pcl_fetch(PCL, {o,"Bucket0001", "Key0001", null})),
@@ -1351,7 +1372,8 @@ simple_server_test() ->
     SnapOpts = #penciller_options{start_snapshot = true,
                                     source_penciller = PCLr},
     {ok, PclSnap} = pcl_start(SnapOpts),
-    ok = pcl_loadsnapshot(PclSnap, leveled_skiplist:empty()),
+    leveled_bookie:load_snapshot(PclSnap,
+                                    leveled_bookie:empty_ledgercache()),
     ?assertMatch(Key1, pcl_fetch(PclSnap, {o,"Bucket0001", "Key0001", null})),
     ?assertMatch(Key2, pcl_fetch(PclSnap, {o,"Bucket0002", "Key0002", null})),
     ?assertMatch(Key3, pcl_fetch(PclSnap, {o,"Bucket0003", "Key0003", null})),
@@ -1383,7 +1405,9 @@ simple_server_test() ->
     % Add some more keys and confirm that check sequence number still
     % sees the old version in the previous snapshot, but will see the new version
     % in a new snapshot
-    Key1A = {{o,"Bucket0001", "Key0001", null}, {4005, {active, infinity}, null}},
+    Key1A_Pre = {{o,"Bucket0001", "Key0001", null},
+                    {4005, {active, infinity}, null}},
+    Key1A = add_missing_hash(Key1A_Pre),
     KL1A = leveled_sft:generate_randomkeys({2000, 4006}),
     ok = maybe_pause_push(PCLr, [Key1A]),
     ok = maybe_pause_push(PCLr, KL1A),
@@ -1400,7 +1424,7 @@ simple_server_test() ->
                             term_to_binary("Hello")),
     
     {ok, PclSnap2} = pcl_start(SnapOpts),
-    ok = pcl_loadsnapshot(PclSnap2, leveled_skiplist:empty()),
+    leveled_bookie:load_snapshot(PclSnap2, leveled_bookie:empty_ledgercache()),
     ?assertMatch(false, pcl_checksequencenumber(PclSnap2,
                                                 {o,
                                                     "Bucket0001",
@@ -1506,23 +1530,26 @@ simple_findnextkey_test() ->
 
 sqnoverlap_findnextkey_test() ->
     QueryArray = [
-    {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}},
-            {{o, "Bucket1", "Key5"}, {4, {active, infinity}, null}}]},
-    {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}]},
-    {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, null}}]}
+    {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
+            {{o, "Bucket1", "Key5"}, {4, {active, infinity}, 0, null}}]},
+    {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]},
+    {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]}
     ],
     {Array2, KV1} = find_nextkey(QueryArray,
                                     {o, "Bucket1", "Key0"},
                                     {o, "Bucket1", "Key5"}),
-    ?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}}, KV1),
+    ?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
+                    KV1),
     {Array3, KV2} = find_nextkey(Array2,
                                     {o, "Bucket1", "Key0"},
                                     {o, "Bucket1", "Key5"}),
-    ?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}, KV2),
+    ?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}},
+                    KV2),
     {Array4, KV3} = find_nextkey(Array3,
                                     {o, "Bucket1", "Key0"},
                                     {o, "Bucket1", "Key5"}),
-    ?assertMatch({{o, "Bucket1", "Key5"}, {4, {active, infinity}, null}}, KV3),
+    ?assertMatch({{o, "Bucket1", "Key5"}, {4, {active, infinity}, 0, null}},
+                    KV3),
     ER = find_nextkey(Array4,
                         {o, "Bucket1", "Key0"},
                         {o, "Bucket1", "Key5"}),
@@ -1530,23 +1557,26 @@ sqnoverlap_findnextkey_test() ->
 
 sqnoverlap_otherway_findnextkey_test() ->
     QueryArray = [
-    {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}},
-            {{o, "Bucket1", "Key5"}, {1, {active, infinity}, null}}]},
-    {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}]},
-    {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, null}}]}
+    {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
+            {{o, "Bucket1", "Key5"}, {1, {active, infinity}, 0, null}}]},
+    {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]},
+    {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]}
     ],
     {Array2, KV1} = find_nextkey(QueryArray,
                                     {o, "Bucket1", "Key0"},
                                     {o, "Bucket1", "Key5"}),
-    ?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}}, KV1),
+    ?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
+                    KV1),
     {Array3, KV2} = find_nextkey(Array2,
                                     {o, "Bucket1", "Key0"},
                                     {o, "Bucket1", "Key5"}),
-    ?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}, KV2),
+    ?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}},
+                    KV2),
     {Array4, KV3} = find_nextkey(Array3,
                                     {o, "Bucket1", "Key0"},
                                     {o, "Bucket1", "Key5"}),
-    ?assertMatch({{o, "Bucket1", "Key5"}, {2, {active, infinity}, null}}, KV3),
+    ?assertMatch({{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}},
+                    KV3),
     ER = find_nextkey(Array4,
                         {o, "Bucket1", "Key0"},
                         {o, "Bucket1", "Key5"}),
@@ -1554,19 +1584,19 @@ sqnoverlap_otherway_findnextkey_test() ->
 
 foldwithimm_simple_test() ->
     QueryArray = [
-        {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}},
-                {{o, "Bucket1", "Key5"}, {1, {active, infinity}, null}}]},
-        {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}]},
-        {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, null}}]}
+        {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
+                {{o, "Bucket1", "Key5"}, {1, {active, infinity}, 0, null}}]},
+        {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]},
+        {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]}
     ],
     IMM0 = leveled_skiplist:enter({o, "Bucket1", "Key6"},
-                                        {7, {active, infinity}, null},
+                                        {7, {active, infinity}, 0, null},
                                     leveled_skiplist:empty()),
     IMM1 = leveled_skiplist:enter({o, "Bucket1", "Key1"},
-                                        {8, {active, infinity}, null},
+                                        {8, {active, infinity}, 0, null},
                                     IMM0),
     IMM2 = leveled_skiplist:enter({o, "Bucket1", "Key8"},
-                                        {9, {active, infinity}, null},
+                                        {9, {active, infinity}, 0, null},
                                     IMM1),
     IMMiter = leveled_skiplist:to_range(IMM2, {o, "Bucket1", "Key1"}),
     AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}),
@@ -1581,7 +1611,7 @@ foldwithimm_simple_test() ->
                     {{o, "Bucket1", "Key6"}, 7}], Acc),
     
     IMM1A = leveled_skiplist:enter({o, "Bucket1", "Key1"},
-                                        {8, {active, infinity}, null},
+                                        {8, {active, infinity}, 0, null},
                                     leveled_skiplist:empty()),
     IMMiterA = leveled_skiplist:to_range(IMM1A, {o, "Bucket1", "Key1"}),
     AccA = keyfolder(IMMiterA,
@@ -1593,7 +1623,7 @@ foldwithimm_simple_test() ->
                     {{o, "Bucket1", "Key5"}, 2}], AccA),
     
     IMM3 = leveled_skiplist:enter({o, "Bucket1", "Key4"},
-                                     {10, {active, infinity}, null},
+                                     {10, {active, infinity}, 0, null},
                                     IMM2),
     IMMiterB = leveled_skiplist:to_range(IMM3, {o, "Bucket1", "Key1"}),
     AccB = keyfolder(IMMiterB,
@@ -1688,14 +1718,15 @@ badmanifest_test() ->
     clean_testdir(RootPath),
     {ok, PCL} = pcl_start(#penciller_options{root_path=RootPath,
                                                 max_inmemory_tablesize=1000}),
-    Key1 = {{o,"Bucket0001", "Key0001", null},
+    Key1_pre = {{o,"Bucket0001", "Key0001", null},
                 {1001, {active, infinity}, null}},
+    Key1 = add_missing_hash(Key1_pre),
     KL1 = leveled_sft:generate_randomkeys({1000, 1}),
     
     ok = maybe_pause_push(PCL, KL1 ++ [Key1]),
     %% Added together, as split apart there will be a race between the close
     %% call to the penciller and the second fetch of the cache entry
-    ?assertMatch(Key1, pcl_fetch(PCL, {o,"Bucket0001", "Key0001", null})),
+    ?assertMatch(Key1, pcl_fetch(PCL, {o, "Bucket0001", "Key0001", null})),
     
     timer:sleep(100), % Avoids confusion if L0 file not written before close
     ok = pcl_close(PCL),
diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl
index 61ecd4e..5ba62aa 100644
--- a/src/leveled_pmem.erl
+++ b/src/leveled_pmem.erl
@@ -42,9 +42,8 @@
 -include("include/leveled.hrl").
 
 -export([
-        add_to_index/5,
+        add_to_cache/4,
         to_list/2,
-        new_index/0,
         check_levelzero/3,
         merge_trees/4
         ]).      
@@ -56,53 +55,20 @@
 %%% API
 %%%============================================================================
 
-add_to_index(snap, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
-    FoldFun = fun({K, V}, {AccMinSQN, AccMaxSQN, AccCount}) ->
-                    SQN = leveled_codec:strip_to_seqonly({K, V}),
-                    {min(SQN, AccMinSQN),
-                        max(SQN, AccMaxSQN),
-                        AccCount + 1}
-                    end,
-    LM1List = leveled_skiplist:to_list(LevelMinus1),
-    StartingT = {infinity, 0, L0Size},
-    {MinSQN, MaxSQN, NewL0Size} = lists:foldl(FoldFun, StartingT, LM1List),
-    if
-        MinSQN > LedgerSQN ->
-            {MaxSQN,
-                NewL0Size,
-                snap,
-                lists:append(TreeList, [LevelMinus1])}
-    end;
-add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
-    SW = os:timestamp(),
-    SlotInTreeList = length(TreeList) + 1,
-    FoldFun = fun({K, V}, {AccMinSQN, AccMaxSQN, AccCount}) ->
-                    SQN = leveled_codec:strip_to_seqonly({K, V}),
-                    Hash = erlang:phash2(K),
-                    Count0 = case ets:lookup(L0Index, Hash) of
-                                    [] ->
-                                        ets:insert(L0Index, {Hash, [SlotInTreeList]}),
-                                        AccCount + 1;
-                                    [{Hash, L}] ->
-                                        ets:insert(L0Index, {Hash, [SlotInTreeList|L]}),
-                                        AccCount
-                                end,
-                    {min(SQN, AccMinSQN),
-                        max(SQN, AccMaxSQN),
-                        Count0}
-                    end,
-    LM1List = leveled_skiplist:to_list(LevelMinus1),
-    StartingT = {infinity, 0, L0Size},
-    {MinSQN, MaxSQN, NewL0Size} = lists:foldl(FoldFun, StartingT, LM1List),
-    leveled_log:log_timer("PM001", [NewL0Size], SW),
-    if
-        MinSQN > LedgerSQN ->
-            {MaxSQN,
-                NewL0Size,
-                L0Index,
-                lists:append(TreeList, [LevelMinus1])}
+add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) ->
+    LM1Size = leveled_skiplist:size(LevelMinus1),
+    case LM1Size of
+        0 ->
+            {LedgerSQN, L0Size, TreeList};
+        _ ->
+            if
+                MinSQN >= LedgerSQN ->
+                    {MaxSQN,
+                        L0Size + LM1Size,
+                        lists:append(TreeList, [LevelMinus1])}
+            end
     end.
-    
+
 
 to_list(Slots, FetchFun) ->
     SW = os:timestamp(),
@@ -118,21 +84,13 @@ to_list(Slots, FetchFun) ->
     FullList.
 
 
-new_index() ->
-    ets:new(index, [set, private]).
+check_levelzero(Key, TreeList) ->
+    check_levelzero(Key, leveled_codec:magic_hash(Key), TreeList).
 
-check_levelzero(_Key, _L0Index, []) ->
+check_levelzero(_Key, _Hash, []) ->
     {false, not_found};
-check_levelzero(Key, snap, TreeList) ->
-    check_slotlist(Key, lists:seq(1, length(TreeList)), TreeList);
-check_levelzero(Key, L0Index, TreeList) ->
-    Hash = erlang:phash2(Key),
-    case ets:lookup(L0Index, Hash) of
-        [] ->
-            {false, not_found};
-        [{Hash, SlotList}] ->
-            check_slotlist(Key, SlotList, TreeList)
-    end.
+check_levelzero(Key, Hash, TreeList) ->
+    check_slotlist(Key, Hash, lists:seq(1, length(TreeList)), TreeList).
 
 
 merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) ->
@@ -148,7 +106,7 @@ merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) ->
 %%% Internal Functions
 %%%============================================================================
 
-check_slotlist(Key, CheckList, TreeList) ->
+check_slotlist(Key, Hash, CheckList, TreeList) ->
     SlotCheckFun =
                 fun(SlotToCheck, {Found, KV}) ->
                     case Found of
@@ -156,7 +114,7 @@ check_slotlist(Key, CheckList, TreeList) ->
                             {Found, KV};
                         false ->
                             CheckTree = lists:nth(SlotToCheck, TreeList),
-                            case leveled_skiplist:lookup(Key, CheckTree) of
+                            case leveled_skiplist:lookup(Key, Hash, CheckTree) of
                                 none ->
                                     {Found, KV};
                                 {value, Value} ->
@@ -166,7 +124,7 @@ check_slotlist(Key, CheckList, TreeList) ->
                     end,
     lists:foldl(SlotCheckFun,
                     {false, not_found},
-                    lists:reverse(lists:usort(CheckList))).
+                    lists:reverse(CheckList)).
 
 %%%============================================================================
 %%% Test
@@ -177,7 +135,7 @@ check_slotlist(Key, CheckList, TreeList) ->
 generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
     generate_randomkeys(Seqn,
                         Count,
-                        leveled_skiplist:empty(),
+                        leveled_skiplist:empty(true),
                         BucketRangeLow,
                         BucketRangeHigh).
 
@@ -197,58 +155,59 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
 
 
 compare_method_test() ->
-    R = lists:foldl(fun(_X, {LedgerSQN, L0Size, L0Index, L0TreeList}) ->
+    R = lists:foldl(fun(_X, {LedgerSQN, L0Size, L0TreeList}) ->
                             LM1 = generate_randomkeys(LedgerSQN + 1,
                                                         2000, 1, 500),
-                            add_to_index(L0Index, L0Size, LM1, LedgerSQN,
-                                                            L0TreeList)
+                            add_to_cache(L0Size,
+                                            {LM1,
+                                                LedgerSQN + 1,
+                                                LedgerSQN + 2000},
+                                            LedgerSQN,
+                                            L0TreeList)
                             end,
-                        {0, 0, new_index(), []},
+                        {0, 0, []},
                         lists:seq(1, 16)),
     
-    {SQN, Size, Index, TreeList} = R,
+    {SQN, Size, TreeList} = R,
     ?assertMatch(32000, SQN),
     ?assertMatch(true, Size =< 32000),
     
     TestList = leveled_skiplist:to_list(generate_randomkeys(1, 2000, 1, 800)),
 
-    S0 = lists:foldl(fun({Key, _V}, Acc) ->
-    R0 = lists:foldr(fun(Tree, {Found, KV}) ->
-                            case Found of
-                                true ->
-                                    {true, KV};
-                                false ->
-                                    L0 = leveled_skiplist:lookup(Key, Tree),
-                                    case L0 of
-                                        none ->
-                                            {false, not_found};
-                                        {value, Value} ->
-                                            {true, {Key, Value}}
-                                    end
+    FindKeyFun =
+        fun(Key) ->
+                fun(Tree, {Found, KV}) ->
+                    case Found of
+                        true ->
+                            {true, KV};
+                        false ->
+                            L0 = leveled_skiplist:lookup(Key, Tree),
+                            case L0 of
+                                none ->
+                                    {false, not_found};
+                                {value, Value} ->
+                                    {true, {Key, Value}}
                             end
-                            end,
-                        {false, not_found},
-                        TreeList),
-    [R0|Acc]
-    end,
-    [],
-    TestList),
+                    end
+                end
+            end,
     
-    S1 = lists:foldl(fun({Key, _V}, Acc) ->
-                            R0 = check_levelzero(Key, Index, TreeList),
-                            [R0|Acc]
-                            end,
+    S0 = lists:foldl(fun({Key, _V}, Acc) ->
+                            R0 = lists:foldr(FindKeyFun(Key),
+                                                {false, not_found},
+                                                TreeList),
+                            [R0|Acc] end,
                         [],
                         TestList),
-    S2 = lists:foldl(fun({Key, _V}, Acc) ->
-                            R0 = check_levelzero(Key, snap, TreeList),
+    
+    S1 = lists:foldl(fun({Key, _V}, Acc) ->
+                            R0 = check_levelzero(Key, TreeList),
                             [R0|Acc]
                             end,
                         [],
                         TestList),
     
     ?assertMatch(S0, S1),
-    ?assertMatch(S0, S2),
     
     StartKey = {o, "Bucket0100", null, null},
     EndKey = {o, "Bucket0200", null, null},
diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index 9c67721..4c86dff 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -1400,12 +1400,15 @@ generate_randomkeys(Count) ->
 generate_randomkeys(0, _SQN, Acc) ->
     lists:reverse(Acc);
 generate_randomkeys(Count, SQN, Acc) ->
-    RandKey = {{o,
-                    lists:concat(["Bucket", random:uniform(1024)]),
-                    lists:concat(["Key", random:uniform(1024)]),
-                    null},
+    K = {o,
+            lists:concat(["Bucket", random:uniform(1024)]),
+            lists:concat(["Key", random:uniform(1024)]),
+            null},
+    RandKey = {K,
                 {SQN,
-                {active, infinity}, null}},
+                {active, infinity},
+                leveled_codec:magic_hash(K),
+                null}},
     generate_randomkeys(Count - 1, SQN + 1, [RandKey|Acc]).
     
 generate_sequentialkeys(Count, Start) ->
@@ -1415,75 +1418,86 @@ generate_sequentialkeys(Target, Incr, Acc) when Incr =:= Target ->
     Acc;
 generate_sequentialkeys(Target, Incr, Acc) ->
     KeyStr = string:right(integer_to_list(Incr), 8, $0),
-    NextKey = {{o,
-                    "BucketSeq",
-                    lists:concat(["Key", KeyStr]),
-                    null},
+    K = {o, "BucketSeq", lists:concat(["Key", KeyStr]), null},
+    NextKey = {K,
                 {5,
-                {active, infinity}, null}},
+                {active, infinity},
+                leveled_codec:magic_hash(K),
+                null}},
     generate_sequentialkeys(Target, Incr + 1, [NextKey|Acc]).
 
 simple_create_block_test() ->
-    KeyList1 = [{{o, "Bucket1", "Key1", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key3", null}, {2, {active, infinity}, null}}],
-    KeyList2 = [{{o, "Bucket1", "Key2", null}, {3, {active, infinity}, null}}],
+    KeyList1 = [{{o, "Bucket1", "Key1", null},
+                        {1, {active, infinity}, no_lookup, null}},
+                    {{o, "Bucket1", "Key3", null},
+                        {2, {active, infinity}, no_lookup, null}}],
+    KeyList2 = [{{o, "Bucket1", "Key2", null},
+                        {3, {active, infinity}, no_lookup, null}}],
     {MergedKeyList, ListStatus, SN, _, _, _} = create_block(KeyList1,
                                                             KeyList2,
                                                             #level{level=1}),
     ?assertMatch(partial, ListStatus),
     [H1|T1] = MergedKeyList,
-    ?assertMatch(H1, {{o, "Bucket1", "Key1", null}, {1, {active, infinity}, null}}),
+    ?assertMatch({{o, "Bucket1", "Key1", null},
+                    {1, {active, infinity}, no_lookup, null}}, H1),
     [H2|T2] = T1,
-    ?assertMatch(H2, {{o, "Bucket1", "Key2", null}, {3, {active, infinity}, null}}),
-    ?assertMatch(T2, [{{o, "Bucket1", "Key3", null}, {2, {active, infinity}, null}}]),
+    ?assertMatch({{o, "Bucket1", "Key2", null},
+                    {3, {active, infinity}, no_lookup, null}}, H2),
+    ?assertMatch([{{o, "Bucket1", "Key3", null},
+                    {2, {active, infinity}, no_lookup, null}}], T2),
     ?assertMatch(SN, {1,3}).
 
 dominate_create_block_test() ->
-    KeyList1 = [{{o, "Bucket1", "Key1", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key2", null}, {2, {active, infinity}, null}}],
-    KeyList2 = [{{o, "Bucket1", "Key2", null}, {3, {tomb, infinity}, null}}],
+    KeyList1 = [{{o, "Bucket1", "Key1", null},
+                        {1, {active, infinity}, no_lookup, null}},
+                {{o, "Bucket1", "Key2", null},
+                        {2, {active, infinity}, no_lookup, null}}],
+    KeyList2 = [{{o, "Bucket1", "Key2", null},
+                        {3, {tomb, infinity}, no_lookup, null}}],
     {MergedKeyList, ListStatus, SN, _, _, _} = create_block(KeyList1,
                                                             KeyList2,
                                                             #level{level=1}),
     ?assertMatch(partial, ListStatus),
     [K1, K2] = MergedKeyList,
-    ?assertMatch(K1, {{o, "Bucket1", "Key1", null}, {1, {active, infinity}, null}}),
-    ?assertMatch(K2, {{o, "Bucket1", "Key2", null}, {3, {tomb, infinity}, null}}),
+    ?assertMatch(K1, lists:nth(1, KeyList1)),
+    ?assertMatch(K2, lists:nth(1, KeyList2)),
     ?assertMatch(SN, {1,3}).
 
 sample_keylist() ->
-    KeyList1 = [{{o, "Bucket1", "Key1", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key3", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key5", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key7", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key9", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key1", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key3", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key5", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key7", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key9", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket3", "Key1", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket3", "Key3", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket3", "Key5", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket3", "Key7", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket3", "Key9", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, null}}],
-    KeyList2 = [{{o, "Bucket1", "Key2", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key4", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key6", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key8", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key9a", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key9b", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key9c", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket1", "Key9d", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key2", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key4", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key6", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket2", "Key8", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket3", "Key2", null}, {1, {active, infinity}, null}},
-    {{o, "Bucket3", "Key4", null}, {3, {active, infinity}, null}},
-    {{o, "Bucket3", "Key6", null}, {2, {active, infinity}, null}},
-    {{o, "Bucket3", "Key8", null}, {1, {active, infinity}, null}}],
+    KeyList1 =
+        [{{o, "Bucket1", "Key1", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key3", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key5", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key7", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key9", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key1", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key3", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key5", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key7", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key9", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key1", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key3", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key5", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key7", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key9", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, 0, null}}],
+    KeyList2 =
+        [{{o, "Bucket1", "Key2", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key4", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key6", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key8", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key9a", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key9b", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key9c", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket1", "Key9d", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key2", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key4", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key6", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket2", "Key8", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key2", null}, {1, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key4", null}, {3, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key6", null}, {2, {active, infinity}, 0, null}},
+        {{o, "Bucket3", "Key8", null}, {1, {active, infinity}, 0, null}}],
     {KeyList1, KeyList2}.
 
 alternating_create_block_test() ->
@@ -1495,12 +1509,12 @@ alternating_create_block_test() ->
     ?assertMatch(BlockSize, 32),
     ?assertMatch(ListStatus, complete),
     K1 = lists:nth(1, MergedKeyList),
-    ?assertMatch(K1, {{o, "Bucket1", "Key1", null}, {1, {active, infinity}, null}}),
+    ?assertMatch(K1, {{o, "Bucket1", "Key1", null}, {1, {active, infinity}, 0, null}}),
     K11 = lists:nth(11, MergedKeyList),
-    ?assertMatch(K11, {{o, "Bucket1", "Key9b", null}, {1, {active, infinity}, null}}),
+    ?assertMatch(K11, {{o, "Bucket1", "Key9b", null}, {1, {active, infinity}, 0, null}}),
     K32 = lists:nth(32, MergedKeyList),
-    ?assertMatch(K32, {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, null}}),
-    HKey = {{o, "Bucket1", "Key0", null}, {1, {active, infinity}, null}},
+    ?assertMatch(K32, {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, 0, null}}),
+    HKey = {{o, "Bucket1", "Key0", null}, {1, {active, infinity}, 0, null}},
     {_, ListStatus2, _, _, _, _} = create_block([HKey|KeyList1],
                                                     KeyList2,
                                                     #level{level=1}),
@@ -1752,7 +1766,7 @@ initial_create_file_test() ->
     Result1 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key8", null}),
     io:format("Result is ~w~n", [Result1]),
     ?assertMatch(Result1, {{o, "Bucket1", "Key8", null},
-                            {1, {active, infinity}, null}}),
+                            {1, {active, infinity}, 0, null}}),
     Result2 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key88", null}),
     io:format("Result is ~w~n", [Result2]),
     ?assertMatch(Result2, not_present),
@@ -1768,17 +1782,17 @@ big_create_file_test() ->
                                                             InitFileMD,
                                                             KL1, KL2,
                                                             #level{level=1}),
-    [{K1, {Sq1, St1, V1}}|_] = KL1,
-    [{K2, {Sq2, St2, V2}}|_] = KL2,
+    [{K1, {Sq1, St1, MH1, V1}}|_] = KL1,
+    [{K2, {Sq2, St2, MH2, V2}}|_] = KL2,
     Result1 = fetch_keyvalue(Handle, FileMD, K1),
     Result2 = fetch_keyvalue(Handle, FileMD, K2),
-    ?assertMatch(Result1, {K1, {Sq1, St1, V1}}),
-    ?assertMatch(Result2, {K2, {Sq2, St2, V2}}),
+    ?assertMatch(Result1, {K1, {Sq1, St1, MH1, V1}}),
+    ?assertMatch(Result2, {K2, {Sq2, St2, MH2, V2}}),
     SubList = lists:sublist(KL2, 1000),
-    lists:foreach(fun(K) ->
-                        {Kn, {_, _, _}} = K,
+    lists:foreach(fun(KV) ->
+                        {Kn, _} = KV,
                         Rn = fetch_keyvalue(Handle, FileMD, Kn),
-                        ?assertMatch({Kn, {_, _, _}}, Rn)
+                        ?assertMatch({Kn, _}, Rn)
                     end,
                     SubList),
     Result3 = fetch_keyvalue(Handle,
@@ -1834,13 +1848,13 @@ initial_iterator_test() ->
     ok = file:delete(Filename).
 
 key_dominates_test() ->
-    KV1 = {{o, "Bucket", "Key1", null}, {5, {active, infinity}, []}},
-    KV2 = {{o, "Bucket", "Key3", null}, {6, {active, infinity}, []}},
-    KV3 = {{o, "Bucket", "Key2", null}, {3, {active, infinity}, []}},
-    KV4 = {{o, "Bucket", "Key4", null}, {7, {active, infinity}, []}},
-    KV5 = {{o, "Bucket", "Key1", null}, {4, {active, infinity}, []}},
-    KV6 = {{o, "Bucket", "Key1", null}, {99, {tomb, 999}, []}},
-    KV7 = {{o, "Bucket", "Key1", null}, {99, tomb, []}},
+    KV1 = {{o, "Bucket", "Key1", null}, {5, {active, infinity}, 0, []}},
+    KV2 = {{o, "Bucket", "Key3", null}, {6, {active, infinity}, 0, []}},
+    KV3 = {{o, "Bucket", "Key2", null}, {3, {active, infinity}, 0, []}},
+    KV4 = {{o, "Bucket", "Key4", null}, {7, {active, infinity}, 0, []}},
+    KV5 = {{o, "Bucket", "Key1", null}, {4, {active, infinity}, 0, []}},
+    KV6 = {{o, "Bucket", "Key1", null}, {99, {tomb, 999}, 0, []}},
+    KV7 = {{o, "Bucket", "Key1", null}, {99, tomb, 0, []}},
     KL1 = [KV1, KV2],
     KL2 = [KV3, KV4],
     ?assertMatch({{next_key, KV1}, [KV2], KL2},
@@ -1970,21 +1984,21 @@ hashclash_test() ->
                                         "Bucket",
                                         "Key8400" ++ integer_to_list(X),
                                         null},
-                                Value = {X, {active, infinity}, null},
+                                Value = {X, {active, infinity}, 0, null},
                                 Acc ++ [{Key, Value}] end,
                             [],
                             lists:seq(10,98)),
-    KeyListToUse = [{Key1, {1, {active, infinity}, null}}|KeyList]
-                    ++ [{Key99, {99, {active, infinity}, null}}],
+    KeyListToUse = [{Key1, {1, {active, infinity}, 0, null}}|KeyList]
+                    ++ [{Key99, {99, {active, infinity}, 0, null}}],
     {InitHandle, InitFileMD} = create_file(Filename),
     {Handle, _FileMD, _Rem} = complete_file(InitHandle, InitFileMD,
                                                 KeyListToUse, [],
                                                 #level{level=1}),
     ok = file:close(Handle),
     {ok, SFTr, _KeyExtremes} = sft_open(Filename),
-    ?assertMatch({Key1, {1, {active, infinity}, null}},
+    ?assertMatch({Key1, {1, {active, infinity}, 0, null}},
                     sft_get(SFTr, Key1)),
-    ?assertMatch({Key99, {99, {active, infinity}, null}},
+    ?assertMatch({Key99, {99, {active, infinity}, 0, null}},
                     sft_get(SFTr, Key99)),
     ?assertMatch(not_present,
                     sft_get(SFTr, KeyNF)),
diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl
index 63a3842..17da98c 100644
--- a/src/leveled_skiplist.erl
+++ b/src/leveled_skiplist.erl
@@ -22,6 +22,8 @@
         from_sortedlist/2,
         to_list/1,
         enter/3,
+        enter/4,
+        enter_nolookup/3,
         to_range/2,
         to_range/3,
         lookup/2,
@@ -43,17 +45,31 @@
 %%%============================================================================
 
 enter(Key, Value, SkipList) ->
-    Hash = erlang:phash2(Key),
-    case is_list(SkipList) of
-        true ->
-            enter(Key, Value, Hash, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT);
-        false ->
-            SkipList0 = add_to_array(Hash, SkipList),
-            NewListPart = enter(Key, Value, Hash,
-                                dict:fetch(?SKIP_WIDTH, SkipList0),
-                                ?SKIP_WIDTH, ?LIST_HEIGHT),
-            dict:store(?SKIP_WIDTH, NewListPart, SkipList0)
-    end.
+    Hash = leveled_codec:magic_hash(Key),
+    enter(Key, Hash, Value, SkipList).
+
+enter(Key, Hash, Value, SkipList) ->
+    Bloom0 =
+        case element(1, SkipList) of
+            list_only ->
+                list_only;
+            Bloom ->
+                leveled_tinybloom:enter({hash, Hash}, Bloom)
+        end,
+    {Bloom0,
+        enter(Key, Value, Hash,
+                element(2, SkipList),
+                ?SKIP_WIDTH, ?LIST_HEIGHT)}.
+
+%% Can iterate over a key entered this way, but never lookup the key
+%% used for index terms
+%% The key may still be a marker key - and the much cheaper native hash
+%% is used to dtermine this, avoiding the more expensive magic hash
+enter_nolookup(Key, Value, SkipList) ->
+    {element(1, SkipList),
+        enter(Key, Value, erlang:phash2(Key),
+                element(2, SkipList),
+                ?SKIP_WIDTH, ?LIST_HEIGHT)}.
 
 from_list(UnsortedKVL) ->
     from_list(UnsortedKVL, false).
@@ -66,71 +82,45 @@ from_sortedlist(SortedKVL) ->
     from_sortedlist(SortedKVL, false).
 
 from_sortedlist(SortedKVL, BloomProtect) ->
-    case BloomProtect of
-        true ->
-            SL0 = lists:foldr(fun({K, _V}, SkipL) ->
-                                    H = erlang:phash2(K),
-                                    add_to_array(H, SkipL) end,
-                                empty(true),
-                                SortedKVL),
-            dict:store(?SKIP_WIDTH,
-                        from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT),
-                        SL0);
-        false ->
-            from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)
-    end.
+    Bloom0 =
+        case BloomProtect of
+            true ->
+                lists:foldr(fun({K, _V}, Bloom) ->
+                                        leveled_tinybloom:enter(K, Bloom) end,
+                                    leveled_tinybloom:empty(?SKIP_WIDTH),
+                                    SortedKVL);
+            false ->
+                list_only
+    end,
+    {Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
 
 lookup(Key, SkipList) ->
-    case is_list(SkipList) of
-        true ->
-            list_lookup(Key, SkipList, ?LIST_HEIGHT);
-        false ->
-            lookup(Key, erlang:phash2(Key), SkipList)
+    case element(1, SkipList) of
+        list_only ->
+            list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
+        _ ->
+            lookup(Key, leveled_codec:magic_hash(Key), SkipList)
     end.
     
 lookup(Key, Hash, SkipList) ->
-    {Slot, Bit} = hash_toslotbit(Hash),
-    RestLen = ?BITARRAY_SIZE - Bit - 1,
-    <<_Head:Bit/bitstring,
-        B:1/bitstring,
-        _Rest:RestLen/bitstring>> = dict:fetch(Slot, SkipList),
-    case B of
-        <<0:1>> ->
+    case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of
+        false ->
             none;
-        <<1:1>> ->
-            list_lookup(Key, dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
+        true ->
+            list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT)
     end.
 
 
 %% Rather than support iterator_from like gb_trees, will just an output a key
 %% sorted list for the desired range, which can the be iterated over as normal
 to_range(SkipList, Start) ->
-    case is_list(SkipList) of
-        true ->
-            to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT);
-        false ->
-            to_range(dict:fetch(?SKIP_WIDTH, SkipList),
-                        Start, ?INFINITY_KEY,
-                        ?LIST_HEIGHT)
-    end.
+    to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
 
 to_range(SkipList, Start, End) ->
-    case is_list(SkipList) of
-        true ->
-            to_range(SkipList, Start, End, ?LIST_HEIGHT);
-        false ->
-            to_range(dict:fetch(?SKIP_WIDTH, SkipList),
-                        Start, End,
-                        ?LIST_HEIGHT)
-    end.
+    to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT).
 
 to_list(SkipList) ->
-    case is_list(SkipList) of
-        true ->
-            to_list(SkipList, ?LIST_HEIGHT);
-        false ->
-            to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
-    end.
+    to_list(element(2, SkipList), ?LIST_HEIGHT).
 
 empty() ->
     empty(false).
@@ -138,46 +128,20 @@ empty() ->
 empty(BloomProtect) ->
     case BloomProtect of
         true ->
-            FoldFun =
-                fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end,
-            lists:foldl(FoldFun,
-                            dict:store(?SKIP_WIDTH,
-                                        empty([], ?LIST_HEIGHT),
-                                        dict:new()),
-                            lists:seq(0, ?SKIP_WIDTH - 1));
+            {leveled_tinybloom:empty(?SKIP_WIDTH),
+                empty([], ?LIST_HEIGHT)};
         false ->
-            empty([], ?LIST_HEIGHT)
+            {list_only, empty([], ?LIST_HEIGHT)}
     end.
 
 size(SkipList) ->
-    case is_list(SkipList) of
-        true ->
-            size(SkipList, ?LIST_HEIGHT);
-        false ->
-            size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
-    end.
-
+    size(element(2, SkipList), ?LIST_HEIGHT).
 
 
 %%%============================================================================
 %%% SkipList Base Functions
 %%%============================================================================
 
-hash_toslotbit(Hash) ->
-    Slot = Hash band (?SKIP_WIDTH - 1),
-    Bit = (Hash bsr ?SKIP_WIDTH) band (?BITARRAY_SIZE - 1),
-    {Slot, Bit}.
-
-
-add_to_array(Hash, SkipList) ->
-    {Slot, Bit} = hash_toslotbit(Hash),
-    RestLen = ?BITARRAY_SIZE - Bit - 1,
-    <<Head:Bit/bitstring,
-        _B:1/bitstring,
-        Rest:RestLen/bitstring>> = dict:fetch(Slot, SkipList),
-    BitArray = <<Head/bitstring, 1:1, Rest/bitstring>>,
-    dict:store(Slot, BitArray, SkipList).
-
 enter(Key, Value, Hash, SkipList, Width, 1) ->
     {MarkerKey, SubList} = find_mark(Key, SkipList),
     case Hash rem Width of
@@ -488,68 +452,30 @@ dotest_skiplist_small(N) ->
 
 skiplist_withbloom_test() ->
     io:format(user, "~n~nBloom protected skiplist test:~n~n", []),
-    N = 4000,
-    KL = generate_randomkeys(1, N, 1, N div 5),
-                
-    SWaGSL = os:timestamp(),
-    SkipList = from_list(lists:reverse(KL), true),
-    io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
-                        "Top level key count of ~w~n",
-                [N,
-                    timer:now_diff(os:timestamp(), SWaGSL),
-                    length(dict:fetch(?SKIP_WIDTH, SkipList))]),
-    io:format(user, "Second tier key counts of ~w~n",
-                [lists:map(fun({_L, SL}) -> length(SL) end,
-                    dict:fetch(?SKIP_WIDTH, SkipList))]),
-    KLSorted = lists:ukeysort(1, lists:reverse(KL)),
+    skiplist_tester(true).
     
-    SWaGSL2 = os:timestamp(),
-    SkipList = from_sortedlist(KLSorted, true),
-    io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
-                        "microseconds~n",
-                [N, timer:now_diff(os:timestamp(), SWaGSL2)]),
-
-    SWaDSL = os:timestamp(),
-    SkipList1 = 
-        lists:foldl(fun({K, V}, SL) ->
-                            enter(K, V, SL)
-                            end,
-                        empty(true),
-                        KL),
-    io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
-                        "microseconds~n" ++
-                        "Top level key count of ~w~n",
-                [N,
-                    timer:now_diff(os:timestamp(), SWaDSL),
-                    length(dict:fetch(?SKIP_WIDTH, SkipList1))]),
-       io:format(user, "Second tier key counts of ~w~n",
-                [lists:map(fun({_L, SL}) -> length(SL) end,
-                    dict:fetch(?SKIP_WIDTH, SkipList1))]),
-    
-    io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
-    skiplist_timingtest(KLSorted, SkipList, N),
-
-    io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
-    skiplist_timingtest(KLSorted, SkipList1, N).
-
 skiplist_nobloom_test() ->
     io:format(user, "~n~nBloom free skiplist test:~n~n", []),
+    skiplist_tester(false).
+    
+skiplist_tester(Bloom) ->
     N = 4000,
     KL = generate_randomkeys(1, N, 1, N div 5),
                 
     SWaGSL = os:timestamp(),
-    SkipList = from_list(lists:reverse(KL)),
+    SkipList = from_list(lists:reverse(KL), Bloom),
     io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
                         "Top level key count of ~w~n",
                 [N,
                     timer:now_diff(os:timestamp(), SWaGSL),
-                    length(SkipList)]),
+                    length(element(2, SkipList))]),
     io:format(user, "Second tier key counts of ~w~n",
-                [lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]),
+                [lists:map(fun({_L, SL}) -> length(SL) end,
+                    element(2, SkipList))]),
     KLSorted = lists:ukeysort(1, lists:reverse(KL)),
     
     SWaGSL2 = os:timestamp(),
-    SkipList = from_sortedlist(KLSorted),
+    SkipList = from_sortedlist(KLSorted, Bloom),
     io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
                         "microseconds~n",
                 [N, timer:now_diff(os:timestamp(), SWaGSL2)]),
@@ -559,25 +485,26 @@ skiplist_nobloom_test() ->
         lists:foldl(fun({K, V}, SL) ->
                             enter(K, V, SL)
                             end,
-                        empty(),
+                        empty(Bloom),
                         KL),
     io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
                         "microseconds~n" ++
                         "Top level key count of ~w~n",
                 [N,
                     timer:now_diff(os:timestamp(), SWaDSL),
-                    length(SkipList1)]),
+                    length(element(2, SkipList1))]),
        io:format(user, "Second tier key counts of ~w~n",
-                [lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]),
+                [lists:map(fun({_L, SL}) -> length(SL) end,
+                    element(2, SkipList1))]),
     
     io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
-    skiplist_timingtest(KLSorted, SkipList, N),
+    skiplist_timingtest(KLSorted, SkipList, N, Bloom),
 
     io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
-    skiplist_timingtest(KLSorted, SkipList1, N).
+    skiplist_timingtest(KLSorted, SkipList1, N, Bloom).
 
     
-skiplist_timingtest(KL, SkipList, N) ->
+skiplist_timingtest(KL, SkipList, N, Bloom) ->
     io:format(user, "Timing tests on skiplist of size ~w~n",
                 [leveled_skiplist:size(SkipList)]),
     CheckList1 = lists:sublist(KL, N div 4, 200),
@@ -666,7 +593,24 @@ skiplist_timingtest(KL, SkipList, N) ->
     FlatList = to_list(SkipList),
     io:format(user, "Flattening skiplist took ~w microseconds~n",
                 [timer:now_diff(os:timestamp(), SWg)]),
-    ?assertMatch(KL, FlatList).
+    ?assertMatch(KL, FlatList),
+    
+    case Bloom of
+        true ->
+            HashList = lists:map(fun(_X) ->
+                                        random:uniform(4296967295) end,
+                                    lists:seq(1, 2000)),
+            SWh = os:timestamp(),
+            lists:foreach(fun(X) ->
+                                lookup(X, X, SkipList) end,
+                            HashList),
+            io:format(user,
+                        "Getting 2000 missing keys when hash was known " ++
+                            "took ~w microseconds~n",
+                        [timer:now_diff(os:timestamp(), SWh)]);
+        false ->
+            ok
+    end.
 
 define_kv(X) ->
     {{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null},
@@ -688,5 +632,21 @@ skiplist_roundsize_test() ->
                             ?assertMatch(L, R) end,
                         lists:seq(0, 24)).
 
+skiplist_nolookup_test() ->
+    N = 4000,
+    KL = generate_randomkeys(1, N, 1, N div 5),
+    SkipList = lists:foldl(fun({K, V}, Acc) ->
+                                enter_nolookup(K, V, Acc) end,
+                            empty(true),
+                            KL),
+    KLSorted = lists:ukeysort(1, lists:reverse(KL)),
+    lists:foreach(fun({K, _V}) ->
+                        ?assertMatch(none, lookup(K, SkipList)) end,
+                        KL),
+    ?assertMatch(KLSorted, to_list(SkipList)).
+
+empty_skiplist_size_test() ->
+    ?assertMatch(0, leveled_skiplist:size(empty(false))),
+    ?assertMatch(0, leveled_skiplist:size(empty(true))).
 
 -endif.
\ No newline at end of file
diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl
new file mode 100644
index 0000000..166d616
--- /dev/null
+++ b/src/leveled_tinybloom.erl
@@ -0,0 +1,151 @@
+%% -------- TINY BLOOM ---------
+%%
+%% For sheltering relatively expensive lookups with a probabilistic check
+%%
+%% Uses multiple 256 byte blooms.  Can sensibly hold up to 1000 keys per array.
+%% Even at 1000 keys should still offer only a 20% false positive
+%%
+%% Restricted to no more than 256 arrays - so can't handle more than 250K keys
+%% in total
+%%
+%% Implemented this way to make it easy to control false positive (just by
+%% setting the width).  Also only requires binary manipulations of a single
+%% hash
+
+-module(leveled_tinybloom).
+
+-include("include/leveled.hrl").
+
+-export([
+        enter/2,
+        check/2,
+        empty/1
+        ]).      
+
+-include_lib("eunit/include/eunit.hrl").
+
+%%%============================================================================
+%%% Bloom API
+%%%============================================================================
+
+
+empty(Width) when Width =< 256 ->
+    FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end,
+    lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)).
+
+enter({hash, Hash}, Bloom) ->
+    {H0, Bit1, Bit2} = split_hash(Hash),
+    Slot = H0 rem dict:size(Bloom),
+    BitArray0 = dict:fetch(Slot, Bloom),
+    BitArray1 = lists:foldl(fun add_to_array/2,
+                                BitArray0,
+                                lists:usort([Bit1, Bit2])),
+    dict:store(Slot, BitArray1, Bloom);
+enter(Key, Bloom) ->
+    Hash = leveled_codec:magic_hash(Key),
+    enter({hash, Hash}, Bloom).
+
+check({hash, Hash}, Bloom) ->
+    {H0, Bit1, Bit2} = split_hash(Hash),
+    Slot = H0 rem dict:size(Bloom),
+    BitArray = dict:fetch(Slot, Bloom),
+    case getbit(Bit1, BitArray) of
+        <<0:1>> ->
+            false;
+        <<1:1>> ->
+            case getbit(Bit2, BitArray) of
+                <<0:1>> ->
+                    false;
+                <<1:1>> ->
+                    true
+            end
+    end;
+check(Key, Bloom) ->
+    Hash = leveled_codec:magic_hash(Key),
+    check({hash, Hash}, Bloom).
+
+%%%============================================================================
+%%% Internal Functions
+%%%============================================================================
+
+split_hash(Hash) ->
+    H0 = Hash band 255,
+    H1 = (Hash bsr 8) band 4095,
+    H2 = Hash bsr 20,
+    {H0, H1, H2}.
+
+add_to_array(Bit, BitArray) ->
+    RestLen = 4096 - Bit - 1,
+    <<Head:Bit/bitstring,
+        _B:1/bitstring,
+        Rest:RestLen/bitstring>> = BitArray,
+    <<Head/bitstring, 1:1, Rest/bitstring>>.
+
+getbit(Bit, BitArray) ->
+    RestLen = 4096 - Bit - 1,
+    <<_Head:Bit/bitstring,
+        B:1/bitstring,
+        _Rest:RestLen/bitstring>> = BitArray,
+    B.
+
+
+%%%============================================================================
+%%% Test
+%%%============================================================================
+
+-ifdef(TEST).
+
+simple_test() ->
+    N = 4000,
+    W = 4,
+    KLin = lists:map(fun(X) -> "Key_" ++
+                                integer_to_list(X) ++
+                                integer_to_list(random:uniform(100)) ++
+                                binary_to_list(crypto:rand_bytes(2))
+                                end,
+                        lists:seq(1, N)),
+    KLout = lists:map(fun(X) ->
+                            "NotKey_" ++
+                            integer_to_list(X) ++
+                            integer_to_list(random:uniform(100)) ++
+                            binary_to_list(crypto:rand_bytes(2))
+                            end,
+                        lists:seq(1, N)),
+    SW0_PH = os:timestamp(),
+    lists:foreach(fun(X) -> erlang:phash2(X) end, KLin),
+    io:format(user,
+                "~nNative hash function hashes ~w keys in ~w microseconds~n",
+                [N, timer:now_diff(os:timestamp(), SW0_PH)]),
+    SW0_MH = os:timestamp(),
+    lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin),
+    io:format(user,
+                "~nMagic hash function hashes ~w keys in ~w microseconds~n",
+                [N, timer:now_diff(os:timestamp(), SW0_MH)]),
+    
+    SW1 = os:timestamp(),
+    Bloom = lists:foldr(fun enter/2, empty(W), KLin),
+    io:format(user,
+                "~nAdding ~w keys to bloom took ~w microseconds~n",
+                [N, timer:now_diff(os:timestamp(), SW1)]),
+    
+    SW2 = os:timestamp(),
+    lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin),
+    io:format(user,
+                "~nChecking ~w keys in bloom took ~w microseconds~n",
+                [N, timer:now_diff(os:timestamp(), SW2)]),
+    
+    SW3 = os:timestamp(),
+    FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of
+                                        true -> Acc + 1;
+                                        false -> Acc
+                                    end end,
+                        0,
+                        KLout),
+    io:format(user,
+                "~nChecking ~w keys out of bloom took ~w microseconds " ++
+                    "with ~w false positive rate~n",
+                [N, timer:now_diff(os:timestamp(), SW3), FP / N]),
+    ?assertMatch(true, FP < (N div 4)).
+    
+
+-endif.
\ No newline at end of file

From ccc993383d93078cdf8a39a8042ba25d33b89c39 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 01:21:53 +0000
Subject: [PATCH 12/34] Stop second hash on fetch_head

The bookie should magic_hash for fetch_head, and now passes the hash to
the Penciller so second hash not required.
---
 src/leveled_bookie.erl | 23 +++++++++++++++--------
 src/leveled_log.erl    |  2 --
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index 62892ec..412eec0 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -728,15 +728,22 @@ startup(InkerOpts, PencillerOpts) ->
 
 
 fetch_head(Key, Penciller, LedgerCache) ->
-    case leveled_skiplist:lookup(Key, LedgerCache#ledger_cache.skiplist) of
-        {value, Head} ->
-            Head;
-        none ->
-            case leveled_penciller:pcl_fetch(Penciller, Key) of
-                {Key, Head} ->
+    Hash = leveled_codec:magic_hash(Key),
+    if
+        Hash /= no_lookup ->
+            L0R = leveled_skiplist:lookup(Key,
+                                            Hash,
+                                            LedgerCache#ledger_cache.skiplist),
+            case L0R of
+                {value, Head} ->
                     Head;
-                not_present ->
-                    not_present
+                none ->
+                    case leveled_penciller:pcl_fetch(Penciller, Key, Hash) of
+                        {Key, Head} ->
+                            Head;
+                        not_present ->
+                            not_present
+                    end
             end
     end.
 
diff --git a/src/leveled_log.erl b/src/leveled_log.erl
index fa26555..a10e641 100644
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@@ -198,8 +198,6 @@
     {"IC011",
         {info, "Not clearing filename ~s as modified delta is only ~w seconds"}},
     
-    {"PM001",
-        {info, "Indexed new cache entry with total L0 cache size now ~w"}},
     {"PM002",
         {info, "Completed dump of L0 cache to list of size ~w"}},
     

From 1f38bcb3285288a23107949520c98d13e165dcc6 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 01:32:32 +0000
Subject: [PATCH 13/34] Magic Hash vs phash2

Magic Hash broke Skip List organisation
---
 src/leveled_skiplist.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl
index 17da98c..142afc5 100644
--- a/src/leveled_skiplist.erl
+++ b/src/leveled_skiplist.erl
@@ -57,7 +57,7 @@ enter(Key, Hash, Value, SkipList) ->
                 leveled_tinybloom:enter({hash, Hash}, Bloom)
         end,
     {Bloom0,
-        enter(Key, Value, Hash,
+        enter(Key, Value, erlang:phash2(Key),
                 element(2, SkipList),
                 ?SKIP_WIDTH, ?LIST_HEIGHT)}.
 

From 5d11bc051f9d1a630d82ba859dc68097331bc5f9 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 01:49:03 +0000
Subject: [PATCH 14/34] Allow for more fluctuation in L0 write time

Try to alleviate existing co-ordination issue when all vnodes tend to
try and write L0 files concurrently
---
 src/leveled_penciller.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index a1ab9b1..93a9094 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -197,10 +197,10 @@
 -define(CURRENT_FILEX, "crr").
 -define(PENDING_FILEX, "pnd").
 -define(MEMTABLE, mem).
--define(MAX_TABLESIZE, 32000).
+-define(MAX_TABLESIZE, 28000). % This is less than max - but COIN_SIDECOUNT
 -define(PROMPT_WAIT_ONL0, 5).
 -define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
--define(COIN_SIDECOUNT, 2).
+-define(COIN_SIDECOUNT, 4).
 
 -record(state, {manifest = [] :: list(),
 				manifest_sqn = 0 :: integer(),

From 2c7fdc74d484385aa973e6d658038029048f6565 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 01:58:25 +0000
Subject: [PATCH 15/34] Setting fiddling

Try to find a happy medium
---
 src/leveled_penciller.erl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index 93a9094..6cdf265 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -197,7 +197,8 @@
 -define(CURRENT_FILEX, "crr").
 -define(PENDING_FILEX, "pnd").
 -define(MEMTABLE, mem).
--define(MAX_TABLESIZE, 28000). % This is less than max - but COIN_SIDECOUNT
+-define(MAX_TABLESIZE, 25000). % This is less than max - but COIN_SIDECOUNT
+-define(SUPER_MAX_TABLE_SIZE, 45000)
 -define(PROMPT_WAIT_ONL0, 5).
 -define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
 -define(COIN_SIDECOUNT, 4).
@@ -645,6 +646,7 @@ update_levelzero(L0Size, {PushedTree, MinSQN, MaxSQN},
                                     levelzero_size=NewL0Size,
                                     ledger_sqn=UpdMaxSQN},
             CacheTooBig = NewL0Size > State#state.levelzero_maxcachesize,
+            CacheMuchTooBig = NewL0Size > ?SUPER_MAX_TABLE_SIZE,
             Level0Free = length(get_item(0, State#state.manifest, [])) == 0,
             RandomFactor =
                 case State#state.levelzero_cointoss of
@@ -658,7 +660,7 @@ update_levelzero(L0Size, {PushedTree, MinSQN, MaxSQN},
                     false ->
                         true
                 end,
-            case {CacheTooBig, Level0Free, RandomFactor} of
+            case {CacheTooBig, Level0Free, RandomFactor or CacheMuchTooBig} of
                 {true, true, true}  ->
                     L0Constructor = roll_memory(UpdState, false),        
                     UpdState#state{levelzero_pending=true,

From ea8f3c07a7d52b16858f7055d6a7c90e4f56e57e Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 02:00:19 +0000
Subject: [PATCH 16/34] oops

---
 src/leveled_penciller.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index 6cdf265..b937d30 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -198,7 +198,7 @@
 -define(PENDING_FILEX, "pnd").
 -define(MEMTABLE, mem).
 -define(MAX_TABLESIZE, 25000). % This is less than max - but COIN_SIDECOUNT
--define(SUPER_MAX_TABLE_SIZE, 45000)
+-define(SUPER_MAX_TABLE_SIZE, 45000).
 -define(PROMPT_WAIT_ONL0, 5).
 -define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
 -define(COIN_SIDECOUNT, 4).

From 523716e8f2bcbc4cf0df16268b84d5367d45dd70 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 04:48:50 +0000
Subject: [PATCH 17/34] Add tiny bloom to Penciller Manifest

This is an attempt to save on unnecessary message transfers, and
slightly more expensive GCS checks in the SFT file itself.
---
 include/leveled.hrl       |   1 +
 src/leveled_pclerk.erl    |   2 +
 src/leveled_penciller.erl |  41 ++++----
 src/leveled_sft.erl       | 195 ++++++++++++++++++++++++--------------
 src/leveled_tinybloom.erl |   4 +
 5 files changed, 153 insertions(+), 90 deletions(-)

diff --git a/include/leveled.hrl b/include/leveled.hrl
index 25216f6..f57ffd4 100644
--- a/include/leveled.hrl
+++ b/include/leveled.hrl
@@ -41,6 +41,7 @@
                         {start_key :: tuple(),
                         end_key :: tuple(),
                         owner :: pid(),
+                        bloom,
                         filename :: string()}).
 
 -record(cdb_options,
diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl
index 272071d..649973b 100644
--- a/src/leveled_pclerk.erl
+++ b/src/leveled_pclerk.erl
@@ -320,6 +320,7 @@ do_merge(KL1, KL2, {SrcLevel, IsB}, {Filepath, MSN}, FileCounter, OutList) ->
                                             KL1,
                                             KL2,
                                             LevelR),
+    {ok, Bloom} = leveled_sft:sft_getbloom(Pid),
     case Reply of
         {{[], []}, null, _} ->
             leveled_log:log("PC013", [FileName]),
@@ -331,6 +332,7 @@ do_merge(KL1, KL2, {SrcLevel, IsB}, {Filepath, MSN}, FileCounter, OutList) ->
                                     [#manifest_entry{start_key=SmallestKey,
                                                         end_key=HighestKey,
                                                         owner=Pid,
+                                                        bloom=Bloom,
                                                         filename=FileName}]),
             leveled_log:log_timer("PC015", [], TS1),
             do_merge(KL1Rem, KL2Rem,
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index b937d30..ee0b921 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -175,7 +175,7 @@
         pcl_checksequencenumber/4,
         pcl_workforclerk/1,
         pcl_promptmanifestchange/2,
-        pcl_confirml0complete/4,
+        pcl_confirml0complete/5,
         pcl_confirmdelete/2,
         pcl_close/1,
         pcl_doom/1,
@@ -285,8 +285,8 @@ pcl_workforclerk(Pid) ->
 pcl_promptmanifestchange(Pid, WI) ->
     gen_server:cast(Pid, {manifest_change, WI}).
 
-pcl_confirml0complete(Pid, FN, StartKey, EndKey) ->
-    gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey}).
+pcl_confirml0complete(Pid, FN, StartKey, EndKey, Bloom) ->
+    gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey, Bloom}).
 
 pcl_confirmdelete(Pid, FileName) ->
     gen_server:cast(Pid, {confirm_delete, FileName}).
@@ -454,10 +454,11 @@ handle_cast({confirm_delete, FileName}, State=#state{is_snapshot=Snap})
         _ ->
             {noreply, State}
     end;
-handle_cast({levelzero_complete, FN, StartKey, EndKey}, State) ->
+handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
     leveled_log:log("P0029", []),
     ManEntry = #manifest_entry{start_key=StartKey,
                                 end_key=EndKey,
+                                bloom=Bloom,
                                 owner=State#state.levelzero_constructor,
                                 filename=FN},
     UpdMan = lists:keystore(0, 1, State#state.manifest, {0, [ManEntry]}),
@@ -721,34 +722,40 @@ fetch_mem(Key, Hash, Manifest, L0Cache) ->
     L0Check = leveled_pmem:check_levelzero(Key, Hash, L0Cache),
     case L0Check of
         {false, not_found} ->
-            fetch(Key, Manifest, 0, fun leveled_sft:sft_get/2);
+            fetch(Key, Hash, Manifest, 0, fun leveled_sft:sft_get/2);
         {true, KV} ->
             KV
     end.
 
-fetch(_Key, _Manifest, ?MAX_LEVELS + 1, _FetchFun) ->
+fetch(_Key, _Hash, _Manifest, ?MAX_LEVELS + 1, _FetchFun) ->
     not_present;
-fetch(Key, Manifest, Level, FetchFun) ->
+fetch(Key, Hash, Manifest, Level, FetchFun) ->
     LevelManifest = get_item(Level, Manifest, []),
     case lists:foldl(fun(File, Acc) ->
                         case Acc of
                             not_present when
                                     Key >= File#manifest_entry.start_key,
                                     File#manifest_entry.end_key >= Key ->
-                                File#manifest_entry.owner;
-                            PidFound ->
-                                PidFound
+                                {File#manifest_entry.owner,
+                                    File#manifest_entry.bloom};
+                            FoundDetails ->
+                                FoundDetails
                         end end,
                         not_present,
                         LevelManifest) of
         not_present ->
-            fetch(Key, Manifest, Level + 1, FetchFun);
-        FileToCheck ->
-            case FetchFun(FileToCheck, Key) of
-                not_present ->
-                    fetch(Key, Manifest, Level + 1, FetchFun);
-                ObjectFound ->
-                    ObjectFound
+            fetch(Key, Hash, Manifest, Level + 1, FetchFun);
+        {FileToCheck, Bloom} ->
+            case leveled_tinybloom:check({hash, Hash}, Bloom) of
+                true ->
+                    case FetchFun(FileToCheck, Key) of
+                        not_present ->
+                            fetch(Key, Hash, Manifest, Level + 1, FetchFun);
+                        ObjectFound ->
+                            ObjectFound
+                    end;
+                false ->
+                    fetch(Key, Hash, Manifest, Level + 1, FetchFun)
             end
     end.
     
diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index 4c86dff..4f67adf 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -161,6 +161,7 @@
         sft_newfroml0cache/4,
         sft_open/1,
         sft_get/2,
+        sft_getbloom/1,
         sft_getkvrange/4,
         sft_close/1,
         sft_clear/1,
@@ -189,6 +190,7 @@
 -define(HEADER_LEN, 56).
 -define(ITERATOR_SCANWIDTH, 1).
 -define(MERGE_SCANWIDTH, 32).
+-define(BLOOM_WIDTH, 48).
 -define(DELETE_TIMEOUT, 10000).
 -define(MAX_KEYS, ?SLOT_COUNT * ?BLOCK_COUNT * ?BLOCK_SIZE).
 -define(DISCARD_EXT, ".discarded").
@@ -211,7 +213,8 @@
                 handle :: file:fd(),
                 background_complete = false :: boolean(),
                 oversized_file = false :: boolean(),
-                penciller :: pid()}).
+                penciller :: pid(),
+                bloom}).
 
 
 %%%============================================================================
@@ -268,6 +271,9 @@ sft_open(Filename) ->
 sft_setfordelete(Pid, Penciller) ->
     gen_fsm:sync_send_event(Pid, {set_for_delete, Penciller}, infinity).
 
+sft_getbloom(Pid) ->
+    gen_fsm:sync_send_event(Pid, get_bloom, infinity).
+
 sft_get(Pid, Key) ->
     gen_fsm:sync_send_event(Pid, {get_kv, Key}, infinity).
 
@@ -342,8 +348,9 @@ starting({sft_newfroml0cache, Filename, Slots, FetchFun, PCL}, _State) ->
             leveled_penciller:pcl_confirml0complete(PCL,
                                                     State#state.filename,
                                                     State#state.smallest_key,
-                                                    State#state.highest_key),
-            {next_state, reader, State}
+                                                    State#state.highest_key,
+                                                    State#state.bloom),
+            {next_state, reader, State#state{bloom=none}}
     end.
 
 
@@ -378,6 +385,12 @@ reader(background_complete, _From, State) ->
                 reader,
                 State}
     end;
+reader(get_bloom, _From, State) ->
+    Bloom = State#state.bloom,
+    if
+        Bloom /= none ->
+            {reply, {ok, Bloom}, reader, State#state{bloom=none}}
+    end;
 reader(close, _From, State) ->
     ok = file:close(State#state.handle),
     {stop, normal, ok, State}.
@@ -510,7 +523,7 @@ open_file(FileMD) ->
         Slen:32/integer>> = HeaderLengths,
     {ok, SummaryBin} = file:pread(Handle,
                                     ?HEADER_LEN + Blen + Ilen + Flen, Slen),
-    {{LowSQN, HighSQN}, {LowKey, HighKey}} = binary_to_term(SummaryBin),
+    {{LowSQN, HighSQN}, {LowKey, HighKey}, Bloom} = binary_to_term(SummaryBin),
     {ok, SlotIndexBin} = file:pread(Handle, ?HEADER_LEN + Blen, Ilen),
     SlotIndex = binary_to_term(SlotIndexBin),
     {Handle, FileMD#state{slot_index=SlotIndex,
@@ -523,7 +536,8 @@ open_file(FileMD) ->
                            filter_pointer=?HEADER_LEN + Blen + Ilen,
                            summ_pointer=?HEADER_LEN + Blen + Ilen + Flen,
                            summ_length=Slen,
-                           handle=Handle}}.
+                           handle=Handle,
+                           bloom=Bloom}}.
     
 %% Take a file handle with a previously created header and complete it based on
 %% the two key lists KL1 and KL2
@@ -531,10 +545,11 @@ complete_file(Handle, FileMD, KL1, KL2, LevelR) ->
     complete_file(Handle, FileMD, KL1, KL2, LevelR, false).
 
 complete_file(Handle, FileMD, KL1, KL2, LevelR, Rename) ->
+    EmptyBloom = leveled_tinybloom:empty(?BLOOM_WIDTH),
     {ok, KeyRemainders} = write_keys(Handle,
                                         maybe_expand_pointer(KL1),
                                         maybe_expand_pointer(KL2),
-                                        [], <<>>,
+                                        [], <<>>, EmptyBloom,
                                         LevelR,
                                         fun sftwrite_function/2),
     {ReadHandle, UpdFileMD} = case Rename of
@@ -769,12 +784,12 @@ get_nextkeyaftermatch([_KTuple|T], KeyToFind, PrevV) ->
 
 write_keys(Handle,
             KL1, KL2,
-            SlotIndex, SerialisedSlots,
+            SlotIndex, SerialisedSlots, InitialBloom,
             LevelR, WriteFun) ->
     write_keys(Handle,
                     KL1, KL2,
                     {0, 0},
-                    SlotIndex, SerialisedSlots,
+                    SlotIndex, SerialisedSlots, InitialBloom,
                     {infinity, 0}, null, {last, null},
                     LevelR, WriteFun).
 
@@ -782,7 +797,7 @@ write_keys(Handle,
 write_keys(Handle,
             KL1, KL2,
             {SlotCount, SlotTotal},
-            SlotIndex, SerialisedSlots,
+            SlotIndex, SerialisedSlots, Bloom,
             {LSN, HSN}, LowKey, LastKey,
             LevelR, WriteFun)
                     when SlotCount =:= ?SLOT_GROUPWRITE_COUNT ->
@@ -791,26 +806,27 @@ write_keys(Handle,
         reached ->
             {complete_keywrite(UpdHandle,
                                 SlotIndex,
-                                {LSN, HSN}, {LowKey, LastKey},
+                                {{LSN, HSN}, {LowKey, LastKey}, Bloom},
                                 WriteFun),
                 {KL1, KL2}};
         continue ->
             write_keys(UpdHandle,
                         KL1, KL2,
                         {0, SlotTotal},
-                        SlotIndex, <<>>,
+                        SlotIndex, <<>>, Bloom,
                         {LSN, HSN}, LowKey, LastKey,
                         LevelR, WriteFun)
     end;
 write_keys(Handle,
             KL1, KL2,
             {SlotCount, SlotTotal},
-            SlotIndex, SerialisedSlots,
+            SlotIndex, SerialisedSlots, Bloom,
             {LSN, HSN}, LowKey, LastKey,
             LevelR, WriteFun) ->
-    SlotOutput = create_slot(KL1, KL2, LevelR),
+    SlotOutput = create_slot(KL1, KL2, LevelR, Bloom),
     {{LowKey_Slot, SegFilter, SerialisedSlot, LengthList},
         {{LSN_Slot, HSN_Slot}, LastKey_Slot, Status},
+        UpdBloom,
         KL1rem, KL2rem} = SlotOutput,
     UpdSlotIndex = lists:append(SlotIndex,
                                 [{LowKey_Slot, SegFilter, LengthList}]),
@@ -829,34 +845,34 @@ write_keys(Handle,
             UpdHandle = WriteFun(slots , {Handle, UpdSlots}),
             {complete_keywrite(UpdHandle,
                                 UpdSlotIndex,
-                                SNExtremes, {FirstKey, FinalKey},
+                                {SNExtremes, {FirstKey, FinalKey}, UpdBloom},
                                 WriteFun),
                 {KL1rem, KL2rem}};
         full ->
             write_keys(Handle,
                         KL1rem, KL2rem,
                         {SlotCount + 1, SlotTotal + 1},
-                        UpdSlotIndex, UpdSlots,
+                        UpdSlotIndex, UpdSlots, UpdBloom,
                         SNExtremes, FirstKey, FinalKey,
                         LevelR, WriteFun);
         complete ->
             UpdHandle = WriteFun(slots , {Handle, UpdSlots}),
             {complete_keywrite(UpdHandle,
                                 UpdSlotIndex,
-                                SNExtremes, {FirstKey, FinalKey},
+                                {SNExtremes, {FirstKey, FinalKey}, UpdBloom},
                                 WriteFun),
                 {KL1rem, KL2rem}}
     end.
         
 
-complete_keywrite(Handle, SlotIndex,
-                    SNExtremes, {FirstKey, FinalKey},
+complete_keywrite(Handle,
+                    SlotIndex,
+                    {SNExtremes, {FirstKey, FinalKey}, Bloom},
                     WriteFun) ->
     ConvSlotIndex = convert_slotindex(SlotIndex),
     WriteFun(finalise, {Handle,
                         ConvSlotIndex,
-                        SNExtremes,
-                        {FirstKey, FinalKey}}).
+                        {SNExtremes, {FirstKey, FinalKey}, Bloom}}).
 
 
 %% Take a slot index, and remove the SegFilters replacing with pointers
@@ -884,16 +900,15 @@ sftwrite_function(slots, {Handle, SerialisedSlots}) ->
     Handle;
 sftwrite_function(finalise,
                     {Handle,
-                    {SlotFilters, PointerIndex},
-                    SNExtremes,
-                    KeyExtremes}) ->
+                        {SlotFilters, PointerIndex},
+                        {SNExtremes, KeyExtremes, Bloom}}) ->
     {ok, Position} = file:position(Handle, cur),
     
     BlocksLength = Position - ?HEADER_LEN,
     Index = term_to_binary(PointerIndex),
     IndexLength = byte_size(Index),
     FilterLength = byte_size(SlotFilters),
-    Summary = term_to_binary({SNExtremes, KeyExtremes}),
+    Summary = term_to_binary({SNExtremes, KeyExtremes, Bloom}),
     SummaryLength = byte_size(Summary),
     %% Write Index, Filter and Summary
     ok = file:write(Handle, <<Index/binary,
@@ -947,39 +962,47 @@ maxslots_bylevel(SlotTotal, _Level) ->
 %% Also this should return a partial block if the KeyLists have been exhausted
 %% but the block is full
 
-create_block(KeyList1, KeyList2, LevelR) ->
-    create_block(KeyList1, KeyList2, [], {infinity, 0}, [], LevelR).
+create_block(KeyList1, KeyList2, LevelR, Bloom) ->
+    create_block(KeyList1, KeyList2, [], {infinity, 0}, [], LevelR, Bloom).
 
 create_block(KeyList1, KeyList2,
-                BlockKeyList, {LSN, HSN}, SegmentList, _LevelR)
+                BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom)
                                     when length(BlockKeyList)==?BLOCK_SIZE ->
     case {KeyList1, KeyList2} of
         {[], []} ->
-            {BlockKeyList, complete, {LSN, HSN}, SegmentList, [], []};
+            {BlockKeyList, complete, {LSN, HSN}, SegmentList,
+                Bloom,
+                [], []};
         _ ->
-            {BlockKeyList, full, {LSN, HSN}, SegmentList, KeyList1, KeyList2}
+            {BlockKeyList, full, {LSN, HSN}, SegmentList,
+                Bloom,
+                KeyList1, KeyList2}
     end;
-create_block([], [],
-                BlockKeyList, {LSN, HSN}, SegmentList, _LevelR) ->
-    {BlockKeyList, partial, {LSN, HSN}, SegmentList, [], []};
+create_block([], [], BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom) ->
+    {BlockKeyList, partial, {LSN, HSN}, SegmentList,
+        Bloom,
+        [], []};
 create_block(KeyList1, KeyList2,
-                BlockKeyList, {LSN, HSN}, SegmentList, LevelR) ->
+                BlockKeyList, {LSN, HSN}, SegmentList, LevelR, Bloom) ->
     case key_dominates(KeyList1,
                         KeyList2,
                         {LevelR#level.is_basement, LevelR#level.timestamp}) of
         {{next_key, TopKey}, Rem1, Rem2} ->
-            {UpdLSN, UpdHSN} = update_sequencenumbers(TopKey, LSN, HSN),
+            {_K, V} = TopKey,
+            {SQN, _St, MH, _MD} = leveled_codec:striphead_to_details(V),
+            {UpdLSN, UpdHSN} = update_sequencenumbers(SQN, LSN, HSN),
+            UpdBloom = leveled_tinybloom:enter({hash, MH}, Bloom),
             NewBlockKeyList = lists:append(BlockKeyList,
                                             [TopKey]),
             NewSegmentList = lists:append(SegmentList,
-                                            [hash_for_segmentid(TopKey)]), 
+                                            [hash_for_segmentid(TopKey)]),
             create_block(Rem1, Rem2,
                             NewBlockKeyList, {UpdLSN, UpdHSN},
-                            NewSegmentList, LevelR);
+                            NewSegmentList, LevelR, UpdBloom);
         {skipped_key, Rem1, Rem2} ->
             create_block(Rem1, Rem2,
                             BlockKeyList, {LSN, HSN},
-                            SegmentList, LevelR)
+                            SegmentList, LevelR, Bloom)
     end.
 
 
@@ -996,33 +1019,43 @@ create_block(KeyList1, KeyList2,
 %% - Remainder of any KeyLists used to make the slot
 
 
-create_slot(KeyList1, KeyList2, Level)  ->
-    create_slot(KeyList1, KeyList2, Level, ?BLOCK_COUNT, [], <<>>, [],
-                                    {null, infinity, 0, null, full}).
+create_slot(KeyList1, KeyList2, Level, Bloom)  ->
+    create_slot(KeyList1, KeyList2, Level, ?BLOCK_COUNT, Bloom,
+                    [], <<>>, [],
+                    {null, infinity, 0, null, full}).
 
 %% Keep adding blocks to the slot until either the block count is reached or
 %% there is a partial block
 
-create_slot(KL1, KL2, _, 0, SegLists, SerialisedSlot, LengthList,
-                                    {LowKey, LSN, HSN, LastKey, Status}) ->
+create_slot(KL1, KL2, _, 0, Bloom,
+                SegLists, SerialisedSlot, LengthList,
+                {LowKey, LSN, HSN, LastKey, Status}) ->
     {{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
         {{LSN, HSN}, LastKey, Status},
+        Bloom,
         KL1, KL2};
-create_slot(KL1, KL2, _, _, SegLists, SerialisedSlot, LengthList,
-                                    {LowKey, LSN, HSN, LastKey, partial}) ->
+create_slot(KL1, KL2, _, _, Bloom,
+                SegLists, SerialisedSlot, LengthList,
+                {LowKey, LSN, HSN, LastKey, partial}) ->
     {{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
         {{LSN, HSN}, LastKey, partial},
+        Bloom,
         KL1, KL2};
-create_slot(KL1, KL2, _, _, SegLists, SerialisedSlot, LengthList,
-                                    {LowKey, LSN, HSN, LastKey, complete}) ->
+create_slot(KL1, KL2, _, _, Bloom,
+                SegLists, SerialisedSlot, LengthList,
+                {LowKey, LSN, HSN, LastKey, complete}) ->
     {{LowKey, generate_segment_filter(SegLists), SerialisedSlot, LengthList},
         {{LSN, HSN}, LastKey, partial},
+        Bloom,
         KL1, KL2};
-create_slot(KL1, KL2, LevelR, BlockCount, SegLists, SerialisedSlot, LengthList,
-                                    {LowKey, LSN, HSN, LastKey, _Status}) ->
+create_slot(KL1, KL2, LevelR, BlockCount, Bloom,
+                SegLists, SerialisedSlot, LengthList,
+                {LowKey, LSN, HSN, LastKey, _Status}) ->
     {BlockKeyList, Status,
         {LSNb, HSNb},
-        SegmentList, KL1b, KL2b} = create_block(KL1, KL2, LevelR),
+        SegmentList,
+        UpdBloom,
+        KL1b, KL2b} = create_block(KL1, KL2, LevelR, Bloom),
     TrackingMetadata = case {LowKey, BlockKeyList} of
         {null, []} ->
             {null, LSN, HSN, LastKey, Status};
@@ -1043,9 +1076,10 @@ create_slot(KL1, KL2, LevelR, BlockCount, SegLists, SerialisedSlot, LengthList,
     SerialisedBlock = serialise_block(BlockKeyList),
     BlockLength = byte_size(SerialisedBlock),
     SerialisedSlot2 = <<SerialisedSlot/binary, SerialisedBlock/binary>>,
-    create_slot(KL1b, KL2b, LevelR, BlockCount - 1, SegLists ++ [SegmentList],
-                SerialisedSlot2, LengthList ++ [BlockLength],
-                TrackingMetadata).
+    SegList2 = SegLists ++ [SegmentList],
+    create_slot(KL1b, KL2b, LevelR, BlockCount - 1, UpdBloom,
+                    SegList2, SerialisedSlot2, LengthList ++ [BlockLength],
+                    TrackingMetadata).
 
 serialise_block(BlockKeyList) ->
     term_to_binary(BlockKeyList, [{compressed, ?COMPRESSION_LEVEL}]).
@@ -1133,8 +1167,6 @@ pointer_append_queryresults(Results, QueryPid) ->
 
     
 %% Update the sequence numbers
-update_sequencenumbers(Item, LSN, HSN) when is_tuple(Item) ->
-    update_sequencenumbers(leveled_codec:strip_to_seqonly(Item), LSN, HSN);    
 update_sequencenumbers(SN, infinity, 0) ->
     {SN, SN};
 update_sequencenumbers(SN, LSN, HSN) when SN < LSN ->
@@ -1433,9 +1465,11 @@ simple_create_block_test() ->
                         {2, {active, infinity}, no_lookup, null}}],
     KeyList2 = [{{o, "Bucket1", "Key2", null},
                         {3, {active, infinity}, no_lookup, null}}],
-    {MergedKeyList, ListStatus, SN, _, _, _} = create_block(KeyList1,
-                                                            KeyList2,
-                                                            #level{level=1}),
+    BlockOutput = create_block(KeyList1,
+                                KeyList2,
+                                #level{level=1},
+                                leveled_tinybloom:empty(4)),
+    {MergedKeyList, ListStatus, SN, _, _, _, _} = BlockOutput,
     ?assertMatch(partial, ListStatus),
     [H1|T1] = MergedKeyList,
     ?assertMatch({{o, "Bucket1", "Key1", null},
@@ -1454,9 +1488,11 @@ dominate_create_block_test() ->
                         {2, {active, infinity}, no_lookup, null}}],
     KeyList2 = [{{o, "Bucket1", "Key2", null},
                         {3, {tomb, infinity}, no_lookup, null}}],
-    {MergedKeyList, ListStatus, SN, _, _, _} = create_block(KeyList1,
-                                                            KeyList2,
-                                                            #level{level=1}),
+    BlockOutput = create_block(KeyList1,
+                                KeyList2,
+                                #level{level=1},
+                                leveled_tinybloom:empty(4)),
+    {MergedKeyList, ListStatus, SN, _, _, _, _} = BlockOutput,
     ?assertMatch(partial, ListStatus),
     [K1, K2] = MergedKeyList,
     ?assertMatch(K1, lists:nth(1, KeyList1)),
@@ -1502,9 +1538,11 @@ sample_keylist() ->
 
 alternating_create_block_test() ->
     {KeyList1, KeyList2} = sample_keylist(),
-    {MergedKeyList, ListStatus, _, _, _, _} = create_block(KeyList1,
-                                                            KeyList2,
-                                                            #level{level=1}),
+    BlockOutput = create_block(KeyList1,
+                                KeyList2,
+                                #level{level=1},
+                                leveled_tinybloom:empty(4)),
+    {MergedKeyList, ListStatus, _SN, _, _, _, _} = BlockOutput,
     BlockSize = length(MergedKeyList),
     ?assertMatch(BlockSize, 32),
     ?assertMatch(ListStatus, complete),
@@ -1515,10 +1553,11 @@ alternating_create_block_test() ->
     K32 = lists:nth(32, MergedKeyList),
     ?assertMatch(K32, {{o, "Bucket4", "Key1", null}, {1, {active, infinity}, 0, null}}),
     HKey = {{o, "Bucket1", "Key0", null}, {1, {active, infinity}, 0, null}},
-    {_, ListStatus2, _, _, _, _} = create_block([HKey|KeyList1],
-                                                    KeyList2,
-                                                    #level{level=1}),
-    ?assertMatch(ListStatus2, full).
+    {_, LStatus2, _, _, _, _, _} = create_block([HKey|KeyList1],
+                                                KeyList2,
+                                                #level{level=1},
+                                                leveled_tinybloom:empty(4)),
+    ?assertMatch(full, LStatus2).
 
 
 merge_seglists_test() ->
@@ -1655,9 +1694,13 @@ merge_seglists_test() ->
     
 createslot_stage1_test() ->
     {KeyList1, KeyList2} = sample_keylist(),
-    Out = create_slot(KeyList1, KeyList2, #level{level=1}),
+    Out = create_slot(KeyList1,
+                        KeyList2,
+                        #level{level=1},
+                        leveled_tinybloom:empty(4)),
     {{LowKey, SegFilter, _SerialisedSlot, _LengthList},
         {{LSN, HSN}, LastKey, Status},
+        _UpdBloom,
         KL1, KL2} = Out,
     ?assertMatch(LowKey, {o, "Bucket1", "Key1", null}),
     ?assertMatch(LastKey, {o, "Bucket4", "Key1", null}),
@@ -1678,9 +1721,11 @@ createslot_stage1_test() ->
 createslot_stage2_test() ->
     Out = create_slot(lists:sort(generate_randomkeys(100)),
                         lists:sort(generate_randomkeys(100)),
-                        #level{level=1}),
+                        #level{level=1},
+                        leveled_tinybloom:empty(4)),
     {{_LowKey, _SegFilter, SerialisedSlot, LengthList},
         {{_LSN, _HSN}, _LastKey, Status},
+        _UpdBloom,
         _KL1, _KL2} = Out,
     ?assertMatch(Status, full),
     Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList),
@@ -1691,9 +1736,11 @@ createslot_stage2_test() ->
 createslot_stage3_test() ->
     Out = create_slot(lists:sort(generate_sequentialkeys(100, 1)),
                         lists:sort(generate_sequentialkeys(100, 101)),
-                        #level{level=1}),
+                        #level{level=1},
+                        leveled_tinybloom:empty(4)),
     {{LowKey, SegFilter, SerialisedSlot, LengthList},
         {{_LSN, _HSN}, LastKey, Status},
+        _UpdBloom,
         KL1, KL2} = Out,
     ?assertMatch(Status, full),
     Sum1 = lists:foldl(fun(X, Sum) -> Sum + X end, 0, LengthList),
@@ -1729,17 +1776,19 @@ createslot_stage3_test() ->
 
 testwrite_function(slots, {Handle, SerialisedSlots}) ->
     lists:append(Handle, [SerialisedSlots]);
-testwrite_function(finalise, {Handle, C_SlotIndex, SNExtremes, KeyExtremes}) ->
-    {Handle, C_SlotIndex, SNExtremes, KeyExtremes}.
+testwrite_function(finalise,
+                    {Handle, C_SlotIndex, {SNExtremes, KeyExtremes, Bloom}}) ->
+    {Handle, C_SlotIndex, SNExtremes, KeyExtremes, Bloom}.
 
 writekeys_stage1_test() ->
     {KL1, KL2} = sample_keylist(),
     {FunOut, {_KL1Rem, _KL2Rem}} = write_keys([],
                                                 KL1, KL2,
                                                 [], <<>>,
+                                                leveled_tinybloom:empty(4),
                                                 #level{level=1},
                                                 fun testwrite_function/2),
-    {Handle, {_, PointerIndex}, SNExtremes, KeyExtremes} = FunOut,
+    {Handle, {_, PointerIndex}, SNExtremes, KeyExtremes, _Bloom} = FunOut,
     ?assertMatch(SNExtremes, {1,3}),
     ?assertMatch(KeyExtremes, {{o, "Bucket1", "Key1", null},
                                 {o, "Bucket4", "Key1", null}}),
diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl
index 166d616..dd72b6e 100644
--- a/src/leveled_tinybloom.erl
+++ b/src/leveled_tinybloom.erl
@@ -33,6 +33,8 @@ empty(Width) when Width =< 256 ->
     FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end,
     lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)).
 
+enter({hash, no_lookup}, Bloom) ->
+    Bloom;
 enter({hash, Hash}, Bloom) ->
     {H0, Bit1, Bit2} = split_hash(Hash),
     Slot = H0 rem dict:size(Bloom),
@@ -45,6 +47,8 @@ enter(Key, Bloom) ->
     Hash = leveled_codec:magic_hash(Key),
     enter({hash, Hash}, Bloom).
 
+check({hash, _Hash}, undefined) ->
+    true;
 check({hash, Hash}, Bloom) ->
     {H0, Bit1, Bit2} = split_hash(Hash),
     Slot = H0 rem dict:size(Bloom),

From f848500eff6289f38a703072f07ac4c03ef21f2f Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 04:53:36 +0000
Subject: [PATCH 18/34] Tinker, tinker, tinker, tinker

---
 src/leveled_penciller.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index ee0b921..431c501 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -197,11 +197,11 @@
 -define(CURRENT_FILEX, "crr").
 -define(PENDING_FILEX, "pnd").
 -define(MEMTABLE, mem).
--define(MAX_TABLESIZE, 25000). % This is less than max - but COIN_SIDECOUNT
--define(SUPER_MAX_TABLE_SIZE, 45000).
+-define(MAX_TABLESIZE, 28000). % This is less than max - but COIN_SIDECOUNT
+-define(SUPER_MAX_TABLE_SIZE, 40000).
 -define(PROMPT_WAIT_ONL0, 5).
 -define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
--define(COIN_SIDECOUNT, 4).
+-define(COIN_SIDECOUNT, 3).
 
 -record(state, {manifest = [] :: list(),
 				manifest_sqn = 0 :: integer(),

From 8bcb49479df71300a75e5709645b600465424557 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 05:23:24 +0000
Subject: [PATCH 19/34] Re-introduce ETS Index

Add ETS Index back in to avoid having to check each skip list in turn.
Also this helps keep a lower skip list size.
---
 src/leveled_log.erl       |  2 ++
 src/leveled_penciller.erl | 34 ++++++++++++++++++++++++++++------
 src/leveled_pmem.erl      | 28 +++++++++++++++++++++++++++-
 3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/src/leveled_log.erl b/src/leveled_log.erl
index a10e641..f2306ce 100644
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@@ -104,6 +104,8 @@
         {info, "L0 completion confirmed and will transition to not pending"}},
     {"P0030",
         {warn, "We're doomed - intention recorded to destroy all files"}},
+    {"P0031",
+        {info, "Completion of update to levelzero"}},
     
     {"PC001",
         {info, "Penciller's clerk ~w started with owner ~w"}},
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index 431c501..3547342 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -219,6 +219,7 @@
                 levelzero_size = 0 :: integer(),
                 levelzero_maxcachesize :: integer(),
                 levelzero_cointoss = false :: boolean(),
+                levelzero_index, % may be none or an ETS table reference
                 
                 is_snapshot = false :: boolean(),
                 snapshot_fully_loaded = false :: boolean(),
@@ -369,14 +370,16 @@ handle_call({fetch, Key, Hash}, _From, State) ->
         fetch_mem(Key,
                     Hash,
                     State#state.manifest,
-                    State#state.levelzero_cache),
+                    State#state.levelzero_cache,
+                    State#state.levelzero_index),
         State};
 handle_call({check_sqn, Key, Hash, SQN}, _From, State) ->
     {reply,
         compare_to_sqn(fetch_mem(Key,
                                     Hash,
                                     State#state.manifest,
-                                    State#state.levelzero_cache),
+                                    State#state.levelzero_cache,
+                                    State#state.levelzero_index),
                         SQN),
         State};
 handle_call({fetch_keys, StartKey, EndKey, AccFun, InitAcc, MaxKeys},
@@ -417,6 +420,7 @@ handle_call({load_snapshot, {BookieIncrTree, MinSQN, MaxSQN}}, _From, State) ->
     {LedgerSQN, L0Size, L0Cache} = L0D,
     {reply, ok, State#state{levelzero_cache=L0Cache,
                                 levelzero_size=L0Size,
+                                levelzero_index=none,
                                 ledger_sqn=LedgerSQN,
                                 snapshot_fully_loaded=true}};
 handle_call({fetch_levelzero, Slot}, _From, State) ->
@@ -468,6 +472,7 @@ handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
                             levelzero_pending=false,
                             levelzero_constructor=undefined,
                             levelzero_size=0,
+                            levelzero_index=leveled_pmem:new_index(),
                             manifest=UpdMan,
                             persisted_sqn=State#state.ledger_sqn}}.
 
@@ -560,7 +565,8 @@ start_from_file(PCLopts) ->
     InitState = #state{clerk=MergeClerk,
                         root_path=RootPath,
                         levelzero_maxcachesize=MaxTableSize,
-                        levelzero_cointoss=CoinToss},
+                        levelzero_cointoss=CoinToss,
+                        levelzero_index=leveled_pmem:new_index()},
     
     %% Open manifest
     ManifestPath = InitState#state.root_path ++ "/" ++ ?MANIFEST_FP ++ "/",
@@ -636,10 +642,13 @@ start_from_file(PCLopts) ->
 
 update_levelzero(L0Size, {PushedTree, MinSQN, MaxSQN},
                                                 LedgerSQN, L0Cache, State) ->
+    SW = os:timestamp(),
     Update = leveled_pmem:add_to_cache(L0Size,
                                         {PushedTree, MinSQN, MaxSQN},
                                         LedgerSQN,
                                         L0Cache),
+    leveled_pmem:add_to_index(PushedTree, State#state.levelzero_index),
+    
     {UpdMaxSQN, NewL0Size, UpdL0Cache} = Update,
     if
         UpdMaxSQN >= LedgerSQN ->
@@ -661,15 +670,20 @@ update_levelzero(L0Size, {PushedTree, MinSQN, MaxSQN},
                     false ->
                         true
                 end,
-            case {CacheTooBig, Level0Free, RandomFactor or CacheMuchTooBig} of
+            JitterCheck = RandomFactor or CacheMuchTooBig,
+            case {CacheTooBig, Level0Free, JitterCheck} of
                 {true, true, true}  ->
-                    L0Constructor = roll_memory(UpdState, false),        
+                    L0Constructor = roll_memory(UpdState, false),
+                    leveled_log:log_timer("P0031", [], SW),
                     UpdState#state{levelzero_pending=true,
                                     levelzero_constructor=L0Constructor};
                 _ ->
+                    leveled_log:log_timer("P0031", [], SW),
                     UpdState
             end;
+        
         NewL0Size == L0Size ->
+            leveled_log:log_timer("P0031", [], SW),
             State#state{levelzero_cache=L0Cache,
                         levelzero_size=L0Size,
                         ledger_sqn=LedgerSQN}
@@ -718,13 +732,21 @@ levelzero_filename(State) ->
     FileName.
 
 
-fetch_mem(Key, Hash, Manifest, L0Cache) ->
+
+fetch_mem(Key, Hash, Manifest, L0Cache, none) ->
     L0Check = leveled_pmem:check_levelzero(Key, Hash, L0Cache),
     case L0Check of
         {false, not_found} ->
             fetch(Key, Hash, Manifest, 0, fun leveled_sft:sft_get/2);
         {true, KV} ->
             KV
+    end;
+fetch_mem(Key, Hash, Manifest, L0Cache, L0Index) ->
+    case leveled_pmem:check_index(Hash, L0Index) of
+        true ->
+            fetch_mem(Key, Hash, Manifest, L0Cache, none);
+        false ->
+            fetch(Key, Hash, Manifest, 0, fun leveled_sft:sft_get/2)
     end.
 
 fetch(_Key, _Hash, _Manifest, ?MAX_LEVELS + 1, _FetchFun) ->
diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl
index 5ba62aa..9f81c01 100644
--- a/src/leveled_pmem.erl
+++ b/src/leveled_pmem.erl
@@ -45,7 +45,10 @@
         add_to_cache/4,
         to_list/2,
         check_levelzero/3,
-        merge_trees/4
+        merge_trees/4,
+        add_to_index/2,
+        new_index/0,
+        check_index/2
         ]).      
 
 -include_lib("eunit/include/eunit.hrl").
@@ -69,6 +72,29 @@ add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) ->
             end
     end.
 
+add_to_index(LevelMinus1, L0Index) ->
+    IndexAddFun =
+        fun({_K, V}) ->
+            {_, _, Hash, _} = leveled_codec:striphead_to_details(V),
+            case Hash of
+                no_lookup ->
+                    ok;
+                _ ->
+                    ets:insert(L0Index, {Hash})
+            end
+            end,
+    lists:foreach(IndexAddFun, leveled_skiplist:to_list(LevelMinus1)).
+
+new_index() ->
+    ets:new(l0index, [private, set]).
+
+check_index(Hash, L0Index) ->
+    case ets:lookup(L0Index, Hash) of
+        [{Hash}] ->
+            true;
+        [] ->
+            false
+    end.
 
 to_list(Slots, FetchFun) ->
     SW = os:timestamp(),

From 32ac305c674329d5488cc459badf3e3ea5f430ea Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 06:53:25 +0000
Subject: [PATCH 20/34] Compaction test error

Compaction tests now throwing up different corruption points
---
 src/leveled_codec.erl  |  4 +++-
 src/leveled_iclerk.erl | 33 ++++++++++++++++++++-------------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl
index 72b90b0..5303180 100644
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@@ -226,7 +226,9 @@ compact_inkerkvc({{SQN, ?INKT_STND, LK}, V, CrcCheck}, Strategy) ->
             {TagStrat, {{SQN, ?INKT_KEYD, LK}, {null, KeyDeltas}, CrcCheck}}; 
         TagStrat ->
             {TagStrat, null}
-    end.
+    end;
+compact_inkerkvc(_KVC, _Strategy) ->
+    skip.
 
 split_inkvalue(VBin) ->
     case is_binary(VBin) of
diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl
index a060774..c612367 100644
--- a/src/leveled_iclerk.erl
+++ b/src/leveled_iclerk.erl
@@ -238,19 +238,26 @@ check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -
     FN = leveled_cdb:cdb_filename(CDB),
     PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize),
     KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []),
-    R0 = lists:foldl(fun(KS, {ActSize, RplSize}) ->
-                            {{SQN, _Type, PK}, Size} = KS,
-                            Check = FilterFun(FilterServer, PK, SQN),
-                            case {Check, SQN > MaxSQN} of
-                                {true, _} ->
-                                    {ActSize + Size - ?CRC_SIZE, RplSize};
-                                {false, true} ->
-                                    {ActSize + Size - ?CRC_SIZE, RplSize};
-                                _ ->
-                                    {ActSize, RplSize + Size - ?CRC_SIZE}
-                            end end,
-                        {0, 0},
-                        KeySizeList),
+    
+    FoldFunForSizeCompare =
+        fun(KS, {ActSize, RplSize}) ->
+            case KS of
+                {{SQN, _Type, PK}, Size} ->
+                    Check = FilterFun(FilterServer, PK, SQN),
+                    case {Check, SQN > MaxSQN} of
+                        {true, _} ->
+                            {ActSize + Size - ?CRC_SIZE, RplSize};
+                        {false, true} ->
+                            {ActSize + Size - ?CRC_SIZE, RplSize};
+                        _ ->
+                            {ActSize, RplSize + Size - ?CRC_SIZE}
+                    end;
+                _ ->
+                    {ActSize, RplSize}
+            end
+            end,
+            
+    R0 = lists:foldl(FoldFunForSizeCompare, {0, 0}, KeySizeList),
     {ActiveSize, ReplacedSize} = R0,
     Score = case ActiveSize + ReplacedSize of
                 0 ->

From 2758498fad5437fef6c48ec53d10a90718267fc7 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 06:54:41 +0000
Subject: [PATCH 21/34] More Jitter!

Having reduced the size of the ledger cache (again) we can now tolerate
more jitter here
---
 src/leveled_penciller.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index 3547342..58f575e 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -201,7 +201,7 @@
 -define(SUPER_MAX_TABLE_SIZE, 40000).
 -define(PROMPT_WAIT_ONL0, 5).
 -define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
--define(COIN_SIDECOUNT, 3).
+-define(COIN_SIDECOUNT, 5).
 
 -record(state, {manifest = [] :: list(),
 				manifest_sqn = 0 :: integer(),

From 6f06c6fdeb3d71c5c7dde4bc24cc838f76683631 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 07:07:30 +0000
Subject: [PATCH 22/34] ETS delete

Delete the objects rather than starting a new table each time
---
 src/leveled_penciller.erl | 2 +-
 src/leveled_pmem.erl      | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index 58f575e..ce83252 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -467,12 +467,12 @@ handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
                                 filename=FN},
     UpdMan = lists:keystore(0, 1, State#state.manifest, {0, [ManEntry]}),
     % Prompt clerk to ask about work - do this for every L0 roll
+    leveled_pmem:clear_index(State#state.levelzero_index),
     ok = leveled_pclerk:clerk_prompt(State#state.clerk),
     {noreply, State#state{levelzero_cache=[],
                             levelzero_pending=false,
                             levelzero_constructor=undefined,
                             levelzero_size=0,
-                            levelzero_index=leveled_pmem:new_index(),
                             manifest=UpdMan,
                             persisted_sqn=State#state.ledger_sqn}}.
 
diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl
index 9f81c01..8629fb3 100644
--- a/src/leveled_pmem.erl
+++ b/src/leveled_pmem.erl
@@ -88,6 +88,9 @@ add_to_index(LevelMinus1, L0Index) ->
 new_index() ->
     ets:new(l0index, [private, set]).
 
+clear_index(L0Index) ->
+    ets:delete_all_objects(L0Index).
+
 check_index(Hash, L0Index) ->
     case ets:lookup(L0Index, Hash) of
         [{Hash}] ->

From 16c704551b6be82b9c987b12d6c2f912735edc98 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 07:35:23 +0000
Subject: [PATCH 23/34] Revert to original SFT build settings

Leveled is always CPU bound during tests, and it is the merge in the
ledger that drains the CPU hardest,
---
 src/leveled_sft.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index 4f67adf..caf4558 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -179,7 +179,7 @@
 -define(DWORD_SIZE, 8).
 -define(CURRENT_VERSION, {0,1}).
 -define(SLOT_COUNT, 256).
--define(SLOT_GROUPWRITE_COUNT, 64).
+-define(SLOT_GROUPWRITE_COUNT, 32).
 -define(BLOCK_SIZE, 32).
 -define(BLOCK_COUNT, 4).
 -define(FOOTERPOS_HEADERPOS, 2).
@@ -189,7 +189,7 @@
 -define(COMPRESSION_LEVEL, 1).
 -define(HEADER_LEN, 56).
 -define(ITERATOR_SCANWIDTH, 1).
--define(MERGE_SCANWIDTH, 32).
+-define(MERGE_SCANWIDTH, 16).
 -define(BLOOM_WIDTH, 48).
 -define(DELETE_TIMEOUT, 10000).
 -define(MAX_KEYS, ?SLOT_COUNT * ?BLOCK_COUNT * ?BLOCK_SIZE).

From fb069666dc6fd6464a477225e08b0a2b8ab0f02a Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 08:16:00 +0000
Subject: [PATCH 24/34] Export module

---
 src/leveled_pmem.erl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl
index 8629fb3..0c61acf 100644
--- a/src/leveled_pmem.erl
+++ b/src/leveled_pmem.erl
@@ -48,6 +48,7 @@
         merge_trees/4,
         add_to_index/2,
         new_index/0,
+        clear_index/1,
         check_index/2
         ]).      
 

From 71cf7a3a5161db65753593342912a5799813788d Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 08:37:03 +0000
Subject: [PATCH 25/34] Setting change led to idle CPU

---
 src/leveled_sft.erl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index caf4558..da3a9fd 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -179,7 +179,7 @@
 -define(DWORD_SIZE, 8).
 -define(CURRENT_VERSION, {0,1}).
 -define(SLOT_COUNT, 256).
--define(SLOT_GROUPWRITE_COUNT, 32).
+-define(SLOT_GROUPWRITE_COUNT, 128).
 -define(BLOCK_SIZE, 32).
 -define(BLOCK_COUNT, 4).
 -define(FOOTERPOS_HEADERPOS, 2).
@@ -189,7 +189,7 @@
 -define(COMPRESSION_LEVEL, 1).
 -define(HEADER_LEN, 56).
 -define(ITERATOR_SCANWIDTH, 1).
--define(MERGE_SCANWIDTH, 16).
+-define(MERGE_SCANWIDTH, 32).
 -define(BLOOM_WIDTH, 48).
 -define(DELETE_TIMEOUT, 10000).
 -define(MAX_KEYS, ?SLOT_COUNT * ?BLOCK_COUNT * ?BLOCK_SIZE).

From 44cee5a6e8d84391c815664e7f2b25e6c459b2d6 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 12:33:09 +0000
Subject: [PATCH 26/34] Experiemnt with no compression

Does compression hurt CPU more than the benefit gaine din some cases?
---
 src/leveled_codec.erl | 2 +-
 src/leveled_sft.erl   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl
index 5303180..35afbdb 100644
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@@ -246,7 +246,7 @@ check_forinkertype(_LedgerKey, _Object) ->
 create_value_for_journal(Value) ->
     case Value of
         {Object, KeyChanges} ->
-            term_to_binary({Object, KeyChanges}, [compressed]);
+            term_to_binary({Object, KeyChanges});
         Value when is_binary(Value) ->
             Value
     end.
diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index da3a9fd..5bae7da 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -1082,7 +1082,7 @@ create_slot(KL1, KL2, LevelR, BlockCount, Bloom,
                     TrackingMetadata).
 
 serialise_block(BlockKeyList) ->
-    term_to_binary(BlockKeyList, [{compressed, ?COMPRESSION_LEVEL}]).
+    term_to_binary(BlockKeyList).
 
 
 %% Compare the keys at the head of the list, and either skip that "best" key or

From 1b638450505ffab94873f5bc6d9ab190ea83273f Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 15:02:33 +0000
Subject: [PATCH 27/34] Bring compression back to SFT

It is expensive on the CPU - but it leads to a 4 x increase in the cache
coverage.

Try and make some small micro gains in list handling in create_block
---
 src/leveled_codec.erl |  2 +-
 src/leveled_sft.erl   | 34 +++++++++++++++++++++++-----------
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl
index 35afbdb..5303180 100644
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@@ -246,7 +246,7 @@ check_forinkertype(_LedgerKey, _Object) ->
 create_value_for_journal(Value) ->
     case Value of
         {Object, KeyChanges} ->
-            term_to_binary({Object, KeyChanges});
+            term_to_binary({Object, KeyChanges}, [compressed]);
         Value when is_binary(Value) ->
             Value
     end.
diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index 5bae7da..428523a 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -179,7 +179,7 @@
 -define(DWORD_SIZE, 8).
 -define(CURRENT_VERSION, {0,1}).
 -define(SLOT_COUNT, 256).
--define(SLOT_GROUPWRITE_COUNT, 128).
+-define(SLOT_GROUPWRITE_COUNT, 64).
 -define(BLOCK_SIZE, 32).
 -define(BLOCK_COUNT, 4).
 -define(FOOTERPOS_HEADERPOS, 2).
@@ -970,16 +970,25 @@ create_block(KeyList1, KeyList2,
                                     when length(BlockKeyList)==?BLOCK_SIZE ->
     case {KeyList1, KeyList2} of
         {[], []} ->
-            {BlockKeyList, complete, {LSN, HSN}, SegmentList,
+            {lists:reverse(BlockKeyList),
+                complete,
+                {LSN, HSN},
+                lists:reverse(SegmentList),
                 Bloom,
                 [], []};
         _ ->
-            {BlockKeyList, full, {LSN, HSN}, SegmentList,
+            {lists:reverse(BlockKeyList),
+                full,
+                {LSN, HSN},
+                lists:reverse(SegmentList),
                 Bloom,
                 KeyList1, KeyList2}
     end;
 create_block([], [], BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom) ->
-    {BlockKeyList, partial, {LSN, HSN}, SegmentList,
+    {lists:reverse(BlockKeyList),
+        partial,
+        {LSN, HSN},
+        lists:reverse(SegmentList),
         Bloom,
         [], []};
 create_block(KeyList1, KeyList2,
@@ -992,10 +1001,8 @@ create_block(KeyList1, KeyList2,
             {SQN, _St, MH, _MD} = leveled_codec:striphead_to_details(V),
             {UpdLSN, UpdHSN} = update_sequencenumbers(SQN, LSN, HSN),
             UpdBloom = leveled_tinybloom:enter({hash, MH}, Bloom),
-            NewBlockKeyList = lists:append(BlockKeyList,
-                                            [TopKey]),
-            NewSegmentList = lists:append(SegmentList,
-                                            [hash_for_segmentid(TopKey)]),
+            NewBlockKeyList = [TopKey|BlockKeyList],
+            NewSegmentList = [hash_for_segmentid(TopKey)|SegmentList],
             create_block(Rem1, Rem2,
                             NewBlockKeyList, {UpdLSN, UpdHSN},
                             NewSegmentList, LevelR, UpdBloom);
@@ -1061,13 +1068,13 @@ create_slot(KL1, KL2, LevelR, BlockCount, Bloom,
             {null, LSN, HSN, LastKey, Status};
         {null, _} ->
             [NewLowKeyV|_] = BlockKeyList,
-            NewLastKey = lists:last([{keyonly, LastKey}|BlockKeyList]),
+            NewLastKey = last_key(BlockKeyList, {keyonly, LastKey}),
             {leveled_codec:strip_to_keyonly(NewLowKeyV),
                 min(LSN, LSNb), max(HSN, HSNb),
                 leveled_codec:strip_to_keyonly(NewLastKey),
                 Status};
         {_, _} ->
-            NewLastKey = lists:last([{keyonly, LastKey}|BlockKeyList]),
+            NewLastKey = last_key(BlockKeyList, {keyonly, LastKey}),
             {LowKey,
                 min(LSN, LSNb), max(HSN, HSNb),
                 leveled_codec:strip_to_keyonly(NewLastKey),
@@ -1081,8 +1088,13 @@ create_slot(KL1, KL2, LevelR, BlockCount, Bloom,
                     SegList2, SerialisedSlot2, LengthList ++ [BlockLength],
                     TrackingMetadata).
 
+last_key([], LastKey) ->
+    LastKey;
+last_key(BlockKeyList, _LastKey) ->
+    lists:last(BlockKeyList).
+
 serialise_block(BlockKeyList) ->
-    term_to_binary(BlockKeyList).
+    term_to_binary(BlockKeyList, [compressed]).
 
 
 %% Compare the keys at the head of the list, and either skip that "best" key or

From a86686d621fcaa3001a287bdaa549ee8b82d6332 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 15:17:58 +0000
Subject: [PATCH 28/34] Remove unnecessary reverse

---
 src/leveled_sft.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index 428523a..686511c 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -973,14 +973,14 @@ create_block(KeyList1, KeyList2,
             {lists:reverse(BlockKeyList),
                 complete,
                 {LSN, HSN},
-                lists:reverse(SegmentList),
+                SegmentList,
                 Bloom,
                 [], []};
         _ ->
             {lists:reverse(BlockKeyList),
                 full,
                 {LSN, HSN},
-                lists:reverse(SegmentList),
+                SegmentList,
                 Bloom,
                 KeyList1, KeyList2}
     end;
@@ -988,7 +988,7 @@ create_block([], [], BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom) ->
     {lists:reverse(BlockKeyList),
         partial,
         {LSN, HSN},
-        lists:reverse(SegmentList),
+        SegmentList,
         Bloom,
         [], []};
 create_block(KeyList1, KeyList2,

From 24a5347bec97c3b207d830296d70d30b6bc84dc5 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 15:19:34 +0000
Subject: [PATCH 29/34] Revert

---
 src/leveled_sft.erl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index 686511c..206f0bd 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -1094,7 +1094,7 @@ last_key(BlockKeyList, _LastKey) ->
     lists:last(BlockKeyList).
 
 serialise_block(BlockKeyList) ->
-    term_to_binary(BlockKeyList, [compressed]).
+    term_to_binary(BlockKeyList, [{compressed, ?COMPRESSION_LEVEL}]).
 
 
 %% Compare the keys at the head of the list, and either skip that "best" key or

From 5cfe9a71e1f5d8a4c872eb893fa2b99396a4e49a Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 15:25:14 +0000
Subject: [PATCH 30/34] Wrap test with non-default timeout

---
 src/leveled_pclerk.erl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl
index 649973b..1e46d80 100644
--- a/src/leveled_pclerk.erl
+++ b/src/leveled_pclerk.erl
@@ -394,6 +394,9 @@ find_randomkeys(FList, Count, Source) ->
 
 
 merge_file_test() ->
+    {timeout, 10, merge_file_test_towrap()}.
+
+merge_file_test_towrap() ->
     KL1_L1 = lists:sort(generate_randomkeys(16000, 0, 1000)),
     {ok, PidL1_1, _} = leveled_sft:sft_new("../test/KL1_L1.sft",
                                             KL1_L1, [], 1),

From f96d1480731dca4040e1fcf3e6cd7bf07f1bfb3c Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 20:17:05 +0000
Subject: [PATCH 31/34] Make the merge_test a more sensible size

On the verge of a timeout.  Rather than keep battling with the timeout,
make it do less work
---
 src/leveled_pclerk.erl | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl
index 1e46d80..63884bd 100644
--- a/src/leveled_pclerk.erl
+++ b/src/leveled_pclerk.erl
@@ -394,22 +394,19 @@ find_randomkeys(FList, Count, Source) ->
 
 
 merge_file_test() ->
-    {timeout, 10, merge_file_test_towrap()}.
-
-merge_file_test_towrap() ->
-    KL1_L1 = lists:sort(generate_randomkeys(16000, 0, 1000)),
+    KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)),
     {ok, PidL1_1, _} = leveled_sft:sft_new("../test/KL1_L1.sft",
                                             KL1_L1, [], 1),
-    KL1_L2 = lists:sort(generate_randomkeys(16000, 0, 250)),
+    KL1_L2 = lists:sort(generate_randomkeys(8000, 0, 250)),
     {ok, PidL2_1, _} = leveled_sft:sft_new("../test/KL1_L2.sft",
                                             KL1_L2, [], 2),
-    KL2_L2 = lists:sort(generate_randomkeys(16000, 250, 250)),
+    KL2_L2 = lists:sort(generate_randomkeys(8000, 250, 250)),
     {ok, PidL2_2, _} = leveled_sft:sft_new("../test/KL2_L2.sft",
                                             KL2_L2, [], 2),
-    KL3_L2 = lists:sort(generate_randomkeys(16000, 500, 250)),
+    KL3_L2 = lists:sort(generate_randomkeys(8000, 500, 250)),
     {ok, PidL2_3, _} = leveled_sft:sft_new("../test/KL3_L2.sft",
                                             KL3_L2, [], 2),
-    KL4_L2 = lists:sort(generate_randomkeys(16000, 750, 250)),
+    KL4_L2 = lists:sort(generate_randomkeys(8000, 750, 250)),
     {ok, PidL2_4, _} = leveled_sft:sft_new("../test/KL4_L2.sft",
                                             KL4_L2, [], 2),
     Result = perform_merge({PidL1_1, "../test/KL1_L1.sft"},

From 4b48ed14c6b429cb4c033e40269f8cac514ff8bb Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 20:38:20 +0000
Subject: [PATCH 32/34] Correct Mistyped 2 ^ 32

---
 src/leveled_codec.erl    | 3 +--
 src/leveled_skiplist.erl | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl
index 5303180..f08e2e9 100644
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@@ -74,8 +74,7 @@
 %%
 %% Hash function contains mysterious constants, some explanation here as to
 %% what they are -
-%% http://stackoverflow.com/ ++
-%% questions/10696223/reason-for-5381-number-in-djb-hash-function
+%% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function
 
 magic_hash({?RIAK_TAG, Bucket, Key, _SubKey}) ->
     magic_hash({Bucket, Key});
diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl
index 142afc5..7fcc81a 100644
--- a/src/leveled_skiplist.erl
+++ b/src/leveled_skiplist.erl
@@ -598,7 +598,7 @@ skiplist_timingtest(KL, SkipList, N, Bloom) ->
     case Bloom of
         true ->
             HashList = lists:map(fun(_X) ->
-                                        random:uniform(4296967295) end,
+                                        random:uniform(4294967295) end,
                                     lists:seq(1, 2000)),
             SWh = os:timestamp(),
             lists:foreach(fun(X) ->

From 86bdfdeaf034ca51ccbc2920e5793e55203c8b10 Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 21:01:10 +0000
Subject: [PATCH 33/34] Reverted back out the additional bloom check

This is desirable to add back in going forward, but wasn't implemented
in a safe or clear way.

The way the bloom was or was not on the LoopState was clumsy, and it got
persisted in multiple places without a CRC check.

Intention to implement back in wherby it is requested on-demand by the
Penciller, and then the SFT worker lifts it off disk and CRC checks it.
So it is never on the SFT LoopState.  Also it will be easier to control
the logic over which levels have the bloom in the Penciller.
---
 include/leveled.hrl       |  1 -
 src/leveled_pclerk.erl    |  2 --
 src/leveled_penciller.erl | 39 ++++++++++++++++-----------------------
 src/leveled_sft.erl       | 23 +++++------------------
 4 files changed, 21 insertions(+), 44 deletions(-)

diff --git a/include/leveled.hrl b/include/leveled.hrl
index f57ffd4..25216f6 100644
--- a/include/leveled.hrl
+++ b/include/leveled.hrl
@@ -41,7 +41,6 @@
                         {start_key :: tuple(),
                         end_key :: tuple(),
                         owner :: pid(),
-                        bloom,
                         filename :: string()}).
 
 -record(cdb_options,
diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl
index 63884bd..b5f8e3f 100644
--- a/src/leveled_pclerk.erl
+++ b/src/leveled_pclerk.erl
@@ -320,7 +320,6 @@ do_merge(KL1, KL2, {SrcLevel, IsB}, {Filepath, MSN}, FileCounter, OutList) ->
                                             KL1,
                                             KL2,
                                             LevelR),
-    {ok, Bloom} = leveled_sft:sft_getbloom(Pid),
     case Reply of
         {{[], []}, null, _} ->
             leveled_log:log("PC013", [FileName]),
@@ -332,7 +331,6 @@ do_merge(KL1, KL2, {SrcLevel, IsB}, {Filepath, MSN}, FileCounter, OutList) ->
                                     [#manifest_entry{start_key=SmallestKey,
                                                         end_key=HighestKey,
                                                         owner=Pid,
-                                                        bloom=Bloom,
                                                         filename=FileName}]),
             leveled_log:log_timer("PC015", [], TS1),
             do_merge(KL1Rem, KL2Rem,
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index ce83252..d5b70d1 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -175,7 +175,7 @@
         pcl_checksequencenumber/4,
         pcl_workforclerk/1,
         pcl_promptmanifestchange/2,
-        pcl_confirml0complete/5,
+        pcl_confirml0complete/4,
         pcl_confirmdelete/2,
         pcl_close/1,
         pcl_doom/1,
@@ -286,8 +286,8 @@ pcl_workforclerk(Pid) ->
 pcl_promptmanifestchange(Pid, WI) ->
     gen_server:cast(Pid, {manifest_change, WI}).
 
-pcl_confirml0complete(Pid, FN, StartKey, EndKey, Bloom) ->
-    gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey, Bloom}).
+pcl_confirml0complete(Pid, FN, StartKey, EndKey) ->
+    gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey}).
 
 pcl_confirmdelete(Pid, FileName) ->
     gen_server:cast(Pid, {confirm_delete, FileName}).
@@ -458,11 +458,10 @@ handle_cast({confirm_delete, FileName}, State=#state{is_snapshot=Snap})
         _ ->
             {noreply, State}
     end;
-handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
+handle_cast({levelzero_complete, FN, StartKey, EndKey}, State) ->
     leveled_log:log("P0029", []),
     ManEntry = #manifest_entry{start_key=StartKey,
                                 end_key=EndKey,
-                                bloom=Bloom,
                                 owner=State#state.levelzero_constructor,
                                 filename=FN},
     UpdMan = lists:keystore(0, 1, State#state.manifest, {0, [ManEntry]}),
@@ -737,7 +736,7 @@ fetch_mem(Key, Hash, Manifest, L0Cache, none) ->
     L0Check = leveled_pmem:check_levelzero(Key, Hash, L0Cache),
     case L0Check of
         {false, not_found} ->
-            fetch(Key, Hash, Manifest, 0, fun leveled_sft:sft_get/2);
+            fetch(Key, Manifest, 0, fun leveled_sft:sft_get/2);
         {true, KV} ->
             KV
     end;
@@ -746,38 +745,32 @@ fetch_mem(Key, Hash, Manifest, L0Cache, L0Index) ->
         true ->
             fetch_mem(Key, Hash, Manifest, L0Cache, none);
         false ->
-            fetch(Key, Hash, Manifest, 0, fun leveled_sft:sft_get/2)
+            fetch(Key, Manifest, 0, fun leveled_sft:sft_get/2)
     end.
 
-fetch(_Key, _Hash, _Manifest, ?MAX_LEVELS + 1, _FetchFun) ->
+fetch(_Key, _Manifest, ?MAX_LEVELS + 1, _FetchFun) ->
     not_present;
-fetch(Key, Hash, Manifest, Level, FetchFun) ->
+fetch(Key, Manifest, Level, FetchFun) ->
     LevelManifest = get_item(Level, Manifest, []),
     case lists:foldl(fun(File, Acc) ->
                         case Acc of
                             not_present when
                                     Key >= File#manifest_entry.start_key,
                                     File#manifest_entry.end_key >= Key ->
-                                {File#manifest_entry.owner,
-                                    File#manifest_entry.bloom};
+                                File#manifest_entry.owner;
                             FoundDetails ->
                                 FoundDetails
                         end end,
                         not_present,
                         LevelManifest) of
         not_present ->
-            fetch(Key, Hash, Manifest, Level + 1, FetchFun);
-        {FileToCheck, Bloom} ->
-            case leveled_tinybloom:check({hash, Hash}, Bloom) of
-                true ->
-                    case FetchFun(FileToCheck, Key) of
-                        not_present ->
-                            fetch(Key, Hash, Manifest, Level + 1, FetchFun);
-                        ObjectFound ->
-                            ObjectFound
-                    end;
-                false ->
-                    fetch(Key, Hash, Manifest, Level + 1, FetchFun)
+            fetch(Key, Manifest, Level + 1, FetchFun);
+        FileToCheck ->
+            case FetchFun(FileToCheck, Key) of
+                not_present ->
+                    fetch(Key, Manifest, Level + 1, FetchFun);
+                ObjectFound ->
+                    ObjectFound
             end
     end.
     
diff --git a/src/leveled_sft.erl b/src/leveled_sft.erl
index 206f0bd..5b4f24e 100644
--- a/src/leveled_sft.erl
+++ b/src/leveled_sft.erl
@@ -161,7 +161,6 @@
         sft_newfroml0cache/4,
         sft_open/1,
         sft_get/2,
-        sft_getbloom/1,
         sft_getkvrange/4,
         sft_close/1,
         sft_clear/1,
@@ -213,8 +212,7 @@
                 handle :: file:fd(),
                 background_complete = false :: boolean(),
                 oversized_file = false :: boolean(),
-                penciller :: pid(),
-                bloom}).
+                penciller :: pid()}).
 
 
 %%%============================================================================
@@ -271,9 +269,6 @@ sft_open(Filename) ->
 sft_setfordelete(Pid, Penciller) ->
     gen_fsm:sync_send_event(Pid, {set_for_delete, Penciller}, infinity).
 
-sft_getbloom(Pid) ->
-    gen_fsm:sync_send_event(Pid, get_bloom, infinity).
-
 sft_get(Pid, Key) ->
     gen_fsm:sync_send_event(Pid, {get_kv, Key}, infinity).
 
@@ -348,9 +343,8 @@ starting({sft_newfroml0cache, Filename, Slots, FetchFun, PCL}, _State) ->
             leveled_penciller:pcl_confirml0complete(PCL,
                                                     State#state.filename,
                                                     State#state.smallest_key,
-                                                    State#state.highest_key,
-                                                    State#state.bloom),
-            {next_state, reader, State#state{bloom=none}}
+                                                    State#state.highest_key),
+            {next_state, reader, State}
     end.
 
 
@@ -385,12 +379,6 @@ reader(background_complete, _From, State) ->
                 reader,
                 State}
     end;
-reader(get_bloom, _From, State) ->
-    Bloom = State#state.bloom,
-    if
-        Bloom /= none ->
-            {reply, {ok, Bloom}, reader, State#state{bloom=none}}
-    end;
 reader(close, _From, State) ->
     ok = file:close(State#state.handle),
     {stop, normal, ok, State}.
@@ -523,7 +511,7 @@ open_file(FileMD) ->
         Slen:32/integer>> = HeaderLengths,
     {ok, SummaryBin} = file:pread(Handle,
                                     ?HEADER_LEN + Blen + Ilen + Flen, Slen),
-    {{LowSQN, HighSQN}, {LowKey, HighKey}, Bloom} = binary_to_term(SummaryBin),
+    {{LowSQN, HighSQN}, {LowKey, HighKey}, _Bloom} = binary_to_term(SummaryBin),
     {ok, SlotIndexBin} = file:pread(Handle, ?HEADER_LEN + Blen, Ilen),
     SlotIndex = binary_to_term(SlotIndexBin),
     {Handle, FileMD#state{slot_index=SlotIndex,
@@ -536,8 +524,7 @@ open_file(FileMD) ->
                            filter_pointer=?HEADER_LEN + Blen + Ilen,
                            summ_pointer=?HEADER_LEN + Blen + Ilen + Flen,
                            summ_length=Slen,
-                           handle=Handle,
-                           bloom=Bloom}}.
+                           handle=Handle}}.
     
 %% Take a file handle with a previously created header and complete it based on
 %% the two key lists KL1 and KL2

From f28c7e02bf93224f3f4fd026358ec840dd251b8d Mon Sep 17 00:00:00 2001
From: martinsumner <martin.sumner@adaptip.co.uk>
Date: Sun, 11 Dec 2016 21:24:04 +0000
Subject: [PATCH 34/34] Remove unnecessary clause

As the intention is to change the way the tiny bloom is called, the
unnecessary clause of handling an undefined bloom can be removed.
---
 src/leveled_tinybloom.erl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl
index dd72b6e..f9212ad 100644
--- a/src/leveled_tinybloom.erl
+++ b/src/leveled_tinybloom.erl
@@ -47,8 +47,6 @@ enter(Key, Bloom) ->
     Hash = leveled_codec:magic_hash(Key),
     enter({hash, Hash}, Bloom).
 
-check({hash, _Hash}, undefined) ->
-    true;
 check({hash, Hash}, Bloom) ->
     {H0, Bit1, Bit2} = split_hash(Hash),
     Slot = H0 rem dict:size(Bloom),