Merge pull request #98 from martinsumner/mas-segid-cryptohash

Mas segid cryptohash
2017-10-25 10:02:04 +01:00 · 2017-10-25 10:02:04 +01:00 · 7763df3cef
commit 7763df3cef
parent ef6df2387d e24eaf655b
12 changed files with 231 additions and 201 deletions
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@ -947,7 +947,7 @@ fetch_head(Key, Penciller, LedgerCache) ->
        [{Key, Head}] ->
            Head;
        [] ->
-            Hash = leveled_codec:magic_hash(Key),
+            Hash = leveled_codec:segment_hash(Key),
            case leveled_penciller:pcl_fetch(Penciller, Key, Hash) of
                {Key, Head} ->
                    maybe_longrunning(SW, pcl_head),
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@ -65,6 +65,7 @@
        integer_now/0,
        riak_extract_metadata/2,
        magic_hash/1,
+        segment_hash/1,
        to_lookup/1]).         

 -define(V1_VERS, 1).
@ -79,6 +80,20 @@
                            integer()|null, % Hash of vclock - non-exportable 
                            integer()}. % Size in bytes of real object

+
+-spec segment_hash(any()) -> {integer(), integer()}.
+%% @doc
+%% Return two 16 bit integers - the segment ID and a second integer for spare
+%% entropy.  The hashed should be used in blooms or indexes such that some 
+%% speed can be gained if just the segment ID is known - but more can be 
+%% gained should the extended hash (with the second element) is known
+segment_hash(Key) when is_binary(Key) ->
+    <<SegmentID:16/integer, ExtraHash:32/integer, _Rest/binary>> = 
+        crypto:hash(md5, Key),
+    {SegmentID, ExtraHash};
+segment_hash(Key) ->
+    segment_hash(term_to_binary(Key)).
+
 -spec magic_hash(any()) -> integer().
 %% @doc 
 %% Use DJ Bernstein magic hash function. Note, this is more expensive than
@ -87,10 +102,6 @@
 %% Hash function contains mysterious constants, some explanation here as to
 %% what they are -
 %% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function
-magic_hash({?RIAK_TAG, Bucket, Key, _SubKey}) ->
-    magic_hash({Bucket, Key});
-magic_hash({?STD_TAG, Bucket, Key, _SubKey}) ->
-    magic_hash({Bucket, Key});
 magic_hash({binary, BinaryKey}) ->
    H = 5381,
    hash1(H, BinaryKey) band 16#FFFFFFFF;
@ -516,7 +527,9 @@ parse_date(LMD, UnitMins, LimitMins, Now) ->

 -spec generate_ledgerkv(
            tuple(), integer(), any(), integer(), tuple()|infinity) ->
-            {any(), any(), any(), {integer()|no_lookup, integer()}, list()}.
+            {any(), any(), any(), 
+                {{integer(), integer()}|no_lookup, integer()}, 
+                list()}.
 %% @doc
 %% Function to extract from an object the information necessary to populate
 %% the Penciller's ledger.
@ -537,7 +550,7 @@ generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) ->
                    _ ->
                        {active, TS}
                end,
-    Hash = magic_hash(PrimaryKey),
+    Hash = segment_hash(PrimaryKey),
    {MD, LastMods} = extract_metadata(Obj, Size, Tag),
    ObjHash = get_objhash(Tag, MD),
    Value = {SQN,
--- a/src/leveled_iclerk.erl
+++ b/src/leveled_iclerk.erl
@ -648,8 +648,8 @@ schedule_test_bycount(N) ->
    ?assertMatch(true, SecondsToCompaction0 < 5700),
    SecondsToCompaction1 = schedule_compaction([14], N, CurrentTS), % tomorrow!
    io:format("Seconds to compaction ~w~n", [SecondsToCompaction1]),
-    ?assertMatch(true, SecondsToCompaction1 > 81000),
-    ?assertMatch(true, SecondsToCompaction1 < 84300).
+    ?assertMatch(true, SecondsToCompaction1 >= 81180),
+    ?assertMatch(true, SecondsToCompaction1 =< 84780).


 simple_score_test() ->
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@ -144,6 +144,8 @@
                    ++ "leaving SnapshotCount=~w and MinSQN=~w"}},
    {"P0040",
        {info, "Archiving filename ~s as unused at startup"}},
+    {"P0041",
+        {info, "Penciller manifest switched from SQN ~w to ~w"}},
        
    {"PC001",
        {info, "Penciller's clerk ~w started with owner ~w"}},
--- a/src/leveled_pclerk.erl
+++ b/src/leveled_pclerk.erl
@ -254,7 +254,7 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
    K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber},
    RandKey = {K, {Count + 1,
                    {active, infinity},
-                    leveled_codec:magic_hash(K),
+                    leveled_codec:segment_hash(K),
                    null}},
    generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).

--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@ -315,21 +315,22 @@ pcl_fetchlevelzero(Pid, Slot) ->
 %% The Key needs to be hashable (i.e. have a tag which indicates that the key
 %% can be looked up) - index entries are not hashable for example.
 %%
-%% If the hash is already knonw, call pcl_fetch/3 as magic_hash is a
+%% If the hash is already knonw, call pcl_fetch/3 as segment_hash is a
 %% relatively expensive hash function
 pcl_fetch(Pid, Key) ->
-    Hash = leveled_codec:magic_hash(Key),
+    Hash = leveled_codec:segment_hash(Key),
    if
        Hash /= no_lookup ->
            gen_server:call(Pid, {fetch, Key, Hash}, infinity)
    end.

-spec pcl_fetch(pid(), tuple(), integer()) -> {tuple(), tuple()}|not_present.
+-spec pcl_fetch(pid(), tuple(), {integer(), integer()}) -> 
+                                            {tuple(), tuple()}|not_present.
 %% @doc
 %% Fetch a key, return the first (highest SQN) occurrence of that Key along
 %% with  the value.
 %%
-%% Hash should be result of leveled_codec:magic_hash(Key)
+%% Hash should be result of leveled_codec:segment_hash(Key)
 pcl_fetch(Pid, Key, Hash) ->
    gen_server:call(Pid, {fetch, Key, Hash}, infinity).

@ -367,7 +368,7 @@ pcl_fetchnextkey(Pid, StartKey, EndKey, AccFun, InitAcc) ->
 %% If the key is not present, it will be assumed that a higher sequence number
 %% tombstone once existed, and false will be returned.
 pcl_checksequencenumber(Pid, Key, SQN) ->
-    Hash = leveled_codec:magic_hash(Key),
+    Hash = leveled_codec:segment_hash(Key),
    if
        Hash /= no_lookup ->
            gen_server:call(Pid, {check_sqn, Key, Hash, SQN}, infinity)
@ -672,6 +673,8 @@ handle_call(doom, _From, State) ->

 handle_cast({manifest_change, NewManifest}, State) ->
    NewManSQN = leveled_pmanifest:get_manifest_sqn(NewManifest),
+    OldManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest),
+    leveled_log:log("P0041", [OldManSQN, NewManSQN]),
    ok = leveled_pclerk:clerk_promptdeletions(State#state.clerk, NewManSQN),
    UpdManifest = leveled_pmanifest:merge_snapshot(State#state.manifest,
                                                    NewManifest),
@ -1317,7 +1320,7 @@ generate_randomkeys(Count, SQN, Acc) ->
    RandKey = {K,
                {SQN,
                {active, infinity},
-                leveled_codec:magic_hash(K),
+                leveled_codec:segment_hash(K),
                null}},
    generate_randomkeys(Count - 1, SQN + 1, [RandKey|Acc]).
    
@ -1347,7 +1350,7 @@ maybe_pause_push(PCL, KL) ->
    T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
                            UpdSL = [{K, V}|AccSL],
                            SQN = leveled_codec:strip_to_seqonly({K, V}),
-                            H = leveled_codec:magic_hash(K),
+                            H = leveled_codec:segment_hash(K),
                            UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
                            {UpdSL, UpdIdx, min(SQN, MinSQN), max(SQN, MaxSQN)}
                            end,
@ -1366,7 +1369,7 @@ maybe_pause_push(PCL, KL) ->

 %% old test data doesn't have the magic hash
 add_missing_hash({K, {SQN, ST, MD}}) ->
-    {K, {SQN, ST, leveled_codec:magic_hash(K), MD}}.
+    {K, {SQN, ST, leveled_codec:segment_hash(K), MD}}.


 clean_dir_test() ->
--- a/src/leveled_pmanifest.erl
+++ b/src/leveled_pmanifest.erl
@ -1128,6 +1128,49 @@ snapshot_timeout_test() ->
    Man10 = release_snapshot(Man9, ?PHANTOM_PID),
    ?assertMatch(0, length(Man10#manifest.snapshots)).

+potential_issue_test() ->
+    Manifest = 
+        {manifest,{array,9,0,[],
+                 {[],
+                  [{manifest_entry,{o_rkv,"Bucket","Key10",null},
+                                   {o_rkv,"Bucket","Key12949",null},
+                                   "<0.313.0>","./16_1_0.sst"},
+                   {manifest_entry,{o_rkv,"Bucket","Key129490",null},
+                                   {o_rkv,"Bucket","Key158981",null},
+                                   "<0.315.0>","./16_1_1.sst"},
+                   {manifest_entry,{o_rkv,"Bucket","Key158982",null},
+                                   {o_rkv,"Bucket","Key188472",null},
+                                   "<0.316.0>","./16_1_2.sst"}],
+                  {idxt,1,
+                        {{[{{o_rkv,"Bucket1","Key1",null},
+                            {manifest_entry,{o_rkv,"Bucket","Key9083",null},
+                                            {o_rkv,"Bucket1","Key1",null},
+                                            "<0.320.0>","./16_1_6.sst"}}]},
+                         {1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
+                  {idxt,0,{{},{0,nil}}},
+                  {idxt,0,{{},{0,nil}}},
+                  {idxt,0,{{},{0,nil}}},
+                  {idxt,0,{{},{0,nil}}},
+                  {idxt,0,{{},{0,nil}}},
+                  {idxt,0,{{},{0,nil}}},
+                  []}},
+          19,[],0,
+          {dict,0,16,16,8,80,48,
+                {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
+                {{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}},
+          2},
+    Range1 = range_lookup(Manifest, 
+                            1, 
+                            {o_rkv, "Bucket", null, null}, 
+                            {o_rkv, "Bucket", null, null}),
+    Range2 = range_lookup(Manifest, 
+                            2, 
+                            {o_rkv, "Bucket", null, null}, 
+                            {o_rkv, "Bucket", null, null}),
+    io:format("Range in Level 1 ~w~n", [Range1]),
+    io:format("Range in Level 2 ~w~n", [Range2]),
+    ?assertMatch(3, length(Range1)),
+    ?assertMatch(1, length(Range2)).

    
 -endif.
--- a/src/leveled_pmem.erl
+++ b/src/leveled_pmem.erl
@ -50,7 +50,8 @@
 %%% API
 %%%============================================================================

-spec prepare_for_index(index_array(), integer()|no_lookup) -> index_array().
+-spec prepare_for_index(index_array(), {integer(), integer()}|no_lookup) 
+                                                            -> index_array().
 %% @doc
 %% Add the hash of a key to the index.  This is 'prepared' in the sense that
 %% this index is not use until it is loaded into the main index.
@ -95,7 +96,7 @@ new_index() ->
 clear_index(_L0Index) ->
    new_index().

-spec check_index(integer(), index_array()) -> list(integer()).
+-spec check_index({integer(), integer()}, index_array()) -> list(integer()).
 %% @doc
 %% return a list of positions in the list of cache arrays that may contain the
 %% key associated with the hash being checked
@ -158,9 +159,9 @@ to_list(Slots, FetchFun) ->
 %% checked (with the most recently received cache being checked first) until a
 %% match is found.
 check_levelzero(Key, PosList, TreeList) ->
-    check_levelzero(Key, leveled_codec:magic_hash(Key), PosList, TreeList).
+    check_levelzero(Key, leveled_codec:segment_hash(Key), PosList, TreeList).

-spec check_levelzero(tuple(), integer(), list(integer()), list())
+-spec check_levelzero(tuple(), {integer(), integer()}, list(integer()), list())
                                            -> {boolean(), tuple|not_found}.
 %% @doc
 %% Check for the presence of a given Key in the Level Zero cache, with the
@ -204,10 +205,10 @@ find_pos(<<0:1/integer, NxtSlot:7/integer, T/binary>>, Hash, PosList, _SlotID) -
    find_pos(T, Hash, PosList, NxtSlot).


-split_hash(Hash) ->
-    Slot = Hash band 255,
-    H0 = (Hash bsr 8) band 8388607,
-    {Slot, H0}.
+split_hash({SegmentID, ExtraHash}) ->
+    Slot = SegmentID band 255,
+    H0 = (SegmentID bsr 8) bor (ExtraHash bsl 8),
+    {Slot, H0 band 8388607}.

 check_slotlist(Key, _Hash, CheckList, TreeList) ->
    SlotCheckFun =
@ -358,7 +359,7 @@ with_index_test_() ->
 with_index_test2() ->
    IndexPrepareFun =
        fun({K, _V}, Acc) ->
-            H = leveled_codec:magic_hash(K),
+            H = leveled_codec:segment_hash(K),
            prepare_for_index(Acc, H)
        end,
    LoadFun =
@ -382,7 +383,7 @@ with_index_test2() ->

    CheckFun =
        fun({K, V}, {L0Idx, L0Cache}) ->
-            H = leveled_codec:magic_hash(K),
+            H = leveled_codec:segment_hash(K),
            PosList = check_index(H, L0Idx),
            ?assertMatch({true, {K, V}},
                            check_slotlist(K, H, PosList, L0Cache)),
--- a/src/leveled_sst.erl
+++ b/src/leveled_sst.erl
@ -65,13 +65,12 @@
 -include("include/leveled.hrl").

 -define(MAX_SLOTS, 256).
-define(LOOK_SLOTSIZE, 128). % This is not configurable
-define(LOOK_BLOCKSIZE, {24, 32}). 
+-define(LOOK_SLOTSIZE, 128). % Maximum of 128
+-define(LOOK_BLOCKSIZE, {24, 32}). % 4x + y = ?LOOK_SLOTSIZE
 -define(NOLOOK_SLOTSIZE, 256).
-define(NOLOOK_BLOCKSIZE, {56, 32}). 
+-define(NOLOOK_BLOCKSIZE, {56, 32}). % 4x + y = ?NOLOOK_SLOTSIZE
 -define(COMPRESSION_LEVEL, 1).
 -define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_LEVEL}]).
-% -define(LEVEL_BLOOM_BITS, [{0, 8}, {1, 10}, {2, 8}, {default, 6}]).
 -define(MERGE_SCANWIDTH, 16).
 -define(DISCARD_EXT, ".discarded").
 -define(DELETE_TIMEOUT, 10000).
@ -237,12 +236,12 @@ sst_newlevelzero(RootPath, Filename, Slots, FetchFun, Penciller, MaxSQN) ->
 -spec sst_get(pid(), tuple()) -> tuple()|not_present.
 %% @doc
 %% Return a Key, Value pair matching a Key or not_present if the Key is not in
-%% the store.  The magic_hash function is used to accelerate the seeking of
+%% the store.  The segment_hash function is used to accelerate the seeking of
 %% keys, sst_get/3 should be used directly if this has already been calculated
 sst_get(Pid, LedgerKey) ->
-    sst_get(Pid, LedgerKey, leveled_codec:magic_hash(LedgerKey)).
+    sst_get(Pid, LedgerKey, leveled_codec:segment_hash(LedgerKey)).

-spec sst_get(pid(), tuple(), integer()) -> tuple()|not_present.
+-spec sst_get(pid(), tuple(), {integer(), integer()}) -> tuple()|not_present.
 %% @doc
 %% Return a Key, Value pair matching a Key or not_present if the Key is not in
 %% the store (with the magic hash precalculated).
@ -554,7 +553,7 @@ fetch(LedgerKey, Hash, State) ->
                        State#state{blockindex_cache = BlockIndexCache}};
                <<BlockLengths:24/binary, BlockIdx/binary>> ->
                    PosList = find_pos(BlockIdx, 
-                                        double_hash(Hash, LedgerKey), 
+                                        extra_hash(Hash), 
                                        [], 
                                        0),
                    case PosList of 
@ -808,9 +807,9 @@ generate_binary_slot(Lookup, KVL) ->
        fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) ->
            
            {_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
-            case is_integer(H1) of 
+            PosH1 = extra_hash(H1),
+            case is_integer(PosH1) of 
                true ->
-                    PosH1 = double_hash(H1, K),
                    case NoHashCount of 
                        0 ->
                            {<<1:1/integer, 
@ -1003,7 +1002,7 @@ binaryslot_get(FullBin, Key, Hash) ->
            <<B1P:32/integer, _R/binary>> = BlockLengths, 
            <<PosBinIndex:B1P/binary, Blocks/binary>> = Rest,
            PosList = find_pos(PosBinIndex,
-                                double_hash(Hash, Key), 
+                                extra_hash(Hash), 
                                [], 
                                0),
            {fetch_value(PosList, BlockLengths, Blocks, Key),
@ -1186,9 +1185,10 @@ block_offsetandlength(BlockLengths, BlockID) ->
            {BlocksPos, B1L + B2L + B3L + B4L, B5L}
    end.

-double_hash(Hash, Key) ->
-    H2 = erlang:phash2(Key),
-    (Hash bxor H2) band 32767.
+extra_hash({SegHash, _ExtraHash}) when is_integer(SegHash) ->
+    SegHash band 32767;
+extra_hash(NotHash) ->
+    NotHash.

 fetch_value([], _BlockLengths, _Blocks, _Key) ->
    not_present;
@ -1538,7 +1538,7 @@ indexed_list_test() ->
    io:format(user, "~nIndexed list timing test:~n", []),
    N = 150,
    KVL0 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 4)),
-    KVL1 = lists:sublist(KVL0, 128),
+    KVL1 = lists:sublist(KVL0, ?LOOK_SLOTSIZE),

    SW0 = os:timestamp(),

@ -1548,15 +1548,15 @@ indexed_list_test() ->
                [timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),

    {TestK1, TestV1} = lists:nth(20, KVL1),
-    MH1 = leveled_codec:magic_hash(TestK1),
+    MH1 = leveled_codec:segment_hash(TestK1),
    {TestK2, TestV2} = lists:nth(40, KVL1),
-    MH2 = leveled_codec:magic_hash(TestK2),
+    MH2 = leveled_codec:segment_hash(TestK2),
    {TestK3, TestV3} = lists:nth(60, KVL1),
-    MH3 = leveled_codec:magic_hash(TestK3),
+    MH3 = leveled_codec:segment_hash(TestK3),
    {TestK4, TestV4} = lists:nth(80, KVL1),
-    MH4 = leveled_codec:magic_hash(TestK4),
+    MH4 = leveled_codec:segment_hash(TestK4),
    {TestK5, TestV5} = lists:nth(100, KVL1),
-    MH5 = leveled_codec:magic_hash(TestK5),
+    MH5 = leveled_codec:segment_hash(TestK5),

    test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
    test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
@ -1573,15 +1573,15 @@ indexed_list_mixedkeys_test() ->
    {_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),

    {TestK1, TestV1} = lists:nth(4, KVL1),
-    MH1 = leveled_codec:magic_hash(TestK1),
+    MH1 = leveled_codec:segment_hash(TestK1),
    {TestK2, TestV2} = lists:nth(8, KVL1),
-    MH2 = leveled_codec:magic_hash(TestK2),
+    MH2 = leveled_codec:segment_hash(TestK2),
    {TestK3, TestV3} = lists:nth(12, KVL1),
-    MH3 = leveled_codec:magic_hash(TestK3),
+    MH3 = leveled_codec:segment_hash(TestK3),
    {TestK4, TestV4} = lists:nth(16, KVL1),
-    MH4 = leveled_codec:magic_hash(TestK4),
+    MH4 = leveled_codec:segment_hash(TestK4),
    {TestK5, TestV5} = lists:nth(20, KVL1),
-    MH5 = leveled_codec:magic_hash(TestK5),
+    MH5 = leveled_codec:segment_hash(TestK5),

    test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
    test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
@ -1598,15 +1598,17 @@ indexed_list_mixedkeys2_test() ->
    Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
    {_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
    lists:foreach(fun({K, V}) ->
-                        MH = leveled_codec:magic_hash(K),
+                        MH = leveled_codec:segment_hash(K),
                        test_binary_slot(FullBin, K, MH, {K, V})
                        end,
                    KVL1).

 indexed_list_allindexkeys_test() ->
-    Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
+    Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 
+                            ?LOOK_SLOTSIZE),
    {PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
-    ?assertMatch(<<_BL:24/binary, 127:8/integer>>, PosBinIndex1),
+    EmptySlotSize = ?LOOK_SLOTSIZE - 1,
+    ?assertMatch(<<_BL:24/binary, EmptySlotSize:8/integer>>, PosBinIndex1),
    % SW = os:timestamp(),
    BinToList = binaryslot_tolist(FullBin),
    % io:format(user,
@ -1629,9 +1631,11 @@ indexed_list_allindexkeys_nolookup_test() ->
    ?assertMatch(Keys, binaryslot_trimmedlist(FullBin, all, all)).

 indexed_list_allindexkeys_trimmed_test() ->
-    Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
+    Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 
+                            ?LOOK_SLOTSIZE),
    {PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
-    ?assertMatch(<<_BL:24/binary, 127:8/integer>>, PosBinIndex1),
+    EmptySlotSize = ?LOOK_SLOTSIZE - 1,
+    ?assertMatch(<<_BL:24/binary, EmptySlotSize:8/integer>>, PosBinIndex1),
    ?assertMatch(Keys, binaryslot_trimmedlist(FullBin, 
                                                {i, 
                                                    "Bucket", 
@ -1656,9 +1660,9 @@ indexed_list_allindexkeys_trimmed_test() ->
    ?assertMatch(11, length(O2)),
    ?assertMatch(R2, O2),

-    {SK3, _} = lists:nth(127, Keys),
-    {EK3, _} = lists:nth(128, Keys),
-    R3 = lists:sublist(Keys, 127, 2),
+    {SK3, _} = lists:nth(?LOOK_SLOTSIZE - 1, Keys),
+    {EK3, _} = lists:nth(?LOOK_SLOTSIZE, Keys),
+    R3 = lists:sublist(Keys, ?LOOK_SLOTSIZE - 1, 2),
    O3 = binaryslot_trimmedlist(FullBin, SK3, EK3),
    ?assertMatch(2, length(O3)),
    ?assertMatch(R3, O3).
@ -1682,7 +1686,7 @@ indexed_list_mixedkeys_bitflip_test() ->
        end,
    
    {TestK1, _TestV1} = lists:nth(20, KVL1),
-    MH1 = leveled_codec:magic_hash(TestK1),
+    MH1 = leveled_codec:segment_hash(TestK1),

    test_binary_slot(FullBin0, TestK1, MH1, not_present),
    ToList = binaryslot_tolist(FullBin0),
@ -1920,7 +1924,7 @@ simple_persisted_test() ->
            In = lists:keymember(K, 1, KVList1),
            case {K > FirstKey, LastKey > K, In} of
                {true, true, false} ->
-                    [{K, leveled_codec:magic_hash(K), V}|Acc];
+                    [{K, leveled_codec:segment_hash(K), V}|Acc];
                _ ->
                    Acc
            end
--- a/src/leveled_tinybloom.erl
+++ b/src/leveled_tinybloom.erl
@ -16,8 +16,8 @@
            check_hash/2
            ]).

-define(BITS_PER_KEY, 8). % Must be 8 or 4
-define(INTEGER_SIZE, ?BITS_PER_KEY * 8). 
+-define(BLOOM_SIZE_BYTES, 16). 
+-define(INTEGER_SIZE, 128). 
 -define(BAND_MASK, ?INTEGER_SIZE - 1). 


@ -34,9 +34,8 @@ create_bloom(HashList) ->
            <<>>;
        L when L > 32 ->
            add_hashlist(HashList,
-                            15,
-                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                            0, 0, 0, 0, 0, 0);
+                            7,
+                            0, 0, 0, 0, 0, 0, 0, 0);
        L when L > 16 ->
            add_hashlist(HashList, 3, 0, 0, 0, 0);
        _ ->
@ -48,11 +47,11 @@ create_bloom(HashList) ->
 %% Check for the presence of a given hash within a bloom
 check_hash(_Hash, <<>>) ->
    false;
-check_hash(Hash, BloomBin) ->
-    SlotSplit = (byte_size(BloomBin) div ?BITS_PER_KEY) - 1,
-    {Slot, H0, H1} = split_hash(Hash, SlotSplit),
-    Mask = get_mask(H0, H1),
-    Pos = Slot * ?BITS_PER_KEY,
+check_hash({_SegHash, Hash}, BloomBin) ->
+    SlotSplit = (byte_size(BloomBin) div ?BLOOM_SIZE_BYTES) - 1,
+    {Slot, Hashes} = split_hash(Hash, SlotSplit),
+    Mask = get_mask(Hashes),
+    Pos = Slot * ?BLOOM_SIZE_BYTES,
    IntSize = ?INTEGER_SIZE,
    <<_H:Pos/binary, CheckInt:IntSize/integer, _T/binary>> = BloomBin,
    case CheckInt band Mask of
@ -69,19 +68,13 @@ check_hash(Hash, BloomBin) ->
 split_hash(Hash, SlotSplit) ->
    Slot = Hash band SlotSplit,
    H0 = (Hash bsr 4) band (?BAND_MASK),
-    H1 = (Hash bsr 10) band (?BAND_MASK),
-    H3 = (Hash bsr 16) band (?BAND_MASK),
-    H4 = (Hash bsr 22) band (?BAND_MASK),
-    Slot0 = (Hash bsr 28) band SlotSplit,
-    {Slot bxor Slot0, H0 bxor H3, H1 bxor H4}.
+    H1 = (Hash bsr 11) band (?BAND_MASK),
+    H2 = (Hash bsr 18) band (?BAND_MASK),
+    H3 = (Hash bsr 25) band (?BAND_MASK),
+    {Slot, [H0, H1, H2, H3]}.

-get_mask(H0, H1) ->
-    case H0 == H1 of
-        true ->
-            1 bsl H0;
-        false ->
-            (1 bsl H0) + (1 bsl H1)
-    end.
+get_mask([H0, H1, H2, H3]) ->
+    (1 bsl H0) bor (1 bsl H1) bor (1 bsl H2) bor (1 bsl H3).


 %% This looks ugly and clunky, but in tests it was quicker than modifying an
@ -90,9 +83,9 @@ get_mask(H0, H1) ->
 add_hashlist([], _S, S0, S1) ->
    IntSize = ?INTEGER_SIZE,
    <<S0:IntSize/integer, S1:IntSize/integer>>;
-add_hashlist([TopHash|T], SlotSplit, S0, S1) ->
-    {Slot, H0, H1} = split_hash(TopHash, SlotSplit),
-    Mask = get_mask(H0, H1),
+add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1) ->
+    {Slot, Hashes} = split_hash(TopHash, SlotSplit),
+    Mask = get_mask(Hashes),
    case Slot of
        0 ->
            add_hashlist(T, SlotSplit, S0 bor Mask, S1);
@ -104,9 +97,9 @@ add_hashlist([], _S, S0, S1, S2, S3) ->
     IntSize = ?INTEGER_SIZE,
     <<S0:IntSize/integer, S1:IntSize/integer,
        S2:IntSize/integer, S3:IntSize/integer>>;
-add_hashlist([TopHash|T], SlotSplit, S0, S1, S2, S3) ->
-    {Slot, H0, H1} = split_hash(TopHash, SlotSplit),
-    Mask = get_mask(H0, H1),
+add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1, S2, S3) ->
+    {Slot, Hashes} = split_hash(TopHash, SlotSplit),
+    Mask = get_mask(Hashes),
    case Slot of
        0 ->
            add_hashlist(T, SlotSplit, S0 bor Mask, S1, S2, S3);
@ -118,104 +111,50 @@ add_hashlist([TopHash|T], SlotSplit, S0, S1, S2, S3) ->
            add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask)
    end.

-add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                                                    SA, SB, SC, SD, SE, SF) ->
+add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7) ->
    IntSize = ?INTEGER_SIZE,
    <<S0:IntSize/integer, S1:IntSize/integer,
        S2:IntSize/integer, S3:IntSize/integer,
        S4:IntSize/integer, S5:IntSize/integer,
-        S6:IntSize/integer, S7:IntSize/integer,
-        S8:IntSize/integer, S9:IntSize/integer,
-        SA:IntSize/integer, SB:IntSize/integer,
-        SC:IntSize/integer, SD:IntSize/integer,
-        SE:IntSize/integer, SF:IntSize/integer>>;
-add_hashlist([TopHash|T],
+        S6:IntSize/integer, S7:IntSize/integer>>;
+add_hashlist([{_SegHash, TopHash}|T],
                SlotSplit,
-                S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                SA, SB, SC, SD, SE, SF) ->
-    {Slot, H0, H1} = split_hash(TopHash, SlotSplit),
-    Mask = get_mask(H0, H1),
+                S0, S1, S2, S3, S4, S5, S6, S7) ->
+    {Slot, Hashes} = split_hash(TopHash, SlotSplit),
+    Mask = get_mask(Hashes),
    case Slot of
        0 ->
            add_hashlist(T,
                            SlotSplit,
-                            S0 bor Mask, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                            SA, SB, SC, SD, SE, SF);
+                            S0 bor Mask, S1, S2, S3, S4, S5, S6, S7);
        1 ->
            add_hashlist(T,
                            SlotSplit,
-                            S0, S1 bor Mask, S2, S3, S4, S5, S6, S7, S8, S9,
-                            SA, SB, SC, SD, SE, SF);
+                            S0, S1 bor Mask, S2, S3, S4, S5, S6, S7);
        2 ->
            add_hashlist(T,
                            SlotSplit,
-                            S0, S1, S2 bor Mask, S3, S4, S5, S6, S7, S8, S9,
-                            SA, SB, SC, SD, SE, SF);
+                            S0, S1, S2 bor Mask, S3, S4, S5, S6, S7);
        3 ->
            add_hashlist(T,
                            SlotSplit,
-                            S0, S1, S2, S3 bor Mask, S4, S5, S6, S7, S8, S9,
-                            SA, SB, SC, SD, SE, SF);
+                            S0, S1, S2, S3 bor Mask, S4, S5, S6, S7);
        4 ->
            add_hashlist(T,
                            SlotSplit,
-                            S0, S1, S2, S3, S4 bor Mask, S5, S6, S7, S8, S9,
-                            SA, SB, SC, SD, SE, SF);
+                            S0, S1, S2, S3, S4 bor Mask, S5, S6, S7);
        5 ->
            add_hashlist(T,
                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5 bor Mask, S6, S7, S8, S9,
-                            SA, SB, SC, SD, SE, SF);
+                            S0, S1, S2, S3, S4, S5 bor Mask, S6, S7);
        6 ->
            add_hashlist(T,
                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6 bor Mask, S7, S8, S9,
-                            SA, SB, SC, SD, SE, SF);
+                            S0, S1, S2, S3, S4, S5, S6 bor Mask, S7);
        7 ->
            add_hashlist(T,
                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7 bor Mask, S8, S9,
-                            SA, SB, SC, SD, SE, SF);
-        8 ->
-            add_hashlist(T,
-                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7, S8 bor Mask, S9,
-                            SA, SB, SC, SD, SE, SF);
-        9 ->
-            add_hashlist(T,
-                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7, S8, S9 bor Mask,
-                            SA, SB, SC, SD, SE, SF);
-        10 ->
-            add_hashlist(T,
-                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                            SA bor Mask, SB, SC, SD, SE, SF);
-        11 ->
-            add_hashlist(T,
-                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                            SA, SB bor Mask, SC, SD, SE, SF);
-        12 ->
-            add_hashlist(T,
-                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                            SA, SB, SC bor Mask, SD, SE, SF);
-        13 ->
-            add_hashlist(T,
-                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                            SA, SB, SC, SD bor Mask, SE, SF);
-        14 ->
-            add_hashlist(T,
-                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                            SA, SB, SC, SD, SE bor Mask, SF);
-        15 ->
-            add_hashlist(T,
-                            SlotSplit,
-                            S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
-                            SA, SB, SC, SD, SE, SF bor Mask)
+                            S0, S1, S2, S3, S4, S5, S6, S7 bor Mask)
    end.


@ -239,7 +178,7 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
    BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0),
    KNumber = string:right(integer_to_list(leveled_rand:uniform(10000)), 6, $0),
    LK = leveled_codec:to_ledgerkey("Bucket" ++ BNumber, "Key" ++ KNumber, o),
-    Chunk = leveled_rand:rand_bytes(64),
+    Chunk = leveled_rand:rand_bytes(16),
    {_B, _K, MV, _H, _LMs} =
        leveled_codec:generate_ledgerkv(LK, Seqn, Chunk, 64, infinity),
    generate_randomkeys(Seqn + 1,
@ -254,7 +193,7 @@ get_hashlist(N) ->
    KVL = lists:sublist(KVL0, N),
    HashFun =
        fun({K, _V}) ->
-            leveled_codec:magic_hash(K)
+            leveled_codec:segment_hash(K)
        end,
    lists:map(HashFun, KVL).

@ -283,46 +222,50 @@ empty_bloom_test() ->
    ?assertMatch({0, 4},
                    check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})).

-bloom_test() ->
-    test_bloom(128),
-    test_bloom(64),
-    test_bloom(32),
-    test_bloom(16),
-    test_bloom(8).
+bloom_test_() ->
+    {timeout, 20, fun bloom_test_ranges/0}.

-test_bloom(N) ->
-    HashList1 = get_hashlist(N),
-    HashList2 = get_hashlist(N),
-    HashList3 = get_hashlist(N),
-    HashList4 = get_hashlist(N),
+bloom_test_ranges() ->
+    test_bloom(128, 2000),
+    test_bloom(64, 100),
+    test_bloom(32, 100),
+    test_bloom(16, 100),
+    test_bloom(8, 100).
+
+test_bloom(N, Runs) ->
+    ListOfHashLists = 
+        lists:map(fun(_X) -> get_hashlist(N) end, lists:seq(1, Runs)),
    
    SWa = os:timestamp(),
-    BloomBin1 = create_bloom(HashList1),
-    BloomBin2 = create_bloom(HashList2),
-    BloomBin3 = create_bloom(HashList3),
-    BloomBin4 = create_bloom(HashList4),
+    ListOfBlooms =
+        lists:map(fun(HL) -> create_bloom(HL) end, ListOfHashLists),
    TSa = timer:now_diff(os:timestamp(), SWa),
    
    SWb = os:timestamp(),
-    check_all_hashes(BloomBin1, HashList1),
-    check_all_hashes(BloomBin2, HashList2),
-    check_all_hashes(BloomBin3, HashList3),
-    check_all_hashes(BloomBin4, HashList4),
+    lists:foreach(fun(Nth) ->
+                        HL = lists:nth(Nth, ListOfHashLists),
+                        BB = lists:nth(Nth, ListOfBlooms),
+                        check_all_hashes(BB, HL)
+                     end,
+                     lists:seq(1, Runs)),
    TSb = timer:now_diff(os:timestamp(), SWb),
     
    HashPool = get_hashlist(N * 2),
-    HashListOut1 = lists:sublist(lists:subtract(HashPool, HashList1), N),
-    HashListOut2 = lists:sublist(lists:subtract(HashPool, HashList2), N),
-    HashListOut3 = lists:sublist(lists:subtract(HashPool, HashList3), N),
-    HashListOut4 = lists:sublist(lists:subtract(HashPool, HashList4), N),
-    
+    ListOfMisses = 
+        lists:map(fun(HL) ->
+                        lists:sublist(lists:subtract(HashPool, HL), N)
+                    end,
+                    ListOfHashLists),
+
    SWc = os:timestamp(),
-    C0 = {0, 0},
-    C1 = check_neg_hashes(BloomBin1, HashListOut1, C0),
-    C2 = check_neg_hashes(BloomBin2, HashListOut2, C1),
-    C3 = check_neg_hashes(BloomBin3, HashListOut3, C2),
-    C4 = check_neg_hashes(BloomBin4, HashListOut4, C3),
-    {Pos, Neg} = C4,
+    {Pos, Neg} = 
+        lists:foldl(fun(Nth, Acc) ->
+                            HL = lists:nth(Nth, ListOfMisses),
+                            BB = lists:nth(Nth, ListOfBlooms),
+                            check_neg_hashes(BB, HL, Acc)
+                        end,
+                        {0, 0},
+                        lists:seq(1, Runs)),
    FPR = Pos / (Pos + Neg),
    TSc = timer:now_diff(os:timestamp(), SWc),
    
@ -332,5 +275,4 @@ test_bloom(N) ->
                [N, TSa, TSb, TSc, FPR]).


-
 -endif.
--- a/src/leveled_tree.erl
+++ b/src/leveled_tree.erl
@ -214,7 +214,7 @@ search_range(StartRange, EndRange, Tree, StartKeyFun) ->
    EndRangeFun =
        fun(ER, _FirstRHSKey, FirstRHSValue) ->
            StartRHSKey = StartKeyFun(FirstRHSValue),
-            ER >= StartRHSKey 
+            not leveled_codec:endkey_passed(ER, StartRHSKey) 
        end,
    case Tree of
        {tree, _L, T} ->
@ -405,8 +405,12 @@ idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) ->
                [{FirstRHSKey, FirstRHSValue}|_Rest] ->
                    case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of
                        true ->
+                            % The start key is not after the end of the range
+                            % and so this should be included in the range
                            Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}];
                        false ->
+                            % the start key of the next key is after the end
+                            % of the range and so should not be included
                            Output ++ LHS
                    end
            end;
@ -804,4 +808,22 @@ empty_test() ->
    T2 = empty(idxt),
    ?assertMatch(0, tsize(T2)).

+search_range_idx_test() ->
+    Tree = 
+        {idxt,1,
+            {{[{{o_rkv,"Bucket1","Key1",null},
+                {manifest_entry,{o_rkv,"Bucket","Key9083",null},
+                                {o_rkv,"Bucket1","Key1",null},
+                                "<0.320.0>","./16_1_6.sst"}}]},
+                {1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
+    StartKeyFun =
+        fun(ME) ->
+            ME#manifest_entry.start_key
+        end,
+    R = search_range({o_rkv, "Bucket", null, null}, 
+                        {o_rkv, "Bucket", null, null}, 
+                        Tree, 
+                        StartKeyFun),
+    ?assertMatch(1, length(R)).
+
 -endif.
--- a/test/end_to_end/basic_SUITE.erl
+++ b/test/end_to_end/basic_SUITE.erl
@ -333,8 +333,8 @@ load_and_count(_Config) ->
                                                Bookie1,
                                                TestObject,
                                                G1),
-                        {_S, Count} = testutil:check_bucket_stats(Bookie1,
-                                                                    "Bucket"),
+                        {_S, Count} = 
+                            testutil:check_bucket_stats(Bookie1, "Bucket"),
                        if
                            Acc + 5000 == Count ->
                                ok
@ -351,8 +351,8 @@ load_and_count(_Config) ->
                                                Bookie1,
                                                TestObject,
                                                G2),
-                        {_S, Count} = testutil:check_bucket_stats(Bookie1,
-                                                                    "Bucket"),
+                        {_S, Count} = 
+                            testutil:check_bucket_stats(Bookie1, "Bucket"),
                        if
                            Acc + 5000 == Count ->
                                ok
@ -368,8 +368,8 @@ load_and_count(_Config) ->
                                                Bookie1,
                                                TestObject,
                                                G1),
-                        {_S, Count} = testutil:check_bucket_stats(Bookie1,
-                                                                    "Bucket"),
+                        {_S, Count} = 
+                            testutil:check_bucket_stats(Bookie1, "Bucket"),
                        if
                            Count == 200000 ->
                                ok
@ -385,8 +385,8 @@ load_and_count(_Config) ->
                                                Bookie1,
                                                TestObject,
                                                G2),
-                        {_S, Count} = testutil:check_bucket_stats(Bookie1,
-                                                                    "Bucket"),
+                        {_S, Count} = 
+                            testutil:check_bucket_stats(Bookie1, "Bucket"),
                        if
                            Acc + 5000 == Count ->
                                ok