Merge pull request #93 from martinsumner/mas-tictac-hashfun

Mas tictac hashfun
2017-09-28 11:05:14 +01:00 · 2017-09-28 11:05:14 +01:00 · 15a0a6f0f1
commit 15a0a6f0f1
parent 69ed945e58 0f5911ab70
8 changed files with 289 additions and 133 deletions
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@ -934,14 +934,10 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) ->
        fun() ->
            % The start key and end key will vary depending on whether the
            % fold is to fold over an index or a key range
-            {StartKey, EndKey, HashFun} =
+            {StartKey, EndKey, ExtractFun} =
                case Tag of
                    ?IDX_TAG ->
                        {IdxField, StartIdx, EndIdx} = Query,
-                        HashIdxValFun =
-                            fun(_Key, IdxValue) ->
-                                erlang:phash2(IdxValue)
-                            end,
                        {leveled_codec:to_ledgerkey(Bucket,
                                                    null,
                                                    ?IDX_TAG,
@ -952,23 +948,21 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) ->
                                                        ?IDX_TAG,
                                                        IdxField,
                                                        EndIdx),
-                            HashIdxValFun};
+                            fun(K, T) -> {K, T} end};
                    _ ->
                        {StartObjKey, EndObjKey} = Query,
-                        PassHashFun = fun(_Key, Hash) -> Hash end,
                        {leveled_codec:to_ledgerkey(Bucket,
                                                    StartObjKey,
                                                    Tag),
                            leveled_codec:to_ledgerkey(Bucket,
                                                        EndObjKey,
                                                        Tag),
-                            PassHashFun}
+                            fun(K, H) -> {K, {is_hash, H}} end}
                end,
-
            AccFun = accumulate_tree(Filter,
                                        JournalCheck,
                                        JournalSnapshot,
-                                        HashFun),
+                                        ExtractFun),
            Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot,
                                                    StartKey,
                                                    EndKey,
@ -1263,7 +1257,7 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) ->
        fun(B, K, H, Tree) ->
            case FilterFun(B, K) of
                accumulate ->
-                    leveled_tictac:add_kv(Tree, K, H, HashFun);
+                    leveled_tictac:add_kv(Tree, K, H, HashFun, false);
                pass ->
                    Tree
            end
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@ -74,6 +74,10 @@
 -define(ALL_BUCKETS, <<"$all">>).

 -type recent_aae() :: #recent_aae{}.
+-type riak_metadata() :: {binary()|delete, % Sibling Metadata
+                            binary()|null, % Vclock Metadata
+                            integer()|null, % Hash of vclock - non-exportable 
+                            integer()}. % Size in bytes of real object

 -spec magic_hash(any()) -> integer().
 %% @doc 
@ -464,7 +468,8 @@ aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) ->
                        {LMD1, TTL} ->
                            TreeSize = AAE#recent_aae.tree_size,
                            SegID =
-                                leveled_tictac:get_segment(Key, TreeSize),
+                                leveled_tictac:get_segment(erlang:phash2(Key), 
+                                                            TreeSize),
                            IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin",
                            IdxTrmStr =
                                string:right(integer_to_list(SegID), 8, $0) ++
@ -601,6 +606,19 @@ build_metadata_object(PrimaryKey, MD) ->
    end.


+-spec riak_extract_metadata(binary()|delete, non_neg_integer()) -> 
+                            {riak_metadata(), list()}.
+%% @doc
+%% Riak extract metadata should extract a metadata object which is a 
+%% five-tuple of:
+%% - Binary of sibling Metadata
+%% - Binary of vector clock metadata
+%% - Non-exportable hash of the vector clock metadata
+%% - The largest last modified date of the object
+%% - Size of the object
+%%
+%% The metadata object should be returned with the full list of last 
+%% modified dates (which will be used for recent anti-entropy index creation)
 riak_extract_metadata(delete, Size) ->
    {{delete, null, null, Size}, []};
 riak_extract_metadata(ObjBin, Size) ->
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@ -120,7 +120,8 @@
    {"P0030",
        {warn, "We're doomed - intention recorded to destroy all files"}},
    {"P0031",
-        {info, "Completion of update to levelzero"}},
+        {info, "Completion of update to levelzero"
+                    ++ " with cache size status ~w ~w"}},
    {"P0032",
        {info, "Head timing for result ~w is sample ~w total ~w and max ~w"}},
    {"P0033",
@ -141,6 +142,8 @@
    {"P0039",
        {info, "Failed to release pid=~w "
                    ++ "leaving SnapshotCount=~w and MinSQN=~w"}},
+    {"P0040",
+        {info, "Archiving filename ~s as unused at startup"}},
        
    {"PC001",
        {info, "Penciller's clerk ~w started with owner ~w"}},
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@ -206,6 +206,8 @@
 -define(FILES_FP, "ledger_files").
 -define(CURRENT_FILEX, "crr").
 -define(PENDING_FILEX, "pnd").
+-define(SST_FILEX, ".sst").
+-define(ARCHIVE_FILEX, ".bak").
 -define(MEMTABLE, mem).
 -define(MAX_TABLESIZE, 28000). % This is less than max - but COIN_SIDECOUNT
 -define(SUPER_MAX_TABLE_SIZE, 40000).
@ -819,7 +821,8 @@ sst_rootpath(RootPath) ->
    FP.

 sst_filename(ManSQN, Level, Count) ->
-    lists:flatten(io_lib:format("./~w_~w_~w.sst", [ManSQN, Level, Count])).
+    lists:flatten(io_lib:format("./~w_~w_~w" ++ ?SST_FILEX, 
+                                    [ManSQN, Level, Count])).
    

 %%%============================================================================
@ -859,14 +862,14 @@ start_from_file(PCLopts) ->
            Pid
        end,
    SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1,
-    {MaxSQN, Manifest1} = leveled_pmanifest:load_manifest(Manifest0,
-                                                            OpenFun,
-                                                            SQNFun),
+    {MaxSQN, Manifest1, FileList} = 
+        leveled_pmanifest:load_manifest(Manifest0, OpenFun, SQNFun),
    leveled_log:log("P0014", [MaxSQN]),
    ManSQN = leveled_pmanifest:get_manifest_sqn(Manifest1),
    leveled_log:log("P0035", [ManSQN]),
    %% Find any L0 files
    L0FN = sst_filename(ManSQN + 1, 0, 0),
+    {State0, FileList0} = 
        case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of
            true ->
                leveled_log:log("P0015", [L0FN]),
@ -883,17 +886,49 @@ start_from_file(PCLopts) ->
                                                                    L0Entry),
                leveled_log:log("P0016", [L0SQN]),
                LedgerSQN = max(MaxSQN, L0SQN),
-            {ok,
-                InitState#state{manifest = Manifest2,
+                {InitState#state{manifest = Manifest2,
                                    ledger_sqn = LedgerSQN,
-                                    persisted_sqn = LedgerSQN}};
+                                    persisted_sqn = LedgerSQN},
+                    [L0FN|FileList]};
            false ->
                leveled_log:log("P0017", []),
-            {ok,
-                InitState#state{manifest = Manifest1,
+                {InitState#state{manifest = Manifest1,
                                    ledger_sqn = MaxSQN,
-                                    persisted_sqn = MaxSQN}}
-    end.
+                                    persisted_sqn = MaxSQN},
+                    FileList}
+        end,
+    ok = archive_files(RootPath, FileList0),
+    {ok, State0}.
+
+archive_files(RootPath, UsedFileList) ->
+    {ok, AllFiles} = file:list_dir(sst_rootpath(RootPath)),
+    FileCheckFun =
+        fun(FN, UnusedFiles) ->
+            FN0 = "./" ++ FN,
+            case filename:extension(FN0) of 
+                ?SST_FILEX ->
+                    case lists:member(FN0, UsedFileList) of 
+                        true ->
+                            UnusedFiles;
+                        false ->
+                            leveled_log:log("P0040", [FN0]),
+                            [FN0|UnusedFiles]
+                    end;
+                _ ->
+                    UnusedFiles
+            end
+        end,
+    RenameFun =
+        fun(FN) ->
+            AltName = filename:join(sst_rootpath(RootPath), 
+                                        filename:basename(FN, ?SST_FILEX))
+                        ++ ?ARCHIVE_FILEX,
+            file:rename(filename:join(sst_rootpath(RootPath), FN), 
+                            AltName)
+        end,
+    FilesToArchive = lists:foldl(FileCheckFun, [], AllFiles),
+    lists:foreach(RenameFun, FilesToArchive),
+    ok.


 update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
@ -934,11 +969,13 @@ update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
            case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of
                {true, true, true, true}  ->
                    L0Constructor = roll_memory(UpdState, false),
-                    leveled_log:log_timer("P0031", [], SW),
+                    leveled_log:log_timer("P0031", [true, true], SW),
                    UpdState#state{levelzero_pending=true,
                                    levelzero_constructor=L0Constructor};
                _ ->
-                    leveled_log:log_timer("P0031", [], SW),
+                    leveled_log:log_timer("P0031", 
+                                            [CacheTooBig, JitterCheck], 
+                                            SW),
                    UpdState
            end
    end.
@ -1340,6 +1377,23 @@ clean_dir_test() ->
    ok = clean_subdir(RootPath ++ "/test.bob"),
    ok = file:delete(RootPath ++ "/test.bob").

+
+archive_files_test() ->
+    RootPath = "../test/ledger",
+    SSTPath = sst_rootpath(RootPath),
+    ok = filelib:ensure_dir(SSTPath),
+    ok = file:write_file(SSTPath ++ "/test1.sst", "hello_world"),
+    ok = file:write_file(SSTPath ++ "/test2.sst", "hello_world"),
+    ok = file:write_file(SSTPath ++ "/test3.bob", "hello_world"),
+    UsedFiles = ["./test1.sst"],
+    ok = archive_files(RootPath, UsedFiles),
+    {ok, AllFiles} = file:list_dir(SSTPath),
+    ?assertMatch(true, lists:member("test1.sst", AllFiles)),
+    ?assertMatch(false, lists:member("test2.sst", AllFiles)),
+    ?assertMatch(true, lists:member("test3.bob", AllFiles)),
+    ?assertMatch(true, lists:member("test2.bak", AllFiles)),
+    ok = clean_subdir(SSTPath).
+
 simple_server_test() ->
    RootPath = "../test/ledger",
    clean_testdir(RootPath),
--- a/src/leveled_pmanifest.erl
+++ b/src/leveled_pmanifest.erl
@ -136,7 +136,8 @@ copy_manifest(Manifest) ->
    % about is switched to undefined
    Manifest#manifest{snapshots = undefined, pending_deletes = undefined}.

-spec load_manifest(manifest(), fun(), fun()) -> {integer(), manifest()}.
+-spec load_manifest(manifest(), fun(), fun()) -> 
+                                        {integer(), manifest(), list()}.
 %% @doc
 %% Roll over the manifest starting a process to manage each file in the
 %% manifest.  The PidFun should be able to return the Pid of a file process
@ -144,13 +145,15 @@ copy_manifest(Manifest) ->
 %% of that file, if passed the Pid that owns it.
 load_manifest(Manifest, PidFun, SQNFun) ->
    UpdateLevelFun =
-        fun(LevelIdx, {AccMaxSQN, AccMan}) ->
+        fun(LevelIdx, {AccMaxSQN, AccMan, AccFL}) ->
            L0 = array:get(LevelIdx, AccMan#manifest.levels),
-            {L1, SQN1} = load_level(LevelIdx, L0, PidFun, SQNFun),
+            {L1, SQN1, FileList} = load_level(LevelIdx, L0, PidFun, SQNFun),
            UpdLevels = array:set(LevelIdx, L1, AccMan#manifest.levels),
-            {max(AccMaxSQN, SQN1), AccMan#manifest{levels = UpdLevels}}
+            {max(AccMaxSQN, SQN1), 
+                AccMan#manifest{levels = UpdLevels},
+                AccFL ++ FileList}
        end,
-    lists:foldl(UpdateLevelFun, {0, Manifest},
+    lists:foldl(UpdateLevelFun, {0, Manifest, []},
                    lists:seq(0, Manifest#manifest.basement)).

 -spec close_manifest(manifest(), fun()) -> ok.
@ -488,27 +491,33 @@ levelzero_present(Manifest) ->

 load_level(LevelIdx, Level, PidFun, SQNFun) ->
    HigherLevelLoadFun =
-        fun(ME, {L_Out, L_MaxSQN}) ->
+        fun(ME, {L_Out, L_MaxSQN, FileList}) ->
            FN = ME#manifest_entry.filename,
            P = PidFun(FN),
            SQN = SQNFun(P),
-            {[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)}
+            {[ME#manifest_entry{owner=P}|L_Out], 
+                max(SQN, L_MaxSQN),
+                [FN|FileList]}
        end,
    LowerLevelLoadFun =
-        fun({EK, ME}, {L_Out, L_MaxSQN}) ->
+        fun({EK, ME}, {L_Out, L_MaxSQN, FileList}) ->
            FN = ME#manifest_entry.filename,
            P = PidFun(FN),
            SQN = SQNFun(P),
-            {[{EK, ME#manifest_entry{owner=P}}|L_Out], max(SQN, L_MaxSQN)}
+            {[{EK, ME#manifest_entry{owner=P}}|L_Out], 
+                max(SQN, L_MaxSQN),
+                [FN|FileList]}
        end,
    case LevelIdx =< 1 of
        true ->
-            lists:foldr(HigherLevelLoadFun, {[], 0}, Level);
+            lists:foldr(HigherLevelLoadFun, {[], 0, []}, Level);
        false ->
-            {L0, MaxSQN} = lists:foldr(LowerLevelLoadFun,
-                                        {[], 0},
+            {L0, MaxSQN, Flist} = lists:foldr(LowerLevelLoadFun,
+                                        {[], 0, []},
                                        leveled_tree:to_list(Level)),
-            {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), MaxSQN}
+            {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), 
+                MaxSQN, 
+                Flist}
    end.

 close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 ->
--- a/src/leveled_tictac.erl
+++ b/src/leveled_tictac.erl
@ -57,14 +57,14 @@
 -export([
            new_tree/1,
            new_tree/2,
-            add_kv/4,
+            add_kv/5,
            find_dirtyleaves/2,
            find_dirtysegments/2,
            fetch_root/1,
            fetch_leaves/2,
            merge_trees/2,
            get_segment/2,
-            tictac_hash/2,
+            tictac_hash/3,
            export_tree/1,
            import_tree/1
        ]).
@ -161,13 +161,24 @@ import_tree(ExportedTree) ->
                    level2 = Lv2}.

 -spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree().
+add_kv(TicTacTree, Key, Value, BinExtractFun) ->
+    add_kv(TicTacTree, Key, Value, BinExtractFun, false).
+
+-spec add_kv(tictactree(), tuple(), tuple(), fun(), boolean()) -> tictactree().
 %% @doc
-%% Add a Key and value to a tictactree using the HashFun to calculate the Hash
-%% based on that key and value
-add_kv(TicTacTree, Key, Value, HashFun) ->
-    HashV = HashFun(Key, Value),
-    SegChangeHash = tictac_hash(Key, HashV),
-    Segment = get_segment(Key, TicTacTree#tictactree.segment_count),
+%% Add a Key and value to a tictactree using the BinExtractFun to extract a 
+%% binary from the Key and value from which to generate the hash.  The 
+%% BinExtractFun will also need to do any canonicalisation necessary to make
+%% the hash consistent (such as whitespace removal, or sorting)
+%%
+%% For exportable trees the hash function will be based on the CJ Bernstein
+%% magic hash.  For non-exportable trees erlang:phash2 will be used, and so 
+%% non-binary Keys and Values can be returned from the BinExtractFun in this
+%% case.
+add_kv(TicTacTree, Key, Value, BinExtractFun, Exportable) ->
+    {BinK, BinV} = BinExtractFun(Key, Value),
+    {SegHash, SegChangeHash} = tictac_hash(BinK, BinV, Exportable),
+    Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count),
    
    Level2Pos =
        Segment band (TicTacTree#tictactree.width - 1),
@ -275,21 +286,33 @@ merge_trees(TreeA, TreeB) ->
    
    MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}.

-spec get_segment(any(), integer()|small|medium|large|xlarge) -> integer().
+-spec get_segment(integer(), integer()|small|medium|large|xlarge) -> integer().
 %% @doc
 %% Return the segment ID for a Key.  Can pass the tree size or the actual
 %% segment count derived from the size
-get_segment(Key, SegmentCount) when is_integer(SegmentCount) ->
-    erlang:phash2(Key) band (SegmentCount - 1);
-get_segment(Key, TreeSize) ->
-    get_segment(Key, element(3, get_size(TreeSize))).
+get_segment(Hash, SegmentCount) when is_integer(SegmentCount) ->
+    Hash band (SegmentCount - 1);
+get_segment(Hash, TreeSize) ->
+    get_segment(Hash, element(3, get_size(TreeSize))).


-spec tictac_hash(tuple(), any()) -> integer().
+-spec tictac_hash(any(), any(), boolean()) -> {integer(), integer()}.
 %% @doc
-%% Hash the key and term
-tictac_hash(Key, Term) ->
-    erlang:phash2({Key, Term}).
+%% Hash the key and term, to either something repetable in Erlang, or using 
+%% the DJ Bernstein hash if it is the tree needs to be compared with one 
+%% calculated with a non-Erlang store
+%%
+%% Boolean is Exportable.  does the hash need to be repetable by a non-Erlang
+%% machine  
+tictac_hash(BinKey, BinVal, true) 
+                            when is_binary(BinKey) and is_binary(BinVal) ->
+    HashKey = leveled_codec:magic_hash({binary, BinKey}),
+    HashVal = leveled_codec:magic_hash({binary, BinVal}),
+    {HashKey, HashKey bxor HashVal};
+tictac_hash(BinKey, {is_hash, HashedVal}, false) ->
+    {erlang:phash2(BinKey), erlang:phash2(BinKey) bxor HashedVal};
+tictac_hash(BinKey, BinVal, false) ->
+    {erlang:phash2(BinKey), erlang:phash2(BinKey) bxor erlang:phash2(BinVal)}.

 %%%============================================================================
 %%% Internal functions
@ -363,13 +386,17 @@ simple_bysize_test() ->
    simple_test_withsize(xlarge).

 simple_test_withsize(Size) ->
-    HashFun = fun(_K, V) -> erlang:phash2(V) end,
+    BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end,
+    
+    K1 = {o, "B1", "K1", null},
+    K2 = {o, "B1", "K2", null},
+    K3 = {o, "B1", "K3", null},

    Tree0 = new_tree(0, Size),
-    Tree1 = add_kv(Tree0, {o, "B1", "K1", null}, {caine, 1}, HashFun),
-    Tree2 = add_kv(Tree1, {o, "B1", "K2", null}, {caine, 2}, HashFun),
-    Tree3 = add_kv(Tree2, {o, "B1", "K3", null}, {caine, 3}, HashFun),
-    Tree3A = add_kv(Tree3, {o, "B1", "K3", null}, {caine, 4}, HashFun),
+    Tree1 = add_kv(Tree0, K1, {caine, 1}, BinFun),
+    Tree2 = add_kv(Tree1, K2, {caine, 2}, BinFun),
+    Tree3 = add_kv(Tree2, K3, {caine, 3}, BinFun),
+    Tree3A = add_kv(Tree3, K3, {caine, 4}, BinFun),
    ?assertMatch(true, Tree0#tictactree.level1 == Tree0#tictactree.level1),
    ?assertMatch(false, Tree0#tictactree.level1 == Tree1#tictactree.level1),
    ?assertMatch(false, Tree1#tictactree.level1 == Tree2#tictactree.level1),
@ -377,10 +404,10 @@ simple_test_withsize(Size) ->
    ?assertMatch(false, Tree3#tictactree.level1 == Tree3A#tictactree.level1),
    
    Tree0X = new_tree(0, Size),
-    Tree1X = add_kv(Tree0X, {o, "B1", "K3", null}, {caine, 3}, HashFun),
-    Tree2X = add_kv(Tree1X, {o, "B1", "K1", null}, {caine, 1}, HashFun),
-    Tree3X = add_kv(Tree2X, {o, "B1", "K2", null}, {caine, 2}, HashFun),
-    Tree3XA = add_kv(Tree3X, {o, "B1", "K3", null}, {caine, 4}, HashFun),
+    Tree1X = add_kv(Tree0X, K3, {caine, 3}, BinFun),
+    Tree2X = add_kv(Tree1X, K1, {caine, 1}, BinFun),
+    Tree3X = add_kv(Tree2X, K2, {caine, 2}, BinFun),
+    Tree3XA = add_kv(Tree3X, K3, {caine, 4}, BinFun),
    ?assertMatch(false, Tree1#tictactree.level1 == Tree1X#tictactree.level1),
    ?assertMatch(false, Tree2#tictactree.level1 == Tree2X#tictactree.level1),
    ?assertMatch(true, Tree3#tictactree.level1 == Tree3X#tictactree.level1),
@ -388,12 +415,17 @@ simple_test_withsize(Size) ->
    
    SC = Tree0#tictactree.segment_count,

+    GetSegFun = 
+        fun(TK) ->
+            get_segment(erlang:phash2(term_to_binary(TK)), SC)
+        end,
+    
    DL0 = find_dirtyleaves(Tree1, Tree0),
-    ?assertMatch(true, lists:member(get_segment({o, "B1", "K1", null}, SC), DL0)),
+    ?assertMatch(true, lists:member(GetSegFun(K1), DL0)),
    DL1 = find_dirtyleaves(Tree3, Tree1),
-    ?assertMatch(true, lists:member(get_segment({o, "B1", "K2", null}, SC), DL1)),
-    ?assertMatch(true, lists:member(get_segment({o, "B1", "K3", null}, SC), DL1)),
-    ?assertMatch(false, lists:member(get_segment({o, "B1", "K1", null}, SC), DL1)),
+    ?assertMatch(true, lists:member(GetSegFun(K2), DL1)),
+    ?assertMatch(true, lists:member(GetSegFun(K3), DL1)),
+    ?assertMatch(false, lists:member(GetSegFun(K1), DL1)),
    
    % Export and import tree to confirm no difference
    ExpTree3 = export_tree(Tree3),
@ -416,24 +448,24 @@ merge_bysize_xlarge_test2() ->
    merge_test_withsize(xlarge).

 merge_test_withsize(Size) ->
-    HashFun = fun(_K, V) -> erlang:phash2(V) end,
+    BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end,
    
    TreeX0 = new_tree(0, Size),
-    TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, HashFun),
-    TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, HashFun),
-    TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, HashFun),
-    TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, HashFun),
+    TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, BinFun),
+    TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, BinFun),
+    TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, BinFun),
+    TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, BinFun),
    
    TreeY0 = new_tree(0, Size),
-    TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, HashFun),
-    TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, HashFun),
-    TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, HashFun),
-    TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, HashFun),
+    TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, BinFun),
+    TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, BinFun),
+    TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, BinFun),
+    TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, BinFun),
    
-    TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, HashFun),
-    TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, HashFun),
-    TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, HashFun),
-    TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, HashFun),
+    TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, BinFun),
+    TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, BinFun),
+    TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, BinFun),
+    TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, BinFun),
    
    TreeM0 = merge_trees(TreeX4, TreeY4),
    checktree(TreeM0),
@ -443,6 +475,10 @@ merge_test_withsize(Size) ->
    checktree(TreeM1),
    ?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1).

+exportable_test() ->
+    {Int1, Int2} = tictac_hash(<<"key">>, <<"value">>, true),
+    ?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}).
+
 -endif.

    
--- a/test/end_to_end/basic_SUITE.erl
+++ b/test/end_to_end/basic_SUITE.erl
@ -580,22 +580,59 @@ space_clear_ondelete(_Config) ->
    io:format("This should cause a final ledger merge event~n"),
    io:format("Will require the penciller to resolve the issue of creating" ++
                " an empty file as all keys compact on merge~n"),
-    timer:sleep(12000),
+    
+    CheckFun = 
+        fun(X, FileCount) ->
+            case FileCount of
+                0 ->
+                    0;
+                _ ->
+                    timer:sleep(X),
+                    {ok, NewFC} = 
+                        file:list_dir(RootPath ++ "/ledger/ledger_files"),
+                    io:format("Looping with ledger file count ~w~n", 
+                                [length(NewFC)]),
+                    length(strip_nonsst(NewFC))
+            end
+        end,
+    
+    FC = lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]),
    ok = leveled_bookie:book_close(Book3),
+    case FC of 
+        0 ->
+            ok;
+        _ ->
+            {ok, Book4} = leveled_bookie:book_start(StartOpts1),
+            lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]),
+            leveled_bookie:book_close(Book4)
+    end,
+
    {ok, FNsD_L} = file:list_dir(RootPath ++ "/ledger/ledger_files"),
    io:format("FNsD - Bookie has ~w ledger files " ++
-                    "after second close~n", [length(FNsD_L)]),
+                    "after second close~n", [length(strip_nonsst(FNsD_L))]),
    lists:foreach(fun(FN) -> 
                        io:format("FNsD - Ledger file is ~s~n", [FN]) 
                    end, 
                    FNsD_L),
    true = PointB_Journals < length(FNsA_J),
-    true = length(FNsD_L) < length(FNsA_L),
-    true = length(FNsD_L) < length(FNsB_L),
-    true = length(FNsD_L) < length(FNsC_L),
-    true = length(FNsD_L) == 0.
+    true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsA_L)),
+    true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsB_L)),
+    true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsC_L)),
+    true = length(strip_nonsst(FNsD_L)) == 0.


+strip_nonsst(FileList) ->
+    SSTOnlyFun = 
+        fun(FN, Acc) ->
+            case filename:extension(FN) of 
+                ".sst" -> 
+                    [FN|Acc];
+                _ ->
+                    Acc 
+            end
+        end,
+    lists:foldl(SSTOnlyFun, [], FileList).
+

 is_empty_test(_Config) ->
    RootPath = testutil:reset_filestructure(),
--- a/test/end_to_end/tictac_SUITE.erl
+++ b/test/end_to_end/tictac_SUITE.erl
@ -114,19 +114,16 @@ many_put_compare(_Config) ->
    % Now run the same query by putting the tree-building responsibility onto
    % the fold_objects_fun

-    ApplyHash =
-      fun(HashFun) ->
-          fun(_Key, Value) ->
+    ExtractClockFun =
+        fun(Key, Value) ->
            {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value),
            <<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
-                    Rest/binary>> = HeadBin,
-              <<VclockBin:VclockLen/binary, _NotNeeded/binary>> = Rest,
-              HashFun(lists:sort(binary_to_term(VclockBin)))
-          end
+                VclockBin:VclockLen/binary, _Rest/binary>> = HeadBin,
+            {Key, lists:sort(binary_to_term(VclockBin))}
        end,
    FoldObjectsFun =
      fun(_Bucket, Key, Value, Acc) ->
-          leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(fun erlang:phash2/1))
+          leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun, false)
      end,

    FoldQ0 = {foldheads_bybucket,
@ -157,22 +154,25 @@ many_put_compare(_Config) ->
                [timer:now_diff(os:timestamp(), SWB1Obj)]),
    true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj1)) == 0,

-    % AAE trees within riak are based on a sha of the vector clock.  So to
-    % compare with an AAE tree we need to compare outputs when we're hashing
-    % a hash
-    AltHashFun =
-        fun(Term) ->
-            erlang:phash2(crypto:hash(sha, term_to_binary(Term)))
+    % For an exportable comparison, want hash to be based on something not 
+    % coupled to erlang language - so use exportable query
+    AltExtractFun =
+        fun(K, V) ->
+            {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(V),
+            <<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
+                VclockBin:VclockLen/binary, _Rest/binary>> = HeadBin,
+            {term_to_binary(K), VclockBin}
        end,
    AltFoldObjectsFun =
        fun(_Bucket, Key, Value, Acc) ->
-            leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(AltHashFun))
+            leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun, true)
        end,
    AltFoldQ0 = {foldheads_bybucket,
                    o_rkv,
                    "Bucket",
                    {AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
-              false, true},
+                    false, 
+                    true},
    {async, TreeAAltObjFolder0} =
        leveled_bookie:book_returnfolder(Bookie2, AltFoldQ0),
    SWB2Obj = os:timestamp(),
@ -187,15 +187,19 @@ many_put_compare(_Config) ->
    io:format("Build tictac tree via object fold with no "++
                    "presence check and 200K objects  and alt hash in ~w~n",
                [timer:now_diff(os:timestamp(), SWB3Obj)]),
-    true =
-        length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)) == 1,
+    DL_ExportFold = 
+        length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)),
+    io:format("Found dirty leaves with exportable comparison of ~w~n",
+                [DL_ExportFold]),
+    true = DL_ExportFold == 1,


    %% Finding differing keys
    FoldKeysFun =
        fun(SegListToFind) ->
            fun(_B, K, Acc) ->
-                Seg = leveled_tictac:get_segment(K, SegmentCount),
+                Seg = 
+                    leveled_tictac:get_segment(erlang:phash2(K), SegmentCount),
                case lists:member(Seg, SegListToFind) of
                    true ->
                        [K|Acc];
@ -469,7 +473,8 @@ index_compare(_Config) ->

    FoldKeysIndexQFun =
        fun(_Bucket, {Term, Key}, Acc) ->
-            Seg = leveled_tictac:get_segment(Key, SegmentCount),
+            Seg = 
+                leveled_tictac:get_segment(erlang:phash2(Key), SegmentCount),
            case lists:member(Seg, DL3_0) of
                true ->
                    [{Term, Key}|Acc];