diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index d7b73d5..eb422e1 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -934,14 +934,10 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) -> fun() -> % The start key and end key will vary depending on whether the % fold is to fold over an index or a key range - {StartKey, EndKey, HashFun} = + {StartKey, EndKey, ExtractFun} = case Tag of ?IDX_TAG -> {IdxField, StartIdx, EndIdx} = Query, - HashIdxValFun = - fun(_Key, IdxValue) -> - erlang:phash2(IdxValue) - end, {leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, @@ -952,23 +948,21 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) -> ?IDX_TAG, IdxField, EndIdx), - HashIdxValFun}; + fun(K, T) -> {K, T} end}; _ -> {StartObjKey, EndObjKey} = Query, - PassHashFun = fun(_Key, Hash) -> Hash end, {leveled_codec:to_ledgerkey(Bucket, StartObjKey, Tag), leveled_codec:to_ledgerkey(Bucket, EndObjKey, Tag), - PassHashFun} + fun(K, H) -> {K, {is_hash, H}} end} end, - AccFun = accumulate_tree(Filter, JournalCheck, JournalSnapshot, - HashFun), + ExtractFun), Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot, StartKey, EndKey, @@ -1263,7 +1257,7 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) -> fun(B, K, H, Tree) -> case FilterFun(B, K) of accumulate -> - leveled_tictac:add_kv(Tree, K, H, HashFun); + leveled_tictac:add_kv(Tree, K, H, HashFun, false); pass -> Tree end diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 1f6d6cc..dc981e8 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -74,6 +74,10 @@ -define(ALL_BUCKETS, <<"$all">>). -type recent_aae() :: #recent_aae{}. +-type riak_metadata() :: {binary()|delete, % Sibling Metadata + binary()|null, % Vclock Metadata + integer()|null, % Hash of vclock - non-exportable + integer()}. % Size in bytes of real object -spec magic_hash(any()) -> integer(). %% @doc @@ -466,7 +470,8 @@ aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) -> {LMD1, TTL} -> TreeSize = AAE#recent_aae.tree_size, SegID = - leveled_tictac:get_segment(Key, TreeSize), + leveled_tictac:get_segment(erlang:phash2(Key), + TreeSize), IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin", IdxTrmStr = string:right(integer_to_list(SegID), 8, $0) ++ @@ -591,7 +596,7 @@ get_objhash(Tag, ObjMetaData) -> Hash end. - + build_metadata_object(PrimaryKey, MD) -> {Tag, _Bucket, _Key, null} = PrimaryKey, case Tag of @@ -603,6 +608,19 @@ build_metadata_object(PrimaryKey, MD) -> end. +-spec riak_extract_metadata(binary()|delete, non_neg_integer()) -> + {riak_metadata(), list()}. +%% @doc +%% Riak extract metadata should extract a metadata object which is a +%% five-tuple of: +%% - Binary of sibling Metadata +%% - Binary of vector clock metadata +%% - Non-exportable hash of the vector clock metadata +%% - The largest last modified date of the object +%% - Size of the object +%% +%% The metadata object should be returned with the full list of last +%% modified dates (which will be used for recent anti-entropy index creation) riak_extract_metadata(delete, Size) -> {{delete, null, null, Size}, []}; riak_extract_metadata(ObjBin, Size) -> diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 5432874..59892dd 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -120,7 +120,8 @@ {"P0030", {warn, "We're doomed - intention recorded to destroy all files"}}, {"P0031", - {info, "Completion of update to levelzero"}}, + {info, "Completion of update to levelzero" + ++ " with cache size status ~w ~w"}}, {"P0032", {info, "Head timing for result ~w is sample ~w total ~w and max ~w"}}, {"P0033", @@ -141,6 +142,8 @@ {"P0039", {info, "Failed to release pid=~w " ++ "leaving SnapshotCount=~w and MinSQN=~w"}}, + {"P0040", + {info, "Archiving filename ~s as unused at startup"}}, {"PC001", {info, "Penciller's clerk ~w started with owner ~w"}}, diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 8330235..a45184e 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -206,6 +206,8 @@ -define(FILES_FP, "ledger_files"). -define(CURRENT_FILEX, "crr"). -define(PENDING_FILEX, "pnd"). +-define(SST_FILEX, ".sst"). +-define(ARCHIVE_FILEX, ".bak"). -define(MEMTABLE, mem). -define(MAX_TABLESIZE, 28000). % This is less than max - but COIN_SIDECOUNT -define(SUPER_MAX_TABLE_SIZE, 40000). @@ -819,7 +821,8 @@ sst_rootpath(RootPath) -> FP. sst_filename(ManSQN, Level, Count) -> - lists:flatten(io_lib:format("./~w_~w_~w.sst", [ManSQN, Level, Count])). + lists:flatten(io_lib:format("./~w_~w_~w" ++ ?SST_FILEX, + [ManSQN, Level, Count])). %%%============================================================================ @@ -859,41 +862,73 @@ start_from_file(PCLopts) -> Pid end, SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1, - {MaxSQN, Manifest1} = leveled_pmanifest:load_manifest(Manifest0, - OpenFun, - SQNFun), + {MaxSQN, Manifest1, FileList} = + leveled_pmanifest:load_manifest(Manifest0, OpenFun, SQNFun), leveled_log:log("P0014", [MaxSQN]), ManSQN = leveled_pmanifest:get_manifest_sqn(Manifest1), leveled_log:log("P0035", [ManSQN]), %% Find any L0 files L0FN = sst_filename(ManSQN + 1, 0, 0), - case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of - true -> - leveled_log:log("P0015", [L0FN]), - L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN), - {ok, L0Pid, {L0StartKey, L0EndKey}} = L0Open, - L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid), - L0Entry = #manifest_entry{start_key = L0StartKey, - end_key = L0EndKey, - filename = L0FN, - owner = L0Pid}, - Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1, - ManSQN + 1, - 0, - L0Entry), - leveled_log:log("P0016", [L0SQN]), - LedgerSQN = max(MaxSQN, L0SQN), - {ok, - InitState#state{manifest = Manifest2, + {State0, FileList0} = + case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of + true -> + leveled_log:log("P0015", [L0FN]), + L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN), + {ok, L0Pid, {L0StartKey, L0EndKey}} = L0Open, + L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid), + L0Entry = #manifest_entry{start_key = L0StartKey, + end_key = L0EndKey, + filename = L0FN, + owner = L0Pid}, + Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1, + ManSQN + 1, + 0, + L0Entry), + leveled_log:log("P0016", [L0SQN]), + LedgerSQN = max(MaxSQN, L0SQN), + {InitState#state{manifest = Manifest2, ledger_sqn = LedgerSQN, - persisted_sqn = LedgerSQN}}; - false -> - leveled_log:log("P0017", []), - {ok, - InitState#state{manifest = Manifest1, + persisted_sqn = LedgerSQN}, + [L0FN|FileList]}; + false -> + leveled_log:log("P0017", []), + {InitState#state{manifest = Manifest1, ledger_sqn = MaxSQN, - persisted_sqn = MaxSQN}} - end. + persisted_sqn = MaxSQN}, + FileList} + end, + ok = archive_files(RootPath, FileList0), + {ok, State0}. + +archive_files(RootPath, UsedFileList) -> + {ok, AllFiles} = file:list_dir(sst_rootpath(RootPath)), + FileCheckFun = + fun(FN, UnusedFiles) -> + FN0 = "./" ++ FN, + case filename:extension(FN0) of + ?SST_FILEX -> + case lists:member(FN0, UsedFileList) of + true -> + UnusedFiles; + false -> + leveled_log:log("P0040", [FN0]), + [FN0|UnusedFiles] + end; + _ -> + UnusedFiles + end + end, + RenameFun = + fun(FN) -> + AltName = filename:join(sst_rootpath(RootPath), + filename:basename(FN, ?SST_FILEX)) + ++ ?ARCHIVE_FILEX, + file:rename(filename:join(sst_rootpath(RootPath), FN), + AltName) + end, + FilesToArchive = lists:foldl(FileCheckFun, [], AllFiles), + lists:foreach(RenameFun, FilesToArchive), + ok. update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN}, @@ -934,11 +969,13 @@ update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN}, case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of {true, true, true, true} -> L0Constructor = roll_memory(UpdState, false), - leveled_log:log_timer("P0031", [], SW), + leveled_log:log_timer("P0031", [true, true], SW), UpdState#state{levelzero_pending=true, levelzero_constructor=L0Constructor}; _ -> - leveled_log:log_timer("P0031", [], SW), + leveled_log:log_timer("P0031", + [CacheTooBig, JitterCheck], + SW), UpdState end end. @@ -1340,6 +1377,23 @@ clean_dir_test() -> ok = clean_subdir(RootPath ++ "/test.bob"), ok = file:delete(RootPath ++ "/test.bob"). + +archive_files_test() -> + RootPath = "../test/ledger", + SSTPath = sst_rootpath(RootPath), + ok = filelib:ensure_dir(SSTPath), + ok = file:write_file(SSTPath ++ "/test1.sst", "hello_world"), + ok = file:write_file(SSTPath ++ "/test2.sst", "hello_world"), + ok = file:write_file(SSTPath ++ "/test3.bob", "hello_world"), + UsedFiles = ["./test1.sst"], + ok = archive_files(RootPath, UsedFiles), + {ok, AllFiles} = file:list_dir(SSTPath), + ?assertMatch(true, lists:member("test1.sst", AllFiles)), + ?assertMatch(false, lists:member("test2.sst", AllFiles)), + ?assertMatch(true, lists:member("test3.bob", AllFiles)), + ?assertMatch(true, lists:member("test2.bak", AllFiles)), + ok = clean_subdir(SSTPath). + simple_server_test() -> RootPath = "../test/ledger", clean_testdir(RootPath), diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index f6e919e..3970c7e 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -136,7 +136,8 @@ copy_manifest(Manifest) -> % about is switched to undefined Manifest#manifest{snapshots = undefined, pending_deletes = undefined}. --spec load_manifest(manifest(), fun(), fun()) -> {integer(), manifest()}. +-spec load_manifest(manifest(), fun(), fun()) -> + {integer(), manifest(), list()}. %% @doc %% Roll over the manifest starting a process to manage each file in the %% manifest. The PidFun should be able to return the Pid of a file process @@ -144,13 +145,15 @@ copy_manifest(Manifest) -> %% of that file, if passed the Pid that owns it. load_manifest(Manifest, PidFun, SQNFun) -> UpdateLevelFun = - fun(LevelIdx, {AccMaxSQN, AccMan}) -> + fun(LevelIdx, {AccMaxSQN, AccMan, AccFL}) -> L0 = array:get(LevelIdx, AccMan#manifest.levels), - {L1, SQN1} = load_level(LevelIdx, L0, PidFun, SQNFun), + {L1, SQN1, FileList} = load_level(LevelIdx, L0, PidFun, SQNFun), UpdLevels = array:set(LevelIdx, L1, AccMan#manifest.levels), - {max(AccMaxSQN, SQN1), AccMan#manifest{levels = UpdLevels}} + {max(AccMaxSQN, SQN1), + AccMan#manifest{levels = UpdLevels}, + AccFL ++ FileList} end, - lists:foldl(UpdateLevelFun, {0, Manifest}, + lists:foldl(UpdateLevelFun, {0, Manifest, []}, lists:seq(0, Manifest#manifest.basement)). -spec close_manifest(manifest(), fun()) -> ok. @@ -488,27 +491,33 @@ levelzero_present(Manifest) -> load_level(LevelIdx, Level, PidFun, SQNFun) -> HigherLevelLoadFun = - fun(ME, {L_Out, L_MaxSQN}) -> + fun(ME, {L_Out, L_MaxSQN, FileList}) -> FN = ME#manifest_entry.filename, P = PidFun(FN), SQN = SQNFun(P), - {[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)} + {[ME#manifest_entry{owner=P}|L_Out], + max(SQN, L_MaxSQN), + [FN|FileList]} end, LowerLevelLoadFun = - fun({EK, ME}, {L_Out, L_MaxSQN}) -> + fun({EK, ME}, {L_Out, L_MaxSQN, FileList}) -> FN = ME#manifest_entry.filename, P = PidFun(FN), SQN = SQNFun(P), - {[{EK, ME#manifest_entry{owner=P}}|L_Out], max(SQN, L_MaxSQN)} + {[{EK, ME#manifest_entry{owner=P}}|L_Out], + max(SQN, L_MaxSQN), + [FN|FileList]} end, case LevelIdx =< 1 of true -> - lists:foldr(HigherLevelLoadFun, {[], 0}, Level); + lists:foldr(HigherLevelLoadFun, {[], 0, []}, Level); false -> - {L0, MaxSQN} = lists:foldr(LowerLevelLoadFun, - {[], 0}, + {L0, MaxSQN, Flist} = lists:foldr(LowerLevelLoadFun, + {[], 0, []}, leveled_tree:to_list(Level)), - {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), MaxSQN} + {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), + MaxSQN, + Flist} end. close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 -> diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index d61c414..6b2aeb1 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -57,14 +57,14 @@ -export([ new_tree/1, new_tree/2, - add_kv/4, + add_kv/5, find_dirtyleaves/2, find_dirtysegments/2, fetch_root/1, fetch_leaves/2, merge_trees/2, get_segment/2, - tictac_hash/2, + tictac_hash/3, export_tree/1, import_tree/1 ]). @@ -162,13 +162,24 @@ import_tree(ExportedTree) -> level2 = Lv2}. -spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree(). +add_kv(TicTacTree, Key, Value, BinExtractFun) -> + add_kv(TicTacTree, Key, Value, BinExtractFun, false). + +-spec add_kv(tictactree(), tuple(), tuple(), fun(), boolean()) -> tictactree(). %% @doc -%% Add a Key and value to a tictactree using the HashFun to calculate the Hash -%% based on that key and value -add_kv(TicTacTree, Key, Value, HashFun) -> - HashV = HashFun(Key, Value), - SegChangeHash = tictac_hash(Key, HashV), - Segment = get_segment(Key, TicTacTree#tictactree.segment_count), +%% Add a Key and value to a tictactree using the BinExtractFun to extract a +%% binary from the Key and value from which to generate the hash. The +%% BinExtractFun will also need to do any canonicalisation necessary to make +%% the hash consistent (such as whitespace removal, or sorting) +%% +%% For exportable trees the hash function will be based on the CJ Bernstein +%% magic hash. For non-exportable trees erlang:phash2 will be used, and so +%% non-binary Keys and Values can be returned from the BinExtractFun in this +%% case. +add_kv(TicTacTree, Key, Value, BinExtractFun, Exportable) -> + {BinK, BinV} = BinExtractFun(Key, Value), + {SegHash, SegChangeHash} = tictac_hash(BinK, BinV, Exportable), + Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count), Level2Pos = Segment band (TicTacTree#tictactree.width - 1), @@ -276,23 +287,35 @@ merge_trees(TreeA, TreeB) -> MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}. --spec get_segment(any(), - integer()|xsmall|xxsmall|small|medium|large|xlarge) -> +-spec get_segment(integer(), + integer()|xxsmall|xsmall|small|medium|large|xlarge) -> integer(). %% @doc %% Return the segment ID for a Key. Can pass the tree size or the actual %% segment count derived from the size -get_segment(Key, SegmentCount) when is_integer(SegmentCount) -> - erlang:phash2(Key) band (SegmentCount - 1); -get_segment(Key, TreeSize) -> - get_segment(Key, element(3, get_size(TreeSize))). +get_segment(Hash, SegmentCount) when is_integer(SegmentCount) -> + Hash band (SegmentCount - 1); +get_segment(Hash, TreeSize) -> + get_segment(Hash, element(3, get_size(TreeSize))). --spec tictac_hash(tuple(), any()) -> integer(). +-spec tictac_hash(any(), any(), boolean()) -> {integer(), integer()}. %% @doc -%% Hash the key and term -tictac_hash(Key, Term) -> - erlang:phash2({Key, Term}). +%% Hash the key and term, to either something repetable in Erlang, or using +%% the DJ Bernstein hash if it is the tree needs to be compared with one +%% calculated with a non-Erlang store +%% +%% Boolean is Exportable. does the hash need to be repetable by a non-Erlang +%% machine +tictac_hash(BinKey, BinVal, true) + when is_binary(BinKey) and is_binary(BinVal) -> + HashKey = leveled_codec:magic_hash({binary, BinKey}), + HashVal = leveled_codec:magic_hash({binary, BinVal}), + {HashKey, HashKey bxor HashVal}; +tictac_hash(BinKey, {is_hash, HashedVal}, false) -> + {erlang:phash2(BinKey), erlang:phash2(BinKey) bxor HashedVal}; +tictac_hash(BinKey, BinVal, false) -> + {erlang:phash2(BinKey), erlang:phash2(BinKey) bxor erlang:phash2(BinVal)}. %%%============================================================================ %%% Internal functions @@ -372,13 +395,17 @@ simple_bysize_test() -> simple_test_withsize(xlarge). simple_test_withsize(Size) -> - HashFun = fun(_K, V) -> erlang:phash2(V) end, + BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end, + K1 = {o, "B1", "K1", null}, + K2 = {o, "B1", "K2", null}, + K3 = {o, "B1", "K3", null}, + Tree0 = new_tree(0, Size), - Tree1 = add_kv(Tree0, {o, "B1", "K1", null}, {caine, 1}, HashFun), - Tree2 = add_kv(Tree1, {o, "B1", "K2", null}, {caine, 2}, HashFun), - Tree3 = add_kv(Tree2, {o, "B1", "K3", null}, {caine, 3}, HashFun), - Tree3A = add_kv(Tree3, {o, "B1", "K3", null}, {caine, 4}, HashFun), + Tree1 = add_kv(Tree0, K1, {caine, 1}, BinFun), + Tree2 = add_kv(Tree1, K2, {caine, 2}, BinFun), + Tree3 = add_kv(Tree2, K3, {caine, 3}, BinFun), + Tree3A = add_kv(Tree3, K3, {caine, 4}, BinFun), ?assertMatch(true, Tree0#tictactree.level1 == Tree0#tictactree.level1), ?assertMatch(false, Tree0#tictactree.level1 == Tree1#tictactree.level1), ?assertMatch(false, Tree1#tictactree.level1 == Tree2#tictactree.level1), @@ -386,23 +413,28 @@ simple_test_withsize(Size) -> ?assertMatch(false, Tree3#tictactree.level1 == Tree3A#tictactree.level1), Tree0X = new_tree(0, Size), - Tree1X = add_kv(Tree0X, {o, "B1", "K3", null}, {caine, 3}, HashFun), - Tree2X = add_kv(Tree1X, {o, "B1", "K1", null}, {caine, 1}, HashFun), - Tree3X = add_kv(Tree2X, {o, "B1", "K2", null}, {caine, 2}, HashFun), - Tree3XA = add_kv(Tree3X, {o, "B1", "K3", null}, {caine, 4}, HashFun), + Tree1X = add_kv(Tree0X, K3, {caine, 3}, BinFun), + Tree2X = add_kv(Tree1X, K1, {caine, 1}, BinFun), + Tree3X = add_kv(Tree2X, K2, {caine, 2}, BinFun), + Tree3XA = add_kv(Tree3X, K3, {caine, 4}, BinFun), ?assertMatch(false, Tree1#tictactree.level1 == Tree1X#tictactree.level1), ?assertMatch(false, Tree2#tictactree.level1 == Tree2X#tictactree.level1), ?assertMatch(true, Tree3#tictactree.level1 == Tree3X#tictactree.level1), ?assertMatch(true, Tree3XA#tictactree.level1 == Tree3XA#tictactree.level1), SC = Tree0#tictactree.segment_count, + + GetSegFun = + fun(TK) -> + get_segment(erlang:phash2(term_to_binary(TK)), SC) + end, DL0 = find_dirtyleaves(Tree1, Tree0), - ?assertMatch(true, lists:member(get_segment({o, "B1", "K1", null}, SC), DL0)), + ?assertMatch(true, lists:member(GetSegFun(K1), DL0)), DL1 = find_dirtyleaves(Tree3, Tree1), - ?assertMatch(true, lists:member(get_segment({o, "B1", "K2", null}, SC), DL1)), - ?assertMatch(true, lists:member(get_segment({o, "B1", "K3", null}, SC), DL1)), - ?assertMatch(false, lists:member(get_segment({o, "B1", "K1", null}, SC), DL1)), + ?assertMatch(true, lists:member(GetSegFun(K2), DL1)), + ?assertMatch(true, lists:member(GetSegFun(K3), DL1)), + ?assertMatch(false, lists:member(GetSegFun(K1), DL1)), % Export and import tree to confirm no difference ExpTree3 = export_tree(Tree3), @@ -425,24 +457,24 @@ merge_bysize_xlarge_test2() -> merge_test_withsize(xlarge). merge_test_withsize(Size) -> - HashFun = fun(_K, V) -> erlang:phash2(V) end, + BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end, TreeX0 = new_tree(0, Size), - TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, HashFun), - TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, HashFun), - TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, HashFun), - TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, HashFun), + TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, BinFun), + TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, BinFun), + TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, BinFun), + TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, BinFun), TreeY0 = new_tree(0, Size), - TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, HashFun), - TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, HashFun), - TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, HashFun), - TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, HashFun), + TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, BinFun), + TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, BinFun), + TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, BinFun), + TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, BinFun), - TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, HashFun), - TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, HashFun), - TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, HashFun), - TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, HashFun), + TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, BinFun), + TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, BinFun), + TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, BinFun), + TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, BinFun), TreeM0 = merge_trees(TreeX4, TreeY4), checktree(TreeM0), @@ -452,6 +484,10 @@ merge_test_withsize(Size) -> checktree(TreeM1), ?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1). +exportable_test() -> + {Int1, Int2} = tictac_hash(<<"key">>, <<"value">>, true), + ?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}). + -endif. diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index e59a7b1..cbb6b5d 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -580,22 +580,59 @@ space_clear_ondelete(_Config) -> io:format("This should cause a final ledger merge event~n"), io:format("Will require the penciller to resolve the issue of creating" ++ " an empty file as all keys compact on merge~n"), - timer:sleep(12000), + + CheckFun = + fun(X, FileCount) -> + case FileCount of + 0 -> + 0; + _ -> + timer:sleep(X), + {ok, NewFC} = + file:list_dir(RootPath ++ "/ledger/ledger_files"), + io:format("Looping with ledger file count ~w~n", + [length(NewFC)]), + length(strip_nonsst(NewFC)) + end + end, + + FC = lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]), ok = leveled_bookie:book_close(Book3), + case FC of + 0 -> + ok; + _ -> + {ok, Book4} = leveled_bookie:book_start(StartOpts1), + lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]), + leveled_bookie:book_close(Book4) + end, + {ok, FNsD_L} = file:list_dir(RootPath ++ "/ledger/ledger_files"), io:format("FNsD - Bookie has ~w ledger files " ++ - "after second close~n", [length(FNsD_L)]), + "after second close~n", [length(strip_nonsst(FNsD_L))]), lists:foreach(fun(FN) -> io:format("FNsD - Ledger file is ~s~n", [FN]) end, FNsD_L), true = PointB_Journals < length(FNsA_J), - true = length(FNsD_L) < length(FNsA_L), - true = length(FNsD_L) < length(FNsB_L), - true = length(FNsD_L) < length(FNsC_L), - true = length(FNsD_L) == 0. + true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsA_L)), + true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsB_L)), + true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsC_L)), + true = length(strip_nonsst(FNsD_L)) == 0. +strip_nonsst(FileList) -> + SSTOnlyFun = + fun(FN, Acc) -> + case filename:extension(FN) of + ".sst" -> + [FN|Acc]; + _ -> + Acc + end + end, + lists:foldl(SSTOnlyFun, [], FileList). + is_empty_test(_Config) -> RootPath = testutil:reset_filestructure(), diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index c78b27e..2215b6b 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -114,19 +114,16 @@ many_put_compare(_Config) -> % Now run the same query by putting the tree-building responsibility onto % the fold_objects_fun - ApplyHash = - fun(HashFun) -> - fun(_Key, Value) -> - {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value), - <> = HeadBin, - <> = Rest, - HashFun(lists:sort(binary_to_term(VclockBin))) - end - end, + ExtractClockFun = + fun(Key, Value) -> + {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value), + <> = HeadBin, + {Key, lists:sort(binary_to_term(VclockBin))} + end, FoldObjectsFun = fun(_Bucket, Key, Value, Acc) -> - leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(fun erlang:phash2/1)) + leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun, false) end, FoldQ0 = {foldheads_bybucket, @@ -157,22 +154,25 @@ many_put_compare(_Config) -> [timer:now_diff(os:timestamp(), SWB1Obj)]), true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj1)) == 0, - % AAE trees within riak are based on a sha of the vector clock. So to - % compare with an AAE tree we need to compare outputs when we're hashing - % a hash - AltHashFun = - fun(Term) -> - erlang:phash2(crypto:hash(sha, term_to_binary(Term))) + % For an exportable comparison, want hash to be based on something not + % coupled to erlang language - so use exportable query + AltExtractFun = + fun(K, V) -> + {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(V), + <> = HeadBin, + {term_to_binary(K), VclockBin} end, AltFoldObjectsFun = fun(_Bucket, Key, Value, Acc) -> - leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(AltHashFun)) + leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun, true) end, AltFoldQ0 = {foldheads_bybucket, - o_rkv, - "Bucket", - {AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, - false, true}, + o_rkv, + "Bucket", + {AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, + false, + true}, {async, TreeAAltObjFolder0} = leveled_bookie:book_returnfolder(Bookie2, AltFoldQ0), SWB2Obj = os:timestamp(), @@ -187,15 +187,19 @@ many_put_compare(_Config) -> io:format("Build tictac tree via object fold with no "++ "presence check and 200K objects and alt hash in ~w~n", [timer:now_diff(os:timestamp(), SWB3Obj)]), - true = - length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)) == 1, + DL_ExportFold = + length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)), + io:format("Found dirty leaves with exportable comparison of ~w~n", + [DL_ExportFold]), + true = DL_ExportFold == 1, %% Finding differing keys FoldKeysFun = fun(SegListToFind) -> fun(_B, K, Acc) -> - Seg = leveled_tictac:get_segment(K, SegmentCount), + Seg = + leveled_tictac:get_segment(erlang:phash2(K), SegmentCount), case lists:member(Seg, SegListToFind) of true -> [K|Acc]; @@ -469,7 +473,8 @@ index_compare(_Config) -> FoldKeysIndexQFun = fun(_Bucket, {Term, Key}, Acc) -> - Seg = leveled_tictac:get_segment(Key, SegmentCount), + Seg = + leveled_tictac:get_segment(erlang:phash2(Key), SegmentCount), case lists:member(Seg, DL3_0) of true -> [{Term, Key}|Acc];