From 389694b11bf6aa3269d21fc9e2afc27b9bc6a2a8 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 26 Sep 2017 22:49:40 +0100 Subject: [PATCH 1/5] Add exportable option to tictac Idea being that sometimes you may wish to compare a tictac tree between leveled and something that doesn't understand erlang:phash or term_to_binary. So allow the magic_hash to be used instead - and perhaps an extract function that does base64 encoding or something similar. --- src/leveled_bookie.erl | 16 ++-- src/leveled_codec.erl | 3 +- src/leveled_tictac.erl | 122 ++++++++++++++++++++----------- test/end_to_end/tictac_SUITE.erl | 57 ++++++++------- 4 files changed, 117 insertions(+), 81 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index d7b73d5..eb422e1 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -934,14 +934,10 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) -> fun() -> % The start key and end key will vary depending on whether the % fold is to fold over an index or a key range - {StartKey, EndKey, HashFun} = + {StartKey, EndKey, ExtractFun} = case Tag of ?IDX_TAG -> {IdxField, StartIdx, EndIdx} = Query, - HashIdxValFun = - fun(_Key, IdxValue) -> - erlang:phash2(IdxValue) - end, {leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, @@ -952,23 +948,21 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) -> ?IDX_TAG, IdxField, EndIdx), - HashIdxValFun}; + fun(K, T) -> {K, T} end}; _ -> {StartObjKey, EndObjKey} = Query, - PassHashFun = fun(_Key, Hash) -> Hash end, {leveled_codec:to_ledgerkey(Bucket, StartObjKey, Tag), leveled_codec:to_ledgerkey(Bucket, EndObjKey, Tag), - PassHashFun} + fun(K, H) -> {K, {is_hash, H}} end} end, - AccFun = accumulate_tree(Filter, JournalCheck, JournalSnapshot, - HashFun), + ExtractFun), Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot, StartKey, EndKey, @@ -1263,7 +1257,7 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) -> fun(B, K, H, Tree) -> case FilterFun(B, K) of accumulate -> - leveled_tictac:add_kv(Tree, K, H, HashFun); + leveled_tictac:add_kv(Tree, K, H, HashFun, false); pass -> Tree end diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 9ed2a2c..6be90d7 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -464,7 +464,8 @@ aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) -> {LMD1, TTL} -> TreeSize = AAE#recent_aae.tree_size, SegID = - leveled_tictac:get_segment(Key, TreeSize), + leveled_tictac:get_segment(erlang:phash2(Key), + TreeSize), IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin", IdxTrmStr = string:right(integer_to_list(SegID), 8, $0) ++ diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 39d7db8..d25132c 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -57,14 +57,14 @@ -export([ new_tree/1, new_tree/2, - add_kv/4, + add_kv/5, find_dirtyleaves/2, find_dirtysegments/2, fetch_root/1, fetch_leaves/2, merge_trees/2, get_segment/2, - tictac_hash/2, + tictac_hash/3, export_tree/1, import_tree/1 ]). @@ -161,13 +161,24 @@ import_tree(ExportedTree) -> level2 = Lv2}. -spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree(). +add_kv(TicTacTree, Key, Value, BinExtractFun) -> + add_kv(TicTacTree, Key, Value, BinExtractFun, false). + +-spec add_kv(tictactree(), tuple(), tuple(), fun(), boolean()) -> tictactree(). %% @doc -%% Add a Key and value to a tictactree using the HashFun to calculate the Hash -%% based on that key and value -add_kv(TicTacTree, Key, Value, HashFun) -> - HashV = HashFun(Key, Value), - SegChangeHash = tictac_hash(Key, HashV), - Segment = get_segment(Key, TicTacTree#tictactree.segment_count), +%% Add a Key and value to a tictactree using the BinExtractFun to extract a +%% binary from the Key and value from which to generate the hash. The +%% BinExtractFun will also need to do any canonicalisation necessary to make +%% the hash consistent (such as whitespace removal, or sorting) +%% +%% For exportable trees the hash function will be based on the CJ Bernstein +%% magic hash. For non-exportable trees erlang:phash2 will be used, and so +%% non-binary Keys and Values can be returned from the BinExtractFun in this +%% case. +add_kv(TicTacTree, Key, Value, BinExtractFun, Exportable) -> + {BinK, BinV} = BinExtractFun(Key, Value), + {SegHash, SegChangeHash} = tictac_hash(BinK, BinV, Exportable), + Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count), Level2Pos = Segment band (TicTacTree#tictactree.width - 1), @@ -275,21 +286,33 @@ merge_trees(TreeA, TreeB) -> MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}. --spec get_segment(any(), integer()|small|medium|large|xlarge) -> integer(). +-spec get_segment(integer(), integer()|small|medium|large|xlarge) -> integer(). %% @doc %% Return the segment ID for a Key. Can pass the tree size or the actual %% segment count derived from the size -get_segment(Key, SegmentCount) when is_integer(SegmentCount) -> - erlang:phash2(Key) band (SegmentCount - 1); -get_segment(Key, TreeSize) -> - get_segment(Key, element(3, get_size(TreeSize))). +get_segment(Hash, SegmentCount) when is_integer(SegmentCount) -> + Hash band (SegmentCount - 1); +get_segment(Hash, TreeSize) -> + get_segment(Hash, element(3, get_size(TreeSize))). --spec tictac_hash(tuple(), any()) -> integer(). +-spec tictac_hash(any(), any(), boolean()) -> integer(). %% @doc -%% Hash the key and term -tictac_hash(Key, Term) -> - erlang:phash2({Key, Term}). +%% Hash the key and term, to either something repetable in Erlang, or using +%% the DJ Bernstein hash if it is the tree needs to be compared with one +%% calculated with a non-Erlang store +%% +%% Boolean is Exportable. does the hash need to be repetable by a non-Erlang +%% machine +tictac_hash(BinKey, BinVal, true) + when is_binary(BinKey) and is_binary(BinVal) -> + HashKey = leveled_codec:magic_hash({binary, BinKey}), + HashVal = leveled_codec:magic_hash({binary, BinVal}), + {HashKey, HashKey bxor HashVal}; +tictac_hash(BinKey, {is_hash, HashedVal}, false) -> + {erlang:phash2(BinKey), erlang:phash2(BinKey) bxor HashedVal}; +tictac_hash(BinKey, BinVal, false) -> + {erlang:phash2(BinKey), erlang:phash2(BinKey) bxor erlang:phash2(BinVal)}. %%%============================================================================ %%% Internal functions @@ -363,13 +386,17 @@ simple_bysize_test() -> simple_test_withsize(xlarge). simple_test_withsize(Size) -> - HashFun = fun(_K, V) -> erlang:phash2(V) end, + BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end, + K1 = {o, "B1", "K1", null}, + K2 = {o, "B1", "K2", null}, + K3 = {o, "B1", "K3", null}, + Tree0 = new_tree(0, Size), - Tree1 = add_kv(Tree0, {o, "B1", "K1", null}, {caine, 1}, HashFun), - Tree2 = add_kv(Tree1, {o, "B1", "K2", null}, {caine, 2}, HashFun), - Tree3 = add_kv(Tree2, {o, "B1", "K3", null}, {caine, 3}, HashFun), - Tree3A = add_kv(Tree3, {o, "B1", "K3", null}, {caine, 4}, HashFun), + Tree1 = add_kv(Tree0, K1, {caine, 1}, BinFun), + Tree2 = add_kv(Tree1, K2, {caine, 2}, BinFun), + Tree3 = add_kv(Tree2, K3, {caine, 3}, BinFun), + Tree3A = add_kv(Tree3, K3, {caine, 4}, BinFun), ?assertMatch(true, Tree0#tictactree.level1 == Tree0#tictactree.level1), ?assertMatch(false, Tree0#tictactree.level1 == Tree1#tictactree.level1), ?assertMatch(false, Tree1#tictactree.level1 == Tree2#tictactree.level1), @@ -377,23 +404,28 @@ simple_test_withsize(Size) -> ?assertMatch(false, Tree3#tictactree.level1 == Tree3A#tictactree.level1), Tree0X = new_tree(0, Size), - Tree1X = add_kv(Tree0X, {o, "B1", "K3", null}, {caine, 3}, HashFun), - Tree2X = add_kv(Tree1X, {o, "B1", "K1", null}, {caine, 1}, HashFun), - Tree3X = add_kv(Tree2X, {o, "B1", "K2", null}, {caine, 2}, HashFun), - Tree3XA = add_kv(Tree3X, {o, "B1", "K3", null}, {caine, 4}, HashFun), + Tree1X = add_kv(Tree0X, K3, {caine, 3}, BinFun), + Tree2X = add_kv(Tree1X, K1, {caine, 1}, BinFun), + Tree3X = add_kv(Tree2X, K2, {caine, 2}, BinFun), + Tree3XA = add_kv(Tree3X, K3, {caine, 4}, BinFun), ?assertMatch(false, Tree1#tictactree.level1 == Tree1X#tictactree.level1), ?assertMatch(false, Tree2#tictactree.level1 == Tree2X#tictactree.level1), ?assertMatch(true, Tree3#tictactree.level1 == Tree3X#tictactree.level1), ?assertMatch(true, Tree3XA#tictactree.level1 == Tree3XA#tictactree.level1), SC = Tree0#tictactree.segment_count, + + GetSegFun = + fun(TK) -> + get_segment(erlang:phash2(term_to_binary(TK)), SC) + end, DL0 = find_dirtyleaves(Tree1, Tree0), - ?assertMatch(true, lists:member(get_segment({o, "B1", "K1", null}, SC), DL0)), + ?assertMatch(true, lists:member(GetSegFun(K1), DL0)), DL1 = find_dirtyleaves(Tree3, Tree1), - ?assertMatch(true, lists:member(get_segment({o, "B1", "K2", null}, SC), DL1)), - ?assertMatch(true, lists:member(get_segment({o, "B1", "K3", null}, SC), DL1)), - ?assertMatch(false, lists:member(get_segment({o, "B1", "K1", null}, SC), DL1)), + ?assertMatch(true, lists:member(GetSegFun(K2), DL1)), + ?assertMatch(true, lists:member(GetSegFun(K3), DL1)), + ?assertMatch(false, lists:member(GetSegFun(K1), DL1)), % Export and import tree to confirm no difference ExpTree3 = export_tree(Tree3), @@ -416,24 +448,24 @@ merge_bysize_xlarge_test2() -> merge_test_withsize(xlarge). merge_test_withsize(Size) -> - HashFun = fun(_K, V) -> erlang:phash2(V) end, + BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end, TreeX0 = new_tree(0, Size), - TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, HashFun), - TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, HashFun), - TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, HashFun), - TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, HashFun), + TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, BinFun), + TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, BinFun), + TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, BinFun), + TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, BinFun), TreeY0 = new_tree(0, Size), - TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, HashFun), - TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, HashFun), - TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, HashFun), - TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, HashFun), + TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, BinFun), + TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, BinFun), + TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, BinFun), + TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, BinFun), - TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, HashFun), - TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, HashFun), - TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, HashFun), - TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, HashFun), + TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, BinFun), + TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, BinFun), + TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, BinFun), + TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, BinFun), TreeM0 = merge_trees(TreeX4, TreeY4), checktree(TreeM0), @@ -443,6 +475,10 @@ merge_test_withsize(Size) -> checktree(TreeM1), ?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1). +exportable_test() -> + {Int1, Int2} = tictac_hash(<<"key">>, <<"value">>, true), + ?assertMatch({true, true}, {is_integer(Int1), is_integer(Int2)}). + -endif. diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index a837738..6e7ba2d 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -114,19 +114,16 @@ many_put_compare(_Config) -> % Now run the same query by putting the tree-building responsibility onto % the fold_objects_fun - ApplyHash = - fun(HashFun) -> - fun(_Key, Value) -> - {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value), - <> = HeadBin, - <> = Rest, - HashFun(lists:sort(binary_to_term(VclockBin))) - end - end, + ExtractClockFun = + fun(Key, Value) -> + {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value), + <> = HeadBin, + {Key, lists:sort(binary_to_term(VclockBin))} + end, FoldObjectsFun = fun(_Bucket, Key, Value, Acc) -> - leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(fun erlang:phash2/1)) + leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun, false) end, FoldQ0 = {foldheads_bybucket, @@ -157,22 +154,25 @@ many_put_compare(_Config) -> [timer:now_diff(os:timestamp(), SWB1Obj)]), true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj1)) == 0, - % AAE trees within riak are based on a sha of the vector clock. So to - % compare with an AAE tree we need to compare outputs when we're hashing - % a hash - AltHashFun = - fun(Term) -> - erlang:phash2(crypto:hash(sha, term_to_binary(Term))) + % For an exportable comparison, want hash to be based on something not + % coupled to erlang language - so use exportable query + AltExtractFun = + fun(K, V) -> + {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(V), + <> = HeadBin, + {term_to_binary(K), VclockBin} end, AltFoldObjectsFun = fun(_Bucket, Key, Value, Acc) -> - leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(AltHashFun)) + leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun, true) end, AltFoldQ0 = {foldheads_bybucket, - o_rkv, - "Bucket", - {AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, - false, true}, + o_rkv, + "Bucket", + {AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, + false, + true}, {async, TreeAAltObjFolder0} = leveled_bookie:book_returnfolder(Bookie2, AltFoldQ0), SWB2Obj = os:timestamp(), @@ -187,15 +187,19 @@ many_put_compare(_Config) -> io:format("Build tictac tree via object fold with no "++ "presence check and 200K objects and alt hash in ~w~n", [timer:now_diff(os:timestamp(), SWB3Obj)]), - true = - length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)) == 1, + DL_ExportFold = + length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)), + io:format("Found dirty leaves with exportable comparison of ~w~n", + [DL_ExportFold]), + true = DL_ExportFold == 1, %% Finding differing keys FoldKeysFun = fun(SegListToFind) -> fun(_B, K, Acc) -> - Seg = leveled_tictac:get_segment(K, SegmentCount), + Seg = + leveled_tictac:get_segment(erlang:phash2(K), SegmentCount), case lists:member(Seg, SegListToFind) of true -> [K|Acc]; @@ -469,7 +473,8 @@ index_compare(_Config) -> FoldKeysIndexQFun = fun(_Bucket, {Term, Key}, Acc) -> - Seg = leveled_tictac:get_segment(Key, SegmentCount), + Seg = + leveled_tictac:get_segment(erlang:phash2(Key), SegmentCount), case lists:member(Seg, DL3_0) of true -> [{Term, Key}|Acc]; From 2e5b9c80f445dbb03e43dad11dfecc9296113916 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 27 Sep 2017 12:15:18 +0100 Subject: [PATCH 2/5] Add max LMD to Riak metadata This is an interim stage towwards enhancing the proxy object so that it contains more helper information (other than size). The aim is to be able to run more efficient fold_heads queries that might filter on LMD range (so as not to have to co-ordinate the running of comparative queries). For example if producing a tictactree to compare between two different offsets, a max LMD could be passed in so that changes beyond the time the first query was requested can be ignored. --- src/leveled_codec.erl | 25 ++++++++++++++++++++++--- src/leveled_tictac.erl | 4 ++-- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 6be90d7..e3ca2f4 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -74,6 +74,11 @@ -define(ALL_BUCKETS, <<"$all">>). -type recent_aae() :: #recent_aae{}. +-type riak_metadata() :: {binary()|delete, % Sibling Metadata + binary()|null, % Vclock Metadata + integer()|null, % Hash of vclock + {integer(), integer(), integer()}|null, % LMOD TS + integer()}. % Size in bytes of real object -spec magic_hash(any()) -> integer(). %% @doc @@ -583,7 +588,7 @@ get_keyandobjhash(LK, Value) -> get_objhash(Tag, ObjMetaData) -> case Tag of ?RIAK_TAG -> - {_RMD, _VC, Hash, _Size} = ObjMetaData, + {_RMD, _VC, Hash, _LMD, _Size} = ObjMetaData, Hash; ?STD_TAG -> {Hash, _Size} = ObjMetaData, @@ -595,20 +600,34 @@ build_metadata_object(PrimaryKey, MD) -> {Tag, _Bucket, _Key, null} = PrimaryKey, case Tag of ?RIAK_TAG -> - {SibData, Vclock, _Hash, _Size} = MD, + {SibData, Vclock, _Hash, _LMD, _Size} = MD, riak_metadata_to_binary(Vclock, SibData); ?STD_TAG -> MD end. +-spec riak_extract_metadata(binary()|delete, non_neg_integer()) -> + {riak_metadata(), list()}. +%% @doc +%% Riak extract metadata should extract a metadata object which is a +%% five-tuple of: +%% - Binary of sibling Metadata +%% - Binary of vector clock metadata +%% - Non-exportable hash of the vector clock metadata +%% - The largest last modified date of the object +%% - Size of the object +%% +%% The metadata object should be returned with the full list of last +%% modified dates (which will be used for recent anti-entropy index creation) riak_extract_metadata(delete, Size) -> - {{delete, null, null, Size}, []}; + {{delete, null, null, null, Size}, []}; riak_extract_metadata(ObjBin, Size) -> {VclockBin, SibBin, LastMods} = riak_metadata_from_binary(ObjBin), {{SibBin, VclockBin, erlang:phash2(lists:sort(binary_to_term(VclockBin))), + lists:max(LastMods), Size}, LastMods}. diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index d25132c..0ba615b 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -296,7 +296,7 @@ get_segment(Hash, TreeSize) -> get_segment(Hash, element(3, get_size(TreeSize))). --spec tictac_hash(any(), any(), boolean()) -> integer(). +-spec tictac_hash(any(), any(), boolean()) -> {integer(), integer()}. %% @doc %% Hash the key and term, to either something repetable in Erlang, or using %% the DJ Bernstein hash if it is the tree needs to be compared with one @@ -477,7 +477,7 @@ merge_test_withsize(Size) -> exportable_test() -> {Int1, Int2} = tictac_hash(<<"key">>, <<"value">>, true), - ?assertMatch({true, true}, {is_integer(Int1), is_integer(Int2)}). + ?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}). -endif. From 433cc37eb62422e6cff47fd83f87bd0e93241116 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 27 Sep 2017 12:26:12 +0100 Subject: [PATCH 3/5] Rolled back LMD in metadata Because there's no sensible way of using it if objects are mutable - you still end up with the same false positives in the tictactree. Didn't fully rollback the change as spec and docs were added which chould be useful going forward. --- src/leveled_codec.erl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index e3ca2f4..3dc2a33 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -76,8 +76,7 @@ -type recent_aae() :: #recent_aae{}. -type riak_metadata() :: {binary()|delete, % Sibling Metadata binary()|null, % Vclock Metadata - integer()|null, % Hash of vclock - {integer(), integer(), integer()}|null, % LMOD TS + integer()|null, % Hash of vclock - non-exportable integer()}. % Size in bytes of real object -spec magic_hash(any()) -> integer(). @@ -588,19 +587,19 @@ get_keyandobjhash(LK, Value) -> get_objhash(Tag, ObjMetaData) -> case Tag of ?RIAK_TAG -> - {_RMD, _VC, Hash, _LMD, _Size} = ObjMetaData, + {_RMD, _VC, Hash, _Size} = ObjMetaData, Hash; ?STD_TAG -> {Hash, _Size} = ObjMetaData, Hash end. - + build_metadata_object(PrimaryKey, MD) -> {Tag, _Bucket, _Key, null} = PrimaryKey, case Tag of ?RIAK_TAG -> - {SibData, Vclock, _Hash, _LMD, _Size} = MD, + {SibData, Vclock, _Hash, _Size} = MD, riak_metadata_to_binary(Vclock, SibData); ?STD_TAG -> MD @@ -627,7 +626,6 @@ riak_extract_metadata(ObjBin, Size) -> {{SibBin, VclockBin, erlang:phash2(lists:sort(binary_to_term(VclockBin))), - lists:max(LastMods), Size}, LastMods}. From 3950942da38df6f97c87dfb75a41ab5f50cf61ae Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 27 Sep 2017 23:52:49 +0100 Subject: [PATCH 4/5] Roll in fix for intermittently failing test As descibed in https://github.com/martinsumner/leveled/issues/92 Only the first fix was made. Just to eb safe - archiving means renaming to another file with a different extension. Assumption is that renamed files cna be manually reaped if necessary. --- src/leveled_codec.erl | 2 +- src/leveled_log.erl | 5 +- src/leveled_penciller.erl | 99 ++++++++++++++++++++++----------- src/leveled_pmanifest.erl | 35 +++++++----- test/end_to_end/basic_SUITE.erl | 49 ++++++++++++++-- 5 files changed, 138 insertions(+), 52 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 3dc2a33..26963d0 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -620,7 +620,7 @@ build_metadata_object(PrimaryKey, MD) -> %% The metadata object should be returned with the full list of last %% modified dates (which will be used for recent anti-entropy index creation) riak_extract_metadata(delete, Size) -> - {{delete, null, null, null, Size}, []}; + {{delete, null, null, Size}, []}; riak_extract_metadata(ObjBin, Size) -> {VclockBin, SibBin, LastMods} = riak_metadata_from_binary(ObjBin), {{SibBin, diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 5432874..59892dd 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -120,7 +120,8 @@ {"P0030", {warn, "We're doomed - intention recorded to destroy all files"}}, {"P0031", - {info, "Completion of update to levelzero"}}, + {info, "Completion of update to levelzero" + ++ " with cache size status ~w ~w"}}, {"P0032", {info, "Head timing for result ~w is sample ~w total ~w and max ~w"}}, {"P0033", @@ -141,6 +142,8 @@ {"P0039", {info, "Failed to release pid=~w " ++ "leaving SnapshotCount=~w and MinSQN=~w"}}, + {"P0040", + {info, "Archiving filename ~s as unused at startup"}}, {"PC001", {info, "Penciller's clerk ~w started with owner ~w"}}, diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 8330235..e5f759a 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -206,6 +206,8 @@ -define(FILES_FP, "ledger_files"). -define(CURRENT_FILEX, "crr"). -define(PENDING_FILEX, "pnd"). +-define(SST_FILEX, ".sst"). +-define(ARCHIVE_FILEX, ".bak"). -define(MEMTABLE, mem). -define(MAX_TABLESIZE, 28000). % This is less than max - but COIN_SIDECOUNT -define(SUPER_MAX_TABLE_SIZE, 40000). @@ -819,7 +821,8 @@ sst_rootpath(RootPath) -> FP. sst_filename(ManSQN, Level, Count) -> - lists:flatten(io_lib:format("./~w_~w_~w.sst", [ManSQN, Level, Count])). + lists:flatten(io_lib:format("./~w_~w_~w" ++ ?SST_FILEX, + [ManSQN, Level, Count])). %%%============================================================================ @@ -859,41 +862,73 @@ start_from_file(PCLopts) -> Pid end, SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1, - {MaxSQN, Manifest1} = leveled_pmanifest:load_manifest(Manifest0, - OpenFun, - SQNFun), + {MaxSQN, Manifest1, FileList} = + leveled_pmanifest:load_manifest(Manifest0, OpenFun, SQNFun), leveled_log:log("P0014", [MaxSQN]), ManSQN = leveled_pmanifest:get_manifest_sqn(Manifest1), leveled_log:log("P0035", [ManSQN]), %% Find any L0 files L0FN = sst_filename(ManSQN + 1, 0, 0), - case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of - true -> - leveled_log:log("P0015", [L0FN]), - L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN), - {ok, L0Pid, {L0StartKey, L0EndKey}} = L0Open, - L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid), - L0Entry = #manifest_entry{start_key = L0StartKey, - end_key = L0EndKey, - filename = L0FN, - owner = L0Pid}, - Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1, - ManSQN + 1, - 0, - L0Entry), - leveled_log:log("P0016", [L0SQN]), - LedgerSQN = max(MaxSQN, L0SQN), - {ok, - InitState#state{manifest = Manifest2, + {State0, FileList0} = + case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of + true -> + leveled_log:log("P0015", [L0FN]), + L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN), + {ok, L0Pid, {L0StartKey, L0EndKey}} = L0Open, + L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid), + L0Entry = #manifest_entry{start_key = L0StartKey, + end_key = L0EndKey, + filename = L0FN, + owner = L0Pid}, + Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1, + ManSQN + 1, + 0, + L0Entry), + leveled_log:log("P0016", [L0SQN]), + LedgerSQN = max(MaxSQN, L0SQN), + {InitState#state{manifest = Manifest2, ledger_sqn = LedgerSQN, - persisted_sqn = LedgerSQN}}; - false -> - leveled_log:log("P0017", []), - {ok, - InitState#state{manifest = Manifest1, + persisted_sqn = LedgerSQN}, + [L0FN|FileList]}; + false -> + leveled_log:log("P0017", []), + {InitState#state{manifest = Manifest1, ledger_sqn = MaxSQN, - persisted_sqn = MaxSQN}} - end. + persisted_sqn = MaxSQN}, + FileList} + end, + ok = archive_files(RootPath, FileList0), + {ok, State0}. + +archive_files(RootPath, FileList) -> + {ok, AllFiles} = file:list_dir(sst_rootpath(RootPath)), + FileCheckFun = + fun(FN, UnusedFiles) -> + FN0 = "./" ++ FN, + case filename:extension(FN0) of + ?SST_FILEX -> + case lists:member(FN0, FileList) of + true -> + UnusedFiles; + false -> + leveled_log:log("P0040", [FN0]), + [FN0|UnusedFiles] + end; + _ -> + UnusedFiles + end + end, + RenameFun = + fun(FN) -> + AltName = filename:join(sst_rootpath(RootPath), + filename:basename(FN, ?SST_FILEX)) + ++ ?ARCHIVE_FILEX, + file:rename(filename:join(sst_rootpath(RootPath), FN), + AltName) + end, + FilesToArchive = lists:foldl(FileCheckFun, [], AllFiles), + lists:foreach(RenameFun, FilesToArchive), + ok. update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN}, @@ -934,11 +969,13 @@ update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN}, case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of {true, true, true, true} -> L0Constructor = roll_memory(UpdState, false), - leveled_log:log_timer("P0031", [], SW), + leveled_log:log_timer("P0031", [true, true], SW), UpdState#state{levelzero_pending=true, levelzero_constructor=L0Constructor}; _ -> - leveled_log:log_timer("P0031", [], SW), + leveled_log:log_timer("P0031", + [CacheTooBig, JitterCheck], + SW), UpdState end end. diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index f6e919e..3970c7e 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -136,7 +136,8 @@ copy_manifest(Manifest) -> % about is switched to undefined Manifest#manifest{snapshots = undefined, pending_deletes = undefined}. --spec load_manifest(manifest(), fun(), fun()) -> {integer(), manifest()}. +-spec load_manifest(manifest(), fun(), fun()) -> + {integer(), manifest(), list()}. %% @doc %% Roll over the manifest starting a process to manage each file in the %% manifest. The PidFun should be able to return the Pid of a file process @@ -144,13 +145,15 @@ copy_manifest(Manifest) -> %% of that file, if passed the Pid that owns it. load_manifest(Manifest, PidFun, SQNFun) -> UpdateLevelFun = - fun(LevelIdx, {AccMaxSQN, AccMan}) -> + fun(LevelIdx, {AccMaxSQN, AccMan, AccFL}) -> L0 = array:get(LevelIdx, AccMan#manifest.levels), - {L1, SQN1} = load_level(LevelIdx, L0, PidFun, SQNFun), + {L1, SQN1, FileList} = load_level(LevelIdx, L0, PidFun, SQNFun), UpdLevels = array:set(LevelIdx, L1, AccMan#manifest.levels), - {max(AccMaxSQN, SQN1), AccMan#manifest{levels = UpdLevels}} + {max(AccMaxSQN, SQN1), + AccMan#manifest{levels = UpdLevels}, + AccFL ++ FileList} end, - lists:foldl(UpdateLevelFun, {0, Manifest}, + lists:foldl(UpdateLevelFun, {0, Manifest, []}, lists:seq(0, Manifest#manifest.basement)). -spec close_manifest(manifest(), fun()) -> ok. @@ -488,27 +491,33 @@ levelzero_present(Manifest) -> load_level(LevelIdx, Level, PidFun, SQNFun) -> HigherLevelLoadFun = - fun(ME, {L_Out, L_MaxSQN}) -> + fun(ME, {L_Out, L_MaxSQN, FileList}) -> FN = ME#manifest_entry.filename, P = PidFun(FN), SQN = SQNFun(P), - {[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)} + {[ME#manifest_entry{owner=P}|L_Out], + max(SQN, L_MaxSQN), + [FN|FileList]} end, LowerLevelLoadFun = - fun({EK, ME}, {L_Out, L_MaxSQN}) -> + fun({EK, ME}, {L_Out, L_MaxSQN, FileList}) -> FN = ME#manifest_entry.filename, P = PidFun(FN), SQN = SQNFun(P), - {[{EK, ME#manifest_entry{owner=P}}|L_Out], max(SQN, L_MaxSQN)} + {[{EK, ME#manifest_entry{owner=P}}|L_Out], + max(SQN, L_MaxSQN), + [FN|FileList]} end, case LevelIdx =< 1 of true -> - lists:foldr(HigherLevelLoadFun, {[], 0}, Level); + lists:foldr(HigherLevelLoadFun, {[], 0, []}, Level); false -> - {L0, MaxSQN} = lists:foldr(LowerLevelLoadFun, - {[], 0}, + {L0, MaxSQN, Flist} = lists:foldr(LowerLevelLoadFun, + {[], 0, []}, leveled_tree:to_list(Level)), - {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), MaxSQN} + {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), + MaxSQN, + Flist} end. close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 -> diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index e59a7b1..cbb6b5d 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -580,22 +580,59 @@ space_clear_ondelete(_Config) -> io:format("This should cause a final ledger merge event~n"), io:format("Will require the penciller to resolve the issue of creating" ++ " an empty file as all keys compact on merge~n"), - timer:sleep(12000), + + CheckFun = + fun(X, FileCount) -> + case FileCount of + 0 -> + 0; + _ -> + timer:sleep(X), + {ok, NewFC} = + file:list_dir(RootPath ++ "/ledger/ledger_files"), + io:format("Looping with ledger file count ~w~n", + [length(NewFC)]), + length(strip_nonsst(NewFC)) + end + end, + + FC = lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]), ok = leveled_bookie:book_close(Book3), + case FC of + 0 -> + ok; + _ -> + {ok, Book4} = leveled_bookie:book_start(StartOpts1), + lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]), + leveled_bookie:book_close(Book4) + end, + {ok, FNsD_L} = file:list_dir(RootPath ++ "/ledger/ledger_files"), io:format("FNsD - Bookie has ~w ledger files " ++ - "after second close~n", [length(FNsD_L)]), + "after second close~n", [length(strip_nonsst(FNsD_L))]), lists:foreach(fun(FN) -> io:format("FNsD - Ledger file is ~s~n", [FN]) end, FNsD_L), true = PointB_Journals < length(FNsA_J), - true = length(FNsD_L) < length(FNsA_L), - true = length(FNsD_L) < length(FNsB_L), - true = length(FNsD_L) < length(FNsC_L), - true = length(FNsD_L) == 0. + true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsA_L)), + true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsB_L)), + true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsC_L)), + true = length(strip_nonsst(FNsD_L)) == 0. +strip_nonsst(FileList) -> + SSTOnlyFun = + fun(FN, Acc) -> + case filename:extension(FN) of + ".sst" -> + [FN|Acc]; + _ -> + Acc + end + end, + lists:foldl(SSTOnlyFun, [], FileList). + is_empty_test(_Config) -> RootPath = testutil:reset_filestructure(), From 0f5911ab707ef2dde7e7350a30c046ffcde67238 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 28 Sep 2017 10:50:54 +0100 Subject: [PATCH 5/5] Add unit test of archive files --- src/leveled_penciller.erl | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index e5f759a..a45184e 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -900,14 +900,14 @@ start_from_file(PCLopts) -> ok = archive_files(RootPath, FileList0), {ok, State0}. -archive_files(RootPath, FileList) -> +archive_files(RootPath, UsedFileList) -> {ok, AllFiles} = file:list_dir(sst_rootpath(RootPath)), FileCheckFun = fun(FN, UnusedFiles) -> FN0 = "./" ++ FN, case filename:extension(FN0) of ?SST_FILEX -> - case lists:member(FN0, FileList) of + case lists:member(FN0, UsedFileList) of true -> UnusedFiles; false -> @@ -1377,6 +1377,23 @@ clean_dir_test() -> ok = clean_subdir(RootPath ++ "/test.bob"), ok = file:delete(RootPath ++ "/test.bob"). + +archive_files_test() -> + RootPath = "../test/ledger", + SSTPath = sst_rootpath(RootPath), + ok = filelib:ensure_dir(SSTPath), + ok = file:write_file(SSTPath ++ "/test1.sst", "hello_world"), + ok = file:write_file(SSTPath ++ "/test2.sst", "hello_world"), + ok = file:write_file(SSTPath ++ "/test3.bob", "hello_world"), + UsedFiles = ["./test1.sst"], + ok = archive_files(RootPath, UsedFiles), + {ok, AllFiles} = file:list_dir(SSTPath), + ?assertMatch(true, lists:member("test1.sst", AllFiles)), + ?assertMatch(false, lists:member("test2.sst", AllFiles)), + ?assertMatch(true, lists:member("test3.bob", AllFiles)), + ?assertMatch(true, lists:member("test2.bak", AllFiles)), + ok = clean_subdir(SSTPath). + simple_server_test() -> RootPath = "../test/ledger", clean_testdir(RootPath),