Merge branch 'master' into mas-riakaae-impl-2
This commit is contained in:
commit
61724cfedb
8 changed files with 290 additions and 134 deletions
|
@ -934,14 +934,10 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) ->
|
|||
fun() ->
|
||||
% The start key and end key will vary depending on whether the
|
||||
% fold is to fold over an index or a key range
|
||||
{StartKey, EndKey, HashFun} =
|
||||
{StartKey, EndKey, ExtractFun} =
|
||||
case Tag of
|
||||
?IDX_TAG ->
|
||||
{IdxField, StartIdx, EndIdx} = Query,
|
||||
HashIdxValFun =
|
||||
fun(_Key, IdxValue) ->
|
||||
erlang:phash2(IdxValue)
|
||||
end,
|
||||
{leveled_codec:to_ledgerkey(Bucket,
|
||||
null,
|
||||
?IDX_TAG,
|
||||
|
@ -952,23 +948,21 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) ->
|
|||
?IDX_TAG,
|
||||
IdxField,
|
||||
EndIdx),
|
||||
HashIdxValFun};
|
||||
fun(K, T) -> {K, T} end};
|
||||
_ ->
|
||||
{StartObjKey, EndObjKey} = Query,
|
||||
PassHashFun = fun(_Key, Hash) -> Hash end,
|
||||
{leveled_codec:to_ledgerkey(Bucket,
|
||||
StartObjKey,
|
||||
Tag),
|
||||
leveled_codec:to_ledgerkey(Bucket,
|
||||
EndObjKey,
|
||||
Tag),
|
||||
PassHashFun}
|
||||
fun(K, H) -> {K, {is_hash, H}} end}
|
||||
end,
|
||||
|
||||
AccFun = accumulate_tree(Filter,
|
||||
JournalCheck,
|
||||
JournalSnapshot,
|
||||
HashFun),
|
||||
ExtractFun),
|
||||
Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot,
|
||||
StartKey,
|
||||
EndKey,
|
||||
|
@ -1263,7 +1257,7 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) ->
|
|||
fun(B, K, H, Tree) ->
|
||||
case FilterFun(B, K) of
|
||||
accumulate ->
|
||||
leveled_tictac:add_kv(Tree, K, H, HashFun);
|
||||
leveled_tictac:add_kv(Tree, K, H, HashFun, false);
|
||||
pass ->
|
||||
Tree
|
||||
end
|
||||
|
|
|
@ -74,6 +74,10 @@
|
|||
-define(ALL_BUCKETS, <<"$all">>).
|
||||
|
||||
-type recent_aae() :: #recent_aae{}.
|
||||
-type riak_metadata() :: {binary()|delete, % Sibling Metadata
|
||||
binary()|null, % Vclock Metadata
|
||||
integer()|null, % Hash of vclock - non-exportable
|
||||
integer()}. % Size in bytes of real object
|
||||
|
||||
-spec magic_hash(any()) -> integer().
|
||||
%% @doc
|
||||
|
@ -466,7 +470,8 @@ aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) ->
|
|||
{LMD1, TTL} ->
|
||||
TreeSize = AAE#recent_aae.tree_size,
|
||||
SegID =
|
||||
leveled_tictac:get_segment(Key, TreeSize),
|
||||
leveled_tictac:get_segment(erlang:phash2(Key),
|
||||
TreeSize),
|
||||
IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin",
|
||||
IdxTrmStr =
|
||||
string:right(integer_to_list(SegID), 8, $0) ++
|
||||
|
@ -603,6 +608,19 @@ build_metadata_object(PrimaryKey, MD) ->
|
|||
end.
|
||||
|
||||
|
||||
-spec riak_extract_metadata(binary()|delete, non_neg_integer()) ->
|
||||
{riak_metadata(), list()}.
|
||||
%% @doc
|
||||
%% Riak extract metadata should extract a metadata object which is a
|
||||
%% five-tuple of:
|
||||
%% - Binary of sibling Metadata
|
||||
%% - Binary of vector clock metadata
|
||||
%% - Non-exportable hash of the vector clock metadata
|
||||
%% - The largest last modified date of the object
|
||||
%% - Size of the object
|
||||
%%
|
||||
%% The metadata object should be returned with the full list of last
|
||||
%% modified dates (which will be used for recent anti-entropy index creation)
|
||||
riak_extract_metadata(delete, Size) ->
|
||||
{{delete, null, null, Size}, []};
|
||||
riak_extract_metadata(ObjBin, Size) ->
|
||||
|
|
|
@ -120,7 +120,8 @@
|
|||
{"P0030",
|
||||
{warn, "We're doomed - intention recorded to destroy all files"}},
|
||||
{"P0031",
|
||||
{info, "Completion of update to levelzero"}},
|
||||
{info, "Completion of update to levelzero"
|
||||
++ " with cache size status ~w ~w"}},
|
||||
{"P0032",
|
||||
{info, "Head timing for result ~w is sample ~w total ~w and max ~w"}},
|
||||
{"P0033",
|
||||
|
@ -141,6 +142,8 @@
|
|||
{"P0039",
|
||||
{info, "Failed to release pid=~w "
|
||||
++ "leaving SnapshotCount=~w and MinSQN=~w"}},
|
||||
{"P0040",
|
||||
{info, "Archiving filename ~s as unused at startup"}},
|
||||
|
||||
{"PC001",
|
||||
{info, "Penciller's clerk ~w started with owner ~w"}},
|
||||
|
|
|
@ -206,6 +206,8 @@
|
|||
-define(FILES_FP, "ledger_files").
|
||||
-define(CURRENT_FILEX, "crr").
|
||||
-define(PENDING_FILEX, "pnd").
|
||||
-define(SST_FILEX, ".sst").
|
||||
-define(ARCHIVE_FILEX, ".bak").
|
||||
-define(MEMTABLE, mem).
|
||||
-define(MAX_TABLESIZE, 28000). % This is less than max - but COIN_SIDECOUNT
|
||||
-define(SUPER_MAX_TABLE_SIZE, 40000).
|
||||
|
@ -819,7 +821,8 @@ sst_rootpath(RootPath) ->
|
|||
FP.
|
||||
|
||||
sst_filename(ManSQN, Level, Count) ->
|
||||
lists:flatten(io_lib:format("./~w_~w_~w.sst", [ManSQN, Level, Count])).
|
||||
lists:flatten(io_lib:format("./~w_~w_~w" ++ ?SST_FILEX,
|
||||
[ManSQN, Level, Count])).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
|
@ -859,41 +862,73 @@ start_from_file(PCLopts) ->
|
|||
Pid
|
||||
end,
|
||||
SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1,
|
||||
{MaxSQN, Manifest1} = leveled_pmanifest:load_manifest(Manifest0,
|
||||
OpenFun,
|
||||
SQNFun),
|
||||
{MaxSQN, Manifest1, FileList} =
|
||||
leveled_pmanifest:load_manifest(Manifest0, OpenFun, SQNFun),
|
||||
leveled_log:log("P0014", [MaxSQN]),
|
||||
ManSQN = leveled_pmanifest:get_manifest_sqn(Manifest1),
|
||||
leveled_log:log("P0035", [ManSQN]),
|
||||
%% Find any L0 files
|
||||
L0FN = sst_filename(ManSQN + 1, 0, 0),
|
||||
case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of
|
||||
true ->
|
||||
leveled_log:log("P0015", [L0FN]),
|
||||
L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN),
|
||||
{ok, L0Pid, {L0StartKey, L0EndKey}} = L0Open,
|
||||
L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid),
|
||||
L0Entry = #manifest_entry{start_key = L0StartKey,
|
||||
end_key = L0EndKey,
|
||||
filename = L0FN,
|
||||
owner = L0Pid},
|
||||
Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1,
|
||||
ManSQN + 1,
|
||||
0,
|
||||
L0Entry),
|
||||
leveled_log:log("P0016", [L0SQN]),
|
||||
LedgerSQN = max(MaxSQN, L0SQN),
|
||||
{ok,
|
||||
InitState#state{manifest = Manifest2,
|
||||
{State0, FileList0} =
|
||||
case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of
|
||||
true ->
|
||||
leveled_log:log("P0015", [L0FN]),
|
||||
L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN),
|
||||
{ok, L0Pid, {L0StartKey, L0EndKey}} = L0Open,
|
||||
L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid),
|
||||
L0Entry = #manifest_entry{start_key = L0StartKey,
|
||||
end_key = L0EndKey,
|
||||
filename = L0FN,
|
||||
owner = L0Pid},
|
||||
Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1,
|
||||
ManSQN + 1,
|
||||
0,
|
||||
L0Entry),
|
||||
leveled_log:log("P0016", [L0SQN]),
|
||||
LedgerSQN = max(MaxSQN, L0SQN),
|
||||
{InitState#state{manifest = Manifest2,
|
||||
ledger_sqn = LedgerSQN,
|
||||
persisted_sqn = LedgerSQN}};
|
||||
false ->
|
||||
leveled_log:log("P0017", []),
|
||||
{ok,
|
||||
InitState#state{manifest = Manifest1,
|
||||
persisted_sqn = LedgerSQN},
|
||||
[L0FN|FileList]};
|
||||
false ->
|
||||
leveled_log:log("P0017", []),
|
||||
{InitState#state{manifest = Manifest1,
|
||||
ledger_sqn = MaxSQN,
|
||||
persisted_sqn = MaxSQN}}
|
||||
end.
|
||||
persisted_sqn = MaxSQN},
|
||||
FileList}
|
||||
end,
|
||||
ok = archive_files(RootPath, FileList0),
|
||||
{ok, State0}.
|
||||
|
||||
archive_files(RootPath, UsedFileList) ->
|
||||
{ok, AllFiles} = file:list_dir(sst_rootpath(RootPath)),
|
||||
FileCheckFun =
|
||||
fun(FN, UnusedFiles) ->
|
||||
FN0 = "./" ++ FN,
|
||||
case filename:extension(FN0) of
|
||||
?SST_FILEX ->
|
||||
case lists:member(FN0, UsedFileList) of
|
||||
true ->
|
||||
UnusedFiles;
|
||||
false ->
|
||||
leveled_log:log("P0040", [FN0]),
|
||||
[FN0|UnusedFiles]
|
||||
end;
|
||||
_ ->
|
||||
UnusedFiles
|
||||
end
|
||||
end,
|
||||
RenameFun =
|
||||
fun(FN) ->
|
||||
AltName = filename:join(sst_rootpath(RootPath),
|
||||
filename:basename(FN, ?SST_FILEX))
|
||||
++ ?ARCHIVE_FILEX,
|
||||
file:rename(filename:join(sst_rootpath(RootPath), FN),
|
||||
AltName)
|
||||
end,
|
||||
FilesToArchive = lists:foldl(FileCheckFun, [], AllFiles),
|
||||
lists:foreach(RenameFun, FilesToArchive),
|
||||
ok.
|
||||
|
||||
|
||||
update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
|
||||
|
@ -934,11 +969,13 @@ update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
|
|||
case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of
|
||||
{true, true, true, true} ->
|
||||
L0Constructor = roll_memory(UpdState, false),
|
||||
leveled_log:log_timer("P0031", [], SW),
|
||||
leveled_log:log_timer("P0031", [true, true], SW),
|
||||
UpdState#state{levelzero_pending=true,
|
||||
levelzero_constructor=L0Constructor};
|
||||
_ ->
|
||||
leveled_log:log_timer("P0031", [], SW),
|
||||
leveled_log:log_timer("P0031",
|
||||
[CacheTooBig, JitterCheck],
|
||||
SW),
|
||||
UpdState
|
||||
end
|
||||
end.
|
||||
|
@ -1340,6 +1377,23 @@ clean_dir_test() ->
|
|||
ok = clean_subdir(RootPath ++ "/test.bob"),
|
||||
ok = file:delete(RootPath ++ "/test.bob").
|
||||
|
||||
|
||||
archive_files_test() ->
|
||||
RootPath = "../test/ledger",
|
||||
SSTPath = sst_rootpath(RootPath),
|
||||
ok = filelib:ensure_dir(SSTPath),
|
||||
ok = file:write_file(SSTPath ++ "/test1.sst", "hello_world"),
|
||||
ok = file:write_file(SSTPath ++ "/test2.sst", "hello_world"),
|
||||
ok = file:write_file(SSTPath ++ "/test3.bob", "hello_world"),
|
||||
UsedFiles = ["./test1.sst"],
|
||||
ok = archive_files(RootPath, UsedFiles),
|
||||
{ok, AllFiles} = file:list_dir(SSTPath),
|
||||
?assertMatch(true, lists:member("test1.sst", AllFiles)),
|
||||
?assertMatch(false, lists:member("test2.sst", AllFiles)),
|
||||
?assertMatch(true, lists:member("test3.bob", AllFiles)),
|
||||
?assertMatch(true, lists:member("test2.bak", AllFiles)),
|
||||
ok = clean_subdir(SSTPath).
|
||||
|
||||
simple_server_test() ->
|
||||
RootPath = "../test/ledger",
|
||||
clean_testdir(RootPath),
|
||||
|
|
|
@ -136,7 +136,8 @@ copy_manifest(Manifest) ->
|
|||
% about is switched to undefined
|
||||
Manifest#manifest{snapshots = undefined, pending_deletes = undefined}.
|
||||
|
||||
-spec load_manifest(manifest(), fun(), fun()) -> {integer(), manifest()}.
|
||||
-spec load_manifest(manifest(), fun(), fun()) ->
|
||||
{integer(), manifest(), list()}.
|
||||
%% @doc
|
||||
%% Roll over the manifest starting a process to manage each file in the
|
||||
%% manifest. The PidFun should be able to return the Pid of a file process
|
||||
|
@ -144,13 +145,15 @@ copy_manifest(Manifest) ->
|
|||
%% of that file, if passed the Pid that owns it.
|
||||
load_manifest(Manifest, PidFun, SQNFun) ->
|
||||
UpdateLevelFun =
|
||||
fun(LevelIdx, {AccMaxSQN, AccMan}) ->
|
||||
fun(LevelIdx, {AccMaxSQN, AccMan, AccFL}) ->
|
||||
L0 = array:get(LevelIdx, AccMan#manifest.levels),
|
||||
{L1, SQN1} = load_level(LevelIdx, L0, PidFun, SQNFun),
|
||||
{L1, SQN1, FileList} = load_level(LevelIdx, L0, PidFun, SQNFun),
|
||||
UpdLevels = array:set(LevelIdx, L1, AccMan#manifest.levels),
|
||||
{max(AccMaxSQN, SQN1), AccMan#manifest{levels = UpdLevels}}
|
||||
{max(AccMaxSQN, SQN1),
|
||||
AccMan#manifest{levels = UpdLevels},
|
||||
AccFL ++ FileList}
|
||||
end,
|
||||
lists:foldl(UpdateLevelFun, {0, Manifest},
|
||||
lists:foldl(UpdateLevelFun, {0, Manifest, []},
|
||||
lists:seq(0, Manifest#manifest.basement)).
|
||||
|
||||
-spec close_manifest(manifest(), fun()) -> ok.
|
||||
|
@ -488,27 +491,33 @@ levelzero_present(Manifest) ->
|
|||
|
||||
load_level(LevelIdx, Level, PidFun, SQNFun) ->
|
||||
HigherLevelLoadFun =
|
||||
fun(ME, {L_Out, L_MaxSQN}) ->
|
||||
fun(ME, {L_Out, L_MaxSQN, FileList}) ->
|
||||
FN = ME#manifest_entry.filename,
|
||||
P = PidFun(FN),
|
||||
SQN = SQNFun(P),
|
||||
{[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)}
|
||||
{[ME#manifest_entry{owner=P}|L_Out],
|
||||
max(SQN, L_MaxSQN),
|
||||
[FN|FileList]}
|
||||
end,
|
||||
LowerLevelLoadFun =
|
||||
fun({EK, ME}, {L_Out, L_MaxSQN}) ->
|
||||
fun({EK, ME}, {L_Out, L_MaxSQN, FileList}) ->
|
||||
FN = ME#manifest_entry.filename,
|
||||
P = PidFun(FN),
|
||||
SQN = SQNFun(P),
|
||||
{[{EK, ME#manifest_entry{owner=P}}|L_Out], max(SQN, L_MaxSQN)}
|
||||
{[{EK, ME#manifest_entry{owner=P}}|L_Out],
|
||||
max(SQN, L_MaxSQN),
|
||||
[FN|FileList]}
|
||||
end,
|
||||
case LevelIdx =< 1 of
|
||||
true ->
|
||||
lists:foldr(HigherLevelLoadFun, {[], 0}, Level);
|
||||
lists:foldr(HigherLevelLoadFun, {[], 0, []}, Level);
|
||||
false ->
|
||||
{L0, MaxSQN} = lists:foldr(LowerLevelLoadFun,
|
||||
{[], 0},
|
||||
{L0, MaxSQN, Flist} = lists:foldr(LowerLevelLoadFun,
|
||||
{[], 0, []},
|
||||
leveled_tree:to_list(Level)),
|
||||
{leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), MaxSQN}
|
||||
{leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH),
|
||||
MaxSQN,
|
||||
Flist}
|
||||
end.
|
||||
|
||||
close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 ->
|
||||
|
|
|
@ -57,14 +57,14 @@
|
|||
-export([
|
||||
new_tree/1,
|
||||
new_tree/2,
|
||||
add_kv/4,
|
||||
add_kv/5,
|
||||
find_dirtyleaves/2,
|
||||
find_dirtysegments/2,
|
||||
fetch_root/1,
|
||||
fetch_leaves/2,
|
||||
merge_trees/2,
|
||||
get_segment/2,
|
||||
tictac_hash/2,
|
||||
tictac_hash/3,
|
||||
export_tree/1,
|
||||
import_tree/1
|
||||
]).
|
||||
|
@ -162,13 +162,24 @@ import_tree(ExportedTree) ->
|
|||
level2 = Lv2}.
|
||||
|
||||
-spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree().
|
||||
add_kv(TicTacTree, Key, Value, BinExtractFun) ->
|
||||
add_kv(TicTacTree, Key, Value, BinExtractFun, false).
|
||||
|
||||
-spec add_kv(tictactree(), tuple(), tuple(), fun(), boolean()) -> tictactree().
|
||||
%% @doc
|
||||
%% Add a Key and value to a tictactree using the HashFun to calculate the Hash
|
||||
%% based on that key and value
|
||||
add_kv(TicTacTree, Key, Value, HashFun) ->
|
||||
HashV = HashFun(Key, Value),
|
||||
SegChangeHash = tictac_hash(Key, HashV),
|
||||
Segment = get_segment(Key, TicTacTree#tictactree.segment_count),
|
||||
%% Add a Key and value to a tictactree using the BinExtractFun to extract a
|
||||
%% binary from the Key and value from which to generate the hash. The
|
||||
%% BinExtractFun will also need to do any canonicalisation necessary to make
|
||||
%% the hash consistent (such as whitespace removal, or sorting)
|
||||
%%
|
||||
%% For exportable trees the hash function will be based on the CJ Bernstein
|
||||
%% magic hash. For non-exportable trees erlang:phash2 will be used, and so
|
||||
%% non-binary Keys and Values can be returned from the BinExtractFun in this
|
||||
%% case.
|
||||
add_kv(TicTacTree, Key, Value, BinExtractFun, Exportable) ->
|
||||
{BinK, BinV} = BinExtractFun(Key, Value),
|
||||
{SegHash, SegChangeHash} = tictac_hash(BinK, BinV, Exportable),
|
||||
Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count),
|
||||
|
||||
Level2Pos =
|
||||
Segment band (TicTacTree#tictactree.width - 1),
|
||||
|
@ -276,23 +287,35 @@ merge_trees(TreeA, TreeB) ->
|
|||
|
||||
MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}.
|
||||
|
||||
-spec get_segment(any(),
|
||||
integer()|xsmall|xxsmall|small|medium|large|xlarge) ->
|
||||
-spec get_segment(integer(),
|
||||
integer()|xxsmall|xsmall|small|medium|large|xlarge) ->
|
||||
integer().
|
||||
%% @doc
|
||||
%% Return the segment ID for a Key. Can pass the tree size or the actual
|
||||
%% segment count derived from the size
|
||||
get_segment(Key, SegmentCount) when is_integer(SegmentCount) ->
|
||||
erlang:phash2(Key) band (SegmentCount - 1);
|
||||
get_segment(Key, TreeSize) ->
|
||||
get_segment(Key, element(3, get_size(TreeSize))).
|
||||
get_segment(Hash, SegmentCount) when is_integer(SegmentCount) ->
|
||||
Hash band (SegmentCount - 1);
|
||||
get_segment(Hash, TreeSize) ->
|
||||
get_segment(Hash, element(3, get_size(TreeSize))).
|
||||
|
||||
|
||||
-spec tictac_hash(tuple(), any()) -> integer().
|
||||
-spec tictac_hash(any(), any(), boolean()) -> {integer(), integer()}.
|
||||
%% @doc
|
||||
%% Hash the key and term
|
||||
tictac_hash(Key, Term) ->
|
||||
erlang:phash2({Key, Term}).
|
||||
%% Hash the key and term, to either something repetable in Erlang, or using
|
||||
%% the DJ Bernstein hash if it is the tree needs to be compared with one
|
||||
%% calculated with a non-Erlang store
|
||||
%%
|
||||
%% Boolean is Exportable. does the hash need to be repetable by a non-Erlang
|
||||
%% machine
|
||||
tictac_hash(BinKey, BinVal, true)
|
||||
when is_binary(BinKey) and is_binary(BinVal) ->
|
||||
HashKey = leveled_codec:magic_hash({binary, BinKey}),
|
||||
HashVal = leveled_codec:magic_hash({binary, BinVal}),
|
||||
{HashKey, HashKey bxor HashVal};
|
||||
tictac_hash(BinKey, {is_hash, HashedVal}, false) ->
|
||||
{erlang:phash2(BinKey), erlang:phash2(BinKey) bxor HashedVal};
|
||||
tictac_hash(BinKey, BinVal, false) ->
|
||||
{erlang:phash2(BinKey), erlang:phash2(BinKey) bxor erlang:phash2(BinVal)}.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal functions
|
||||
|
@ -372,13 +395,17 @@ simple_bysize_test() ->
|
|||
simple_test_withsize(xlarge).
|
||||
|
||||
simple_test_withsize(Size) ->
|
||||
HashFun = fun(_K, V) -> erlang:phash2(V) end,
|
||||
BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end,
|
||||
|
||||
K1 = {o, "B1", "K1", null},
|
||||
K2 = {o, "B1", "K2", null},
|
||||
K3 = {o, "B1", "K3", null},
|
||||
|
||||
Tree0 = new_tree(0, Size),
|
||||
Tree1 = add_kv(Tree0, {o, "B1", "K1", null}, {caine, 1}, HashFun),
|
||||
Tree2 = add_kv(Tree1, {o, "B1", "K2", null}, {caine, 2}, HashFun),
|
||||
Tree3 = add_kv(Tree2, {o, "B1", "K3", null}, {caine, 3}, HashFun),
|
||||
Tree3A = add_kv(Tree3, {o, "B1", "K3", null}, {caine, 4}, HashFun),
|
||||
Tree1 = add_kv(Tree0, K1, {caine, 1}, BinFun),
|
||||
Tree2 = add_kv(Tree1, K2, {caine, 2}, BinFun),
|
||||
Tree3 = add_kv(Tree2, K3, {caine, 3}, BinFun),
|
||||
Tree3A = add_kv(Tree3, K3, {caine, 4}, BinFun),
|
||||
?assertMatch(true, Tree0#tictactree.level1 == Tree0#tictactree.level1),
|
||||
?assertMatch(false, Tree0#tictactree.level1 == Tree1#tictactree.level1),
|
||||
?assertMatch(false, Tree1#tictactree.level1 == Tree2#tictactree.level1),
|
||||
|
@ -386,10 +413,10 @@ simple_test_withsize(Size) ->
|
|||
?assertMatch(false, Tree3#tictactree.level1 == Tree3A#tictactree.level1),
|
||||
|
||||
Tree0X = new_tree(0, Size),
|
||||
Tree1X = add_kv(Tree0X, {o, "B1", "K3", null}, {caine, 3}, HashFun),
|
||||
Tree2X = add_kv(Tree1X, {o, "B1", "K1", null}, {caine, 1}, HashFun),
|
||||
Tree3X = add_kv(Tree2X, {o, "B1", "K2", null}, {caine, 2}, HashFun),
|
||||
Tree3XA = add_kv(Tree3X, {o, "B1", "K3", null}, {caine, 4}, HashFun),
|
||||
Tree1X = add_kv(Tree0X, K3, {caine, 3}, BinFun),
|
||||
Tree2X = add_kv(Tree1X, K1, {caine, 1}, BinFun),
|
||||
Tree3X = add_kv(Tree2X, K2, {caine, 2}, BinFun),
|
||||
Tree3XA = add_kv(Tree3X, K3, {caine, 4}, BinFun),
|
||||
?assertMatch(false, Tree1#tictactree.level1 == Tree1X#tictactree.level1),
|
||||
?assertMatch(false, Tree2#tictactree.level1 == Tree2X#tictactree.level1),
|
||||
?assertMatch(true, Tree3#tictactree.level1 == Tree3X#tictactree.level1),
|
||||
|
@ -397,12 +424,17 @@ simple_test_withsize(Size) ->
|
|||
|
||||
SC = Tree0#tictactree.segment_count,
|
||||
|
||||
GetSegFun =
|
||||
fun(TK) ->
|
||||
get_segment(erlang:phash2(term_to_binary(TK)), SC)
|
||||
end,
|
||||
|
||||
DL0 = find_dirtyleaves(Tree1, Tree0),
|
||||
?assertMatch(true, lists:member(get_segment({o, "B1", "K1", null}, SC), DL0)),
|
||||
?assertMatch(true, lists:member(GetSegFun(K1), DL0)),
|
||||
DL1 = find_dirtyleaves(Tree3, Tree1),
|
||||
?assertMatch(true, lists:member(get_segment({o, "B1", "K2", null}, SC), DL1)),
|
||||
?assertMatch(true, lists:member(get_segment({o, "B1", "K3", null}, SC), DL1)),
|
||||
?assertMatch(false, lists:member(get_segment({o, "B1", "K1", null}, SC), DL1)),
|
||||
?assertMatch(true, lists:member(GetSegFun(K2), DL1)),
|
||||
?assertMatch(true, lists:member(GetSegFun(K3), DL1)),
|
||||
?assertMatch(false, lists:member(GetSegFun(K1), DL1)),
|
||||
|
||||
% Export and import tree to confirm no difference
|
||||
ExpTree3 = export_tree(Tree3),
|
||||
|
@ -425,24 +457,24 @@ merge_bysize_xlarge_test2() ->
|
|||
merge_test_withsize(xlarge).
|
||||
|
||||
merge_test_withsize(Size) ->
|
||||
HashFun = fun(_K, V) -> erlang:phash2(V) end,
|
||||
BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end,
|
||||
|
||||
TreeX0 = new_tree(0, Size),
|
||||
TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, HashFun),
|
||||
TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, HashFun),
|
||||
TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, HashFun),
|
||||
TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, HashFun),
|
||||
TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, BinFun),
|
||||
TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, BinFun),
|
||||
TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, BinFun),
|
||||
TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, BinFun),
|
||||
|
||||
TreeY0 = new_tree(0, Size),
|
||||
TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, HashFun),
|
||||
TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, HashFun),
|
||||
TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, HashFun),
|
||||
TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, HashFun),
|
||||
TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, BinFun),
|
||||
TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, BinFun),
|
||||
TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, BinFun),
|
||||
TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, BinFun),
|
||||
|
||||
TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, HashFun),
|
||||
TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, HashFun),
|
||||
TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, HashFun),
|
||||
TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, HashFun),
|
||||
TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, BinFun),
|
||||
TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, BinFun),
|
||||
TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, BinFun),
|
||||
TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, BinFun),
|
||||
|
||||
TreeM0 = merge_trees(TreeX4, TreeY4),
|
||||
checktree(TreeM0),
|
||||
|
@ -452,6 +484,10 @@ merge_test_withsize(Size) ->
|
|||
checktree(TreeM1),
|
||||
?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1).
|
||||
|
||||
exportable_test() ->
|
||||
{Int1, Int2} = tictac_hash(<<"key">>, <<"value">>, true),
|
||||
?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}).
|
||||
|
||||
-endif.
|
||||
|
||||
|
||||
|
|
|
@ -580,22 +580,59 @@ space_clear_ondelete(_Config) ->
|
|||
io:format("This should cause a final ledger merge event~n"),
|
||||
io:format("Will require the penciller to resolve the issue of creating" ++
|
||||
" an empty file as all keys compact on merge~n"),
|
||||
timer:sleep(12000),
|
||||
|
||||
CheckFun =
|
||||
fun(X, FileCount) ->
|
||||
case FileCount of
|
||||
0 ->
|
||||
0;
|
||||
_ ->
|
||||
timer:sleep(X),
|
||||
{ok, NewFC} =
|
||||
file:list_dir(RootPath ++ "/ledger/ledger_files"),
|
||||
io:format("Looping with ledger file count ~w~n",
|
||||
[length(NewFC)]),
|
||||
length(strip_nonsst(NewFC))
|
||||
end
|
||||
end,
|
||||
|
||||
FC = lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]),
|
||||
ok = leveled_bookie:book_close(Book3),
|
||||
case FC of
|
||||
0 ->
|
||||
ok;
|
||||
_ ->
|
||||
{ok, Book4} = leveled_bookie:book_start(StartOpts1),
|
||||
lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]),
|
||||
leveled_bookie:book_close(Book4)
|
||||
end,
|
||||
|
||||
{ok, FNsD_L} = file:list_dir(RootPath ++ "/ledger/ledger_files"),
|
||||
io:format("FNsD - Bookie has ~w ledger files " ++
|
||||
"after second close~n", [length(FNsD_L)]),
|
||||
"after second close~n", [length(strip_nonsst(FNsD_L))]),
|
||||
lists:foreach(fun(FN) ->
|
||||
io:format("FNsD - Ledger file is ~s~n", [FN])
|
||||
end,
|
||||
FNsD_L),
|
||||
true = PointB_Journals < length(FNsA_J),
|
||||
true = length(FNsD_L) < length(FNsA_L),
|
||||
true = length(FNsD_L) < length(FNsB_L),
|
||||
true = length(FNsD_L) < length(FNsC_L),
|
||||
true = length(FNsD_L) == 0.
|
||||
true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsA_L)),
|
||||
true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsB_L)),
|
||||
true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsC_L)),
|
||||
true = length(strip_nonsst(FNsD_L)) == 0.
|
||||
|
||||
|
||||
strip_nonsst(FileList) ->
|
||||
SSTOnlyFun =
|
||||
fun(FN, Acc) ->
|
||||
case filename:extension(FN) of
|
||||
".sst" ->
|
||||
[FN|Acc];
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
end,
|
||||
lists:foldl(SSTOnlyFun, [], FileList).
|
||||
|
||||
|
||||
is_empty_test(_Config) ->
|
||||
RootPath = testutil:reset_filestructure(),
|
||||
|
|
|
@ -114,19 +114,16 @@ many_put_compare(_Config) ->
|
|||
% Now run the same query by putting the tree-building responsibility onto
|
||||
% the fold_objects_fun
|
||||
|
||||
ApplyHash =
|
||||
fun(HashFun) ->
|
||||
fun(_Key, Value) ->
|
||||
{proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value),
|
||||
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
|
||||
Rest/binary>> = HeadBin,
|
||||
<<VclockBin:VclockLen/binary, _NotNeeded/binary>> = Rest,
|
||||
HashFun(lists:sort(binary_to_term(VclockBin)))
|
||||
end
|
||||
end,
|
||||
ExtractClockFun =
|
||||
fun(Key, Value) ->
|
||||
{proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value),
|
||||
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
|
||||
VclockBin:VclockLen/binary, _Rest/binary>> = HeadBin,
|
||||
{Key, lists:sort(binary_to_term(VclockBin))}
|
||||
end,
|
||||
FoldObjectsFun =
|
||||
fun(_Bucket, Key, Value, Acc) ->
|
||||
leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(fun erlang:phash2/1))
|
||||
leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun, false)
|
||||
end,
|
||||
|
||||
FoldQ0 = {foldheads_bybucket,
|
||||
|
@ -157,22 +154,25 @@ many_put_compare(_Config) ->
|
|||
[timer:now_diff(os:timestamp(), SWB1Obj)]),
|
||||
true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj1)) == 0,
|
||||
|
||||
% AAE trees within riak are based on a sha of the vector clock. So to
|
||||
% compare with an AAE tree we need to compare outputs when we're hashing
|
||||
% a hash
|
||||
AltHashFun =
|
||||
fun(Term) ->
|
||||
erlang:phash2(crypto:hash(sha, term_to_binary(Term)))
|
||||
% For an exportable comparison, want hash to be based on something not
|
||||
% coupled to erlang language - so use exportable query
|
||||
AltExtractFun =
|
||||
fun(K, V) ->
|
||||
{proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(V),
|
||||
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
|
||||
VclockBin:VclockLen/binary, _Rest/binary>> = HeadBin,
|
||||
{term_to_binary(K), VclockBin}
|
||||
end,
|
||||
AltFoldObjectsFun =
|
||||
fun(_Bucket, Key, Value, Acc) ->
|
||||
leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(AltHashFun))
|
||||
leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun, true)
|
||||
end,
|
||||
AltFoldQ0 = {foldheads_bybucket,
|
||||
o_rkv,
|
||||
"Bucket",
|
||||
{AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
|
||||
false, true},
|
||||
o_rkv,
|
||||
"Bucket",
|
||||
{AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
|
||||
false,
|
||||
true},
|
||||
{async, TreeAAltObjFolder0} =
|
||||
leveled_bookie:book_returnfolder(Bookie2, AltFoldQ0),
|
||||
SWB2Obj = os:timestamp(),
|
||||
|
@ -187,15 +187,19 @@ many_put_compare(_Config) ->
|
|||
io:format("Build tictac tree via object fold with no "++
|
||||
"presence check and 200K objects and alt hash in ~w~n",
|
||||
[timer:now_diff(os:timestamp(), SWB3Obj)]),
|
||||
true =
|
||||
length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)) == 1,
|
||||
DL_ExportFold =
|
||||
length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)),
|
||||
io:format("Found dirty leaves with exportable comparison of ~w~n",
|
||||
[DL_ExportFold]),
|
||||
true = DL_ExportFold == 1,
|
||||
|
||||
|
||||
%% Finding differing keys
|
||||
FoldKeysFun =
|
||||
fun(SegListToFind) ->
|
||||
fun(_B, K, Acc) ->
|
||||
Seg = leveled_tictac:get_segment(K, SegmentCount),
|
||||
Seg =
|
||||
leveled_tictac:get_segment(erlang:phash2(K), SegmentCount),
|
||||
case lists:member(Seg, SegListToFind) of
|
||||
true ->
|
||||
[K|Acc];
|
||||
|
@ -469,7 +473,8 @@ index_compare(_Config) ->
|
|||
|
||||
FoldKeysIndexQFun =
|
||||
fun(_Bucket, {Term, Key}, Acc) ->
|
||||
Seg = leveled_tictac:get_segment(Key, SegmentCount),
|
||||
Seg =
|
||||
leveled_tictac:get_segment(erlang:phash2(Key), SegmentCount),
|
||||
case lists:member(Seg, DL3_0) of
|
||||
true ->
|
||||
[{Term, Key}|Acc];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue