Merge pull request #96 from martinsumner/mas-riakaae-impl-2

Mas riakaae impl 2
This commit is contained in:
Martin Sumner 2017-10-17 09:39:12 +01:00 committed by GitHub
commit d0b8e47f77
5 changed files with 141 additions and 32 deletions

Binary file not shown.

View file

@ -604,10 +604,11 @@ handle_call({return_folder, FolderType}, _From, State) ->
CheckPresence, CheckPresence,
SnapPreFold), SnapPreFold),
State}; State};
{foldheads_bybucket, Tag, Bucket, FoldHeadsFun, {foldheads_bybucket, Tag, Bucket, KeyRange, FoldHeadsFun,
CheckPresence, SnapPreFold} -> CheckPresence, SnapPreFold} ->
{reply, {reply,
foldheads_bybucket(State, Tag, Bucket, foldheads_bybucket(State, Tag, Bucket,
KeyRange,
FoldHeadsFun, FoldHeadsFun,
CheckPresence, CheckPresence,
SnapPreFold), SnapPreFold),
@ -1000,9 +1001,20 @@ foldobjects_bybucket(State, Tag, Bucket, FoldObjectsFun) ->
foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun, foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun,
false, true). false, true).
foldheads_bybucket(State, Tag, Bucket, FoldHeadsFun, CheckPresence, SnapPreFold) -> foldheads_bybucket(State, Tag, Bucket, KeyRange, FoldHeadsFun,
StartKey = leveled_codec:to_ledgerkey(Bucket, null, Tag), CheckPresence, SnapPreFold) ->
EndKey = leveled_codec:to_ledgerkey(Bucket, null, Tag), {StartKey, EndKey} =
case KeyRange of
all ->
{leveled_codec:to_ledgerkey(Bucket, null, Tag),
leveled_codec:to_ledgerkey(Bucket, null, Tag)};
{StartTerm, <<"$all">>} ->
{leveled_codec:to_ledgerkey(Bucket, StartTerm, Tag),
leveled_codec:to_ledgerkey(Bucket, null, Tag)};
{StartTerm, EndTerm} ->
{leveled_codec:to_ledgerkey(Bucket, StartTerm, Tag),
leveled_codec:to_ledgerkey(Bucket, EndTerm, Tag)}
end,
foldobjects(State, Tag, StartKey, EndKey, FoldHeadsFun, foldobjects(State, Tag, StartKey, EndKey, FoldHeadsFun,
{true, CheckPresence}, SnapPreFold). {true, CheckPresence}, SnapPreFold).
@ -1876,6 +1888,7 @@ foldobjects_vs_foldheads_bybucket_testto() ->
{foldheads_bybucket, {foldheads_bybucket,
?STD_TAG, ?STD_TAG,
"BucketA", "BucketA",
all,
FoldHeadsFun, FoldHeadsFun,
true, true,
true}), true}),
@ -1885,15 +1898,69 @@ foldobjects_vs_foldheads_bybucket_testto() ->
{foldheads_bybucket, {foldheads_bybucket,
?STD_TAG, ?STD_TAG,
"BucketB", "BucketB",
all,
FoldHeadsFun, FoldHeadsFun,
false, false,
false}), false}),
KeyHashList2B = HTFolder2B(), KeyHashList2B = HTFolder2B(),
?assertMatch(true, ?assertMatch(true,
lists:usort(KeyHashList1A) == lists:usort(KeyHashList2A)), lists:usort(KeyHashList1A) == lists:usort(KeyHashList2A)),
?assertMatch(true, ?assertMatch(true,
lists:usort(KeyHashList1B) == lists:usort(KeyHashList2B)), lists:usort(KeyHashList1B) == lists:usort(KeyHashList2B)),
{async, HTFolder2C} =
book_returnfolder(Bookie1,
{foldheads_bybucket,
?STD_TAG,
"BucketB",
{"Key", <<"$all">>},
FoldHeadsFun,
false,
false}),
KeyHashList2C = HTFolder2C(),
{async, HTFolder2D} =
book_returnfolder(Bookie1,
{foldheads_bybucket,
?STD_TAG,
"BucketB",
{"Key", "Keyzzzzz"},
FoldHeadsFun,
false,
false}),
KeyHashList2D = HTFolder2D(),
?assertMatch(true,
lists:usort(KeyHashList2B) == lists:usort(KeyHashList2C)),
?assertMatch(true,
lists:usort(KeyHashList2B) == lists:usort(KeyHashList2D)),
{async, HTFolder2E} =
book_returnfolder(Bookie1,
{foldheads_bybucket,
?STD_TAG,
"BucketB",
{"Key", "Key4zzzz"},
FoldHeadsFun,
false,
false}),
KeyHashList2E = HTFolder2E(),
{async, HTFolder2F} =
book_returnfolder(Bookie1,
{foldheads_bybucket,
?STD_TAG,
"BucketB",
{"Key5", <<"all">>},
FoldHeadsFun,
false,
false}),
KeyHashList2F = HTFolder2F(),
?assertMatch(true, length(KeyHashList2E) > 0),
?assertMatch(true, length(KeyHashList2F) > 0),
?assertMatch(true,
lists:usort(KeyHashList2B) ==
lists:usort(KeyHashList2E ++ KeyHashList2F)),
ok = book_close(Bookie1), ok = book_close(Bookie1),
reset_filestructure(). reset_filestructure().

View file

@ -91,10 +91,12 @@ magic_hash({?RIAK_TAG, Bucket, Key, _SubKey}) ->
magic_hash({Bucket, Key}); magic_hash({Bucket, Key});
magic_hash({?STD_TAG, Bucket, Key, _SubKey}) -> magic_hash({?STD_TAG, Bucket, Key, _SubKey}) ->
magic_hash({Bucket, Key}); magic_hash({Bucket, Key});
magic_hash({binary, BinaryKey}) ->
H = 5381,
hash1(H, BinaryKey) band 16#FFFFFFFF;
magic_hash(AnyKey) -> magic_hash(AnyKey) ->
BK = term_to_binary(AnyKey), BK = term_to_binary(AnyKey),
H = 5381, magic_hash({binary, BK}).
hash1(H, BK) band 16#FFFFFFFF.
hash1(H, <<>>) -> hash1(H, <<>>) ->
H; H;

View file

@ -66,20 +66,25 @@
get_segment/2, get_segment/2,
tictac_hash/3, tictac_hash/3,
export_tree/1, export_tree/1,
import_tree/1 import_tree/1,
valid_size/1
]). ]).
-include_lib("eunit/include/eunit.hrl"). -include_lib("eunit/include/eunit.hrl").
-define(HASH_SIZE, 4). -define(HASH_SIZE, 4).
-define(XXSMALL, {6, 64, 64 * 64}).
-define(XSMALL, {7, 128, 128 * 128}).
-define(SMALL, {8, 256, 256 * 256}). -define(SMALL, {8, 256, 256 * 256}).
-define(MEDIUM, {9, 512, 512 * 512}). -define(MEDIUM, {9, 512, 512 * 512}).
-define(LARGE, {10, 1024, 1024 * 1024}). -define(LARGE, {10, 1024, 1024 * 1024}).
-define(XLARGE, {11, 2048, 2048 * 2048}). -define(XLARGE, {11, 2048, 2048 * 2048}).
-define(EMPTY, <<0:8/integer>>).
-define(VALID_SIZES, [xxsmall, xsmall, small, medium, large, xlarge]).
-record(tictactree, {treeID :: any(), -record(tictactree, {treeID :: any(),
size :: small|medium|large|xlarge, size :: xxsmall|xsmall|small|medium|large|xlarge,
width :: integer(), width :: integer(),
bitwidth :: integer(), bitwidth :: integer(),
segment_count :: integer(), segment_count :: integer(),
@ -93,6 +98,12 @@
%%% External functions %%% External functions
%%%============================================================================ %%%============================================================================
-spec valid_size(any()) -> boolean().
%% @doc
%% For validation of input
valid_size(Size) ->
lists:member(Size, ?VALID_SIZES).
-spec new_tree(any()) -> tictactree(). -spec new_tree(any()) -> tictactree().
%% @doc %% @doc
%% Create a new tree, zeroed out. %% Create a new tree, zeroed out.
@ -103,9 +114,7 @@ new_tree(TreeID, Size) ->
{BitWidth, Width, SegmentCount} = get_size(Size), {BitWidth, Width, SegmentCount} = get_size(Size),
Lv1Width = Width * ?HASH_SIZE * 8, Lv1Width = Width * ?HASH_SIZE * 8,
Lv1Init = <<0:Lv1Width/integer>>, Lv1Init = <<0:Lv1Width/integer>>,
Lv2SegBinSize = Width * ?HASH_SIZE * 8, Lv2Init = array:new([{size, Width}, {default, ?EMPTY}]),
Lv2SegBinInit = <<0:Lv2SegBinSize/integer>>,
Lv2Init = array:new([{size, Width}, {default, Lv2SegBinInit}]),
#tictactree{treeID = TreeID, #tictactree{treeID = TreeID,
size = Size, size = Size,
width = Width, width = Width,
@ -119,13 +128,13 @@ new_tree(TreeID, Size) ->
%% Export the tree into a tuple list, with the level1 binary, and then for %% Export the tree into a tuple list, with the level1 binary, and then for
%% level2 {branchID, binary()} %% level2 {branchID, binary()}
export_tree(Tree) -> export_tree(Tree) ->
L2 = EncodeL2Fun =
lists:foldl(fun(X, L2Acc) -> fun(X, L2Acc) ->
[{integer_to_binary(X), L2Element = zlib:compress(array:get(X, Tree#tictactree.level2)),
array:get(X, Tree#tictactree.level2)}|L2Acc] [{integer_to_binary(X), base64:encode_to_string(L2Element)}|L2Acc]
end, end,
[], L2 =
lists:seq(0, Tree#tictactree.width - 1)), lists:foldl(EncodeL2Fun, [], lists:seq(0, Tree#tictactree.width - 1)),
{struct, {struct,
[{<<"level1">>, base64:encode_to_string(Tree#tictactree.level1)}, [{<<"level1">>, base64:encode_to_string(Tree#tictactree.level1)},
{<<"level2">>, {struct, lists:reverse(L2)}} {<<"level2">>, {struct, lists:reverse(L2)}}
@ -139,16 +148,16 @@ import_tree(ExportedTree) ->
[{<<"level1">>, L1Base64}, [{<<"level1">>, L1Base64},
{<<"level2">>, {struct, L2List}}]} = ExportedTree, {<<"level2">>, {struct, L2List}}]} = ExportedTree,
L1Bin = base64:decode(L1Base64), L1Bin = base64:decode(L1Base64),
Sizes = [{small, element(2, ?SMALL)}, Sizes =
{medium, element(2, ?MEDIUM)}, lists:map(fun(SizeTag) -> {SizeTag, element(2, get_size(SizeTag))} end,
{large, element(2, ?LARGE)}, ?VALID_SIZES),
{xlarge, element(2, ?XLARGE)}],
Width = byte_size(L1Bin) div ?HASH_SIZE, Width = byte_size(L1Bin) div ?HASH_SIZE,
{Size, Width} = lists:keyfind(Width, 2, Sizes), {Size, Width} = lists:keyfind(Width, 2, Sizes),
{BitWidth, Width, SegmentCount} = get_size(Size), {BitWidth, Width, SegmentCount} = get_size(Size),
Lv2Init = array:new([{size, Width}]), Lv2Init = array:new([{size, Width}]),
FoldFun = FoldFun =
fun({X, L2SegBin}, L2Array) -> fun({X, EncodedL2SegBin}, L2Array) ->
L2SegBin = zlib:uncompress(base64:decode(EncodedL2SegBin)),
array:set(binary_to_integer(X), L2SegBin, L2Array) array:set(binary_to_integer(X), L2SegBin, L2Array)
end, end,
Lv2 = lists:foldl(FoldFun, Lv2Init, L2List), Lv2 = lists:foldl(FoldFun, Lv2Init, L2List),
@ -188,7 +197,7 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, Exportable) ->
Level2BytePos = ?HASH_SIZE * Level2Pos, Level2BytePos = ?HASH_SIZE * Level2Pos,
Level1BytePos = ?HASH_SIZE * Level1Pos, Level1BytePos = ?HASH_SIZE * Level1Pos,
Level2 = array:get(Level1Pos, TicTacTree#tictactree.level2), Level2 = get_level2(TicTacTree, Level1Pos),
HashIntLength = ?HASH_SIZE * 8, HashIntLength = ?HASH_SIZE * 8,
<<PreL2:Level2BytePos/binary, <<PreL2:Level2BytePos/binary,
@ -254,7 +263,7 @@ fetch_root(TicTacTree) ->
fetch_leaves(TicTacTree, BranchList) -> fetch_leaves(TicTacTree, BranchList) ->
MapFun = MapFun =
fun(Idx) -> fun(Idx) ->
{Idx, array:get(Idx, TicTacTree#tictactree.level2)} {Idx, get_level2(TicTacTree, Idx)}
end, end,
lists:map(MapFun, BranchList). lists:map(MapFun, BranchList).
@ -275,9 +284,16 @@ merge_trees(TreeA, TreeB) ->
MergeFun = MergeFun =
fun(SQN, MergeL2) -> fun(SQN, MergeL2) ->
L2A = array:get(SQN, TreeA#tictactree.level2), L2A = get_level2(TreeA, SQN),
L2B = array:get(SQN, TreeB#tictactree.level2), L2B = get_level2(TreeB, SQN),
NewLevel2 = merge_binaries(L2A, L2B), BothEmpty = (L2A == ?EMPTY) and (L2B == ?EMPTY),
NewLevel2 =
case BothEmpty of
true ->
?EMPTY;
false ->
merge_binaries(L2A, L2B)
end,
array:set(SQN, NewLevel2, MergeL2) array:set(SQN, NewLevel2, MergeL2)
end, end,
NewLevel2 = lists:foldl(MergeFun, NewLevel2 = lists:foldl(MergeFun,
@ -286,7 +302,9 @@ merge_trees(TreeA, TreeB) ->
MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}. MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}.
-spec get_segment(integer(), integer()|small|medium|large|xlarge) -> integer(). -spec get_segment(integer(),
integer()|xxsmall|xsmall|small|medium|large|xlarge) ->
integer().
%% @doc %% @doc
%% Return the segment ID for a Key. Can pass the tree size or the actual %% Return the segment ID for a Key. Can pass the tree size or the actual
%% segment count derived from the size %% segment count derived from the size
@ -318,8 +336,21 @@ tictac_hash(BinKey, BinVal, false) ->
%%% Internal functions %%% Internal functions
%%%============================================================================ %%%============================================================================
get_level2(TicTacTree, L1Pos) ->
case array:get(L1Pos, TicTacTree#tictactree.level2) of
?EMPTY ->
Lv2SegBinSize = TicTacTree#tictactree.width * ?HASH_SIZE * 8,
<<0:Lv2SegBinSize/integer>>;
SrcL2 ->
SrcL2
end.
get_size(Size) -> get_size(Size) ->
case Size of case Size of
xxsmall ->
?XXSMALL;
xsmall ->
?XSMALL;
small -> small ->
?SMALL; ?SMALL;
medium -> medium ->
@ -353,7 +384,7 @@ checktree(<<>>, TicTacTree, Counter) ->
checktree(Level1Bin, TicTacTree, Counter) -> checktree(Level1Bin, TicTacTree, Counter) ->
BitSize = ?HASH_SIZE * 8, BitSize = ?HASH_SIZE * 8,
<<TopHash:BitSize/integer, Tail/binary>> = Level1Bin, <<TopHash:BitSize/integer, Tail/binary>> = Level1Bin,
L2Bin = array:get(Counter, TicTacTree#tictactree.level2), L2Bin = get_level2(TicTacTree, Counter),
true = TopHash == segmentsummarise(L2Bin, 0), true = TopHash == segmentsummarise(L2Bin, 0),
checktree(Tail, TicTacTree, Counter + 1). checktree(Tail, TicTacTree, Counter + 1).
@ -379,13 +410,19 @@ merge_binaries(BinA, BinB) ->
-ifdef(TEST). -ifdef(TEST).
simple_bysize_test() -> simple_bysize_test_() ->
{timeout, 60, fun simple_bysize_test_allsizes/0}.
simple_bysize_test_allsizes() ->
simple_test_withsize(xxsmall),
simple_test_withsize(xsmall),
simple_test_withsize(small), simple_test_withsize(small),
simple_test_withsize(medium), simple_test_withsize(medium),
simple_test_withsize(large), simple_test_withsize(large),
simple_test_withsize(xlarge). simple_test_withsize(xlarge).
simple_test_withsize(Size) -> simple_test_withsize(Size) ->
?assertMatch(true, valid_size(Size)),
BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end, BinFun = fun(K, V) -> {term_to_binary(K), term_to_binary(V)} end,
K1 = {o, "B1", "K1", null}, K1 = {o, "B1", "K1", null},

View file

@ -129,6 +129,7 @@ many_put_compare(_Config) ->
FoldQ0 = {foldheads_bybucket, FoldQ0 = {foldheads_bybucket,
o_rkv, o_rkv,
"Bucket", "Bucket",
all,
{FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, {FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
false, true}, false, true},
{async, TreeAObjFolder0} = {async, TreeAObjFolder0} =
@ -143,6 +144,7 @@ many_put_compare(_Config) ->
FoldQ1 = {foldheads_bybucket, FoldQ1 = {foldheads_bybucket,
o_rkv, o_rkv,
"Bucket", "Bucket",
all,
{FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, {FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
true, true}, true, true},
{async, TreeAObjFolder1} = {async, TreeAObjFolder1} =
@ -170,6 +172,7 @@ many_put_compare(_Config) ->
AltFoldQ0 = {foldheads_bybucket, AltFoldQ0 = {foldheads_bybucket,
o_rkv, o_rkv,
"Bucket", "Bucket",
all,
{AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, {AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
false, false,
true}, true},
@ -309,11 +312,11 @@ many_put_compare(_Config) ->
index_compare(_Config) -> index_compare(_Config) ->
TreeSize = small, TreeSize = xxsmall,
LS = 2000, LS = 2000,
JS = 50000000, JS = 50000000,
SS = testutil:sync_strategy(), SS = testutil:sync_strategy(),
SegmentCount = 256 * 256, SegmentCount = 64 * 64,
% Test requires multiple different databases, so want to mount them all % Test requires multiple different databases, so want to mount them all
% on individual file paths % on individual file paths