Add tests using fold_heads

Comparing the inbuilt tictac_tree fold, to using "proper" abstraction and achieving the same thing through fold_heads.

The fold_heads method is slower (a lot more manipulation required in the fold) - expect it to require > 2 x CPU.

However, this does give the flexibility to change the hash algorithm.  This would allow for a fold over a database of AAE trees (where the hash has been pre-computed using sha) to be compared with a fold over a database of leveled backends.

Also can vary whether the fold_heads checks for presence of the object in the Inker.  So normally we can get the speed advantage of not checking the Journal for presence, but periodically we can.
This commit is contained in:
Martin Sumner 2017-08-07 10:45:41 +01:00
parent dd20132892
commit 53ddc8950b
2 changed files with 207 additions and 121 deletions

View file

@ -16,7 +16,7 @@
%%
%%
%% -------- Actors ---------
%%
%%
%% The store is fronted by a Bookie, who takes support from different actors:
%% - An Inker who persists new data into the journal, and returns items from
%% the journal based on sequence number
@ -70,7 +70,7 @@
loadqueue_ledgercache/1,
push_ledgercache/2,
snapshot_store/5,
fetch_value/2]).
fetch_value/2]).
-include_lib("eunit/include/eunit.hrl").
@ -102,6 +102,7 @@
put_timing :: tuple() | undefined,
get_timing :: tuple() | undefined}).
-type book_state() :: #state{}.
%%%============================================================================
%%% API
@ -113,7 +114,7 @@
%% provide this startup method. This will start a KV store from the previous
%% store at root path - or an empty one if there is no store at the path.
%%
%% Fiddling with the LedgerCacheSize and JournalSize may improve performance,
%% Fiddling with the LedgerCacheSize and JournalSize may improve performance,
%% but these are primarily exposed to support special situations (e.g. very
%% low memory installations), there should not be huge variance in outcomes
%% from modifying these numbers.
@ -158,7 +159,7 @@ book_start(RootPath, LedgerCacheSize, JournalSize, SyncStrategy) ->
%% up before deletion)
%%
%% TODO:
%% The reload_strategy is exposed as currently no firm decision has been made
%% The reload_strategy is exposed as currently no firm decision has been made
%% about how recovery should work. For instance if we were to trust everything
%% as permanent in the Ledger once it is persisted, then there would be no
%% need to retain a skinny history of key changes in the Journal after
@ -200,7 +201,7 @@ book_tempput(Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL)
%% - A Primary Key and a Value
%% - IndexSpecs - a set of secondary key changes associated with the
%% transaction
%% - A tag indictaing the type of object. Behaviour for metadata extraction,
%% - A tag indictaing the type of object. Behaviour for metadata extraction,
%% and ledger compaction will vary by type. There are three currently
%% implemented types i (Index), o (Standard), o_rkv (Riak). Keys added with
%% Index tags are not fetchable (as they will not be hashed), but are
@ -314,9 +315,9 @@ book_head(Pid, Bucket, Key, Tag) ->
%% objects with a given tag
%% {tictactree_idx,
%% {Bucket, IdxField, StartValue, EndValue},
%% TreeSize,
%% TreeSize,
%% PartitionFilter}
%% -> compile a hashtree for the items on the index. A partition filter is
%% -> compile a hashtree for the items on the index. A partition filter is
%% required to avoid adding an index entry in this vnode as a fallback.
%% There is no de-duplicate of results, duplicate reuslts corrupt the tree.
%% {tictactree_obj,
@ -384,7 +385,7 @@ init([Opts]) ->
undefined ->
% Start from file not snapshot
{InkerOpts, PencillerOpts} = set_options(Opts),
CacheJitter = ?CACHE_SIZE div (100 div ?CACHE_SIZE_JITTER),
CacheSize = get_opt(cache_size, Opts, ?CACHE_SIZE)
+ erlang:phash2(self()) rem CacheJitter,
@ -398,9 +399,9 @@ init([Opts]) ->
limit_minutes = LimitMinutes,
unit_minutes = UnitMinutes}
end,
{Inker, Penciller} = startup(InkerOpts, PencillerOpts, RecentAAE),
NewETS = ets:new(mem, [ordered_set]),
leveled_log:log("B0001", [Inker, Penciller]),
{ok, #state{inker=Inker,
@ -467,12 +468,12 @@ handle_call({get, Bucket, Key, Tag}, _From, State) ->
State#state.penciller,
State#state.ledger_cache) of
not_present ->
GT0 = leveled_log:get_timing(State#state.get_timing,
GT0 = leveled_log:get_timing(State#state.get_timing,
SWh,
head_not_present),
{reply, not_found, State#state{get_timing=GT0}};
Head ->
GT0 = leveled_log:get_timing(State#state.get_timing,
GT0 = leveled_log:get_timing(State#state.get_timing,
SWh,
head_found),
SWg = os:timestamp(),
@ -518,8 +519,8 @@ handle_call({head, Bucket, Key, Tag}, _From, State) ->
end
end;
handle_call({snapshot, _Requestor, SnapType, _Timeout}, _From, State) ->
% TODO: clean-up passing of Requestor (which was previously just used in
% logs) and so can now be ignored, and timeout which is ignored - but
% TODO: clean-up passing of Requestor (which was previously just used in
% logs) and so can now be ignored, and timeout which is ignored - but
% probably shouldn't be.
Reply = snapshot_store(State, SnapType),
{reply, Reply, State};
@ -595,13 +596,21 @@ handle_call({return_folder, FolderType}, _From, State) ->
TreeSize,
PartitionFilter),
State};
{foldheads_allkeys, Tag, FoldHeadsFun} ->
{foldheads_allkeys, Tag, FoldHeadsFun,
CheckPresence, SnapPreFold} ->
{reply,
foldheads_allkeys(State, Tag, FoldHeadsFun),
foldheads_allkeys(State, Tag,
FoldHeadsFun,
CheckPresence,
SnapPreFold),
State};
{foldheads_bybucket, Tag, Bucket, FoldHeadsFun} ->
{foldheads_bybucket, Tag, Bucket, FoldHeadsFun,
CheckPresence, SnapPreFold} ->
{reply,
foldheads_bybucket(State, Tag, Bucket, FoldHeadsFun),
foldheads_bybucket(State, Tag, Bucket,
FoldHeadsFun,
CheckPresence,
SnapPreFold),
State};
{foldobjects_allkeys, Tag, FoldObjectsFun} ->
{reply,
@ -622,7 +631,7 @@ handle_call({return_folder, FolderType}, _From, State) ->
Field, FromTerm, ToTerm,
FoldObjectsFun),
State}
end;
handle_call({compact_journal, Timeout}, _From, State) ->
ok = leveled_inker:ink_compactjournal(State#state.inker,
@ -703,9 +712,9 @@ snapshot_store(LedgerCache0, Penciller, Inker, SnapType, Query) ->
LedgerCache#ledger_cache.index,
LedgerCache#ledger_cache.min_sqn,
LedgerCache#ledger_cache.max_sqn},
LongRunning =
case Query of
undefined ->
LongRunning =
case Query of
undefined ->
true;
no_lookup ->
true;
@ -728,7 +737,7 @@ snapshot_store(LedgerCache0, Penciller, Inker, SnapType, Query) ->
{ok, LedgerSnapshot, JournalSnapshot};
ledger ->
{ok, LedgerSnapshot, null}
end.
end.
snapshot_store(State, SnapType) ->
snapshot_store(State, SnapType, undefined).
@ -837,7 +846,7 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList) ->
leveled_log:log("B0010",[NB]),
[]
end.
index_query(State,
Bucket,
@ -955,7 +964,7 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) ->
Tag),
PassHashFun}
end,
AccFun = accumulate_tree(Filter,
JournalCheck,
JournalSnapshot,
@ -965,7 +974,7 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) ->
EndKey,
AccFun,
Tree),
% Close down snapshot when complete so as not to hold removed
% files open
ok = leveled_penciller:pcl_close(LedgerSnapshot),
@ -982,22 +991,26 @@ tictactree(State, Tag, Bucket, Query, JournalCheck, TreeSize, Filter) ->
foldobjects_allkeys(State, Tag, FoldObjectsFun) ->
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun, false).
foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun,
false, true).
foldheads_allkeys(State, Tag, FoldHeadsFun) ->
foldheads_allkeys(State, Tag, FoldHeadsFun, CheckPresence, SnapPreFold) ->
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
foldobjects(State, Tag, StartKey, EndKey, FoldHeadsFun, true).
foldobjects(State, Tag, StartKey, EndKey, FoldHeadsFun,
{true, CheckPresence}, SnapPreFold).
foldobjects_bybucket(State, Tag, Bucket, FoldObjectsFun) ->
StartKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
EndKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun, false).
foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun,
false, true).
foldheads_bybucket(State, Tag, Bucket, FoldHeadsFun) ->
foldheads_bybucket(State, Tag, Bucket, FoldHeadsFun, CheckPresence, SnapPreFold) ->
StartKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
EndKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
foldobjects(State, Tag, StartKey, EndKey, FoldHeadsFun, true).
foldobjects(State, Tag, StartKey, EndKey, FoldHeadsFun,
{true, CheckPresence}, SnapPreFold).
foldobjects_byindex(State, Tag, Bucket,
Field, FromTerm, ToTerm, FoldObjectsFun) ->
@ -1005,34 +1018,53 @@ foldobjects_byindex(State, Tag, Bucket,
leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, Field, FromTerm),
EndKey =
leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, Field, ToTerm),
foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun, false).
foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun,
false, true).
foldobjects(_State, Tag, StartKey, EndKey, FoldObjectsFun, DeferredFetch) ->
{FoldFun, InitAcc} = case is_tuple(FoldObjectsFun) of
true ->
FoldObjectsFun;
false ->
{FoldObjectsFun, []}
end,
% For fold_objects the snapshot has been moved inside of the Folder
% function.
%
% fold_objects and fold_heads are called by the riak_kv_sweeper in Riak,
% and the sweeper prompts the fold before checking to see if the fold is
% ready to be run. This may lead to the fold being called on an old
% snapshot.
Self = self(),
Folder =
fun() ->
{ok,
LedgerSnapshot,
JournalSnapshot} = book_snapshotstore(Self, Self, 5400),
-spec foldobjects(book_state(), atom(), tuple(), tuple(), fun(),
false|{true, boolean()}, boolean()) ->
{async, fun()}.
%% @doc
%% The object folder should be passed DeferredFetch and SnapPreFold.
%% DeferredFetch can either be false (which will return to the fold function
%% the full object), or {true, CheckPresence} - in which case a proxy object
%% will be created that if understood by the fold function will allow the fold
%% function to work on the head of the object, and defer fetching the body in
%% case such a fetch is unecessary.
%% SnapPreFold determines if the snapshot of the database is done prior to
%% returning the Folder function (SnapPreFold=true) or when the Folder function
%% is called as Folder() {SnapPreFold=false}
foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun,
DeferredFetch, SnapPreFold) ->
{FoldFun, InitAcc} =
case is_tuple(FoldObjectsFun) of
true ->
% FoldObjectsFun is already a tuple with a Fold function and an
% initial accumulator
FoldObjectsFun;
false ->
% no initial accumulatr passed, and so should be just a list
{FoldObjectsFun, []}
end,
SnapFun =
case SnapPreFold of
true ->
{ok, LS, JS} = snapshot_store(State, store, undefined),
fun() -> {ok, LS, JS} end;
false ->
Self = self(),
% Timeout will be ignored, as will Requestor
%
% This uses the external snapshot - as the snpshot will need
% This uses the external snapshot - as the snapshot will need
% to have consistent state between Bookie and Penciller when
% it is made.
fun() -> book_snapshotstore(Self, Self, 5400) end
end,
Folder =
fun() ->
{ok, LedgerSnapshot, JournalSnapshot} = SnapFun(),
AccFun = accumulate_objects(FoldFun,
JournalSnapshot,
Tag,
@ -1099,7 +1131,7 @@ readycache_forsnapshot(LedgerCache, Query) ->
min_sqn=LedgerCache#ledger_cache.min_sqn,
max_sqn=LedgerCache#ledger_cache.max_sqn};
_ ->
Idx =
Idx =
case Query of
no_lookup ->
empty_index;
@ -1141,21 +1173,21 @@ set_options(Opts) ->
JournalSizeJitter = MaxJournalSize0 div (100 div ?JOURNAL_SIZE_JITTER),
MaxJournalSize = MaxJournalSize0 -
erlang:phash2(self()) rem JournalSizeJitter,
SyncStrat = get_opt(sync_strategy, Opts, sync),
WRP = get_opt(waste_retention_period, Opts),
AltStrategy = get_opt(reload_strategy, Opts, []),
ReloadStrategy = leveled_codec:inker_reload_strategy(AltStrategy),
PCLL0CacheSize = get_opt(max_pencillercachesize, Opts),
RootPath = get_opt(root_path, Opts),
JournalFP = RootPath ++ "/" ++ ?JOURNAL_FP,
LedgerFP = RootPath ++ "/" ++ ?LEDGER_FP,
ok = filelib:ensure_dir(JournalFP),
ok = filelib:ensure_dir(LedgerFP),
{#inker_options{root_path = JournalFP,
reload_strategy = ReloadStrategy,
max_run_length = get_opt(max_run_length, Opts),
@ -1181,7 +1213,7 @@ startup(InkerOpts, PencillerOpts, RecentAAE) ->
fetch_head(Key, Penciller, LedgerCache) ->
SW = os:timestamp(),
CacheResult =
CacheResult =
case LedgerCache#ledger_cache.mem of
undefined ->
[];
@ -1239,7 +1271,7 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) ->
get_hashaccumulator(JournalCheck,
InkerClone,
AddKeyFun).
get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun) ->
Now = leveled_codec:integer_now(),
AccFun =
@ -1256,7 +1288,7 @@ get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun) ->
false ->
Acc
end;
_ ->
_ ->
AddKeyFun(B, K, H, Acc)
end;
false ->
@ -1292,30 +1324,25 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) ->
end,
JK = {leveled_codec:to_ledgerkey(B, K, Tag), SQN},
case DeferredFetch of
true ->
{true, true} ->
InJournal =
leveled_inker:ink_keycheck(InkerClone,
LK,
SQN),
case InJournal of
probably ->
Size = leveled_codec:get_size(LK, V),
MDBin =
leveled_codec:build_metadata_object(LK,
MD),
Value = {proxy_object,
MDBin,
Size,
{fun fetch_value/2,
InkerClone,
JK}},
FoldObjectsFun(B,
K,
term_to_binary(Value),
Acc);
ProxyObj = make_proxy_object(LK, JK,
MD, V,
InkerClone),
FoldObjectsFun(B, K,ProxyObj, Acc);
missing ->
Acc
end;
{true, false} ->
ProxyObj = make_proxy_object(LK, JK,
MD, V,
InkerClone),
FoldObjectsFun(B, K,ProxyObj, Acc);
false ->
R = fetch_value(InkerClone, JK),
case R of
@ -1323,7 +1350,7 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) ->
Acc;
Value ->
FoldObjectsFun(B, K, Value, Acc)
end
end;
false ->
@ -1332,6 +1359,13 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) ->
end,
AccFun.
make_proxy_object(LK, JK, MD, V, InkerClone) ->
Size = leveled_codec:get_size(LK, V),
MDBin = leveled_codec:build_metadata_object(LK, MD),
term_to_binary({proxy_object,
MDBin,
Size,
{fun fetch_value/2, InkerClone, JK}}).
check_presence(Key, Value, InkerClone) ->
{LedgerKey, SQN} = leveled_codec:strip_to_keyseqonly({Key, Value}),
@ -1455,7 +1489,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) ->
end.
maybe_withjitter(CacheSize, MaxCacheSize) ->
maybe_withjitter(CacheSize, MaxCacheSize) ->
if
CacheSize > MaxCacheSize ->
R = leveled_rand:uniform(7 * MaxCacheSize),
@ -1471,7 +1505,7 @@ maybe_withjitter(CacheSize, MaxCacheSize) ->
get_loadfun(RecentAAE) ->
PrepareFun =
PrepareFun =
fun(Tag, PK, SQN, Obj, VS, IdxSpecs) ->
preparefor_ledgercache(Tag, PK, SQN, Obj, VS, IdxSpecs, RecentAAE)
end,
@ -1480,11 +1514,11 @@ get_loadfun(RecentAAE) ->
{MinSQN, MaxSQN, OutputTree} = Acc0,
{SQN, InkTag, PK} = KeyInJournal,
% VBin may already be a term
{VBin, VSize} = ExtractFun(ValueInJournal),
{VBin, VSize} = ExtractFun(ValueInJournal),
{Obj, IdxSpecs} = leveled_codec:split_inkvalue(VBin),
case SQN of
SQN when SQN < MinSQN ->
{loop, Acc0};
{loop, Acc0};
SQN when SQN < MaxSQN ->
Chngs = PrepareFun(InkTag, PK, SQN, Obj, VSize, IdxSpecs),
{loop,
@ -1549,7 +1583,7 @@ generate_multiple_objects(Count, KeyNumber, ObjL) ->
KeyNumber + 1,
ObjL ++ [{Key, Value, IndexSpec}]).
ttl_test() ->
RootPath = reset_filestructure(),
{ok, Bookie1} = book_start([{root_path, RootPath}]),
@ -1569,7 +1603,7 @@ ttl_test() ->
{ok, _} = book_head(Bookie1, "Bucket", K, ?STD_TAG)
end,
ObjL1),
ObjL2 = generate_multiple_objects(100, 101),
Past = leveled_codec:integer_now() - 300,
lists:foreach(fun({K, V, S}) -> ok = book_tempput(Bookie1,
@ -1585,7 +1619,7 @@ ttl_test() ->
not_found = book_head(Bookie1, "Bucket", K, ?STD_TAG)
end,
ObjL2),
{async, BucketFolder} = book_returnfolder(Bookie1,
{bucket_stats, "Bucket"}),
{_Size, Count} = BucketFolder(),
@ -1600,7 +1634,7 @@ ttl_test() ->
{false, undefined}}),
KeyList = IndexFolder(),
?assertMatch(20, length(KeyList)),
{ok, Regex} = re:compile("f8"),
{async,
IndexFolderTR} = book_returnfolder(Bookie1,
@ -1611,10 +1645,10 @@ ttl_test() ->
{true, Regex}}),
TermKeyList = IndexFolderTR(),
?assertMatch(10, length(TermKeyList)),
ok = book_close(Bookie1),
{ok, Bookie2} = book_start([{root_path, RootPath}]),
{async,
IndexFolderTR2} = book_returnfolder(Bookie2,
{index_query,
@ -1624,7 +1658,7 @@ ttl_test() ->
{false, Regex}}),
KeyList2 = IndexFolderTR2(),
?assertMatch(10, length(KeyList2)),
lists:foreach(fun({K, _V, _S}) ->
not_found = book_get(Bookie2, "Bucket", K, ?STD_TAG)
end,
@ -1633,7 +1667,7 @@ ttl_test() ->
not_found = book_head(Bookie2, "Bucket", K, ?STD_TAG)
end,
ObjL2),
ok = book_close(Bookie2),
reset_filestructure().
@ -1729,7 +1763,7 @@ foldobjects_vs_hashtree_test() ->
?STD_TAG,
false}),
KeyHashList1 = lists:usort(HTFolder1()),
FoldObjectsFun = fun(B, K, V, Acc) ->
[{B, K, erlang:phash2(term_to_binary(V))}|Acc] end,
{async, HTFolder2} =
@ -1737,7 +1771,7 @@ foldobjects_vs_hashtree_test() ->
{foldobjects_allkeys, ?STD_TAG, FoldObjectsFun}),
KeyHashList2 = HTFolder2(),
?assertMatch(KeyHashList1, lists:usort(KeyHashList2)),
FoldHeadsFun =
fun(B, K, ProxyV, Acc) ->
{proxy_object,
@ -1747,14 +1781,14 @@ foldobjects_vs_hashtree_test() ->
V = FetchFun(Clone, JK),
[{B, K, erlang:phash2(term_to_binary(V))}|Acc]
end,
{async, HTFolder3} =
book_returnfolder(Bookie1,
{foldheads_allkeys, ?STD_TAG, FoldHeadsFun}),
KeyHashList3 = HTFolder3(),
?assertMatch(KeyHashList1, lists:usort(KeyHashList3)),
FoldHeadsFun2 =
FoldHeadsFun2 =
fun(B, K, ProxyV, Acc) ->
{proxy_object,
MD,
@ -1763,13 +1797,13 @@ foldobjects_vs_hashtree_test() ->
{Hash, _Size} = MD,
[{B, K, Hash}|Acc]
end,
{async, HTFolder4} =
book_returnfolder(Bookie1,
{foldheads_allkeys, ?STD_TAG, FoldHeadsFun2}),
KeyHashList4 = HTFolder4(),
?assertMatch(KeyHashList1, lists:usort(KeyHashList4)),
ok = book_close(Bookie1),
reset_filestructure().
@ -1793,7 +1827,7 @@ foldobjects_vs_foldheads_bybucket_test() ->
?STD_TAG,
Future) end,
ObjL2),
FoldObjectsFun = fun(B, K, V, Acc) ->
[{B, K, erlang:phash2(term_to_binary(V))}|Acc] end,
{async, HTFolder1A} =
@ -1812,7 +1846,7 @@ foldobjects_vs_foldheads_bybucket_test() ->
KeyHashList1B = HTFolder1B(),
?assertMatch(false,
lists:usort(KeyHashList1A) == lists:usort(KeyHashList1B)),
FoldHeadsFun =
fun(B, K, ProxyV, Acc) ->
{proxy_object,
@ -1822,7 +1856,7 @@ foldobjects_vs_foldheads_bybucket_test() ->
V = FetchFun(Clone, JK),
[{B, K, erlang:phash2(term_to_binary(V))}|Acc]
end,
{async, HTFolder2A} =
book_returnfolder(Bookie1,
{foldheads_bybucket,
@ -1841,7 +1875,7 @@ foldobjects_vs_foldheads_bybucket_test() ->
lists:usort(KeyHashList1A) == lists:usort(KeyHashList2A)),
?assertMatch(true,
lists:usort(KeyHashList1B) == lists:usort(KeyHashList2B)),
ok = book_close(Bookie1),
reset_filestructure().
@ -1873,7 +1907,7 @@ scan_table_test() ->
<<"F1-bin">>,
<<"AA2">>),
Tab0 = ets:new(mem, [ordered_set]),
SK_A0 = leveled_codec:to_ledgerkey(<<"B1">>,
null,
?IDX_TAG,

View file

@ -113,31 +113,83 @@ many_put_compare(_Config) ->
% Now run the same query by putting the tree-building responsibility onto
% the fold_objects_fun
HashFun =
fun(_Key, Value) ->
{proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value),
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
Rest/binary>> = HeadBin,
<<VclockBin:VclockLen/binary, _NotNeeded/binary>> = Rest,
erlang:phash2(lists:sort(binary_to_term(VclockBin)))
ApplyHash =
fun(HashFun) ->
fun(_Key, Value) ->
{proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value),
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
Rest/binary>> = HeadBin,
<<VclockBin:VclockLen/binary, _NotNeeded/binary>> = Rest,
HashFun(lists:sort(binary_to_term(VclockBin)))
end
end,
FoldObjectsFun =
fun(_Bucket, Key, Value, Acc) ->
leveled_tictac:add_kv(Acc, Key, Value, HashFun)
leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(fun erlang:phash2/1))
end,
FoldQ = {foldheads_bybucket,
FoldQ0 = {foldheads_bybucket,
o_rkv,
"Bucket",
{FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}},
{async, TreeAObjFolder} = leveled_bookie:book_returnfolder(Bookie2, FoldQ),
{FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
false, true},
{async, TreeAObjFolder0} =
leveled_bookie:book_returnfolder(Bookie2, FoldQ0),
SWB0Obj = os:timestamp(),
TreeAObj = TreeAObjFolder(),
io:format("Build tictac tree via object foldwith 200K objects in ~w~n",
TreeAObj0 = TreeAObjFolder0(),
io:format("Build tictac tree via object fold with no "++
"presence check and 200K objects in ~w~n",
[timer:now_diff(os:timestamp(), SWB0Obj)]),
SegList0Obj = leveled_tictac:find_dirtyleaves(TreeA, TreeAObj),
io:format("Fold object compared with tictac fold has ~w diffs~n",
[length(SegList0Obj)]),
true = length(SegList0Obj) == 0,
true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj0)) == 0,
FoldQ1 = {foldheads_bybucket,
o_rkv,
"Bucket",
{FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
true, true},
{async, TreeAObjFolder1} =
leveled_bookie:book_returnfolder(Bookie2, FoldQ1),
SWB1Obj = os:timestamp(),
TreeAObj1 = TreeAObjFolder1(),
io:format("Build tictac tree via object fold with "++
"presence check and 200K objects in ~w~n",
[timer:now_diff(os:timestamp(), SWB1Obj)]),
true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj1)) == 0,
% AAE trees within riak are based on a sha of the vector clock. So to
% compare with an AAE tree we need to compare outputs when we're hashing
% a hash
AltHashFun =
fun(Term) ->
erlang:phash2(crypto:hash(sha, term_to_binary(Term)))
end,
AltFoldObjectsFun =
fun(_Bucket, Key, Value, Acc) ->
leveled_tictac:add_kv(Acc, Key, Value, ApplyHash(AltHashFun))
end,
AltFoldQ0 = {foldheads_bybucket,
o_rkv,
"Bucket",
{AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)},
false, true},
{async, TreeAAltObjFolder0} =
leveled_bookie:book_returnfolder(Bookie2, AltFoldQ0),
SWB2Obj = os:timestamp(),
TreeAAltObj = TreeAAltObjFolder0(),
io:format("Build tictac tree via object fold with no "++
"presence check and 200K objects and alt hash in ~w~n",
[timer:now_diff(os:timestamp(), SWB2Obj)]),
{async, TreeBAltObjFolder0} =
leveled_bookie:book_returnfolder(Bookie3, AltFoldQ0),
SWB3Obj = os:timestamp(),
TreeBAltObj = TreeBAltObjFolder0(),
io:format("Build tictac tree via object fold with no "++
"presence check and 200K objects and alt hash in ~w~n",
[timer:now_diff(os:timestamp(), SWB3Obj)]),
true =
length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)) == 1,
%% Finding differing keys
FoldKeysFun =