Make ink fold more generic
Also makes the fold_from_sequence loop much easier to follow
This commit is contained in:
parent
39ad5c9680
commit
50c81d0626
4 changed files with 116 additions and 81 deletions
|
@ -653,7 +653,9 @@ loadqueue_ledgercache(Cache) ->
|
||||||
%% Query can be no_lookup, indicating the snapshot will be used for non-specific
|
%% Query can be no_lookup, indicating the snapshot will be used for non-specific
|
||||||
%% range queries and not direct fetch requests. {StartKey, EndKey} if the the
|
%% range queries and not direct fetch requests. {StartKey, EndKey} if the the
|
||||||
%% snapshot is to be used for one specific query only (this is much quicker to
|
%% snapshot is to be used for one specific query only (this is much quicker to
|
||||||
%% setup, assuming the range is a small subset of the overall key space).
|
%% setup, assuming the range is a small subset of the overall key space). If
|
||||||
|
%% lookup is required but the range isn't defined then 'undefined' should be
|
||||||
|
%% passed as the query
|
||||||
snapshot_store(LedgerCache, Penciller, Inker, SnapType, Query, LongRunning) ->
|
snapshot_store(LedgerCache, Penciller, Inker, SnapType, Query, LongRunning) ->
|
||||||
LedgerCacheReady = readycache_forsnapshot(LedgerCache, Query),
|
LedgerCacheReady = readycache_forsnapshot(LedgerCache, Query),
|
||||||
BookiesMem = {LedgerCacheReady#ledger_cache.loader,
|
BookiesMem = {LedgerCacheReady#ledger_cache.loader,
|
||||||
|
@ -760,10 +762,18 @@ get_runner(State,
|
||||||
leveled_runner:foldheads_allkeys(SnapFun,
|
leveled_runner:foldheads_allkeys(SnapFun,
|
||||||
Tag, FoldFun,
|
Tag, FoldFun,
|
||||||
JournalCheck, SegmentList);
|
JournalCheck, SegmentList);
|
||||||
get_runner(State,
|
get_runner(State,
|
||||||
{foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) ->
|
{foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) ->
|
||||||
|
get_runner(State,
|
||||||
|
{foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order});
|
||||||
|
get_runner(State,
|
||||||
|
{foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order}) ->
|
||||||
SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold),
|
SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold),
|
||||||
leveled_runner:foldobjects_allkeys(SnapFun, Tag, FoldFun);
|
leveled_runner:foldobjects_allkeys(SnapFun, Tag, FoldFun, key_order);
|
||||||
|
get_runner(State,
|
||||||
|
{foldobjects_allkeys, Tag, FoldFun, SnapPreFold, sqn_order}) ->
|
||||||
|
SnapFun = return_snapfun(State, store, undefined, true, SnapPreFold),
|
||||||
|
leveled_runner:foldobjects_allkeys(SnapFun, Tag, FoldFun, sqn_order);
|
||||||
get_runner(State,
|
get_runner(State,
|
||||||
{foldheads_bybucket,
|
{foldheads_bybucket,
|
||||||
Tag, Bucket, KeyRange,
|
Tag, Bucket, KeyRange,
|
||||||
|
|
|
@ -46,6 +46,7 @@
|
||||||
to_ledgerkey/3,
|
to_ledgerkey/3,
|
||||||
to_ledgerkey/5,
|
to_ledgerkey/5,
|
||||||
from_ledgerkey/1,
|
from_ledgerkey/1,
|
||||||
|
from_ledgerkey/2,
|
||||||
to_inkerkv/3,
|
to_inkerkv/3,
|
||||||
to_inkerkv/6,
|
to_inkerkv/6,
|
||||||
from_inkerkv/1,
|
from_inkerkv/1,
|
||||||
|
@ -204,6 +205,19 @@ is_active(Key, Value, Now) ->
|
||||||
false
|
false
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
-spec from_ledgerkey(atom(), tuple()) -> false|tuple().
|
||||||
|
%% @doc
|
||||||
|
%% Return the "significant information" from the Ledger Key (normally the
|
||||||
|
%% {Bucket, Key} pair) if and only if the ExpectedTag matched the tag -
|
||||||
|
%% otherwise return false
|
||||||
|
from_ledgerkey(ExpectedTag, {ExpectedTag, Bucket, Key, SubKey}) ->
|
||||||
|
from_ledgerkey({ExpectedTag, Bucket, Key, SubKey});
|
||||||
|
from_ledgerkey(_ExpectedTag, _OtherKey) ->
|
||||||
|
false.
|
||||||
|
|
||||||
|
-spec from_ledgerkey(tuple()) -> tuple().
|
||||||
|
%% @doc
|
||||||
|
%% Return identifying information from the LedgerKey
|
||||||
from_ledgerkey({?IDX_TAG, ?ALL_BUCKETS, {_IdxFld, IdxVal}, {Bucket, Key}}) ->
|
from_ledgerkey({?IDX_TAG, ?ALL_BUCKETS, {_IdxFld, IdxVal}, {Bucket, Key}}) ->
|
||||||
{Bucket, Key, IdxVal};
|
{Bucket, Key, IdxVal};
|
||||||
from_ledgerkey({?IDX_TAG, Bucket, {_IdxFld, IdxVal}, Key}) ->
|
from_ledgerkey({?IDX_TAG, Bucket, {_IdxFld, IdxVal}, Key}) ->
|
||||||
|
|
|
@ -253,7 +253,7 @@ ink_close(Pid) ->
|
||||||
ink_doom(Pid) ->
|
ink_doom(Pid) ->
|
||||||
gen_server:call(Pid, doom, 60000).
|
gen_server:call(Pid, doom, 60000).
|
||||||
|
|
||||||
-spec ink_fold(pid(), integer(), {fun(), fun(), fun()}, pid()) -> ok.
|
-spec ink_fold(pid(), integer(), {fun(), fun(), fun()}, any()) -> ok.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Fold over the journal from a starting sequence number (MinSQN), passing
|
%% Fold over the journal from a starting sequence number (MinSQN), passing
|
||||||
%% in three functions and a snapshot of the penciller. The Fold functions
|
%% in three functions and a snapshot of the penciller. The Fold functions
|
||||||
|
@ -270,7 +270,7 @@ ink_doom(Pid) ->
|
||||||
%% KeyInJournal
|
%% KeyInJournal
|
||||||
%% ValueInJournal
|
%% ValueInJournal
|
||||||
%% Position - the actual position within the CDB file of the object
|
%% Position - the actual position within the CDB file of the object
|
||||||
%% Acc - the accumulator
|
%% Acc - the bathc accumulator
|
||||||
%% ExtractFun - a single arity function which can be applied to ValueInJournal
|
%% ExtractFun - a single arity function which can be applied to ValueInJournal
|
||||||
%% to extract the actual object, and the size of the object,
|
%% to extract the actual object, and the size of the object,
|
||||||
%%
|
%%
|
||||||
|
@ -279,13 +279,13 @@ ink_doom(Pid) ->
|
||||||
%% {stop, {MinSQN, MaxSQN, UpdAcc}}
|
%% {stop, {MinSQN, MaxSQN, UpdAcc}}
|
||||||
%% The FilterFun is required to call stop when MaxSQN is reached
|
%% The FilterFun is required to call stop when MaxSQN is reached
|
||||||
%%
|
%%
|
||||||
%% The InitAccFun should return an initial accumulator for each subfold.
|
%% The InitAccFun should return an initial batch accumulator for each subfold.
|
||||||
%%
|
%%
|
||||||
%% The FoldFun is a 2 arity function that should take as inputs:
|
%% The BatchFun is a two arity function that should take as inputs:
|
||||||
%% The Recipient
|
%% An overall accumulator
|
||||||
%% The Accumulator built over the sub-fold
|
%% The batch accumulator built over the sub-fold
|
||||||
ink_fold(Pid, MinSQN, FoldFuns, Recipient) ->
|
ink_fold(Pid, MinSQN, FoldFuns, Acc) ->
|
||||||
gen_server:call(Pid, {fold, MinSQN, FoldFuns, Recipient}, infinity).
|
gen_server:call(Pid, {fold, MinSQN, FoldFuns, Acc}, infinity).
|
||||||
|
|
||||||
-spec ink_loadpcl(pid(), integer(), fun(), pid()) -> ok.
|
-spec ink_loadpcl(pid(), integer(), fun(), pid()) -> ok.
|
||||||
%%
|
%%
|
||||||
|
@ -297,13 +297,17 @@ ink_fold(Pid, MinSQN, FoldFuns, Recipient) ->
|
||||||
%% The load fun should be a five arity function like:
|
%% The load fun should be a five arity function like:
|
||||||
%% load_fun(KeyInJournal, ValueInJournal, _Position, Acc0, ExtractFun)
|
%% load_fun(KeyInJournal, ValueInJournal, _Position, Acc0, ExtractFun)
|
||||||
ink_loadpcl(Pid, MinSQN, FilterFun, Penciller) ->
|
ink_loadpcl(Pid, MinSQN, FilterFun, Penciller) ->
|
||||||
|
BatchFun =
|
||||||
|
fun(BatchAcc, _Acc) ->
|
||||||
|
push_to_penciller(Penciller, BatchAcc)
|
||||||
|
end,
|
||||||
gen_server:call(Pid,
|
gen_server:call(Pid,
|
||||||
{fold,
|
{fold,
|
||||||
MinSQN,
|
MinSQN,
|
||||||
{FilterFun,
|
{FilterFun,
|
||||||
fun leveled_bookie:empty_ledgercache/0,
|
fun leveled_bookie:empty_ledgercache/0,
|
||||||
fun push_to_penciller/2},
|
BatchFun},
|
||||||
Penciller},
|
ok},
|
||||||
infinity).
|
infinity).
|
||||||
|
|
||||||
-spec ink_compactjournal(pid(), pid(), integer()) -> ok.
|
-spec ink_compactjournal(pid(), pid(), integer()) -> ok.
|
||||||
|
@ -426,12 +430,12 @@ handle_call({key_check, Key, SQN}, _From, State) ->
|
||||||
handle_call({fold,
|
handle_call({fold,
|
||||||
StartSQN,
|
StartSQN,
|
||||||
{FilterFun, InitAccFun, FoldFun},
|
{FilterFun, InitAccFun, FoldFun},
|
||||||
Recipient}, _From, State) ->
|
Acc}, _From, State) ->
|
||||||
Manifest = lists:reverse(leveled_imanifest:to_list(State#state.manifest)),
|
Manifest = lists:reverse(leveled_imanifest:to_list(State#state.manifest)),
|
||||||
Reply =
|
Reply =
|
||||||
fold_from_sequence(StartSQN,
|
fold_from_sequence(StartSQN,
|
||||||
{FilterFun, InitAccFun, FoldFun},
|
{FilterFun, InitAccFun, FoldFun},
|
||||||
Recipient,
|
Acc,
|
||||||
Manifest),
|
Manifest),
|
||||||
{reply, Reply, State};
|
{reply, Reply, State};
|
||||||
handle_call({register_snapshot, Requestor}, _From , State) ->
|
handle_call({register_snapshot, Requestor}, _From , State) ->
|
||||||
|
@ -793,76 +797,82 @@ start_new_activejournal(SQN, RootPath, CDBOpts) ->
|
||||||
{SQN, Filename, PidW, empty}.
|
{SQN, Filename, PidW, empty}.
|
||||||
|
|
||||||
|
|
||||||
%% Scan between sequence numbers applying FilterFun to each entry where
|
|
||||||
%% FilterFun{K, V, Acc} -> Penciller Key List
|
|
||||||
%% Load the output for the CDB file into the Penciller.
|
|
||||||
|
|
||||||
fold_from_sequence(_MinSQN, _FoldFuns, _Rec, []) ->
|
-spec fold_from_sequence(integer(), {fun(), fun(), fun()}, any(), list())
|
||||||
ok;
|
-> any().
|
||||||
fold_from_sequence(MinSQN, FoldFuns, Rec, [{LowSQN, FN, Pid, _LK}|Rest])
|
%% @doc
|
||||||
|
%%
|
||||||
|
%% Scan from the starting sequence number to the end of the Journal. Apply
|
||||||
|
%% the FilterFun as it scans over the CDB file to build up a Batch of relevant
|
||||||
|
%% objects - and then apply the FoldFun to the batch once the batch is
|
||||||
|
%% complete
|
||||||
|
%%
|
||||||
|
%% Inputs - MinSQN, FoldFuns, OverallAccumulator, Inker's Manifest
|
||||||
|
%%
|
||||||
|
%% The fold loops over all the CDB files in the Manifest. Each file is looped
|
||||||
|
%% over in batches using foldfile_between_sequence/7. The batch is a range of
|
||||||
|
%% sequence numbers (so the batch size may be << ?LOADING_BATCH) in compacted
|
||||||
|
%% files
|
||||||
|
fold_from_sequence(_MinSQN, _FoldFuns, Acc, []) ->
|
||||||
|
Acc;
|
||||||
|
fold_from_sequence(MinSQN, FoldFuns, Acc, [{LowSQN, FN, Pid, _LK}|Rest])
|
||||||
when LowSQN >= MinSQN ->
|
when LowSQN >= MinSQN ->
|
||||||
fold_between_sequence(MinSQN,
|
Acc0 = foldfile_between_sequence(MinSQN,
|
||||||
MinSQN + ?LOADING_BATCH,
|
MinSQN + ?LOADING_BATCH,
|
||||||
FoldFuns,
|
FoldFuns,
|
||||||
Rec,
|
Acc,
|
||||||
Pid,
|
Pid,
|
||||||
undefined,
|
undefined,
|
||||||
FN,
|
FN),
|
||||||
Rest);
|
fold_from_sequence(MinSQN, FoldFuns, Acc0, Rest);
|
||||||
fold_from_sequence(MinSQN, FoldFuns, Rec, [{_LowSQN, FN, Pid, _LK}|Rest]) ->
|
fold_from_sequence(MinSQN, FoldFuns, Acc, [{_LowSQN, FN, Pid, _LK}|Rest]) ->
|
||||||
case Rest of
|
% If this file has a LowSQN less than the minimum, we can skip it if the
|
||||||
[] ->
|
% next file also has a LowSQN below the minimum
|
||||||
fold_between_sequence(MinSQN,
|
Acc0 =
|
||||||
MinSQN + ?LOADING_BATCH,
|
case Rest of
|
||||||
FoldFuns,
|
[] ->
|
||||||
Rec,
|
foldfile_between_sequence(MinSQN,
|
||||||
Pid,
|
MinSQN + ?LOADING_BATCH,
|
||||||
undefined,
|
FoldFuns,
|
||||||
FN,
|
Acc,
|
||||||
Rest);
|
Pid,
|
||||||
[{NextSQN, _NxtFN, _NxtPid, _NxtLK}|_Rest] when NextSQN > MinSQN ->
|
undefined,
|
||||||
fold_between_sequence(MinSQN,
|
FN);
|
||||||
MinSQN + ?LOADING_BATCH,
|
[{NextSQN, _NxtFN, _NxtPid, _NxtLK}|_Rest] when NextSQN > MinSQN ->
|
||||||
FoldFuns,
|
foldfile_between_sequence(MinSQN,
|
||||||
Rec,
|
MinSQN + ?LOADING_BATCH,
|
||||||
Pid,
|
FoldFuns,
|
||||||
undefined,
|
Acc,
|
||||||
FN,
|
Pid,
|
||||||
Rest);
|
undefined,
|
||||||
_ ->
|
FN);
|
||||||
fold_from_sequence(MinSQN, FoldFuns, Rec, Rest)
|
_ ->
|
||||||
end.
|
Acc
|
||||||
|
end,
|
||||||
|
fold_from_sequence(MinSQN, FoldFuns, Acc0, Rest).
|
||||||
|
|
||||||
|
foldfile_between_sequence(MinSQN, MaxSQN, FoldFuns,
|
||||||
|
Acc, CDBpid, StartPos, FN) ->
|
||||||
fold_between_sequence(MinSQN, MaxSQN, FoldFuns,
|
|
||||||
Recipient, CDBpid, StartPos, FN, Rest) ->
|
|
||||||
leveled_log:log("I0014", [FN, MinSQN]),
|
leveled_log:log("I0014", [FN, MinSQN]),
|
||||||
{FilterFun, InitAccFun, FoldFun} = FoldFuns,
|
{FilterFun, InitAccFun, FoldFun} = FoldFuns,
|
||||||
InitAcc = {MinSQN, MaxSQN, InitAccFun()},
|
InitBatchAcc = {MinSQN, MaxSQN, InitAccFun()},
|
||||||
Res = case leveled_cdb:cdb_scan(CDBpid, FilterFun, InitAcc, StartPos) of
|
|
||||||
{eof, {AccMinSQN, _AccMaxSQN, AccLC}} ->
|
case leveled_cdb:cdb_scan(CDBpid, FilterFun, InitBatchAcc, StartPos) of
|
||||||
ok = FoldFun(Recipient, AccLC),
|
{eof, {_AccMinSQN, _AccMaxSQN, BatchAcc}} ->
|
||||||
{ok, AccMinSQN};
|
FoldFun(BatchAcc, Acc);
|
||||||
{LastPosition, {_AccMinSQN, _AccMaxSQN, AccLC}} ->
|
{LastPosition, {_AccMinSQN, _AccMaxSQN, BatchAcc}} ->
|
||||||
ok = FoldFun(Recipient, AccLC),
|
UpdAcc = FoldFun(BatchAcc, Acc),
|
||||||
NextSQN = MaxSQN + 1,
|
NextSQN = MaxSQN + 1,
|
||||||
fold_between_sequence(NextSQN,
|
foldfile_between_sequence(NextSQN,
|
||||||
NextSQN + ?LOADING_BATCH,
|
NextSQN + ?LOADING_BATCH,
|
||||||
FoldFuns,
|
FoldFuns,
|
||||||
Recipient,
|
UpdAcc,
|
||||||
CDBpid,
|
CDBpid,
|
||||||
LastPosition,
|
LastPosition,
|
||||||
FN,
|
FN)
|
||||||
Rest)
|
|
||||||
end,
|
|
||||||
case Res of
|
|
||||||
{ok, LMSQN} ->
|
|
||||||
fold_from_sequence(LMSQN, FoldFuns, Recipient, Rest);
|
|
||||||
ok ->
|
|
||||||
ok
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
push_to_penciller(Penciller, LedgerCache) ->
|
push_to_penciller(Penciller, LedgerCache) ->
|
||||||
% The push to penciller must start as a tree to correctly de-duplicate
|
% The push to penciller must start as a tree to correctly de-duplicate
|
||||||
% the list by order before becoming a de-duplicated list for loading
|
% the list by order before becoming a de-duplicated list for loading
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
hashlist_query/3,
|
hashlist_query/3,
|
||||||
tictactree/5,
|
tictactree/5,
|
||||||
foldheads_allkeys/5,
|
foldheads_allkeys/5,
|
||||||
foldobjects_allkeys/3,
|
foldobjects_allkeys/4,
|
||||||
foldheads_bybucket/5,
|
foldheads_bybucket/5,
|
||||||
foldobjects_bybucket/3,
|
foldobjects_bybucket/3,
|
||||||
foldobjects_byindex/3
|
foldobjects_byindex/3
|
||||||
|
@ -226,10 +226,11 @@ foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck, SegmentList) ->
|
||||||
FoldFun,
|
FoldFun,
|
||||||
{true, JournalCheck}, SegmentList).
|
{true, JournalCheck}, SegmentList).
|
||||||
|
|
||||||
-spec foldobjects_allkeys(fun(), atom(), fun()) -> {async, fun()}.
|
-spec foldobjects_allkeys(fun(), atom(), fun(), key_order|sqn_order)
|
||||||
|
-> {async, fun()}.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Fold over all objects for a given tag
|
%% Fold over all objects for a given tag
|
||||||
foldobjects_allkeys(SnapFun, Tag, FoldFun) ->
|
foldobjects_allkeys(SnapFun, Tag, FoldFun, _Order) ->
|
||||||
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
||||||
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
||||||
foldobjects(SnapFun,
|
foldobjects(SnapFun,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue