Allow for segment-acceleration of folds
Initially with basic tests. If the SlotIndex has been cached, we can now use the slot index as it is based on the Segment hash algortihm. This looks like it should lead to an order of magnitude improvement in querying for keys/clocks by segment ID. This also required a slight tweak to the penciller keyfolder. It now caches the next answer from the SSTiter, rather than restart the iterator. When the IMMiter has many more entries than the SSTiter (as the sSTiter is being filtered but not the IMMiter) this could lead to lots of repeated folding.
This commit is contained in:
parent
f5878548f9
commit
b141dd199c
6 changed files with 519 additions and 202 deletions
|
@ -28,7 +28,7 @@
|
|||
bucketkey_query/4,
|
||||
hashlist_query/3,
|
||||
tictactree/5,
|
||||
foldheads_allkeys/4,
|
||||
foldheads_allkeys/5,
|
||||
foldobjects_allkeys/3,
|
||||
foldheads_bybucket/4,
|
||||
foldobjects_bybucket/3,
|
||||
|
@ -213,14 +213,18 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
|
|||
end,
|
||||
{async, Runner}.
|
||||
|
||||
-spec foldheads_allkeys(fun(), atom(), fun(), boolean()) -> {async, fun()}.
|
||||
-spec foldheads_allkeys(fun(), atom(), fun(), boolean(), false|list(integer()))
|
||||
-> {async, fun()}.
|
||||
%% @doc
|
||||
%% Fold over all heads in the store for a given tag - applying the passed
|
||||
%% function to each proxy object
|
||||
foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck) ->
|
||||
foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck, SegmentList) ->
|
||||
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
||||
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
||||
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, {true, JournalCheck}).
|
||||
foldobjects(SnapFun,
|
||||
Tag, StartKey, EndKey,
|
||||
FoldFun,
|
||||
{true, JournalCheck}, SegmentList).
|
||||
|
||||
-spec foldobjects_allkeys(fun(), atom(), fun()) -> {async, fun()}.
|
||||
%% @doc
|
||||
|
@ -228,21 +232,30 @@ foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck) ->
|
|||
foldobjects_allkeys(SnapFun, Tag, FoldFun) ->
|
||||
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
||||
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
||||
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, false).
|
||||
foldobjects(SnapFun,
|
||||
Tag, StartKey, EndKey,
|
||||
FoldFun,
|
||||
false, false).
|
||||
|
||||
-spec foldobjects_bybucket(fun(), {atom(), any(), any()}, fun()) ->
|
||||
{async, fun()}.
|
||||
%% @doc
|
||||
%% Fold over all objects within a given key range in a bucket
|
||||
foldobjects_bybucket(SnapFun, {Tag, StartKey, EndKey}, FoldFun) ->
|
||||
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, false).
|
||||
foldobjects(SnapFun,
|
||||
Tag, StartKey, EndKey,
|
||||
FoldFun,
|
||||
false, false).
|
||||
|
||||
-spec foldheads_bybucket(fun(), {atom(), any(), any()}, fun(), boolean()) ->
|
||||
{async, fun()}.
|
||||
%% @doc
|
||||
%% Fold over all object metadata within a given key range in a bucket
|
||||
foldheads_bybucket(SnapFun, {Tag, StartKey, EndKey}, FoldFun, JournalCheck) ->
|
||||
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, {true, JournalCheck}).
|
||||
foldobjects(SnapFun,
|
||||
Tag, StartKey, EndKey,
|
||||
FoldFun,
|
||||
{true, JournalCheck}, false).
|
||||
|
||||
-spec foldobjects_byindex(fun(), tuple(), fun()) -> {async, fun()}.
|
||||
%% @doc
|
||||
|
@ -253,7 +266,10 @@ foldobjects_byindex(SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldFun) ->
|
|||
leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, Field, FromTerm),
|
||||
EndKey =
|
||||
leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, Field, ToTerm),
|
||||
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, false).
|
||||
foldobjects(SnapFun,
|
||||
Tag, StartKey, EndKey,
|
||||
FoldFun,
|
||||
false, false).
|
||||
|
||||
|
||||
|
||||
|
@ -302,8 +318,8 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList) ->
|
|||
|
||||
|
||||
-spec foldobjects(fun(), atom(), tuple(), tuple(), fun(),
|
||||
false|{true, boolean()}) ->
|
||||
{async, fun()}.
|
||||
false|{true, boolean()}, false|list(integer())) ->
|
||||
{async, fun()}.
|
||||
%% @doc
|
||||
%% The object folder should be passed DeferredFetch.
|
||||
%% DeferredFetch can either be false (which will return to the fold function
|
||||
|
@ -311,7 +327,10 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList) ->
|
|||
%% will be created that if understood by the fold function will allow the fold
|
||||
%% function to work on the head of the object, and defer fetching the body in
|
||||
%% case such a fetch is unecessary.
|
||||
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldObjectsFun, DeferredFetch) ->
|
||||
foldobjects(SnapFun,
|
||||
Tag, StartKey, EndKey,
|
||||
FoldObjectsFun,
|
||||
DeferredFetch, SegmentList) ->
|
||||
{FoldFun, InitAcc} =
|
||||
case is_tuple(FoldObjectsFun) of
|
||||
true ->
|
||||
|
@ -331,11 +350,12 @@ foldobjects(SnapFun, Tag, StartKey, EndKey, FoldObjectsFun, DeferredFetch) ->
|
|||
JournalSnapshot,
|
||||
Tag,
|
||||
DeferredFetch),
|
||||
Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot,
|
||||
StartKey,
|
||||
EndKey,
|
||||
AccFun,
|
||||
InitAcc),
|
||||
Acc = leveled_penciller:pcl_fetchkeysbysegment(LedgerSnapshot,
|
||||
StartKey,
|
||||
EndKey,
|
||||
AccFun,
|
||||
InitAcc,
|
||||
SegmentList),
|
||||
ok = leveled_penciller:pcl_close(LedgerSnapshot),
|
||||
case DeferredFetch of
|
||||
{true, false} ->
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue