Allow for segment-acceleration of folds

Initially with basic tests.  If the SlotIndex has been cached, we can now use the slot index as it is based on the Segment hash algortihm.

This looks like it should lead to an order of magnitude improvement in querying for keys/clocks by segment ID.

This also required a slight tweak to the penciller keyfolder.  It now caches the next answer from the SSTiter, rather than restart the iterator.   When the IMMiter has many more entries than the SSTiter (as the sSTiter is being filtered but not the IMMiter) this could lead to lots of repeated folding.
This commit is contained in:
Martin Sumner 2017-10-31 23:28:35 +00:00
parent f5878548f9
commit b141dd199c
6 changed files with 519 additions and 202 deletions

View file

@ -28,7 +28,7 @@
bucketkey_query/4,
hashlist_query/3,
tictactree/5,
foldheads_allkeys/4,
foldheads_allkeys/5,
foldobjects_allkeys/3,
foldheads_bybucket/4,
foldobjects_bybucket/3,
@ -213,14 +213,18 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
end,
{async, Runner}.
-spec foldheads_allkeys(fun(), atom(), fun(), boolean()) -> {async, fun()}.
-spec foldheads_allkeys(fun(), atom(), fun(), boolean(), false|list(integer()))
-> {async, fun()}.
%% @doc
%% Fold over all heads in the store for a given tag - applying the passed
%% function to each proxy object
foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck) ->
foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck, SegmentList) ->
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, {true, JournalCheck}).
foldobjects(SnapFun,
Tag, StartKey, EndKey,
FoldFun,
{true, JournalCheck}, SegmentList).
-spec foldobjects_allkeys(fun(), atom(), fun()) -> {async, fun()}.
%% @doc
@ -228,21 +232,30 @@ foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck) ->
foldobjects_allkeys(SnapFun, Tag, FoldFun) ->
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, false).
foldobjects(SnapFun,
Tag, StartKey, EndKey,
FoldFun,
false, false).
-spec foldobjects_bybucket(fun(), {atom(), any(), any()}, fun()) ->
{async, fun()}.
%% @doc
%% Fold over all objects within a given key range in a bucket
foldobjects_bybucket(SnapFun, {Tag, StartKey, EndKey}, FoldFun) ->
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, false).
foldobjects(SnapFun,
Tag, StartKey, EndKey,
FoldFun,
false, false).
-spec foldheads_bybucket(fun(), {atom(), any(), any()}, fun(), boolean()) ->
{async, fun()}.
%% @doc
%% Fold over all object metadata within a given key range in a bucket
foldheads_bybucket(SnapFun, {Tag, StartKey, EndKey}, FoldFun, JournalCheck) ->
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, {true, JournalCheck}).
foldobjects(SnapFun,
Tag, StartKey, EndKey,
FoldFun,
{true, JournalCheck}, false).
-spec foldobjects_byindex(fun(), tuple(), fun()) -> {async, fun()}.
%% @doc
@ -253,7 +266,10 @@ foldobjects_byindex(SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldFun) ->
leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, Field, FromTerm),
EndKey =
leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, Field, ToTerm),
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldFun, false).
foldobjects(SnapFun,
Tag, StartKey, EndKey,
FoldFun,
false, false).
@ -302,8 +318,8 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList) ->
-spec foldobjects(fun(), atom(), tuple(), tuple(), fun(),
false|{true, boolean()}) ->
{async, fun()}.
false|{true, boolean()}, false|list(integer())) ->
{async, fun()}.
%% @doc
%% The object folder should be passed DeferredFetch.
%% DeferredFetch can either be false (which will return to the fold function
@ -311,7 +327,10 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList) ->
%% will be created that if understood by the fold function will allow the fold
%% function to work on the head of the object, and defer fetching the body in
%% case such a fetch is unecessary.
foldobjects(SnapFun, Tag, StartKey, EndKey, FoldObjectsFun, DeferredFetch) ->
foldobjects(SnapFun,
Tag, StartKey, EndKey,
FoldObjectsFun,
DeferredFetch, SegmentList) ->
{FoldFun, InitAcc} =
case is_tuple(FoldObjectsFun) of
true ->
@ -331,11 +350,12 @@ foldobjects(SnapFun, Tag, StartKey, EndKey, FoldObjectsFun, DeferredFetch) ->
JournalSnapshot,
Tag,
DeferredFetch),
Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot,
StartKey,
EndKey,
AccFun,
InitAcc),
Acc = leveled_penciller:pcl_fetchkeysbysegment(LedgerSnapshot,
StartKey,
EndKey,
AccFun,
InitAcc,
SegmentList),
ok = leveled_penciller:pcl_close(LedgerSnapshot),
case DeferredFetch of
{true, false} ->