Add regex support to $key index

Regex to be applied to key only
This commit is contained in:
Martin Sumner 2018-09-21 12:04:32 +01:00
parent d27be07f67
commit 1a3d3daa89
3 changed files with 84 additions and 23 deletions

View file

@ -76,6 +76,7 @@
book_keylist/3,
book_keylist/4,
book_keylist/5,
book_keylist/6,
book_objectfold/4,
book_objectfold/5,
book_objectfold/6,
@ -677,7 +678,8 @@ book_keylist(Pid, Tag, Bucket, FoldAccT) ->
%% is `StartKey', the first key in the range and `EndKey' the last,
%% (inclusive.) Or the atom `all', which will return all keys in the
%% `Bucket'.
-spec book_keylist(pid(), Tag, Bucket, KeyRange, FoldAccT) -> {async, Runner} when
-spec book_keylist(pid(), Tag, Bucket, KeyRange, FoldAccT) ->
{async, Runner} when
Tag :: leveled_codec:tag(),
FoldAccT :: {FoldFun, Acc},
FoldFun :: fun((Bucket, Key, Acc) -> Acc),
@ -689,9 +691,30 @@ book_keylist(Pid, Tag, Bucket, FoldAccT) ->
Key :: term(),
Runner :: fun(() -> Acc).
book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT) ->
RunnerType = {keylist, Tag, Bucket, KeyRange, FoldAccT},
RunnerType = {keylist, Tag, Bucket, KeyRange, FoldAccT, undefined},
book_returnfolder(Pid, RunnerType).
%% @doc as for book_keylist/5 with additional constraint that a compile regular
%% expression is passed to be applied against any key that is in the range.
%% This is always applied to the Key and only the Key, not to any SubKey.
-spec book_keylist(pid(), Tag, Bucket, KeyRange, FoldAccT, TermRegex) ->
{async, Runner} when
Tag :: leveled_codec:tag(),
FoldAccT :: {FoldFun, Acc},
FoldFun :: fun((Bucket, Key, Acc) -> Acc),
Acc :: term(),
Bucket :: term(),
KeyRange :: {StartKey, EndKey} | all,
StartKey :: Key,
EndKey :: Key,
Key :: term(),
TermRegex :: re:mp(),
Runner :: fun(() -> Acc).
book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT, TermRegex) ->
RunnerType = {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex},
book_returnfolder(Pid, RunnerType).
%% @doc fold over all the objects/values in the store in key
%% order. `Tag' is the tagged type of object. `FoldAccT' is a 2-tuple,
%% the first element being a 4-arity fun, that is called once for each
@ -1490,9 +1513,11 @@ get_runner(State, {keylist, Tag, FoldAccT}) ->
get_runner(State, {keylist, Tag, Bucket, FoldAccT}) ->
SnapFun = return_snapfun(State, ledger, no_lookup, true, true),
leveled_runner:bucketkey_query(SnapFun, Tag, Bucket, FoldAccT);
get_runner(State, {keylist, Tag, Bucket, KeyRange, FoldAccT}) ->
get_runner(State, {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}) ->
SnapFun = return_snapfun(State, ledger, no_lookup, true, true),
leveled_runner:bucketkey_query(SnapFun, Tag, Bucket, KeyRange, FoldAccT);
leveled_runner:bucketkey_query(SnapFun,
Tag, Bucket, KeyRange,
FoldAccT, TermRegex);
%% Set of runners for object or metadata folds
get_runner(State,
{foldheads_allkeys,

View file

@ -27,7 +27,7 @@
bucket_list/5,
index_query/3,
bucketkey_query/4,
bucketkey_query/5,
bucketkey_query/6,
hashlist_query/3,
tictactree/5,
foldheads_allkeys/5,
@ -47,6 +47,7 @@
leveled_codec:ledger_key()|null}.
-type fun_and_acc()
:: {fun(), any()}.
-type term_regex() :: re:mp()|undefined.
%%%============================================================================
%%% External functions
@ -132,15 +133,16 @@ index_query(SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT) ->
{async, Runner}.
-spec bucketkey_query(fun(), leveled_codec:tag(), any(),
key_range(), fun_and_acc()) -> {async, fun()}.
key_range(), fun_and_acc(), term_regex()) -> {async, fun()}.
%% @doc
%% Fold over all keys in `KeyRange' under tag (restricted to a given bucket)
bucketkey_query(SnapFun, Tag, Bucket,
{StartKey, EndKey},
{FoldKeysFun, InitAcc}) ->
{FoldKeysFun, InitAcc},
TermRegex) ->
SK = leveled_codec:to_ledgerkey(Bucket, StartKey, Tag),
EK = leveled_codec:to_ledgerkey(Bucket, EndKey, Tag),
AccFun = accumulate_keys(FoldKeysFun),
AccFun = accumulate_keys(FoldKeysFun, TermRegex),
Runner =
fun() ->
{ok, LedgerSnapshot, _JournalSnapshot} = SnapFun(),
@ -159,7 +161,7 @@ bucketkey_query(SnapFun, Tag, Bucket,
%% @doc
%% Fold over all keys under tag (potentially restricted to a given bucket)
bucketkey_query(SnapFun, Tag, Bucket, FunAcc) ->
bucketkey_query(SnapFun, Tag, Bucket, {null, null}, FunAcc).
bucketkey_query(SnapFun, Tag, Bucket, {null, null}, FunAcc, undefined).
-spec hashlist_query(fun(), leveled_codec:tag(), boolean()) -> {async, fun()}.
%% @doc
@ -660,13 +662,24 @@ check_presence(Key, Value, InkerClone) ->
false
end.
accumulate_keys(FoldKeysFun) ->
accumulate_keys(FoldKeysFun, TermRegex) ->
Now = leveled_util:integer_now(),
AccFun = fun(Key, Value, Acc) ->
AccFun =
fun(Key, Value, Acc) ->
case leveled_codec:is_active(Key, Value, Now) of
true ->
{B, K} = leveled_codec:from_ledgerkey(Key),
case TermRegex of
undefined ->
FoldKeysFun(B, K, Acc);
Re ->
case re:run(K, Re) of
nomatch ->
Acc;
_ ->
FoldKeysFun(B, K, Acc)
end
end;
false ->
Acc
end

View file

@ -525,15 +525,38 @@ dollar_key_index(_Config) ->
EndKey = testutil:fixed_bin_key(779),
{async, Folder} = leveled_bookie:book_keylist(Bookie1,
{async, Folder} =
leveled_bookie:book_keylist(Bookie1,
?RIAK_TAG,
<<"Bucket1">>,
{StartKey, EndKey}, {FoldKeysFun, []}
{StartKey, EndKey},
{FoldKeysFun, []}
),
ResLen = length(Folder()),
io:format("Length of Result of folder ~w~n", [ResLen]),
true = 657 == ResLen,
{ok, REMatch} = re:compile("K.y"),
{ok, REMiss} = re:compile("key"),
{async, FolderREMatch} =
leveled_bookie:book_keylist(Bookie1,
?RIAK_TAG,
<<"Bucket1">>,
{StartKey, EndKey},
{FoldKeysFun, []},
REMatch),
{async, FolderREMiss} =
leveled_bookie:book_keylist(Bookie1,
?RIAK_TAG,
<<"Bucket1">>,
{StartKey, EndKey},
{FoldKeysFun, []},
REMiss),
true = 657 == length(FolderREMatch()),
true = 0 == length(FolderREMiss()),
ok = leveled_bookie:book_close(Bookie1),
testutil:reset_filestructure().