From a9aa23bc9c04a7f7c1cb73bef9da0ef3f39ad1a8 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 23 Nov 2018 18:56:30 +0000 Subject: [PATCH] Bucket list update the docs to advertise throw capability. Test it for bucket list (and fix ordering of bucket lists) --- src/leveled_bookie.erl | 70 ++++++++++-------------------- src/leveled_runner.erl | 20 +++++++-- test/end_to_end/iterator_SUITE.erl | 52 +++++++++++++++++++++- 3 files changed, 91 insertions(+), 51 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index d48d8d8..0875a56 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -520,57 +520,35 @@ book_headonly(Pid, Bucket, Key, SubKey) -> -spec book_returnfolder(pid(), tuple()) -> {async, fun()}. -%% @doc Snapshots/Clones +%% @doc Folds over store - deprecated +%% The tuple() is a query, and book_returnfolder will return an {async, Folder} +%% whereby calling Folder() will run a particular fold over a snapshot of the +%% store, and close the snapshot when complete %% -%% If there is a snapshot request (e.g. to iterate over the keys) the Bookie -%% may request a clone of the Penciller, or clones of both the Penciller and -%% the Inker should values also need to be accessed. The snapshot clone is -%% made available through a "runner" - a new trasnportable PID through which -%% the previous state of the store can be queried. So, for example, a -%% riak_kv_vnode_worker in the pool could host the runner. -%% -%% The clone is seeded with the manifest SQN. The clone should be registered -%% with the real Inker/Penciller, so that the real Inker/Penciller may prevent -%% the deletion of files still in use by a snapshot clone. -%% -%% Iterators should de-register themselves from the Penciller on completion. -%% Iterators should be automatically release after a timeout period. A file -%% can only be deleted from the Ledger if it is no longer in the manifest, and -%% there are no registered iterators from before the point the file was -%% removed from the manifest. -%% -%% Clones are simply new gen_servers with copies of the relevant -%% StateData. -%% -%% There are a series of specific folders implemented that provide pre-canned -%% snapshot functionality, more folders can be seen in the get_runner/2 -%% function: -%% -%% {bucket_stats, Bucket} -> return a key count and total object size within -%% a bucket -%% {riakbucket_stats, Bucket} -> as above, but for buckets with the Riak Tag -%% {bucket_list, Tag, {FoldKeysFun, Acc}} -> if we assume buckets and -%% keys are binaries, provides a fast bucket list function -%% {index_query, -%% Constraint, -%% {FoldKeysFun, Acc}, -%% {IdxField, StartValue, EndValue}, -%% {ReturnTerms, TermRegex}} -> secondray index query -%% {keylist, Tag, {FoldKeysFun, Acc}} -> list all keys with tag -%% {keylist, Tag, Bucket, {FoldKeysFun, Acc}} -> list all keys within given -%% bucket -%% {foldobjects_bybucket, Tag, Bucket, FoldObjectsFun} -> fold over all objects -%% in a given bucket -%% {foldobjects_byindex, -%% Tag, -%% Bucket, -%% {Field, FromTerm, ToTerm}, -%% FoldObjectsFun} -> fold over all objects with an entry in a given -%% range on a given index +%% For any new application requiring a fold - use the API below instead, and +%% one of: +%% - book_indexfold +%% - book_bucketlist +%% - book_keylist +%% - book_headfold +%% - book_objectfold book_returnfolder(Pid, RunnerType) -> gen_server:call(Pid, {return_runner, RunnerType}, infinity). +%% Different runner types for async queries: +%% - book_indexfold +%% - book_bucketlist +%% - book_keylist +%% - book_headfold +%% - book_objectfold +%% +%% See individual instructions for each one. All folds can be completed early +%% by using a fold_function that throws an exception when some threshold is +%% reached - and a worker that catches that exception. +%% +%% See test/end_to_end/iterator_SUITE:breaking_folds/1 + %% @doc Builds and returns an `{async, Runner}' pair for secondary %% index queries. Calling `Runner' will fold over keys (ledger) tagged %% with the index `?IDX_TAG' and Constrain the fold to a specific diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index 9351086..45e3518 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -95,10 +95,22 @@ bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) -> BucketAcc = get_nextbucket(null, null, Tag, LedgerSnapshot, [], {0, MaxBuckets}), - ok = leveled_penciller:pcl_close(LedgerSnapshot), - lists:foldl(fun({B, _K}, Acc) -> FoldBucketsFun(B, Acc) end, - InitAcc, - BucketAcc) + AfterFun = + fun() -> + ok = leveled_penciller:pcl_close(LedgerSnapshot) + end, + FoldRunner = + fun() -> + lists:foldr(fun({B, _K}, Acc) -> FoldBucketsFun(B, Acc) end, + InitAcc, + BucketAcc) + % Buckets in reverse alphabetical order so foldr + end, + % For this fold, the fold over the store is actually completed + % before results are passed to the FoldBucketsFun to be + % accumulated. Using a throw to exit the fold early will not + % in this case save significant time. + wrap_runner(FoldRunner, AfterFun) end, {async, Runner}. diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 04c5c46..3bab574 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -182,7 +182,57 @@ breaking_folds(_Config) -> sqn_order), ObjSizeList2_SO = lists:reverse(CatchingFold(ObjFolderTo1K)), io:format("Object fold with result size ~w~n", [length(ObjSizeList2_SO)]), - true = 1000 == length(ObjSizeList2_SO), + true = 1000 == length(ObjSizeList2_SO), + + ObjL2 = testutil:generate_objects(10, + binary_uuid, + [], + ObjectGen, + IndexGen, + "B2"), + ObjL3 = testutil:generate_objects(10, + binary_uuid, + [], + ObjectGen, + IndexGen, + "B3"), + ObjL4 = testutil:generate_objects(10, + binary_uuid, + [], + ObjectGen, + IndexGen, + "B4"), + testutil:riakload(Bookie1, ObjL2), + testutil:riakload(Bookie1, ObjL3), + testutil:riakload(Bookie1, ObjL4), + + FBAccT = {fun(B, Acc) -> [B|Acc] end, []}, + {async, BucketFolder} = + leveled_bookie:book_bucketlist(Bookie1, ?RIAK_TAG, FBAccT, all), + BucketList1 = lists:reverse(BucketFolder()), + io:format("bucket list with result size ~w~n", [length(BucketList1)]), + true = 4 == length(BucketList1), + + StopAt3Fun = + fun(B, Acc) -> + Acc0 = [B|Acc], + case B of + <<"B3">> -> + throw({stop_fold, Acc0}); + _ -> + Acc0 + end + end, + + {async, StopAt3BucketFolder} = + leveled_bookie:book_bucketlist(Bookie1, + ?RIAK_TAG, + {StopAt3Fun, []}, + all), + BucketListSA3 = lists:reverse(CatchingFold(StopAt3BucketFolder)), + io:format("bucket list with result ~w~n", [BucketListSA3]), + true = [<<"B2">>, <<"B3">>] == BucketListSA3, + ok = leveled_bookie:book_close(Bookie1), testutil:reset_filestructure().