Merge pull request #163 from martinsumner/mas-i162-isempty

Mas i162 isempty
2018-09-03 17:00:55 +01:00 · 2018-09-03 17:00:55 +01:00 · a1269e5274
commit a1269e5274
parent 65969c36d0 c64dc1df0d
7 changed files with 143 additions and 42 deletions
--- a/docs/DESIGN.md
+++ b/docs/DESIGN.md
@ -1,4 +1,4 @@
-## Design 
+## Design
 The store is written in Erlang using the actor model, the primary actors being:
@ -118,11 +118,17 @@ Three potential recovery strategies are supported to provide some flexibility fo
 - retain - on compaction KeyDeltas are retained in the Journal, only values are removed.
- recalc (not yet implemented) - the compaction rules assume that on recovery the key changes will be recalculated by comparing the change with the current database state.
+- recalc (not yet implemented) - the compaction rules assume that on recovery the key changes will be recalculated by comparing the change with the current database state. In recovery the key changes will be recalculated by comparing the change with the current database state.
 ## Head only
 Leveled can be started in `head_only` mode.  This is a special mode which dispenses with the long-term role of the Journal in retaining data.  This is a mode to be used in *special circumstances* when values are small, and Key/Value pairs are added in batches.
-n recovery the key changes will be recalculated by comparing the change with the current database state.
+In `head_only` mode, batches of keys and values are stored first in the Journal, however once the last element received by the Journal file has been persisted into the Ledger, the Journal file can be deleted.  The values are never returned from
 Journal except during startup to recover the in-memory part of the Ledger (the Bookie and Penciller's memory).
 There are two ways in which `head_only` mode can be enabled - `with_lookup` and `no_lookup`.  In `with_lookup` mode the an individual value can be fetched from Leveled using a HEAD request.  In `no_lookup`  mode, HEAD requests are not supported - values can only be returned using `fold_heads`.  The `no_lookup` mode is marginally more efficient in terms of CPU usage when under write pressure (it avoids generating key hashes and hash-based lookup indexes within the Penciller).
 The `head_only` mode was created so that it could be used as an AAE store in Riak - where values may simply be a version vector or a hash, and retention of data is not critical (it is not the primary store of real user's data).  Leveled is not optimised for supporting small values, the `head_only` mode improves on this when supporting small values.  However, the intention is that Leveled should remain for the long-term an LSM tree designed for scenarios with larger values.  Features, testing and support for `head_only` modes will be limited compared to support for Leveled running in its standard mode of operation.  For use cases where there is a need for `head_only` behaviour in the primary data store - then an alternative store would be a safer choice.
 There is no ability to mix `head_only` behaviour with standard behaviour.  there is no expected behaviour when switching databases between different `head_only` modes, and data loss is highly likely.
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@ -162,6 +162,9 @@
 -type head_timings() :: no_timing|#head_timings{}.
 -type timing_types() :: head|get|put|fold.
 -type recent_aae() :: false|#recent_aae{}|undefined.
 -type key() :: binary()|string().
    % Keys SHOULD be binary()
    % string() support is a legacy of old tests
 -type open_options() :: 
    %% For full description of options see ../docs/STARTUP_OPTIONS.md
    [{root_path, string()|undefined} |
@ -278,6 +281,8 @@
            % Defaults to ?COMPRESSION_POINT
        ].
 -export_type([key/0]).
 %%%============================================================================
 %%% API
@ -329,7 +334,7 @@ book_start(Opts) ->
    gen_server:start_link(?MODULE, [set_defaults(Opts)], []).
-spec book_tempput(pid(), any(), any(), any(), 
+-spec book_tempput(pid(), key(), key(), any(), 
                    leveled_codec:index_specs(), 
                    leveled_codec:tag(), integer()) -> ok|pause.
@ -396,7 +401,7 @@ book_put(Pid, Bucket, Key, Object, IndexSpecs) ->
 book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag) ->
    book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, infinity).
-spec book_put(pid(), any(), any(), any(), 
+-spec book_put(pid(), key(), key(), any(), 
                leveled_codec:index_specs(), 
                leveled_codec:tag(), infinity|integer()) -> ok|pause.
@ -432,7 +437,7 @@ book_mput(Pid, ObjectSpecs) ->
 book_mput(Pid, ObjectSpecs, TTL) ->
    gen_server:call(Pid, {mput, ObjectSpecs, TTL}, infinity).
-spec book_delete(pid(), any(), any(), leveled_codec:index_specs()) 
+-spec book_delete(pid(), key(), key(), leveled_codec:index_specs()) 
                                                                -> ok|pause.
 %% @doc 
@ -444,9 +449,9 @@ book_delete(Pid, Bucket, Key, IndexSpecs) ->
    book_put(Pid, Bucket, Key, delete, IndexSpecs, ?STD_TAG).
-spec book_get(pid(), any(), any(), leveled_codec:tag()) 
+-spec book_get(pid(), key(), key(), leveled_codec:tag()) 
                                                    -> {ok, any()}|not_found.
-spec book_head(pid(), any(), any(), leveled_codec:tag())
+-spec book_head(pid(), key(), key(), leveled_codec:tag())
                                                    -> {ok, any()}|not_found.
 %% @doc - GET and HEAD requests
@ -503,7 +508,7 @@ book_head(Pid, Bucket, Key) ->
 %% {bucket_stats, Bucket}  -> return a key count and total object size within
 %% a bucket
 %% {riakbucket_stats, Bucket} -> as above, but for buckets with the Riak Tag
-%% {binary_bucketlist, Tag, {FoldKeysFun, Acc}} -> if we assume buckets and
+%% {bucket_list, Tag, {FoldKeysFun, Acc}} -> if we assume buckets and
 %% keys are binaries, provides a fast bucket list function
 %% {index_query,
 %%        Constraint,
@ -1200,14 +1205,14 @@ get_runner(State,
    leveled_runner:foldobjects_byindex(SnapFun, 
                                        {Tag, Bucket, Field, FromTerm, ToTerm},
                                        FoldObjectsFun);
-get_runner(State, {binary_bucketlist, Tag, FoldAccT}) ->
+get_runner(State, {bucket_list, Tag, FoldAccT}) ->
    {FoldBucketsFun, Acc} = FoldAccT,
    SnapFun = return_snapfun(State, ledger, no_lookup, false, false),
-    leveled_runner:binary_bucketlist(SnapFun, Tag, FoldBucketsFun, Acc);
+    leveled_runner:bucket_list(SnapFun, Tag, FoldBucketsFun, Acc);
 get_runner(State, {first_bucket, Tag, FoldAccT}) ->
    {FoldBucketsFun, Acc} = FoldAccT,
    SnapFun = return_snapfun(State, ledger, no_lookup, false, false),
-    leveled_runner:binary_bucketlist(SnapFun, Tag, FoldBucketsFun, Acc, 1);
+    leveled_runner:bucket_list(SnapFun, Tag, FoldBucketsFun, Acc, 1);
 %% Set of specific runners, primarily used as exmaples for tests
 get_runner(State, DeprecatedQuery) ->
    get_deprecatedrunner(State, DeprecatedQuery).
@ -2166,6 +2171,88 @@ is_empty_test() ->
    ok = leveled_bookie:book_close(Bookie1).
 is_empty_headonly_test() ->
    RootPath = reset_filestructure(),
    {ok, Bookie1} = book_start([{root_path, RootPath},
                                    {max_journalsize, 1000000},
                                    {cache_size, 500},
                                    {head_only, no_lookup}]),
    ?assertMatch(true, book_isempty(Bookie1, ?HEAD_TAG)),
    ObjSpecs = 
        [{add, <<"B1">>, <<"K1">>, <<1:8/integer>>, 100},
            {remove, <<"B1">>, <<"K1">>, <<0:8/integer>>, null}],
    ok = book_mput(Bookie1, ObjSpecs),
    ?assertMatch(false, book_isempty(Bookie1, ?HEAD_TAG)),
    ok = book_close(Bookie1).
 foldkeys_headonly_test() ->
    foldkeys_headonly_tester(5000, 25, "BucketStr"),
    foldkeys_headonly_tester(2000, 25, <<"B0">>).
 foldkeys_headonly_tester(ObjectCount, BlockSize, BStr) ->
    RootPath = reset_filestructure(),
    {ok, Bookie1} = book_start([{root_path, RootPath},
                                    {max_journalsize, 1000000},
                                    {cache_size, 500},
                                    {head_only, no_lookup}]),
    GenObjSpecFun =
        fun(I) ->
            Key = I rem 6,
            {add, BStr, <<Key:8/integer>>, integer_to_list(I), I}
        end,
    ObjSpecs = lists:map(GenObjSpecFun, lists:seq(1, ObjectCount)),
    ObjSpecBlocks = 
        lists:map(fun(I) ->
                        lists:sublist(ObjSpecs, I * BlockSize + 1, BlockSize)
                    end,
                    lists:seq(0, ObjectCount div BlockSize - 1)),
    lists:map(fun(Block) -> book_mput(Bookie1, Block) end, ObjSpecBlocks),
    ?assertMatch(false, book_isempty(Bookie1, ?HEAD_TAG)),
    FolderT = 
        {keylist, 
            ?HEAD_TAG, BStr, 
            {fun(_B, {K, SK}, Acc) -> [{K, SK}|Acc] end, []}
        },
    {async, Folder1} = book_returnfolder(Bookie1, FolderT),
    Key_SKL1 = lists:reverse(Folder1()),
    Key_SKL_Compare = 
        lists:usort(lists:map(fun({add, _B, K, SK, _V}) -> {K, SK} end, ObjSpecs)),
    ?assertMatch(Key_SKL_Compare, Key_SKL1),
    ok = book_close(Bookie1),
    {ok, Bookie2} = book_start([{root_path, RootPath},
                                    {max_journalsize, 1000000},
                                    {cache_size, 500},
                                    {head_only, no_lookup}]),
    {async, Folder2} = book_returnfolder(Bookie2, FolderT),
    Key_SKL2 = lists:reverse(Folder2()),
    ?assertMatch(Key_SKL_Compare, Key_SKL2),
    ok = book_close(Bookie2).
 is_empty_stringkey_test() ->
    RootPath = reset_filestructure(),
    {ok, Bookie1} = book_start([{root_path, RootPath},
                                    {max_journalsize, 1000000},
                                    {cache_size, 500}]),
    ?assertMatch(true, book_isempty(Bookie1, ?STD_TAG)),
    Past = leveled_util:integer_now() - 300,
    ?assertMatch(true, leveled_bookie:book_isempty(Bookie1, ?STD_TAG)),
    ok = book_tempput(Bookie1, 
                        "B", "K", {value, <<"V">>}, [], 
                        ?STD_TAG, Past),
    ok = book_put(Bookie1, 
                    "B", "K0", {value, <<"V">>}, [], 
                    ?STD_TAG),
    ?assertMatch(false, book_isempty(Bookie1, ?STD_TAG)),
    ok = book_close(Bookie1).
 scan_table_test() ->
    K1 = leveled_codec:to_ledgerkey(<<"B1">>,
                                        <<"K1">>,
--- a/src/leveled_codec.erl
+++ b/src/leveled_codec.erl
@ -67,7 +67,8 @@
        riak_extract_metadata/2,
        segment_hash/1,
        to_lookup/1,
-        riak_metadata_to_binary/2]).         
+        riak_metadata_to_binary/2,
        next_key/1]).         
 -define(V1_VERS, 1).
 -define(MAGIC, 53). % riak_kv -> riak_object
@ -252,6 +253,8 @@ from_ledgerkey({?IDX_TAG, ?ALL_BUCKETS, {_IdxFld, IdxVal}, {Bucket, Key}}) ->
    {Bucket, Key, IdxVal};
 from_ledgerkey({?IDX_TAG, Bucket, {_IdxFld, IdxVal}, Key}) ->
    {Bucket, Key, IdxVal};
 from_ledgerkey({?HEAD_TAG, Bucket, Key, SubKey}) ->
    {Bucket, {Key, SubKey}};
 from_ledgerkey({_Tag, Bucket, Key, _SubKey}) ->
    {Bucket, Key}.
@ -834,7 +837,13 @@ get_metadata_from_siblings(<<ValLen:32/integer, Rest0/binary>>,
                                    MetaBin:MetaLen/binary>>,
                                    [LastMod|LastMods]).
-
+-spec next_key(leveled_bookie:key()) -> leveled_bookie:key().
 %% @doc
 %% Get the next key to iterate from a given point
 next_key(Key) when is_binary(Key) ->
    <<Key/binary, 0>>;
 next_key(Key) when is_list(Key) ->
    Key ++ [0].
 %%%============================================================================
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@ -40,8 +40,6 @@
        {info, "Bucket list finds no more results"}},
    {"B0009",
        {info, "Bucket list finds Bucket ~w"}},
    {"B0010",
        {info, "Bucket list finds non-binary Bucket ~w"}},
    {"B0011",
        {warn, "Call to destroy the store and so all files to be removed"}},
    {"B0013",
--- a/src/leveled_runner.erl
+++ b/src/leveled_runner.erl
@ -23,8 +23,8 @@
 -export([
            bucket_sizestats/3,
-            binary_bucketlist/4,
+            bucket_list/4,
-            binary_bucketlist/5,
+            bucket_list/5,
            index_query/3,
            bucketkey_query/4,
            bucketkey_query/5,
@ -73,19 +73,20 @@ bucket_sizestats(SnapFun, Bucket, Tag) ->
        end,
    {async, Runner}.
-spec binary_bucketlist(fun(), leveled_codec:tag(), fun(), any()) 
+-spec bucket_list(fun(), leveled_codec:tag(), fun(), any()) 
                                                            -> {async, fun()}.
 %% @doc
-%% List buckets for tag, assuming bucket names are all binary type
+%% List buckets for tag, assuming bucket names are all either binary, ascii 
-binary_bucketlist(SnapFun, Tag, FoldBucketsFun, InitAcc) ->
+%% strings or integers 
-    binary_bucketlist(SnapFun, Tag, FoldBucketsFun, InitAcc, -1).
+bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc) ->
    bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, -1).
-spec binary_bucketlist(fun(), leveled_codec:tag(), fun(), any(), integer()) 
+-spec bucket_list(fun(), leveled_codec:tag(), fun(), any(), integer()) 
                                                    -> {async, fun()}.
 %% @doc
 %% set Max Buckets to -1 to list all buckets, otherwise will only return
 %% MaxBuckets (use 1 to confirm that there exists any bucket for a given Tag)
-binary_bucketlist(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) ->
+bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) ->
    Runner = 
        fun() ->
            {ok, LedgerSnapshot, _JournalSnapshot} = SnapFun(),
@ -437,27 +438,27 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) ->
        null ->
            leveled_log:log("B0008",[]),
            BKList;
-        {{B, K}, V} when is_binary(B), is_binary(K) ->
+        {{B, K}, V} ->
            case leveled_codec:is_active({Tag, B, K, null}, V, Now) of
                true ->
                    leveled_log:log("B0009",[B]),
-                    get_nextbucket(<<B/binary, 0>>,
+                    get_nextbucket(leveled_codec:next_key(B),
                                    null,
                                    Tag,
                                    LedgerSnapshot,
                                    [{B, K}|BKList],
                                    {C + 1, L});
                false ->
-                    get_nextbucket(B,
+                    NK = 
-                                    <<K/binary, 0>>,
+                        case Tag of
-                                    Tag,
+                            ?HEAD_TAG ->
-                                    LedgerSnapshot,
+                                {PK, SK} = K,
-                                    BKList,
+                                {PK, leveled_codec:next_key(SK)};
-                                    {C, L})
+                            _ ->
-            end;
+                                leveled_codec:next_key(K)
-        {NB, _V} ->
+                        end,
-            leveled_log:log("B0010",[NB]),
+                    get_nextbucket(B, NK, Tag, LedgerSnapshot, BKList, {C, L})
-            []
+            end
    end.
--- a/test/end_to_end/basic_SUITE.erl
+++ b/test/end_to_end/basic_SUITE.erl
@ -778,7 +778,7 @@ is_empty_test(_Config) ->
    ok = testutil:book_riakput(Bookie1, TestObject3, TestSpec3),
    FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end,
-    BucketListQuery = {binary_bucketlist,
+    BucketListQuery = {bucket_list,
                        ?RIAK_TAG,
                        {FoldBucketsFun, sets:new()}},
    {async, BL} = leveled_bookie:book_returnfolder(Bookie1, BucketListQuery),
--- a/test/end_to_end/iterator_SUITE.erl
+++ b/test/end_to_end/iterator_SUITE.erl
@ -173,13 +173,13 @@ small_load_with2i(_Config) ->
    true = Total2 == Total1, 
    FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end,
-    % Should not find any buckets - as there is a non-binary bucket, and no
+    % this should find Bucket and Bucket1 - as we can now find string-based 
-    % binary ones
+    % buckets using bucket_list - i.e. it isn't just binary buckets now
-    BucketListQuery = {binary_bucketlist,
+    BucketListQuery = {bucket_list,
                        ?RIAK_TAG,
                        {FoldBucketsFun, sets:new()}},
    {async, BL} = leveled_bookie:book_returnfolder(Bookie2, BucketListQuery),
-    true = sets:size(BL()) == 0,
+    true = sets:size(BL()) == 2,
    ok = leveled_bookie:book_close(Bookie2),
    testutil:reset_filestructure().
@ -394,7 +394,7 @@ query_count(_Config) ->
    testutil:check_forobject(Book4, TestObject),
    FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end,
-    BucketListQuery = {binary_bucketlist,
+    BucketListQuery = {bucket_list,
                        ?RIAK_TAG,
                        {FoldBucketsFun, sets:new()}},
    {async, BLF1} = leveled_bookie:book_returnfolder(Book4, BucketListQuery),