diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 7e3b730..23142a5 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -1,4 +1,4 @@ -## Design +## Design The store is written in Erlang using the actor model, the primary actors being: @@ -118,11 +118,17 @@ Three potential recovery strategies are supported to provide some flexibility fo - retain - on compaction KeyDeltas are retained in the Journal, only values are removed. -- recalc (not yet implemented) - the compaction rules assume that on recovery the key changes will be recalculated by comparing the change with the current database state. +- recalc (not yet implemented) - the compaction rules assume that on recovery the key changes will be recalculated by comparing the change with the current database state. In recovery the key changes will be recalculated by comparing the change with the current database state. +## Head only +Leveled can be started in `head_only` mode. This is a special mode which dispenses with the long-term role of the Journal in retaining data. This is a mode to be used in *special circumstances* when values are small, and Key/Value pairs are added in batches. -n recovery the key changes will be recalculated by comparing the change with the current database state. +In `head_only` mode, batches of keys and values are stored first in the Journal, however once the last element received by the Journal file has been persisted into the Ledger, the Journal file can be deleted. The values are never returned from +Journal except during startup to recover the in-memory part of the Ledger (the Bookie and Penciller's memory). +There are two ways in which `head_only` mode can be enabled - `with_lookup` and `no_lookup`. In `with_lookup` mode the an individual value can be fetched from Leveled using a HEAD request. In `no_lookup` mode, HEAD requests are not supported - values can only be returned using `fold_heads`. The `no_lookup` mode is marginally more efficient in terms of CPU usage when under write pressure (it avoids generating key hashes and hash-based lookup indexes within the Penciller). +The `head_only` mode was created so that it could be used as an AAE store in Riak - where values may simply be a version vector or a hash, and retention of data is not critical (it is not the primary store of real user's data). Leveled is not optimised for supporting small values, the `head_only` mode improves on this when supporting small values. However, the intention is that Leveled should remain for the long-term an LSM tree designed for scenarios with larger values. Features, testing and support for `head_only` modes will be limited compared to support for Leveled running in its standard mode of operation. For use cases where there is a need for `head_only` behaviour in the primary data store - then an alternative store would be a safer choice. +There is no ability to mix `head_only` behaviour with standard behaviour. there is no expected behaviour when switching databases between different `head_only` modes, and data loss is highly likely. diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index f5d3a6b..901f6b8 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -162,6 +162,9 @@ -type head_timings() :: no_timing|#head_timings{}. -type timing_types() :: head|get|put|fold. -type recent_aae() :: false|#recent_aae{}|undefined. +-type key() :: binary()|string(). + % Keys SHOULD be binary() + % string() support is a legacy of old tests -type open_options() :: %% For full description of options see ../docs/STARTUP_OPTIONS.md [{root_path, string()|undefined} | @@ -278,6 +281,8 @@ % Defaults to ?COMPRESSION_POINT ]. +-export_type([key/0]). + %%%============================================================================ %%% API @@ -329,7 +334,7 @@ book_start(Opts) -> gen_server:start_link(?MODULE, [set_defaults(Opts)], []). --spec book_tempput(pid(), any(), any(), any(), +-spec book_tempput(pid(), key(), key(), any(), leveled_codec:index_specs(), leveled_codec:tag(), integer()) -> ok|pause. @@ -396,7 +401,7 @@ book_put(Pid, Bucket, Key, Object, IndexSpecs) -> book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag) -> book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, infinity). --spec book_put(pid(), any(), any(), any(), +-spec book_put(pid(), key(), key(), any(), leveled_codec:index_specs(), leveled_codec:tag(), infinity|integer()) -> ok|pause. @@ -432,7 +437,7 @@ book_mput(Pid, ObjectSpecs) -> book_mput(Pid, ObjectSpecs, TTL) -> gen_server:call(Pid, {mput, ObjectSpecs, TTL}, infinity). --spec book_delete(pid(), any(), any(), leveled_codec:index_specs()) +-spec book_delete(pid(), key(), key(), leveled_codec:index_specs()) -> ok|pause. %% @doc @@ -444,9 +449,9 @@ book_delete(Pid, Bucket, Key, IndexSpecs) -> book_put(Pid, Bucket, Key, delete, IndexSpecs, ?STD_TAG). --spec book_get(pid(), any(), any(), leveled_codec:tag()) +-spec book_get(pid(), key(), key(), leveled_codec:tag()) -> {ok, any()}|not_found. --spec book_head(pid(), any(), any(), leveled_codec:tag()) +-spec book_head(pid(), key(), key(), leveled_codec:tag()) -> {ok, any()}|not_found. %% @doc - GET and HEAD requests @@ -503,7 +508,7 @@ book_head(Pid, Bucket, Key) -> %% {bucket_stats, Bucket} -> return a key count and total object size within %% a bucket %% {riakbucket_stats, Bucket} -> as above, but for buckets with the Riak Tag -%% {binary_bucketlist, Tag, {FoldKeysFun, Acc}} -> if we assume buckets and +%% {bucket_list, Tag, {FoldKeysFun, Acc}} -> if we assume buckets and %% keys are binaries, provides a fast bucket list function %% {index_query, %% Constraint, @@ -1200,14 +1205,14 @@ get_runner(State, leveled_runner:foldobjects_byindex(SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldObjectsFun); -get_runner(State, {binary_bucketlist, Tag, FoldAccT}) -> +get_runner(State, {bucket_list, Tag, FoldAccT}) -> {FoldBucketsFun, Acc} = FoldAccT, SnapFun = return_snapfun(State, ledger, no_lookup, false, false), - leveled_runner:binary_bucketlist(SnapFun, Tag, FoldBucketsFun, Acc); + leveled_runner:bucket_list(SnapFun, Tag, FoldBucketsFun, Acc); get_runner(State, {first_bucket, Tag, FoldAccT}) -> {FoldBucketsFun, Acc} = FoldAccT, SnapFun = return_snapfun(State, ledger, no_lookup, false, false), - leveled_runner:binary_bucketlist(SnapFun, Tag, FoldBucketsFun, Acc, 1); + leveled_runner:bucket_list(SnapFun, Tag, FoldBucketsFun, Acc, 1); %% Set of specific runners, primarily used as exmaples for tests get_runner(State, DeprecatedQuery) -> get_deprecatedrunner(State, DeprecatedQuery). @@ -2166,6 +2171,88 @@ is_empty_test() -> ok = leveled_bookie:book_close(Bookie1). +is_empty_headonly_test() -> + RootPath = reset_filestructure(), + {ok, Bookie1} = book_start([{root_path, RootPath}, + {max_journalsize, 1000000}, + {cache_size, 500}, + {head_only, no_lookup}]), + ?assertMatch(true, book_isempty(Bookie1, ?HEAD_TAG)), + ObjSpecs = + [{add, <<"B1">>, <<"K1">>, <<1:8/integer>>, 100}, + {remove, <<"B1">>, <<"K1">>, <<0:8/integer>>, null}], + ok = book_mput(Bookie1, ObjSpecs), + ?assertMatch(false, book_isempty(Bookie1, ?HEAD_TAG)), + ok = book_close(Bookie1). + + +foldkeys_headonly_test() -> + foldkeys_headonly_tester(5000, 25, "BucketStr"), + foldkeys_headonly_tester(2000, 25, <<"B0">>). + + +foldkeys_headonly_tester(ObjectCount, BlockSize, BStr) -> + RootPath = reset_filestructure(), + + {ok, Bookie1} = book_start([{root_path, RootPath}, + {max_journalsize, 1000000}, + {cache_size, 500}, + {head_only, no_lookup}]), + GenObjSpecFun = + fun(I) -> + Key = I rem 6, + {add, BStr, <>, integer_to_list(I), I} + end, + ObjSpecs = lists:map(GenObjSpecFun, lists:seq(1, ObjectCount)), + ObjSpecBlocks = + lists:map(fun(I) -> + lists:sublist(ObjSpecs, I * BlockSize + 1, BlockSize) + end, + lists:seq(0, ObjectCount div BlockSize - 1)), + lists:map(fun(Block) -> book_mput(Bookie1, Block) end, ObjSpecBlocks), + ?assertMatch(false, book_isempty(Bookie1, ?HEAD_TAG)), + + FolderT = + {keylist, + ?HEAD_TAG, BStr, + {fun(_B, {K, SK}, Acc) -> [{K, SK}|Acc] end, []} + }, + {async, Folder1} = book_returnfolder(Bookie1, FolderT), + Key_SKL1 = lists:reverse(Folder1()), + Key_SKL_Compare = + lists:usort(lists:map(fun({add, _B, K, SK, _V}) -> {K, SK} end, ObjSpecs)), + ?assertMatch(Key_SKL_Compare, Key_SKL1), + + ok = book_close(Bookie1), + + {ok, Bookie2} = book_start([{root_path, RootPath}, + {max_journalsize, 1000000}, + {cache_size, 500}, + {head_only, no_lookup}]), + {async, Folder2} = book_returnfolder(Bookie2, FolderT), + Key_SKL2 = lists:reverse(Folder2()), + ?assertMatch(Key_SKL_Compare, Key_SKL2), + + ok = book_close(Bookie2). + + +is_empty_stringkey_test() -> + RootPath = reset_filestructure(), + {ok, Bookie1} = book_start([{root_path, RootPath}, + {max_journalsize, 1000000}, + {cache_size, 500}]), + ?assertMatch(true, book_isempty(Bookie1, ?STD_TAG)), + Past = leveled_util:integer_now() - 300, + ?assertMatch(true, leveled_bookie:book_isempty(Bookie1, ?STD_TAG)), + ok = book_tempput(Bookie1, + "B", "K", {value, <<"V">>}, [], + ?STD_TAG, Past), + ok = book_put(Bookie1, + "B", "K0", {value, <<"V">>}, [], + ?STD_TAG), + ?assertMatch(false, book_isempty(Bookie1, ?STD_TAG)), + ok = book_close(Bookie1). + scan_table_test() -> K1 = leveled_codec:to_ledgerkey(<<"B1">>, <<"K1">>, diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 2386336..e23f04c 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -67,7 +67,8 @@ riak_extract_metadata/2, segment_hash/1, to_lookup/1, - riak_metadata_to_binary/2]). + riak_metadata_to_binary/2, + next_key/1]). -define(V1_VERS, 1). -define(MAGIC, 53). % riak_kv -> riak_object @@ -252,6 +253,8 @@ from_ledgerkey({?IDX_TAG, ?ALL_BUCKETS, {_IdxFld, IdxVal}, {Bucket, Key}}) -> {Bucket, Key, IdxVal}; from_ledgerkey({?IDX_TAG, Bucket, {_IdxFld, IdxVal}, Key}) -> {Bucket, Key, IdxVal}; +from_ledgerkey({?HEAD_TAG, Bucket, Key, SubKey}) -> + {Bucket, {Key, SubKey}}; from_ledgerkey({_Tag, Bucket, Key, _SubKey}) -> {Bucket, Key}. @@ -834,7 +837,13 @@ get_metadata_from_siblings(<>, MetaBin:MetaLen/binary>>, [LastMod|LastMods]). - +-spec next_key(leveled_bookie:key()) -> leveled_bookie:key(). +%% @doc +%% Get the next key to iterate from a given point +next_key(Key) when is_binary(Key) -> + <>; +next_key(Key) when is_list(Key) -> + Key ++ [0]. %%%============================================================================ diff --git a/src/leveled_log.erl b/src/leveled_log.erl index e5295ae..b52f1cc 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -40,8 +40,6 @@ {info, "Bucket list finds no more results"}}, {"B0009", {info, "Bucket list finds Bucket ~w"}}, - {"B0010", - {info, "Bucket list finds non-binary Bucket ~w"}}, {"B0011", {warn, "Call to destroy the store and so all files to be removed"}}, {"B0013", diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index b8b34dc..7cf720f 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -23,8 +23,8 @@ -export([ bucket_sizestats/3, - binary_bucketlist/4, - binary_bucketlist/5, + bucket_list/4, + bucket_list/5, index_query/3, bucketkey_query/4, bucketkey_query/5, @@ -73,19 +73,20 @@ bucket_sizestats(SnapFun, Bucket, Tag) -> end, {async, Runner}. --spec binary_bucketlist(fun(), leveled_codec:tag(), fun(), any()) +-spec bucket_list(fun(), leveled_codec:tag(), fun(), any()) -> {async, fun()}. %% @doc -%% List buckets for tag, assuming bucket names are all binary type -binary_bucketlist(SnapFun, Tag, FoldBucketsFun, InitAcc) -> - binary_bucketlist(SnapFun, Tag, FoldBucketsFun, InitAcc, -1). +%% List buckets for tag, assuming bucket names are all either binary, ascii +%% strings or integers +bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc) -> + bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, -1). --spec binary_bucketlist(fun(), leveled_codec:tag(), fun(), any(), integer()) +-spec bucket_list(fun(), leveled_codec:tag(), fun(), any(), integer()) -> {async, fun()}. %% @doc %% set Max Buckets to -1 to list all buckets, otherwise will only return %% MaxBuckets (use 1 to confirm that there exists any bucket for a given Tag) -binary_bucketlist(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) -> +bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) -> Runner = fun() -> {ok, LedgerSnapshot, _JournalSnapshot} = SnapFun(), @@ -437,27 +438,27 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) -> null -> leveled_log:log("B0008",[]), BKList; - {{B, K}, V} when is_binary(B), is_binary(K) -> + {{B, K}, V} -> case leveled_codec:is_active({Tag, B, K, null}, V, Now) of true -> leveled_log:log("B0009",[B]), - get_nextbucket(<>, + get_nextbucket(leveled_codec:next_key(B), null, Tag, LedgerSnapshot, [{B, K}|BKList], {C + 1, L}); false -> - get_nextbucket(B, - <>, - Tag, - LedgerSnapshot, - BKList, - {C, L}) - end; - {NB, _V} -> - leveled_log:log("B0010",[NB]), - [] + NK = + case Tag of + ?HEAD_TAG -> + {PK, SK} = K, + {PK, leveled_codec:next_key(SK)}; + _ -> + leveled_codec:next_key(K) + end, + get_nextbucket(B, NK, Tag, LedgerSnapshot, BKList, {C, L}) + end end. diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 14aca22..72028be 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -778,7 +778,7 @@ is_empty_test(_Config) -> ok = testutil:book_riakput(Bookie1, TestObject3, TestSpec3), FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end, - BucketListQuery = {binary_bucketlist, + BucketListQuery = {bucket_list, ?RIAK_TAG, {FoldBucketsFun, sets:new()}}, {async, BL} = leveled_bookie:book_returnfolder(Bookie1, BucketListQuery), diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 3a235c7..4a78ed6 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -173,13 +173,13 @@ small_load_with2i(_Config) -> true = Total2 == Total1, FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end, - % Should not find any buckets - as there is a non-binary bucket, and no - % binary ones - BucketListQuery = {binary_bucketlist, + % this should find Bucket and Bucket1 - as we can now find string-based + % buckets using bucket_list - i.e. it isn't just binary buckets now + BucketListQuery = {bucket_list, ?RIAK_TAG, {FoldBucketsFun, sets:new()}}, {async, BL} = leveled_bookie:book_returnfolder(Bookie2, BucketListQuery), - true = sets:size(BL()) == 0, + true = sets:size(BL()) == 2, ok = leveled_bookie:book_close(Bookie2), testutil:reset_filestructure(). @@ -394,7 +394,7 @@ query_count(_Config) -> testutil:check_forobject(Book4, TestObject), FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end, - BucketListQuery = {binary_bucketlist, + BucketListQuery = {bucket_list, ?RIAK_TAG, {FoldBucketsFun, sets:new()}}, {async, BLF1} = leveled_bookie:book_returnfolder(Book4, BucketListQuery),