From 28f45749d75a1c05b224146d6e6b7a97263fc9d6 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Wed, 17 May 2017 12:54:02 +0100 Subject: [PATCH 1/3] Add specs to exported API of CDB files Time to give the dialyzer some help --- src/leveled_cdb.erl | 106 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 2 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 19d660e..9342a77 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -112,20 +112,41 @@ waste_path :: string(), sync_strategy = none}). +-type cdb_options() :: #cdb_options{}. + + %%%============================================================================ %%% API %%%============================================================================ +-spec cdb_open_writer(string()) -> {ok, pid()}. +%% @doc +%% Open a file for writing using default options cdb_open_writer(Filename) -> %% No options passed cdb_open_writer(Filename, #cdb_options{binary_mode=true}). +-spec cdb_open_writer(string(), cdb_options()) -> {ok, pid()}. +%% @doc +%% The filename should be a full file system reference to an existing CDB +%% file, and it will be opened and a FSM started to manage the file - with the +%% hashtree cached in memory (the file will need to be scanned to build the +%% hashtree) cdb_open_writer(Filename, Opts) -> {ok, Pid} = gen_fsm:start(?MODULE, [Opts], []), ok = gen_fsm:sync_send_event(Pid, {open_writer, Filename}, infinity), {ok, Pid}. +-spec cdb_reopen_reader(string(), binary()) -> {ok, pid()}. +%% @doc +%% Open an existing file that has already been moved into read-only mode. The +%% LastKey should be known, as it has been stored in the manifest. Knowing the +%% LastKey stops the file from needing to be scanned on start-up to discover +%% the LastKey. +%% +%% The LastKey is the Key of the last object added to the file - and is used to +%% determine when scans over a file have completed. cdb_reopen_reader(Filename, LastKey) -> {ok, Pid} = gen_fsm:start(?MODULE, [#cdb_options{binary_mode=true}], []), ok = gen_fsm:sync_send_event(Pid, @@ -133,48 +154,112 @@ cdb_reopen_reader(Filename, LastKey) -> infinity), {ok, Pid}. +-spec cdb_open_reader(string()) -> {ok, pid()}. +%% @doc +%% Open an existing file that has already been moved into read-only mode. +%% Don't use this if the LastKey is known, as this requires an expensive scan +%% to discover the LastKey. cdb_open_reader(Filename) -> cdb_open_reader(Filename, #cdb_options{binary_mode=true}). +-spec cdb_open_reader(string(), #cdb_options{}) -> {ok, pid()}. +%% @doc +%% Open an existing file that has already been moved into read-only mode. +%% Don't use this if the LastKey is known, as this requires an expensive scan +%% to discover the LastKey. +%% Allows non-default cdb_options to be passed cdb_open_reader(Filename, Opts) -> {ok, Pid} = gen_fsm:start(?MODULE, [Opts], []), ok = gen_fsm:sync_send_event(Pid, {open_reader, Filename}, infinity), {ok, Pid}. +-spec cdb_get(pid(), any()) -> {any(), any()}|missing. +%% @doc +%% Extract a Key and Value from a CDB file by passing in a Key. cdb_get(Pid, Key) -> gen_fsm:sync_send_event(Pid, {get_kv, Key}, infinity). +-spec cdb_put(pid(), any(), any()) -> ok|roll. +%% @doc +%% Put a key and value into a cdb file that is open as a writer, will fail +%% if the FSM is in any other state. +%% +%% Response can be roll - if there is no space to put this value in the file. +%% It is assumed that the response to a "roll" will be to roll the file, which +%% will close this file for writing after persisting the hashtree. cdb_put(Pid, Key, Value) -> gen_fsm:sync_send_event(Pid, {put_kv, Key, Value}, infinity). +-spec cdb_mput(pid(), list()) -> ok|roll. +%% @doc +%% Add multiple keys and values in one call. The file will request a roll if +%% all of the keys and values cnanot be written (and in this case none of them +%% will). Mput is an all_or_nothing operation. +%% +%% It may be preferable to respond to roll by trying individual PUTs until +%% roll is returned again cdb_mput(Pid, KVList) -> gen_fsm:sync_send_event(Pid, {mput_kv, KVList}, infinity). -%% SampleSize can be an integer or the atom all +-spec cdb_getpositions(pid(), integer()|all) -> list(). +%% @doc +%% Get the positions in the file of a random sample of Keys. cdb_directfetch +%% can then be used to fetch those keys. SampleSize can be an integer or the +%% atom all. To be used for sampling queries, for example to assess the +%% potential for compaction. cdb_getpositions(Pid, SampleSize) -> gen_fsm:sync_send_event(Pid, {get_positions, SampleSize}, infinity). +-spec cdb_directfetch(pid(), list(), key_only|key_size|key_value_check) -> + list(). +%% @doc %% Info can be key_only, key_size (size being the size of the value) or %% key_value_check (with the check part indicating if the CRC is correct for %% the value) cdb_directfetch(Pid, PositionList, Info) -> gen_fsm:sync_send_event(Pid, {direct_fetch, PositionList, Info}, infinity). +-spec cdb_close(pid()) -> ok. +%% @doc +%% RONSEAL cdb_close(Pid) -> gen_fsm:sync_send_all_state_event(Pid, cdb_close, infinity). +-spec cdb_complete(pid()) -> {ok, string()}. +%% @doc +%% Persists the hashtable to the end of the file, to close it for further +%% writing then exit. Returns the filename that was saved. cdb_complete(Pid) -> gen_fsm:sync_send_event(Pid, cdb_complete, infinity). +-spec cdb_roll(pid()) -> ok. +%% @doc +%% Persists the hashtable to the end of the file, to close it for further +%% writing but do not exit, this will continue to service requests in the +%% rolling state whilst the hashtable is being written, and will become a +%% reader (read-only) CDB file process on completion cdb_roll(Pid) -> gen_fsm:send_event(Pid, cdb_roll). +-spec cdb_returnhashtable(pid(), list(), binary()) -> ok. +%% @doc +%% Used for handling the return of a calulcated hashtable from a spawnded +%% process - the building of the hashtable should not block the servicing of +%% requests. Returned is the binary for writing and the IndexList +%% [{Index, CurrPos, IndexLength}] which can be used to locate the slices of +%% the hashtree within that binary cdb_returnhashtable(Pid, IndexList, HashTreeBin) -> gen_fsm:sync_send_event(Pid, {return_hashtable, IndexList, HashTreeBin}, infinity). +-spec cdb_checkhashtable(pid()) -> boolean(). +%% @doc +%% Hash the hashtable been written for this file? cdb_checkhashtable(Pid) -> gen_fsm:sync_send_event(Pid, check_hashtable). +-spec cdb_destroy(pid()) -> ok. +%% @doc +%% If the file is in a delete_pending state close (and will destroy) cdb_destroy(Pid) -> gen_fsm:send_event(Pid, destroy). @@ -182,16 +267,26 @@ cdb_deletepending(Pid) -> % Only used in unit tests cdb_deletepending(Pid, 0, no_poll). +-spec cdb_deletepending(pid(), integer(), pid()|no_poll) -> ok. +%% @doc +%% Puts the file in a delete_pending state. From that state the Inker will be +%% polled to discover if the Manifest SQN at which the file is deleted now +%% means that the file can safely be destroyed (as there are no snapshots with +%% any outstanding dependencies). +%% Passing no_poll means there's no inker to poll, and the process will close +%% on timeout rather than poll. cdb_deletepending(Pid, ManSQN, Inker) -> gen_fsm:send_event(Pid, {delete_pending, ManSQN, Inker}). +-spec cdb_scan(pid(), fun(), any(), integer()|undefined) -> + {integer()|eof, any()}. +%% @doc %% cdb_scan returns {LastPosition, Acc}. Use LastPosition as StartPosiiton to %% continue from that point (calling function has to protect against) double %% counting. %% %% LastPosition could be the atom complete when the last key processed was at %% the end of the file. last_key must be defined in LoopState. - cdb_scan(Pid, FilterFun, InitAcc, StartPosition) -> gen_fsm:sync_send_all_state_event(Pid, {cdb_scan, @@ -200,18 +295,25 @@ cdb_scan(Pid, FilterFun, InitAcc, StartPosition) -> StartPosition}, infinity). +-spec cdb_lastkey(pid()) -> any(). +%% @doc %% Get the last key to be added to the file (which will have the highest %% sequence number) cdb_lastkey(Pid) -> gen_fsm:sync_send_all_state_event(Pid, cdb_lastkey, infinity). +-spec cdb_firstkey(pid()) -> any(). cdb_firstkey(Pid) -> gen_fsm:sync_send_all_state_event(Pid, cdb_firstkey, infinity). +-spec cdb_filename(pid()) -> string(). +%% @doc %% Get the filename of the database cdb_filename(Pid) -> gen_fsm:sync_send_all_state_event(Pid, cdb_filename, infinity). +-spec cdb_keycheck(pid(), any()) -> probably|missing. +%% @doc %% Check to see if the key is probably present, will return either %% probably or missing. Does not do a definitive check cdb_keycheck(Pid, Key) -> From 8b3ca78d49d5ac2d1e9173460d101ae33c834972 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Thu, 18 May 2017 12:29:56 +0100 Subject: [PATCH 2/3] spec help for SST file --- src/leveled_pclerk.erl | 10 ++--- src/leveled_sst.erl | 87 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 6 deletions(-) diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 12944f8..2d942eb 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -265,31 +265,31 @@ merge_file_test() -> "KL1_L1.sst", 1, KL1_L1, - undefined), + 999999), KL1_L2 = lists:sort(generate_randomkeys(8000, 0, 250)), {ok, PidL2_1, _} = leveled_sst:sst_new("../test/", "KL1_L2.sst", 2, KL1_L2, - undefined), + 999999), KL2_L2 = lists:sort(generate_randomkeys(8000, 250, 250)), {ok, PidL2_2, _} = leveled_sst:sst_new("../test/", "KL2_L2.sst", 2, KL2_L2, - undefined), + 999999), KL3_L2 = lists:sort(generate_randomkeys(8000, 500, 250)), {ok, PidL2_3, _} = leveled_sst:sst_new("../test/", "KL3_L2.sst", 2, KL3_L2, - undefined), + 999999), KL4_L2 = lists:sort(generate_randomkeys(8000, 750, 250)), {ok, PidL2_4, _} = leveled_sst:sst_new("../test/", "KL4_L2.sst", 2, KL4_L2, - undefined), + 999999), E1 = #manifest_entry{owner = PidL1_1, filename = "./KL1_L1.sst", diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 73896ff..74f84db 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -142,6 +142,15 @@ %%% API %%%============================================================================ +-spec sst_open(string(), string()) -> {ok, pid(), {tuple(), tuple()}}. +%% @doc +%% Open an SST file at a given path and filename. The first and last keys +%% are returned in response to the request - so that those keys can be used +%% in manifests to understand what range of keys are covered by the SST file. +%% All keys in the file should be between the first and last key in erlang +%% term order. +%% +%% The filename should include the file extension. sst_open(RootPath, Filename) -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), case gen_fsm:sync_send_event(Pid, @@ -151,6 +160,12 @@ sst_open(RootPath, Filename) -> {ok, Pid, {SK, EK}} end. +-spec sst_new(string(), string(), integer(), list(), integer()) -> + {ok, pid(), {tuple(), tuple()}}. +%% @doc +%% Start a new SST file at the assigned level passing in a list of Key, Value +%% pairs. This should not be used for basement levels or unexpanded Key/Value +%% lists as merge_lists will not be called. sst_new(RootPath, Filename, Level, KVList, MaxSQN) -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), {[], [], SlotList, FK} = merge_lists(KVList), @@ -166,6 +181,20 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN) -> {ok, Pid, {SK, EK}} end. +-spec sst_new(string(), string(), list(), list(), + boolean(), integer(), integer()) -> + empty|{ok, pid(), {{list(), list()}, tuple(), tuple()}}. +%% @doc +%% Start a new SST file at the assigned level passing in a two lists of +%% {Key, Value} pairs to be merged. The merge_lists function will use the +%% IsBasement boolean to determine if expired keys or tombstones can be +%% deleted. +%% +%% The remainder of the lists is returned along with the StartKey and EndKey +%% so that the remainder cna be used in the next file in the merge. It might +%% be that the merge_lists returns nothin (for example when a basement file is +%% all tombstones) - and the atome empty is returned in this case so that the +%% file is not added to the manifest. sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN) -> {Rem1, Rem2, SlotList, FK} = merge_lists(KVL1, KVL2, {IsBasement, Level}), case SlotList of @@ -186,6 +215,13 @@ sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN) -> end end. +-spec sst_newlevelzero(string(), string(), + integer(), fun(), pid()|undefined, integer()) -> + {ok, pid(), noreply}. +%% @doc +%% Start a new file at level zero. At this level the file size is not fixed - +%% it will be as big as the input. Also the KVList is not passed in, it is +%% fetched slot by slot using the FetchFun sst_newlevelzero(RootPath, Filename, Slots, FetchFun, Penciller, MaxSQN) -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), gen_fsm:send_event(Pid, @@ -198,12 +234,30 @@ sst_newlevelzero(RootPath, Filename, Slots, FetchFun, Penciller, MaxSQN) -> MaxSQN}), {ok, Pid, noreply}. +-spec sst_get(pid(), tuple()) -> tuple()|not_present. +%% @doc +%% Return a Key, Value pair matching a Key or not_present if the Key is not in +%% the store. The magic_hash function is used to accelerate the seeking of +%% keys, sst_get/3 should be used directly if this has already been calculated sst_get(Pid, LedgerKey) -> sst_get(Pid, LedgerKey, leveled_codec:magic_hash(LedgerKey)). +-spec sst_get(pid(), tuple(), integer()) -> tuple()|not_present. +%% @doc +%% Return a Key, Value pair matching a Key or not_present if the Key is not in +%% the store (with the magic hash precalculated). sst_get(Pid, LedgerKey, Hash) -> gen_fsm:sync_send_event(Pid, {get_kv, LedgerKey, Hash}, infinity). +-spec sst_getkvrange(pid(), tuple()|all, tuple()|all, integer()) -> list(). +%% @doc +%% Get a range of {Key, Value} pairs as a list between StartKey and EndKey +%% (inclusive). The ScanWidth is the maximum size of the range, a pointer +%% will be placed on the tail of the resulting list if results expand beyond +%% the Scan Width +%% +%% To make the range open-ended (either ta start, end or both) the all atom +%% can be use din place of the Key tuple. sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) -> case gen_fsm:sync_send_event(Pid, {get_kvrange, StartKey, EndKey, ScanWidth}, @@ -218,6 +272,10 @@ sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) -> Reply end. +-spec sst_getslots(pid(), list()) -> list(). +%% @doc +%% Get a list of slots by their ID. The slot will be converted from the binary +%% to term form outside of the FSM loop sst_getslots(Pid, SlotList) -> SlotBins = gen_fsm:sync_send_event(Pid, {get_slots, SlotList}, infinity), FetchFun = @@ -226,27 +284,54 @@ sst_getslots(Pid, SlotList) -> end, lists:foldl(FetchFun, [], SlotBins). +-spec sst_getmaxsequencenumber(pid()) -> integer(). +%% @doc +%% Get the maximume sequence number for this SST file sst_getmaxsequencenumber(Pid) -> gen_fsm:sync_send_event(Pid, get_maxsequencenumber, infinity). +-spec sst_setfordelete(pid(), pid()|false) -> ok. +%% @doc +%% If the SST is no longer in use in the active ledger it can be set for +%% delete. Once set for delete it will poll the Penciller pid to see if +%% it is yet safe to be deleted (i.e. because all snapshots which depend +%% on it have finished). No polling will be done if the Penciller pid +%% is 'false' sst_setfordelete(Pid, Penciller) -> gen_fsm:sync_send_event(Pid, {set_for_delete, Penciller}, infinity). +-spec sst_clear(pid()) -> ok. +%% @doc +%% For this file to be closed and deleted sst_clear(Pid) -> gen_fsm:sync_send_event(Pid, {set_for_delete, false}, infinity), gen_fsm:sync_send_event(Pid, close, 1000). +-spec sst_deleteconfirmed(pid()) -> ok. +%% @doc +%% Allows a penciller to confirm to a SST file that it can be cleared, as it +%% is no longer in use sst_deleteconfirmed(Pid) -> gen_fsm:send_event(Pid, close). +-spec sst_checkready(pid()) -> {ok, string(), tuple(), tuple()}. +%% @doc +%% If a file has been set to be built, check that it has been built. Returns +%% the filename and the {startKey, EndKey} for the manifest. sst_checkready(Pid) -> %% Only used in test gen_fsm:sync_send_event(Pid, background_complete, 100). - +-spec sst_close(pid()) -> ok. +%% @doc +%% Close the file sst_close(Pid) -> gen_fsm:sync_send_event(Pid, close, 2000). +-spec sst_printtimings(pid()) -> ok. +%% @doc +%% The state of the FSM keeps track of timings of operations, and this can +%% forced to be printed. %% Used in unit tests to force the printing of timings sst_printtimings(Pid) -> gen_fsm:sync_send_event(Pid, print_timings, 1000). From c90e0f824d501cafc6661a73b7eba77fb85e34f1 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 22 May 2017 10:00:34 +0100 Subject: [PATCH 3/3] Spelling error change --- src/leveled_cdb.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 40fa46f..f8db410 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -13,7 +13,7 @@ %% - Support for incrementally writing a CDB file while keeping the hash table %% in memory %% - The ability to scan a database in blocks of sequence numbers -%% - The applictaion of a CRC chekc by default to all values +%% - The applictaion of a CRC check by default to all values %% %% This module provides functions to create and query a CDB (constant database). %% A CDB implements a two-level hashtable which provides fast {key,value}