2016-08-02 17:51:43 +01:00
|
|
|
%% -------- PENCILLER ---------
|
2016-07-29 17:19:30 +01:00
|
|
|
%%
|
2016-08-16 12:45:48 +01:00
|
|
|
%% The penciller is responsible for writing and re-writing the ledger - a
|
2016-08-02 13:44:48 +01:00
|
|
|
%% persisted, ordered view of non-recent Keys and Metadata which have been
|
|
|
|
%% added to the store.
|
|
|
|
%% - The penciller maintains a manifest of all the files within the current
|
2017-01-06 10:09:15 +00:00
|
|
|
%% Ledger.
|
2016-08-16 12:45:48 +01:00
|
|
|
%% - The Penciller provides re-write (compaction) work up to be managed by
|
|
|
|
%% the Penciller's Clerk
|
2016-10-09 22:33:45 +01:00
|
|
|
%% - The Penciller can be cloned and maintains a register of clones who have
|
|
|
|
%% requested snapshots of the Ledger
|
2017-01-20 16:36:20 +00:00
|
|
|
%% - The accepts new dumps (in the form of a leveled_tree accomponied by
|
2017-01-06 10:09:15 +00:00
|
|
|
%% an array of hash-listing binaries) from the Bookie, and responds either 'ok'
|
|
|
|
%% to the bookie if the information is accepted nad the Bookie can refresh its
|
|
|
|
%% memory, or 'returned' if the bookie must continue without refreshing as the
|
|
|
|
%% Penciller is not currently able to accept the update (potentially due to a
|
|
|
|
%% backlog of compaction work)
|
2016-10-03 23:34:28 +01:00
|
|
|
%% - The Penciller's persistence of the ledger may not be reliable, in that it
|
|
|
|
%% may lose data but only in sequence from a particular sequence number. On
|
|
|
|
%% startup the Penciller will inform the Bookie of the highest sequence number
|
|
|
|
%% it has, and the Bookie should load any missing data from that point out of
|
2016-11-05 15:59:31 +00:00
|
|
|
%% the journal.
|
2016-07-29 17:19:30 +01:00
|
|
|
%%
|
2016-08-02 17:51:43 +01:00
|
|
|
%% -------- LEDGER ---------
|
2016-07-29 17:19:30 +01:00
|
|
|
%%
|
2016-08-02 13:44:48 +01:00
|
|
|
%% The Ledger is divided into many levels
|
2017-01-06 10:09:15 +00:00
|
|
|
%% - L0: New keys are received from the Bookie and and kept in the levelzero
|
|
|
|
%% cache, until that cache is the size of a SST file, and it is then persisted
|
2016-12-29 02:07:14 +00:00
|
|
|
%% as a SST file at this level. L0 SST files can be larger than the normal
|
2016-08-16 12:45:48 +01:00
|
|
|
%% maximum size - so we don't have to consider problems of either having more
|
|
|
|
%% than one L0 file (and handling what happens on a crash between writing the
|
|
|
|
%% files when the second may have overlapping sequence numbers), or having a
|
|
|
|
%% remainder with overlapping in sequence numbers in memory after the file is
|
2017-01-06 10:09:15 +00:00
|
|
|
%% written. Once the persistence is completed, the L0 cache can be erased.
|
2016-12-29 02:07:14 +00:00
|
|
|
%% There can be only one SST file at Level 0, so the work to merge that file
|
2016-08-16 12:45:48 +01:00
|
|
|
%% to the lower level must be the highest priority, as otherwise writes to the
|
|
|
|
%% ledger will stall, when there is next a need to persist.
|
2016-12-29 02:07:14 +00:00
|
|
|
%% - L1 TO L7: May contain multiple processes managing non-overlapping SST
|
2016-08-16 12:45:48 +01:00
|
|
|
%% files. Compaction work should be sheduled if the number of files exceeds
|
|
|
|
%% the target size of the level, where the target size is 8 ^ n.
|
2016-07-27 18:03:44 +01:00
|
|
|
%%
|
|
|
|
%% The most recent revision of a Key can be found by checking each level until
|
2016-08-02 13:44:48 +01:00
|
|
|
%% the key is found. To check a level the correct file must be sought from the
|
|
|
|
%% manifest for that level, and then a call is made to that file. If the Key
|
|
|
|
%% is not present then every level should be checked.
|
2016-07-27 18:03:44 +01:00
|
|
|
%%
|
|
|
|
%% If a compaction change takes the size of a level beyond the target size,
|
|
|
|
%% then compaction work for that level + 1 should be added to the compaction
|
|
|
|
%% work queue.
|
2016-08-16 12:45:48 +01:00
|
|
|
%% Compaction work is fetched by the Penciller's Clerk because:
|
2016-07-27 18:03:44 +01:00
|
|
|
%% - it has timed out due to a period of inactivity
|
|
|
|
%% - it has been triggered by the a cast to indicate the arrival of high
|
|
|
|
%% priority compaction work
|
2016-08-02 17:51:43 +01:00
|
|
|
%% The Penciller's Clerk (which performs compaction worker) will always call
|
2016-08-16 12:45:48 +01:00
|
|
|
%% the Penciller to find out the highest priority work currently required
|
2016-08-02 17:51:43 +01:00
|
|
|
%% whenever it has either completed work, or a timeout has occurred since it
|
|
|
|
%% was informed there was no work to do.
|
2016-07-27 18:03:44 +01:00
|
|
|
%%
|
2016-08-16 12:45:48 +01:00
|
|
|
%% When the clerk picks work it will take the current manifest, and the
|
|
|
|
%% Penciller assumes the manifest sequence number is to be incremented.
|
2016-11-07 11:17:13 +00:00
|
|
|
%% When the clerk has completed the work it can request that the manifest
|
2016-08-16 12:45:48 +01:00
|
|
|
%% change be committed by the Penciller. The commit is made through changing
|
|
|
|
%% the filename of the new manifest - so the Penciller is not held up by the
|
|
|
|
%% process of wiritng a file, just altering file system metadata.
|
2016-08-02 13:44:48 +01:00
|
|
|
%%
|
2016-08-02 17:51:43 +01:00
|
|
|
%% ---------- PUSH ----------
|
|
|
|
%%
|
2016-08-16 12:45:48 +01:00
|
|
|
%% The Penciller must support the PUSH of a dump of keys from the Bookie. The
|
2016-08-02 17:51:43 +01:00
|
|
|
%% call to PUSH should be immediately acknowledged, and then work should be
|
2017-01-06 10:09:15 +00:00
|
|
|
%% completed to merge the cache update into the L0 cache.
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
|
|
|
%% The Penciller MUST NOT accept a new PUSH if the Clerk has commenced the
|
2017-01-06 10:09:15 +00:00
|
|
|
%% conversion of the current L0 cache into a SST file, but not completed this
|
2016-10-27 09:45:05 +01:00
|
|
|
%% change. The Penciller in this case returns the push, and the Bookie should
|
2016-11-07 11:17:13 +00:00
|
|
|
%% continue to grow the cache before trying again.
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
|
|
|
%% ---------- FETCH ----------
|
|
|
|
%%
|
2016-11-07 11:17:13 +00:00
|
|
|
%% On request to fetch a key the Penciller should look first in the in-memory
|
2016-12-29 02:07:14 +00:00
|
|
|
%% L0 tree, then look in the SST files Level by Level (including level 0),
|
2016-11-07 11:17:13 +00:00
|
|
|
%% consulting the Manifest to determine which file should be checked at each
|
|
|
|
%% level.
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
|
|
|
%% ---------- SNAPSHOT ----------
|
|
|
|
%%
|
2016-10-03 23:34:28 +01:00
|
|
|
%% Iterators may request a snapshot of the database. A snapshot is a cloned
|
2016-10-27 20:56:18 +01:00
|
|
|
%% Penciller seeded not from disk, but by the in-memory L0 gb_tree and the
|
2016-12-29 02:07:14 +00:00
|
|
|
%% in-memory manifest, allowing for direct reference for the SST file processes.
|
2016-10-03 23:34:28 +01:00
|
|
|
%%
|
|
|
|
%% Clones formed to support snapshots are registered by the Penciller, so that
|
2016-12-29 02:07:14 +00:00
|
|
|
%% SST files valid at the point of the snapshot until either the iterator is
|
2016-08-02 17:51:43 +01:00
|
|
|
%% completed or has timed out.
|
|
|
|
%%
|
|
|
|
%% ---------- ON STARTUP ----------
|
|
|
|
%%
|
|
|
|
%% On Startup the Bookie with ask the Penciller to initiate the Ledger first.
|
2016-12-29 02:07:14 +00:00
|
|
|
%% To initiate the Ledger the must consult the manifest, and then start a SST
|
2016-08-02 17:51:43 +01:00
|
|
|
%% management process for each file in the manifest.
|
|
|
|
%%
|
2016-08-15 16:43:39 +01:00
|
|
|
%% The penciller should then try and read any Level 0 file which has the
|
|
|
|
%% manifest sequence number one higher than the last store in the manifest.
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
|
|
|
%% The Bookie will ask the Inker for any Keys seen beyond that sequence number
|
|
|
|
%% before the startup of the overall store can be completed.
|
|
|
|
%%
|
|
|
|
%% ---------- ON SHUTDOWN ----------
|
|
|
|
%%
|
|
|
|
%% On a controlled shutdown the Penciller should attempt to write any in-memory
|
2016-12-29 02:07:14 +00:00
|
|
|
%% ETS table to a L0 SST file, assuming one is nto already pending. If one is
|
2016-10-03 23:34:28 +01:00
|
|
|
%% already pending then the Penciller will not persist this part of the Ledger.
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
|
|
|
%% ---------- FOLDER STRUCTURE ----------
|
|
|
|
%%
|
|
|
|
%% The following folders are used by the Penciller
|
2016-10-03 23:34:28 +01:00
|
|
|
%% $ROOT/ledger/ledger_manifest/ - used for keeping manifest files
|
2016-12-29 02:07:14 +00:00
|
|
|
%% $ROOT/ledger/ledger_files/ - containing individual SST files
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
|
|
|
%% In larger stores there could be a large number of files in the ledger_file
|
|
|
|
%% folder - perhaps o(1000). It is assumed that modern file systems should
|
|
|
|
%% handle this efficiently.
|
|
|
|
%%
|
|
|
|
%% ---------- COMPACTION & MANIFEST UPDATES ----------
|
|
|
|
%%
|
|
|
|
%% The Penciller can have one and only one Clerk for performing compaction
|
|
|
|
%% work. When the Clerk has requested and taken work, it should perform the
|
2016-12-29 02:07:14 +00:00
|
|
|
%5 compaction work starting the new SST process to manage the new Ledger state
|
2016-08-02 17:51:43 +01:00
|
|
|
%% and then write a new manifest file that represents that state with using
|
2016-08-09 16:09:29 +01:00
|
|
|
%% the next Manifest sequence number as the filename:
|
|
|
|
%% - nonzero_<ManifestSQN#>.pnd
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
2016-08-09 16:09:29 +01:00
|
|
|
%% The Penciller on accepting the change should rename the manifest file to -
|
|
|
|
%% - nonzero_<ManifestSQN#>.crr
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
2016-08-09 16:09:29 +01:00
|
|
|
%% On startup, the Penciller should look for the nonzero_*.crr file with the
|
2016-10-19 17:34:58 +01:00
|
|
|
%% highest such manifest sequence number. This will be started as the
|
2016-12-29 02:07:14 +00:00
|
|
|
%% manifest, together with any _0_0.sst file found at that Manifest SQN.
|
2016-10-19 17:34:58 +01:00
|
|
|
%% Level zero files are not kept in the persisted manifest, and adding a L0
|
|
|
|
%% file does not advanced the Manifest SQN.
|
2016-08-02 17:51:43 +01:00
|
|
|
%%
|
|
|
|
%% The pace at which the store can accept updates will be dependent on the
|
|
|
|
%% speed at which the Penciller's Clerk can merge files at lower levels plus
|
|
|
|
%% the time it takes to merge from Level 0. As if a clerk has commenced
|
2016-12-29 02:07:14 +00:00
|
|
|
%% compaction work at a lower level and then immediately a L0 SST file is
|
2016-08-02 17:51:43 +01:00
|
|
|
%% written the Penciller will need to wait for this compaction work to
|
|
|
|
%% complete and the L0 file to be compacted before the ETS table can be
|
|
|
|
%% allowed to again reach capacity
|
2016-08-09 16:09:29 +01:00
|
|
|
%%
|
|
|
|
%% The writing of L0 files do not require the involvement of the clerk.
|
2016-10-27 20:56:18 +01:00
|
|
|
%% The L0 files are prompted directly by the penciller when the in-memory tree
|
2016-11-07 11:17:13 +00:00
|
|
|
%% has reached capacity. This places the penciller in a levelzero_pending
|
2016-12-29 02:07:14 +00:00
|
|
|
%% state, and in this state it must return new pushes. Once the SST file has
|
2016-11-07 11:17:13 +00:00
|
|
|
%% been completed it will confirm completion to the penciller which can then
|
|
|
|
%% revert the levelzero_pending state, add the file to the manifest and clear
|
|
|
|
%% the current level zero in-memory view.
|
|
|
|
%%
|
|
|
|
|
2016-07-28 17:22:50 +01:00
|
|
|
|
2016-08-02 13:44:48 +01:00
|
|
|
-module(leveled_penciller).
|
2016-07-27 18:03:44 +01:00
|
|
|
|
2016-08-09 16:09:29 +01:00
|
|
|
-behaviour(gen_server).
|
|
|
|
|
2016-10-18 01:59:03 +01:00
|
|
|
-include("include/leveled.hrl").
|
2016-07-27 18:03:44 +01:00
|
|
|
|
2017-01-13 18:23:57 +00:00
|
|
|
-export([
|
|
|
|
init/1,
|
2016-08-09 16:09:29 +01:00
|
|
|
handle_call/3,
|
|
|
|
handle_cast/2,
|
|
|
|
handle_info/2,
|
|
|
|
terminate/2,
|
2017-01-13 18:23:57 +00:00
|
|
|
code_change/3]).
|
|
|
|
|
|
|
|
-export([
|
2016-08-09 16:09:29 +01:00
|
|
|
pcl_start/1,
|
|
|
|
pcl_pushmem/2,
|
2016-10-31 01:33:33 +00:00
|
|
|
pcl_fetchlevelzero/2,
|
2016-08-09 16:09:29 +01:00
|
|
|
pcl_fetch/2,
|
2016-12-11 01:02:56 +00:00
|
|
|
pcl_fetch/3,
|
2016-10-12 17:12:49 +01:00
|
|
|
pcl_fetchkeys/5,
|
2017-10-31 23:28:35 +00:00
|
|
|
pcl_fetchkeysbysegment/6,
|
2016-11-20 21:21:31 +00:00
|
|
|
pcl_fetchnextkey/5,
|
2016-09-26 10:55:08 +01:00
|
|
|
pcl_checksequencenumber/3,
|
2016-08-09 16:09:29 +01:00
|
|
|
pcl_workforclerk/1,
|
2017-01-14 16:36:05 +00:00
|
|
|
pcl_manifestchange/2,
|
2017-11-28 01:19:30 +00:00
|
|
|
pcl_confirml0complete/5,
|
2017-01-17 11:18:58 +00:00
|
|
|
pcl_confirmdelete/3,
|
2016-08-15 16:43:39 +01:00
|
|
|
pcl_close/1,
|
2016-11-21 12:34:40 +00:00
|
|
|
pcl_doom/1,
|
2016-10-12 17:12:49 +01:00
|
|
|
pcl_releasesnapshot/2,
|
2017-04-05 09:16:01 +01:00
|
|
|
pcl_registersnapshot/5,
|
2017-01-13 18:23:57 +00:00
|
|
|
pcl_getstartupsequencenumber/1]).
|
|
|
|
|
|
|
|
-export([
|
2017-03-09 21:23:09 +00:00
|
|
|
sst_rootpath/1,
|
|
|
|
sst_filename/3]).
|
|
|
|
|
|
|
|
-export([
|
2016-09-15 10:53:24 +01:00
|
|
|
clean_testdir/1]).
|
2016-07-27 18:03:44 +01:00
|
|
|
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
2016-07-28 17:22:50 +01:00
|
|
|
-define(LEVEL_SCALEFACTOR, [{0, 0}, {1, 8}, {2, 64}, {3, 512},
|
2016-10-19 17:34:58 +01:00
|
|
|
{4, 4096}, {5, 32768}, {6, 262144},
|
|
|
|
{7, infinity}]).
|
2016-07-27 18:03:44 +01:00
|
|
|
-define(MAX_LEVELS, 8).
|
|
|
|
-define(MAX_WORK_WAIT, 300).
|
2016-08-02 17:51:43 +01:00
|
|
|
-define(MANIFEST_FP, "ledger_manifest").
|
|
|
|
-define(FILES_FP, "ledger_files").
|
|
|
|
-define(CURRENT_FILEX, "crr").
|
|
|
|
-define(PENDING_FILEX, "pnd").
|
2017-09-27 23:52:49 +01:00
|
|
|
-define(SST_FILEX, ".sst").
|
|
|
|
-define(ARCHIVE_FILEX, ".bak").
|
2016-08-09 16:09:29 +01:00
|
|
|
-define(MEMTABLE, mem).
|
2016-12-11 04:53:36 +00:00
|
|
|
-define(MAX_TABLESIZE, 28000). % This is less than max - but COIN_SIDECOUNT
|
|
|
|
-define(SUPER_MAX_TABLE_SIZE, 40000).
|
2016-08-16 12:45:48 +01:00
|
|
|
-define(PROMPT_WAIT_ONL0, 5).
|
2016-11-05 14:04:45 +00:00
|
|
|
-define(WORKQUEUE_BACKLOG_TOLERANCE, 4).
|
2016-12-11 06:54:41 +00:00
|
|
|
-define(COIN_SIDECOUNT, 5).
|
2016-12-21 18:28:14 +00:00
|
|
|
-define(SLOW_FETCH, 20000).
|
2016-12-29 10:21:57 +00:00
|
|
|
-define(ITERATOR_SCANWIDTH, 4).
|
2017-04-17 23:01:55 +01:00
|
|
|
-define(SNAPSHOT_TIMEOUT_LONG, 3600).
|
2017-04-05 09:16:01 +01:00
|
|
|
-define(SNAPSHOT_TIMEOUT_SHORT, 600).
|
2017-11-21 19:58:36 +00:00
|
|
|
-define(TIMING_SAMPLECOUNTDOWN, 10000).
|
|
|
|
-define(TIMING_SAMPLESIZE, 100).
|
2016-09-21 18:31:42 +01:00
|
|
|
|
2017-01-13 18:23:57 +00:00
|
|
|
-record(state, {manifest, % a manifest record from the leveled_manifest module
|
2016-10-30 18:25:30 +00:00
|
|
|
persisted_sqn = 0 :: integer(), % The highest SQN persisted
|
2017-01-12 13:48:43 +00:00
|
|
|
|
|
|
|
ledger_sqn = 0 :: integer(), % The highest SQN added to L0
|
2016-08-12 01:05:59 +01:00
|
|
|
root_path = "../test" :: string(),
|
2016-10-27 20:56:18 +01:00
|
|
|
|
2017-07-31 19:39:40 +02:00
|
|
|
clerk :: pid() | undefined,
|
2016-10-27 20:56:18 +01:00
|
|
|
|
|
|
|
levelzero_pending = false :: boolean(),
|
2017-07-31 19:39:40 +02:00
|
|
|
levelzero_constructor :: pid() | undefined,
|
2017-01-20 16:36:20 +00:00
|
|
|
levelzero_cache = [] :: list(), % a list of trees
|
2016-10-30 18:25:30 +00:00
|
|
|
levelzero_size = 0 :: integer(),
|
2017-07-31 19:39:40 +02:00
|
|
|
levelzero_maxcachesize :: integer() | undefined,
|
2016-12-09 14:36:03 +00:00
|
|
|
levelzero_cointoss = false :: boolean(),
|
2017-01-05 21:58:33 +00:00
|
|
|
levelzero_index, % An array
|
2016-10-27 20:56:18 +01:00
|
|
|
|
2016-09-23 18:50:29 +01:00
|
|
|
is_snapshot = false :: boolean(),
|
|
|
|
snapshot_fully_loaded = false :: boolean(),
|
2017-07-31 19:39:40 +02:00
|
|
|
source_penciller :: pid() | undefined,
|
|
|
|
levelzero_astree :: list() | undefined,
|
2016-10-27 20:56:18 +01:00
|
|
|
|
2017-01-14 22:03:57 +00:00
|
|
|
work_ongoing = false :: boolean(), % i.e. compaction work
|
|
|
|
work_backlog = false :: boolean(), % i.e. compaction work
|
2016-12-22 14:03:31 +00:00
|
|
|
|
2017-11-21 19:58:36 +00:00
|
|
|
timings = no_timing :: pcl_timings(),
|
|
|
|
timings_countdown = 0 :: integer(),
|
|
|
|
|
2017-11-13 14:02:39 +00:00
|
|
|
compression_method = native :: lz4|native}).
|
2016-08-09 16:09:29 +01:00
|
|
|
|
2017-11-21 19:58:36 +00:00
|
|
|
-record(pcl_timings,
|
|
|
|
{sample_count = 0 :: integer(),
|
|
|
|
foundmem_time = 0 :: integer(),
|
|
|
|
found0_time = 0 :: integer(),
|
|
|
|
found1_time = 0 :: integer(),
|
|
|
|
found2_time = 0 :: integer(),
|
|
|
|
foundlower_time = 0 :: integer(),
|
|
|
|
missed_time = 0 :: integer(),
|
|
|
|
foundmem_count = 0 :: integer(),
|
|
|
|
found0_count = 0 :: integer(),
|
|
|
|
found1_count = 0 :: integer(),
|
|
|
|
found2_count = 0 :: integer(),
|
|
|
|
foundlower_count = 0 :: integer(),
|
|
|
|
missed_count = 0 :: integer()}).
|
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-type penciller_options() :: #penciller_options{}.
|
|
|
|
-type bookies_memory() :: {tuple()|empty_cache,
|
2017-06-01 21:39:01 +01:00
|
|
|
% array:array()|empty_array,
|
|
|
|
any()|empty_array, % Issue of type compatability with OTP16
|
2017-05-22 18:09:12 +01:00
|
|
|
integer()|infinity,
|
|
|
|
integer()}.
|
|
|
|
-type pcl_state() :: #state{}.
|
2017-11-21 19:58:36 +00:00
|
|
|
-type pcl_timings() :: no_timing|#pcl_timings{}.
|
2016-08-09 16:09:29 +01:00
|
|
|
|
|
|
|
%%%============================================================================
|
|
|
|
%%% API
|
|
|
|
%%%============================================================================
|
2016-09-15 18:38:23 +01:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_start(penciller_options()) -> {ok, pid()}.
|
|
|
|
%% @doc
|
|
|
|
%% Start a penciller using a penciller options record. The start_snapshot
|
|
|
|
%% option should be used if this is to be a clone of an existing penciller,
|
|
|
|
%% otherwise the penciller will look in root path for a manifest and
|
|
|
|
%% associated sst files to start-up from a previous persisted state.
|
|
|
|
%%
|
|
|
|
%% When starting a clone a query can also be passed. This prevents the whole
|
|
|
|
%% Level Zero memory space from being copied to the snapshot, instead the
|
|
|
|
%% query is run against the level zero space and just the query results are
|
|
|
|
%5 copied into the clone.
|
2016-09-08 14:21:30 +01:00
|
|
|
pcl_start(PCLopts) ->
|
|
|
|
gen_server:start(?MODULE, [PCLopts], []).
|
2016-08-09 16:09:29 +01:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_pushmem(pid(), bookies_memory()) -> ok|returned.
|
|
|
|
%% @doc
|
|
|
|
%% Load the contents of the Bookie's memory of recent additions to the Ledger
|
|
|
|
%% to the Ledger proper.
|
|
|
|
%%
|
|
|
|
%% The load is made up of a cache in the form of a leveled_skiplist tuple (or
|
|
|
|
%% the atom empty_cache if no cache is present), an index of entries in the
|
|
|
|
%% skiplist in the form of leveled_pmem index (or empty_index), the minimum
|
|
|
|
%% sequence number in the cache and the maximum sequence number.
|
|
|
|
%%
|
|
|
|
%% If the penciller does not have capacity for the pushed cache it will
|
|
|
|
%% respond with the atom 'returned'. This is a signal to hold the memory
|
|
|
|
%% at the Bookie, and try again soon. This normally only occurs when there
|
|
|
|
%% is a backlog of merges - so the bookie should backoff for longer each time.
|
2016-12-11 01:02:56 +00:00
|
|
|
pcl_pushmem(Pid, LedgerCache) ->
|
2016-08-09 16:09:29 +01:00
|
|
|
%% Bookie to dump memory onto penciller
|
2016-12-11 01:02:56 +00:00
|
|
|
gen_server:call(Pid, {push_mem, LedgerCache}, infinity).
|
2016-10-31 01:33:33 +00:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_fetchlevelzero(pid(), integer()) -> tuple().
|
|
|
|
%% @doc
|
|
|
|
%% Allows a single slot of the penciller's levelzero cache to be fetched. The
|
|
|
|
%% levelzero cache can be up to 40K keys - sending this to the process that is
|
|
|
|
%% persisting this in a SST file in a single cast will lock the process for
|
|
|
|
%% 30-40ms. This allows that process to fetch this slot by slot, so that
|
|
|
|
%% this is split into a series of smaller events.
|
|
|
|
%%
|
|
|
|
%% The return value will be a leveled_skiplist that forms that part of the
|
|
|
|
%% cache
|
2016-10-31 01:33:33 +00:00
|
|
|
pcl_fetchlevelzero(Pid, Slot) ->
|
2017-11-21 19:58:36 +00:00
|
|
|
% Timeout to cause crash of L0 file when it can't get the close signal
|
|
|
|
% as it is deadlocked making this call.
|
|
|
|
%
|
|
|
|
% If the timeout gets hit outside of close scenario the Penciller will
|
|
|
|
% be stuck in L0 pending
|
2016-11-14 17:18:28 +00:00
|
|
|
gen_server:call(Pid, {fetch_levelzero, Slot}, 60000).
|
2017-05-22 18:09:12 +01:00
|
|
|
|
|
|
|
-spec pcl_fetch(pid(), tuple()) -> {tuple(), tuple()}|not_present.
|
|
|
|
%% @doc
|
|
|
|
%% Fetch a key, return the first (highest SQN) occurrence of that Key along
|
|
|
|
%% with the value.
|
|
|
|
%%
|
|
|
|
%% The Key needs to be hashable (i.e. have a tag which indicates that the key
|
|
|
|
%% can be looked up) - index entries are not hashable for example.
|
|
|
|
%%
|
2017-10-20 23:04:29 +01:00
|
|
|
%% If the hash is already knonw, call pcl_fetch/3 as segment_hash is a
|
2017-05-22 18:09:12 +01:00
|
|
|
%% relatively expensive hash function
|
2016-08-09 16:09:29 +01:00
|
|
|
pcl_fetch(Pid, Key) ->
|
2017-10-20 23:04:29 +01:00
|
|
|
Hash = leveled_codec:segment_hash(Key),
|
2016-12-11 01:02:56 +00:00
|
|
|
if
|
|
|
|
Hash /= no_lookup ->
|
|
|
|
gen_server:call(Pid, {fetch, Key, Hash}, infinity)
|
|
|
|
end.
|
|
|
|
|
2017-10-20 23:04:29 +01:00
|
|
|
-spec pcl_fetch(pid(), tuple(), {integer(), integer()}) ->
|
|
|
|
{tuple(), tuple()}|not_present.
|
2017-05-22 18:09:12 +01:00
|
|
|
%% @doc
|
|
|
|
%% Fetch a key, return the first (highest SQN) occurrence of that Key along
|
|
|
|
%% with the value.
|
|
|
|
%%
|
2017-10-20 23:04:29 +01:00
|
|
|
%% Hash should be result of leveled_codec:segment_hash(Key)
|
2016-12-11 01:02:56 +00:00
|
|
|
pcl_fetch(Pid, Key, Hash) ->
|
|
|
|
gen_server:call(Pid, {fetch, Key, Hash}, infinity).
|
2016-08-09 16:09:29 +01:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_fetchkeys(pid(), tuple(), tuple(), fun(), any()) -> any().
|
|
|
|
%% @doc
|
|
|
|
%% Run a range query between StartKey and EndKey (inclusive). This will cover
|
|
|
|
%% all keys in the range - so must only be run against snapshots of the
|
|
|
|
%% penciller to avoid blocking behaviour.
|
|
|
|
%%
|
|
|
|
%% Comparison with the upper-end of the range (EndKey) is done using
|
|
|
|
%% leveled_codec:endkey_passed/2 - so use nulls within the tuple to manage
|
|
|
|
%% the top of the range. Comparison with the start of the range is based on
|
|
|
|
%% Erlang term order.
|
2016-10-12 17:12:49 +01:00
|
|
|
pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc) ->
|
|
|
|
gen_server:call(Pid,
|
2017-10-31 23:28:35 +00:00
|
|
|
{fetch_keys,
|
|
|
|
StartKey, EndKey,
|
|
|
|
AccFun, InitAcc,
|
|
|
|
false, -1},
|
|
|
|
infinity).
|
|
|
|
|
|
|
|
-spec pcl_fetchkeysbysegment(pid(), tuple(), tuple(), fun(), any(),
|
|
|
|
false|list(integer())) -> any().
|
|
|
|
%% @doc
|
|
|
|
%% Run a range query between StartKey and EndKey (inclusive). This will cover
|
|
|
|
%% all keys in the range - so must only be run against snapshots of the
|
|
|
|
%% penciller to avoid blocking behaviour.
|
|
|
|
%%
|
|
|
|
%% This version allows an additional input of a SegmentList. This is a list
|
|
|
|
%% of 16-bit integers representing the segment IDs band ((2 ^ 16) -1) that
|
|
|
|
%% are interesting to the fetch
|
|
|
|
%%
|
|
|
|
%% Note that segment must be false unless the object Tag supports additional
|
|
|
|
%% indexing by segment. This cannot be used on ?IDX_TAG and other tags that
|
|
|
|
%% use the no_lookup hash
|
|
|
|
pcl_fetchkeysbysegment(Pid, StartKey, EndKey, AccFun, InitAcc, SegmentList) ->
|
|
|
|
gen_server:call(Pid,
|
|
|
|
{fetch_keys,
|
|
|
|
StartKey, EndKey,
|
|
|
|
AccFun, InitAcc,
|
|
|
|
SegmentList, -1},
|
2016-11-20 21:21:31 +00:00
|
|
|
infinity).
|
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_fetchnextkey(pid(), tuple(), tuple(), fun(), any()) -> any().
|
|
|
|
%% @doc
|
|
|
|
%% Run a range query between StartKey and EndKey (inclusive). This has the
|
|
|
|
%% same constraints as pcl_fetchkeys/5, but will only return the first key
|
|
|
|
%% found in erlang term order.
|
2016-11-20 21:21:31 +00:00
|
|
|
pcl_fetchnextkey(Pid, StartKey, EndKey, AccFun, InitAcc) ->
|
|
|
|
gen_server:call(Pid,
|
2017-10-31 23:28:35 +00:00
|
|
|
{fetch_keys,
|
|
|
|
StartKey, EndKey,
|
|
|
|
AccFun, InitAcc,
|
|
|
|
false, 1},
|
2016-10-12 17:12:49 +01:00
|
|
|
infinity).
|
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_checksequencenumber(pid(), tuple(), integer()) -> boolean().
|
|
|
|
%% @doc
|
|
|
|
%% Check if the sequence number of the passed key is not replaced by a change
|
|
|
|
%% after the passed sequence number. Will return true if the Key is present
|
|
|
|
%% and either is equal to, or prior to the passed SQN.
|
|
|
|
%%
|
|
|
|
%% If the key is not present, it will be assumed that a higher sequence number
|
|
|
|
%% tombstone once existed, and false will be returned.
|
2016-09-26 10:55:08 +01:00
|
|
|
pcl_checksequencenumber(Pid, Key, SQN) ->
|
2017-10-20 23:04:29 +01:00
|
|
|
Hash = leveled_codec:segment_hash(Key),
|
2016-12-11 01:02:56 +00:00
|
|
|
if
|
|
|
|
Hash /= no_lookup ->
|
|
|
|
gen_server:call(Pid, {check_sqn, Key, Hash, SQN}, infinity)
|
|
|
|
end.
|
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_workforclerk(pid()) -> ok.
|
|
|
|
%% @doc
|
|
|
|
%% A request from the clerk to check for work. If work is present the
|
|
|
|
%% Penciller will cast back to the clerk, no response is sent to this
|
|
|
|
%% request.
|
2016-08-09 16:09:29 +01:00
|
|
|
pcl_workforclerk(Pid) ->
|
2017-01-15 00:52:43 +00:00
|
|
|
gen_server:cast(Pid, work_for_clerk).
|
2016-08-09 16:09:29 +01:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_manifestchange(pid(), tuple()) -> ok.
|
|
|
|
%% @doc
|
|
|
|
%% Provide a manifest record (i.e. the output of the leveled_pmanifest module)
|
|
|
|
%% that is required to beocme the new manifest.
|
2017-01-14 16:36:05 +00:00
|
|
|
pcl_manifestchange(Pid, Manifest) ->
|
2017-01-13 18:23:57 +00:00
|
|
|
gen_server:cast(Pid, {manifest_change, Manifest}).
|
2016-08-10 13:02:08 +01:00
|
|
|
|
2017-11-28 01:19:30 +00:00
|
|
|
-spec pcl_confirml0complete(pid(), string(), tuple(), tuple(), binary()) -> ok.
|
2017-05-22 18:09:12 +01:00
|
|
|
%% @doc
|
|
|
|
%% Allows a SST writer that has written a L0 file to confirm that the file
|
|
|
|
%% is now complete, so the filename and key ranges can be added to the
|
|
|
|
%% manifest and the file can be used in place of the in-memory levelzero
|
|
|
|
%% cache.
|
2017-11-28 01:19:30 +00:00
|
|
|
pcl_confirml0complete(Pid, FN, StartKey, EndKey, Bloom) ->
|
|
|
|
gen_server:cast(Pid, {levelzero_complete, FN, StartKey, EndKey, Bloom}).
|
2016-11-05 11:22:27 +00:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_confirmdelete(pid(), string(), pid()) -> ok.
|
|
|
|
%% @doc
|
|
|
|
%% Poll from a delete_pending file requesting a message if the file is now
|
|
|
|
%% ready for deletion (i.e. all snapshots which depend on the file have
|
|
|
|
%% finished)
|
2017-01-17 11:18:58 +00:00
|
|
|
pcl_confirmdelete(Pid, FileName, FilePid) ->
|
|
|
|
gen_server:cast(Pid, {confirm_delete, FileName, FilePid}).
|
2016-08-09 16:09:29 +01:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_getstartupsequencenumber(pid()) -> integer().
|
|
|
|
%% @doc
|
|
|
|
%% At startup the penciller will get the largest sequence number that is
|
|
|
|
%% within the persisted files. This function allows for this sequence number
|
|
|
|
%% to be fetched - so that it can be used to determine parts of the Ledger
|
|
|
|
%% which may have been lost in the last shutdown (so that the ledger can
|
|
|
|
%% be reloaded from that point in the Journal)
|
2016-08-15 16:43:39 +01:00
|
|
|
pcl_getstartupsequencenumber(Pid) ->
|
2016-09-21 18:31:42 +01:00
|
|
|
gen_server:call(Pid, get_startup_sqn, infinity).
|
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_registersnapshot(pid(),
|
|
|
|
pid(),
|
|
|
|
no_lookup|{tuple(), tuple()}|undefined,
|
|
|
|
bookies_memory(),
|
|
|
|
boolean())
|
|
|
|
-> {ok, pcl_state()}.
|
|
|
|
%% @doc
|
|
|
|
%% Register a snapshot of the penciller, returning a state record from the
|
|
|
|
%% penciller for the snapshot to use as its LoopData
|
2017-04-05 09:16:01 +01:00
|
|
|
pcl_registersnapshot(Pid, Snapshot, Query, BookiesMem, LR) ->
|
2017-03-06 18:42:32 +00:00
|
|
|
gen_server:call(Pid,
|
2017-04-05 09:16:01 +01:00
|
|
|
{register_snapshot, Snapshot, Query, BookiesMem, LR},
|
2017-03-06 18:42:32 +00:00
|
|
|
infinity).
|
2016-09-21 18:31:42 +01:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_releasesnapshot(pid(), pid()) -> ok.
|
|
|
|
%% @doc
|
|
|
|
%% Inform the primary penciller that a snapshot is finished, so that the
|
|
|
|
%% penciller can allow deletes to proceed if appropriate.
|
2016-10-12 17:12:49 +01:00
|
|
|
pcl_releasesnapshot(Pid, Snapshot) ->
|
|
|
|
gen_server:cast(Pid, {release_snapshot, Snapshot}).
|
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_close(pid()) -> ok.
|
|
|
|
%% @doc
|
|
|
|
%% Close the penciller neatly, trying to persist to disk anything in the memory
|
2016-08-15 16:43:39 +01:00
|
|
|
pcl_close(Pid) ->
|
2016-10-13 17:51:47 +01:00
|
|
|
gen_server:call(Pid, close, 60000).
|
2016-08-15 16:43:39 +01:00
|
|
|
|
2017-05-22 18:09:12 +01:00
|
|
|
-spec pcl_doom(pid()) -> {ok, list()}.
|
|
|
|
%% @doc
|
|
|
|
%% Close the penciller neatly, trying to persist to disk anything in the memory
|
|
|
|
%% Return a list of filepaths from where files exist for this penciller (should
|
|
|
|
%% the calling process which to erase the store).
|
2016-11-21 12:34:40 +00:00
|
|
|
pcl_doom(Pid) ->
|
|
|
|
gen_server:call(Pid, doom, 60000).
|
2016-09-21 18:31:42 +01:00
|
|
|
|
2016-08-09 16:09:29 +01:00
|
|
|
%%%============================================================================
|
|
|
|
%%% gen_server callbacks
|
|
|
|
%%%============================================================================
|
|
|
|
|
2016-09-08 14:21:30 +01:00
|
|
|
init([PCLopts]) ->
|
2017-07-31 20:20:39 +02:00
|
|
|
leveled_rand:seed(),
|
2016-09-23 18:50:29 +01:00
|
|
|
case {PCLopts#penciller_options.root_path,
|
2017-03-06 18:42:32 +00:00
|
|
|
PCLopts#penciller_options.start_snapshot,
|
|
|
|
PCLopts#penciller_options.snapshot_query,
|
|
|
|
PCLopts#penciller_options.bookies_mem} of
|
2017-04-19 22:46:37 +01:00
|
|
|
{undefined, _Snapshot=true, Query, BookiesMem} ->
|
2016-09-23 18:50:29 +01:00
|
|
|
SrcPenciller = PCLopts#penciller_options.source_penciller,
|
2017-04-05 09:16:01 +01:00
|
|
|
LongRunning = PCLopts#penciller_options.snapshot_longrunning,
|
|
|
|
{ok, State} = pcl_registersnapshot(SrcPenciller,
|
|
|
|
self(),
|
|
|
|
Query,
|
|
|
|
BookiesMem,
|
|
|
|
LongRunning),
|
2016-11-02 18:14:46 +00:00
|
|
|
leveled_log:log("P0001", [self()]),
|
2017-01-13 18:23:57 +00:00
|
|
|
{ok, State#state{is_snapshot=true,
|
2017-03-06 18:42:32 +00:00
|
|
|
source_penciller=SrcPenciller}};
|
2017-04-19 22:46:37 +01:00
|
|
|
{_RootPath, _Snapshot=false, _Q, _BM} ->
|
2016-09-21 18:31:42 +01:00
|
|
|
start_from_file(PCLopts)
|
|
|
|
end.
|
2016-08-15 16:43:39 +01:00
|
|
|
|
2016-08-09 16:09:29 +01:00
|
|
|
|
2017-03-13 11:54:46 +00:00
|
|
|
handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}},
|
2016-12-11 01:02:56 +00:00
|
|
|
From,
|
|
|
|
State=#state{is_snapshot=Snap}) when Snap == false ->
|
2016-10-27 20:56:18 +01:00
|
|
|
% The push_mem process is as follows:
|
|
|
|
%
|
2017-01-20 16:36:20 +00:00
|
|
|
% 1 - Receive a cache. The cache has four parts: a tree of keys and
|
2017-01-06 10:09:15 +00:00
|
|
|
% values, an array of 256 binaries listing the hashes present in the
|
2017-01-20 16:36:20 +00:00
|
|
|
% tree, a min SQN and a max SQN
|
2016-10-27 20:56:18 +01:00
|
|
|
%
|
2016-11-05 11:22:27 +00:00
|
|
|
% 2 - Check to see if there is a levelzero file pending. If so, the
|
|
|
|
% update must be returned. If not the update can be accepted
|
2016-10-27 20:56:18 +01:00
|
|
|
%
|
2016-11-05 11:22:27 +00:00
|
|
|
% 3 - The Penciller can now reply to the Bookie to show if the push has
|
2016-10-27 20:56:18 +01:00
|
|
|
% been accepted
|
|
|
|
%
|
2016-10-30 18:25:30 +00:00
|
|
|
% 4 - Update the cache:
|
|
|
|
% a) Append the cache to the list
|
2017-01-06 10:09:15 +00:00
|
|
|
% b) Add each of the 256 hash-listing binaries to the master L0 index array
|
2016-10-30 18:25:30 +00:00
|
|
|
%
|
|
|
|
% Check the approximate size of the cache. If it is over the maximum size,
|
2017-01-06 10:09:15 +00:00
|
|
|
% trigger a background L0 file write and update state of levelzero_pending.
|
2016-11-18 21:35:45 +00:00
|
|
|
case State#state.levelzero_pending or State#state.work_backlog of
|
|
|
|
true ->
|
|
|
|
leveled_log:log("P0018", [returned,
|
|
|
|
State#state.levelzero_pending,
|
|
|
|
State#state.work_backlog]),
|
2016-11-05 13:42:44 +00:00
|
|
|
{reply, returned, State};
|
2016-11-18 21:35:45 +00:00
|
|
|
false ->
|
|
|
|
leveled_log:log("P0018", [ok, false, false]),
|
2017-03-13 11:54:46 +00:00
|
|
|
PushedTree =
|
|
|
|
case is_tuple(LedgerTable) of
|
|
|
|
true ->
|
|
|
|
LedgerTable;
|
|
|
|
false ->
|
|
|
|
leveled_tree:from_orderedset(LedgerTable,
|
|
|
|
?CACHE_TYPE)
|
|
|
|
end,
|
2017-03-14 15:43:35 +00:00
|
|
|
% Reply must happen after the table has been converted
|
2017-03-13 11:54:46 +00:00
|
|
|
gen_server:reply(From, ok),
|
2017-01-05 21:58:33 +00:00
|
|
|
{noreply,
|
|
|
|
update_levelzero(State#state.levelzero_size,
|
|
|
|
{PushedTree, PushedIdx, MinSQN, MaxSQN},
|
|
|
|
State#state.ledger_sqn,
|
|
|
|
State#state.levelzero_cache,
|
|
|
|
State)}
|
2016-11-05 13:42:44 +00:00
|
|
|
end;
|
2016-12-11 01:02:56 +00:00
|
|
|
handle_call({fetch, Key, Hash}, _From, State) ->
|
2017-11-21 19:58:36 +00:00
|
|
|
{R, UpdTimings} = timed_fetch_mem(Key,
|
2016-12-22 14:03:31 +00:00
|
|
|
Hash,
|
2017-01-13 18:23:57 +00:00
|
|
|
State#state.manifest,
|
2016-12-22 14:03:31 +00:00
|
|
|
State#state.levelzero_cache,
|
|
|
|
State#state.levelzero_index,
|
2017-11-21 19:58:36 +00:00
|
|
|
State#state.timings),
|
|
|
|
{UpdTimings0, CountDown} =
|
|
|
|
update_statetimings(UpdTimings, State#state.timings_countdown),
|
|
|
|
{reply, R, State#state{timings=UpdTimings0, timings_countdown=CountDown}};
|
2016-12-11 01:02:56 +00:00
|
|
|
handle_call({check_sqn, Key, Hash, SQN}, _From, State) ->
|
2016-10-07 10:04:48 +01:00
|
|
|
{reply,
|
2016-12-22 14:03:31 +00:00
|
|
|
compare_to_sqn(plain_fetch_mem(Key,
|
|
|
|
Hash,
|
2017-01-13 18:23:57 +00:00
|
|
|
State#state.manifest,
|
2016-12-22 14:03:31 +00:00
|
|
|
State#state.levelzero_cache,
|
|
|
|
State#state.levelzero_index),
|
2016-10-07 10:04:48 +01:00
|
|
|
SQN),
|
|
|
|
State};
|
2017-10-31 23:28:35 +00:00
|
|
|
handle_call({fetch_keys,
|
|
|
|
StartKey, EndKey,
|
|
|
|
AccFun, InitAcc,
|
|
|
|
SegmentList, MaxKeys},
|
2016-10-12 17:12:49 +01:00
|
|
|
_From,
|
|
|
|
State=#state{snapshot_fully_loaded=Ready})
|
|
|
|
when Ready == true ->
|
2017-03-06 10:17:51 +00:00
|
|
|
SW = os:timestamp(),
|
2016-11-25 14:50:13 +00:00
|
|
|
L0AsList =
|
2016-11-20 21:21:31 +00:00
|
|
|
case State#state.levelzero_astree of
|
|
|
|
undefined ->
|
|
|
|
leveled_pmem:merge_trees(StartKey,
|
|
|
|
EndKey,
|
|
|
|
State#state.levelzero_cache,
|
2017-01-21 11:38:26 +00:00
|
|
|
leveled_tree:empty(?CACHE_TYPE));
|
2016-11-25 14:50:13 +00:00
|
|
|
List ->
|
|
|
|
List
|
2016-11-20 21:21:31 +00:00
|
|
|
end,
|
2017-03-06 21:44:20 +00:00
|
|
|
leveled_log:log_randomtimer("P0037",
|
|
|
|
[State#state.levelzero_size],
|
|
|
|
SW,
|
|
|
|
0.01),
|
2017-01-12 13:48:43 +00:00
|
|
|
SetupFoldFun =
|
|
|
|
fun(Level, Acc) ->
|
2017-01-17 14:11:50 +00:00
|
|
|
Pointers = leveled_pmanifest:range_lookup(State#state.manifest,
|
2017-01-13 18:23:57 +00:00
|
|
|
Level,
|
|
|
|
StartKey,
|
|
|
|
EndKey),
|
2017-01-12 13:48:43 +00:00
|
|
|
case Pointers of
|
|
|
|
[] -> Acc;
|
2017-01-13 18:23:57 +00:00
|
|
|
PL -> Acc ++ [{Level, PL}]
|
2017-01-12 13:48:43 +00:00
|
|
|
end
|
|
|
|
end,
|
|
|
|
SSTiter = lists:foldl(SetupFoldFun, [], lists:seq(0, ?MAX_LEVELS - 1)),
|
|
|
|
|
2016-12-29 02:07:14 +00:00
|
|
|
Acc = keyfolder({L0AsList, SSTiter},
|
2016-11-20 21:21:31 +00:00
|
|
|
{StartKey, EndKey},
|
|
|
|
{AccFun, InitAcc},
|
2017-10-31 23:28:35 +00:00
|
|
|
{SegmentList, MaxKeys}),
|
2017-01-12 13:48:43 +00:00
|
|
|
|
2016-11-25 14:50:13 +00:00
|
|
|
{reply, Acc, State#state{levelzero_astree = L0AsList}};
|
2016-09-26 10:55:08 +01:00
|
|
|
handle_call(get_startup_sqn, _From, State) ->
|
2016-10-30 18:25:30 +00:00
|
|
|
{reply, State#state.persisted_sqn, State};
|
2017-04-05 09:16:01 +01:00
|
|
|
handle_call({register_snapshot, Snapshot, Query, BookiesMem, LR}, _From, State) ->
|
2017-03-06 18:42:32 +00:00
|
|
|
% Register and load a snapshot
|
|
|
|
%
|
|
|
|
% For setup of the snapshot to be efficient should pass a query
|
|
|
|
% of (StartKey, EndKey) - this will avoid a fully copy of the penciller's
|
|
|
|
% memory being required to be trasnferred to the clone. However, this
|
|
|
|
% will not be a valid clone for fetch
|
2017-04-05 09:16:01 +01:00
|
|
|
Timeout =
|
|
|
|
case LR of
|
|
|
|
true ->
|
|
|
|
?SNAPSHOT_TIMEOUT_LONG;
|
|
|
|
false ->
|
|
|
|
?SNAPSHOT_TIMEOUT_SHORT
|
|
|
|
end,
|
|
|
|
|
2017-01-17 14:11:50 +00:00
|
|
|
Manifest0 = leveled_pmanifest:add_snapshot(State#state.manifest,
|
2017-01-13 18:23:57 +00:00
|
|
|
Snapshot,
|
2017-04-05 09:16:01 +01:00
|
|
|
Timeout),
|
2017-03-06 18:42:32 +00:00
|
|
|
|
|
|
|
{BookieIncrTree, BookieIdx, MinSQN, MaxSQN} = BookiesMem,
|
|
|
|
LM1Cache =
|
2017-03-02 17:49:43 +00:00
|
|
|
case BookieIncrTree of
|
|
|
|
empty_cache ->
|
2017-03-06 18:42:32 +00:00
|
|
|
leveled_tree:empty(?CACHE_TYPE);
|
2017-03-02 17:49:43 +00:00
|
|
|
_ ->
|
2017-03-06 18:42:32 +00:00
|
|
|
BookieIncrTree
|
2017-03-02 17:49:43 +00:00
|
|
|
end,
|
2017-03-06 18:42:32 +00:00
|
|
|
|
|
|
|
CloneState =
|
|
|
|
case Query of
|
|
|
|
no_lookup ->
|
|
|
|
{UpdMaxSQN, UpdSize, L0Cache} =
|
|
|
|
leveled_pmem:add_to_cache(State#state.levelzero_size,
|
|
|
|
{LM1Cache, MinSQN, MaxSQN},
|
|
|
|
State#state.ledger_sqn,
|
|
|
|
State#state.levelzero_cache),
|
|
|
|
#state{levelzero_cache = L0Cache,
|
|
|
|
ledger_sqn = UpdMaxSQN,
|
|
|
|
levelzero_size = UpdSize,
|
|
|
|
persisted_sqn = State#state.persisted_sqn};
|
|
|
|
{StartKey, EndKey} ->
|
|
|
|
SW = os:timestamp(),
|
|
|
|
L0AsTree =
|
|
|
|
leveled_pmem:merge_trees(StartKey,
|
|
|
|
EndKey,
|
|
|
|
State#state.levelzero_cache,
|
|
|
|
LM1Cache),
|
|
|
|
leveled_log:log_randomtimer("P0037",
|
|
|
|
[State#state.levelzero_size],
|
|
|
|
SW,
|
|
|
|
0.1),
|
|
|
|
#state{levelzero_astree = L0AsTree,
|
|
|
|
ledger_sqn = MaxSQN,
|
|
|
|
persisted_sqn = State#state.persisted_sqn};
|
|
|
|
undefined ->
|
|
|
|
{UpdMaxSQN, UpdSize, L0Cache} =
|
|
|
|
leveled_pmem:add_to_cache(State#state.levelzero_size,
|
|
|
|
{LM1Cache, MinSQN, MaxSQN},
|
|
|
|
State#state.ledger_sqn,
|
|
|
|
State#state.levelzero_cache),
|
|
|
|
L0Index =
|
|
|
|
case BookieIdx of
|
|
|
|
empty_index ->
|
|
|
|
State#state.levelzero_index;
|
|
|
|
_ ->
|
|
|
|
leveled_pmem:add_to_index(BookieIdx,
|
|
|
|
State#state.levelzero_index,
|
|
|
|
length(L0Cache))
|
|
|
|
end,
|
|
|
|
#state{levelzero_cache = L0Cache,
|
|
|
|
levelzero_index = L0Index,
|
|
|
|
levelzero_size = UpdSize,
|
|
|
|
ledger_sqn = UpdMaxSQN,
|
|
|
|
persisted_sqn = State#state.persisted_sqn}
|
2017-03-02 17:49:43 +00:00
|
|
|
end,
|
2017-03-06 18:42:32 +00:00
|
|
|
ManifestClone = leveled_pmanifest:copy_manifest(State#state.manifest),
|
|
|
|
{reply,
|
|
|
|
{ok,
|
|
|
|
CloneState#state{snapshot_fully_loaded=true,
|
|
|
|
manifest=ManifestClone}},
|
|
|
|
State#state{manifest = Manifest0}};
|
2016-10-31 01:33:33 +00:00
|
|
|
handle_call({fetch_levelzero, Slot}, _From, State) ->
|
|
|
|
{reply, lists:nth(Slot, State#state.levelzero_cache), State};
|
2016-09-26 10:55:08 +01:00
|
|
|
handle_call(close, _From, State) ->
|
2016-11-21 12:34:40 +00:00
|
|
|
{stop, normal, ok, State};
|
|
|
|
handle_call(doom, _From, State) ->
|
|
|
|
leveled_log:log("P0030", []),
|
|
|
|
ManifestFP = State#state.root_path ++ "/" ++ ?MANIFEST_FP ++ "/",
|
|
|
|
FilesFP = State#state.root_path ++ "/" ++ ?FILES_FP ++ "/",
|
|
|
|
{stop, normal, {ok, [ManifestFP, FilesFP]}, State}.
|
2016-10-27 20:56:18 +01:00
|
|
|
|
2017-01-13 18:23:57 +00:00
|
|
|
handle_cast({manifest_change, NewManifest}, State) ->
|
2017-01-17 14:11:50 +00:00
|
|
|
NewManSQN = leveled_pmanifest:get_manifest_sqn(NewManifest),
|
2017-10-24 13:19:30 +01:00
|
|
|
OldManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest),
|
|
|
|
leveled_log:log("P0041", [OldManSQN, NewManSQN]),
|
2017-01-17 10:12:15 +00:00
|
|
|
ok = leveled_pclerk:clerk_promptdeletions(State#state.clerk, NewManSQN),
|
2017-04-19 22:46:37 +01:00
|
|
|
UpdManifest = leveled_pmanifest:merge_snapshot(State#state.manifest,
|
|
|
|
NewManifest),
|
|
|
|
{noreply, State#state{manifest = UpdManifest, work_ongoing=false}};
|
2016-10-12 17:12:49 +01:00
|
|
|
handle_cast({release_snapshot, Snapshot}, State) ->
|
2017-01-17 14:11:50 +00:00
|
|
|
Manifest0 = leveled_pmanifest:release_snapshot(State#state.manifest,
|
2017-01-13 18:23:57 +00:00
|
|
|
Snapshot),
|
2016-11-02 18:14:46 +00:00
|
|
|
leveled_log:log("P0003", [Snapshot]),
|
2017-01-13 18:23:57 +00:00
|
|
|
{noreply, State#state{manifest=Manifest0}};
|
2017-01-17 11:18:58 +00:00
|
|
|
handle_cast({confirm_delete, Filename, FilePid}, State=#state{is_snapshot=Snap})
|
2016-10-21 12:18:06 +01:00
|
|
|
when Snap == false ->
|
2017-01-15 00:52:43 +00:00
|
|
|
case State#state.work_ongoing of
|
|
|
|
false ->
|
2017-01-17 14:11:50 +00:00
|
|
|
R2D = leveled_pmanifest:ready_to_delete(State#state.manifest,
|
2017-01-15 00:52:43 +00:00
|
|
|
Filename),
|
|
|
|
case R2D of
|
2017-01-17 11:18:58 +00:00
|
|
|
{true, M0} ->
|
2017-01-15 00:52:43 +00:00
|
|
|
leveled_log:log("P0005", [Filename]),
|
2017-01-17 11:18:58 +00:00
|
|
|
ok = leveled_sst:sst_deleteconfirmed(FilePid),
|
2017-01-15 00:52:43 +00:00
|
|
|
{noreply, State#state{manifest=M0}};
|
2017-01-17 11:18:58 +00:00
|
|
|
{false, _M0} ->
|
2017-01-15 00:52:43 +00:00
|
|
|
{noreply, State}
|
|
|
|
end;
|
|
|
|
true ->
|
|
|
|
% If there is ongoing work, then we can't safely update the pidmap
|
|
|
|
% as any change will be reverted when the manifest is passed back
|
|
|
|
% from the Clerk
|
2016-10-21 12:18:06 +01:00
|
|
|
{noreply, State}
|
2016-11-05 11:22:27 +00:00
|
|
|
end;
|
2017-11-28 01:19:30 +00:00
|
|
|
handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
|
2016-11-05 11:22:27 +00:00
|
|
|
leveled_log:log("P0029", []),
|
|
|
|
ManEntry = #manifest_entry{start_key=StartKey,
|
|
|
|
end_key=EndKey,
|
|
|
|
owner=State#state.levelzero_constructor,
|
2017-11-28 01:19:30 +00:00
|
|
|
filename=FN,
|
|
|
|
bloom=Bloom},
|
2017-01-17 14:11:50 +00:00
|
|
|
ManifestSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1,
|
|
|
|
UpdMan = leveled_pmanifest:insert_manifest_entry(State#state.manifest,
|
2017-11-28 01:19:30 +00:00
|
|
|
ManifestSQN,
|
|
|
|
0,
|
|
|
|
ManEntry),
|
2016-11-05 11:22:27 +00:00
|
|
|
% Prompt clerk to ask about work - do this for every L0 roll
|
2017-01-05 21:58:33 +00:00
|
|
|
UpdIndex = leveled_pmem:clear_index(State#state.levelzero_index),
|
2016-11-05 11:22:27 +00:00
|
|
|
ok = leveled_pclerk:clerk_prompt(State#state.clerk),
|
|
|
|
{noreply, State#state{levelzero_cache=[],
|
2017-01-05 21:58:33 +00:00
|
|
|
levelzero_index=UpdIndex,
|
2016-11-05 11:22:27 +00:00
|
|
|
levelzero_pending=false,
|
|
|
|
levelzero_constructor=undefined,
|
|
|
|
levelzero_size=0,
|
2016-11-05 12:03:21 +00:00
|
|
|
manifest=UpdMan,
|
2017-01-15 00:52:43 +00:00
|
|
|
persisted_sqn=State#state.ledger_sqn}};
|
|
|
|
handle_cast(work_for_clerk, State) ->
|
2017-02-09 23:41:28 +00:00
|
|
|
case {State#state.levelzero_pending, State#state.work_ongoing} of
|
|
|
|
{false, false} ->
|
|
|
|
% TODO - as part of supervision tree and retry work:
|
|
|
|
% Need to check for work_ongoing as well as levelzero_pending as
|
|
|
|
% there may be a race that could lead to the clerk doing the same
|
|
|
|
% thing twice.
|
|
|
|
%
|
|
|
|
% This has implications though if we auto-restart the pclerk in the
|
|
|
|
% future, without altering this state - it may never be able to
|
|
|
|
% request work due to ongoing work that crashed the previous clerk
|
|
|
|
%
|
|
|
|
% Perhaps the pclerk should not be restarted because of this, and
|
|
|
|
% the failure should ripple up
|
2017-01-17 14:11:50 +00:00
|
|
|
{WL, WC} = leveled_pmanifest:check_for_work(State#state.manifest,
|
2017-01-15 00:52:43 +00:00
|
|
|
?LEVEL_SCALEFACTOR),
|
|
|
|
case WC of
|
|
|
|
0 ->
|
|
|
|
{noreply, State#state{work_backlog=false}};
|
|
|
|
N ->
|
2017-11-01 15:11:14 +00:00
|
|
|
Backlog = N > ?WORKQUEUE_BACKLOG_TOLERANCE,
|
|
|
|
leveled_log:log("P0024", [N, Backlog]),
|
2017-01-15 00:52:43 +00:00
|
|
|
[TL|_Tail] = WL,
|
|
|
|
ok = leveled_pclerk:clerk_push(State#state.clerk,
|
|
|
|
{TL, State#state.manifest}),
|
|
|
|
{noreply,
|
2017-11-01 15:11:14 +00:00
|
|
|
State#state{work_backlog=Backlog, work_ongoing=true}}
|
2017-02-09 23:41:28 +00:00
|
|
|
end;
|
|
|
|
_ ->
|
|
|
|
{noreply, State}
|
2017-01-15 00:52:43 +00:00
|
|
|
end.
|
2016-09-26 10:55:08 +01:00
|
|
|
|
2016-10-30 18:25:30 +00:00
|
|
|
|
2016-11-05 15:59:31 +00:00
|
|
|
handle_info(_Info, State) ->
|
2016-09-26 10:55:08 +01:00
|
|
|
{noreply, State}.
|
|
|
|
|
2016-10-12 17:12:49 +01:00
|
|
|
terminate(Reason, State=#state{is_snapshot=Snap}) when Snap == true ->
|
|
|
|
ok = pcl_releasesnapshot(State#state.source_penciller, self()),
|
2016-11-02 18:14:46 +00:00
|
|
|
leveled_log:log("P0007", [Reason]),
|
2016-10-07 10:04:48 +01:00
|
|
|
ok;
|
2016-10-12 17:12:49 +01:00
|
|
|
terminate(Reason, State) ->
|
2016-09-26 10:55:08 +01:00
|
|
|
%% Level 0 files lie outside of the manifest, and so if there is no L0
|
|
|
|
%% file present it is safe to write the current contents of memory. If
|
|
|
|
%% there is a L0 file present - then the memory can be dropped (it is
|
|
|
|
%% recoverable from the ledger, and there should not be a lot to recover
|
|
|
|
%% as presumably the ETS file has been recently flushed, hence the presence
|
|
|
|
%% of a L0 file).
|
|
|
|
%%
|
2017-01-12 13:48:43 +00:00
|
|
|
%% The penciller should close each file in the manifest, and cast a close
|
|
|
|
%% on the clerk.
|
|
|
|
ok = leveled_pclerk:clerk_close(State#state.clerk),
|
|
|
|
|
2016-11-02 18:14:46 +00:00
|
|
|
leveled_log:log("P0008", [Reason]),
|
2017-01-17 14:11:50 +00:00
|
|
|
L0_Present = leveled_pmanifest:key_lookup(State#state.manifest, 0, all),
|
2017-01-14 19:41:09 +00:00
|
|
|
L0_Left = State#state.levelzero_size > 0,
|
|
|
|
case {State#state.levelzero_pending, L0_Present, L0_Left} of
|
|
|
|
{false, false, true} ->
|
2017-11-28 01:19:30 +00:00
|
|
|
{L0Pid, _L0Bloom} = roll_memory(State, true),
|
2016-12-29 02:07:14 +00:00
|
|
|
ok = leveled_sst:sst_close(L0Pid);
|
2016-11-14 17:18:28 +00:00
|
|
|
StatusTuple ->
|
|
|
|
leveled_log:log("P0010", [StatusTuple])
|
2016-10-07 10:04:48 +01:00
|
|
|
end,
|
2016-10-19 00:10:48 +01:00
|
|
|
|
|
|
|
% Tidy shutdown of individual files
|
2017-01-17 10:12:15 +00:00
|
|
|
EntryCloseFun =
|
|
|
|
fun(ME) ->
|
2017-03-06 13:51:38 +00:00
|
|
|
case is_record(ME, manifest_entry) of
|
|
|
|
true ->
|
|
|
|
ok = leveled_sst:sst_close(ME#manifest_entry.owner);
|
|
|
|
false ->
|
|
|
|
{_SK, ME0} = ME,
|
|
|
|
ok = leveled_sst:sst_close(ME0#manifest_entry.owner)
|
|
|
|
end
|
2017-01-17 10:12:15 +00:00
|
|
|
end,
|
2017-01-17 14:11:50 +00:00
|
|
|
leveled_pmanifest:close_manifest(State#state.manifest, EntryCloseFun),
|
2016-11-02 18:14:46 +00:00
|
|
|
leveled_log:log("P0011", []),
|
2016-10-07 10:04:48 +01:00
|
|
|
ok.
|
2016-09-26 10:55:08 +01:00
|
|
|
|
|
|
|
|
|
|
|
code_change(_OldVsn, State, _Extra) ->
|
|
|
|
{ok, State}.
|
|
|
|
|
|
|
|
|
2017-03-09 21:23:09 +00:00
|
|
|
%%%============================================================================
|
|
|
|
%%% Path functions
|
|
|
|
%%%============================================================================
|
|
|
|
|
|
|
|
sst_rootpath(RootPath) ->
|
|
|
|
FP = RootPath ++ "/" ++ ?FILES_FP,
|
|
|
|
filelib:ensure_dir(FP ++ "/"),
|
|
|
|
FP.
|
|
|
|
|
|
|
|
sst_filename(ManSQN, Level, Count) ->
|
2017-09-27 23:52:49 +01:00
|
|
|
lists:flatten(io_lib:format("./~w_~w_~w" ++ ?SST_FILEX,
|
|
|
|
[ManSQN, Level, Count])).
|
2017-03-09 21:23:09 +00:00
|
|
|
|
|
|
|
|
2016-08-09 16:09:29 +01:00
|
|
|
%%%============================================================================
|
|
|
|
%%% Internal functions
|
|
|
|
%%%============================================================================
|
|
|
|
|
2016-11-21 12:34:40 +00:00
|
|
|
|
2016-09-21 18:31:42 +01:00
|
|
|
start_from_file(PCLopts) ->
|
|
|
|
RootPath = PCLopts#penciller_options.root_path,
|
2017-11-06 15:54:58 +00:00
|
|
|
MaxTableSize =
|
|
|
|
case PCLopts#penciller_options.max_inmemory_tablesize of
|
|
|
|
undefined ->
|
|
|
|
?MAX_TABLESIZE;
|
|
|
|
M ->
|
|
|
|
M
|
|
|
|
end,
|
|
|
|
PressMethod = PCLopts#penciller_options.compression_method,
|
2016-10-27 20:56:18 +01:00
|
|
|
|
2017-11-06 15:54:58 +00:00
|
|
|
{ok, MergeClerk} = leveled_pclerk:clerk_new(self(), RootPath, PressMethod),
|
2016-12-09 14:36:03 +00:00
|
|
|
|
|
|
|
CoinToss = PCLopts#penciller_options.levelzero_cointoss,
|
|
|
|
% Used to randomly defer the writing of L0 file. Intended to help with
|
|
|
|
% vnode syncronisation issues (e.g. stop them all by default merging to
|
|
|
|
% level zero concurrently)
|
|
|
|
|
2017-11-06 15:54:58 +00:00
|
|
|
InitState = #state{clerk = MergeClerk,
|
|
|
|
root_path = RootPath,
|
|
|
|
levelzero_maxcachesize = MaxTableSize,
|
|
|
|
levelzero_cointoss = CoinToss,
|
|
|
|
levelzero_index = leveled_pmem:new_index(),
|
|
|
|
compression_method = PressMethod},
|
2016-09-21 18:31:42 +01:00
|
|
|
|
|
|
|
%% Open manifest
|
2017-01-17 14:11:50 +00:00
|
|
|
Manifest0 = leveled_pmanifest:open_manifest(RootPath),
|
2017-01-13 18:23:57 +00:00
|
|
|
OpenFun =
|
|
|
|
fun(FN) ->
|
2017-11-28 01:19:30 +00:00
|
|
|
{ok, Pid, {_FK, _LK}, Bloom} =
|
|
|
|
leveled_sst:sst_open(sst_rootpath(RootPath), FN),
|
|
|
|
{Pid, Bloom}
|
2017-01-12 13:48:43 +00:00
|
|
|
end,
|
2017-01-13 18:23:57 +00:00
|
|
|
SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1,
|
2017-09-27 23:52:49 +01:00
|
|
|
{MaxSQN, Manifest1, FileList} =
|
|
|
|
leveled_pmanifest:load_manifest(Manifest0, OpenFun, SQNFun),
|
2016-11-02 18:14:46 +00:00
|
|
|
leveled_log:log("P0014", [MaxSQN]),
|
2017-01-17 14:11:50 +00:00
|
|
|
ManSQN = leveled_pmanifest:get_manifest_sqn(Manifest1),
|
2017-01-14 22:26:26 +00:00
|
|
|
leveled_log:log("P0035", [ManSQN]),
|
2016-10-27 20:56:18 +01:00
|
|
|
%% Find any L0 files
|
2017-03-09 21:23:09 +00:00
|
|
|
L0FN = sst_filename(ManSQN + 1, 0, 0),
|
2017-09-27 23:52:49 +01:00
|
|
|
{State0, FileList0} =
|
|
|
|
case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of
|
|
|
|
true ->
|
|
|
|
leveled_log:log("P0015", [L0FN]),
|
|
|
|
L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN),
|
2017-11-28 01:19:30 +00:00
|
|
|
{ok, L0Pid, {L0StartKey, L0EndKey}, Bloom} = L0Open,
|
2017-09-27 23:52:49 +01:00
|
|
|
L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid),
|
|
|
|
L0Entry = #manifest_entry{start_key = L0StartKey,
|
|
|
|
end_key = L0EndKey,
|
|
|
|
filename = L0FN,
|
2017-11-28 01:19:30 +00:00
|
|
|
owner = L0Pid,
|
|
|
|
bloom = Bloom},
|
2017-09-27 23:52:49 +01:00
|
|
|
Manifest2 = leveled_pmanifest:insert_manifest_entry(Manifest1,
|
|
|
|
ManSQN + 1,
|
|
|
|
0,
|
|
|
|
L0Entry),
|
|
|
|
leveled_log:log("P0016", [L0SQN]),
|
|
|
|
LedgerSQN = max(MaxSQN, L0SQN),
|
|
|
|
{InitState#state{manifest = Manifest2,
|
2017-01-12 13:48:43 +00:00
|
|
|
ledger_sqn = LedgerSQN,
|
2017-09-27 23:52:49 +01:00
|
|
|
persisted_sqn = LedgerSQN},
|
|
|
|
[L0FN|FileList]};
|
|
|
|
false ->
|
|
|
|
leveled_log:log("P0017", []),
|
|
|
|
{InitState#state{manifest = Manifest1,
|
2017-01-12 13:48:43 +00:00
|
|
|
ledger_sqn = MaxSQN,
|
2017-09-27 23:52:49 +01:00
|
|
|
persisted_sqn = MaxSQN},
|
|
|
|
FileList}
|
|
|
|
end,
|
|
|
|
ok = archive_files(RootPath, FileList0),
|
|
|
|
{ok, State0}.
|
|
|
|
|
2017-09-28 10:50:54 +01:00
|
|
|
archive_files(RootPath, UsedFileList) ->
|
2017-09-27 23:52:49 +01:00
|
|
|
{ok, AllFiles} = file:list_dir(sst_rootpath(RootPath)),
|
|
|
|
FileCheckFun =
|
|
|
|
fun(FN, UnusedFiles) ->
|
|
|
|
FN0 = "./" ++ FN,
|
|
|
|
case filename:extension(FN0) of
|
|
|
|
?SST_FILEX ->
|
2017-09-28 10:50:54 +01:00
|
|
|
case lists:member(FN0, UsedFileList) of
|
2017-09-27 23:52:49 +01:00
|
|
|
true ->
|
|
|
|
UnusedFiles;
|
|
|
|
false ->
|
|
|
|
leveled_log:log("P0040", [FN0]),
|
|
|
|
[FN0|UnusedFiles]
|
|
|
|
end;
|
|
|
|
_ ->
|
|
|
|
UnusedFiles
|
|
|
|
end
|
|
|
|
end,
|
|
|
|
RenameFun =
|
|
|
|
fun(FN) ->
|
|
|
|
AltName = filename:join(sst_rootpath(RootPath),
|
|
|
|
filename:basename(FN, ?SST_FILEX))
|
|
|
|
++ ?ARCHIVE_FILEX,
|
|
|
|
file:rename(filename:join(sst_rootpath(RootPath), FN),
|
|
|
|
AltName)
|
|
|
|
end,
|
|
|
|
FilesToArchive = lists:foldl(FileCheckFun, [], AllFiles),
|
|
|
|
lists:foreach(RenameFun, FilesToArchive),
|
|
|
|
ok.
|
2016-09-21 18:31:42 +01:00
|
|
|
|
2016-10-30 18:25:30 +00:00
|
|
|
|
2017-01-05 21:58:33 +00:00
|
|
|
update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
|
2016-12-11 01:02:56 +00:00
|
|
|
LedgerSQN, L0Cache, State) ->
|
2016-12-11 05:23:24 +00:00
|
|
|
SW = os:timestamp(),
|
2016-12-11 01:02:56 +00:00
|
|
|
Update = leveled_pmem:add_to_cache(L0Size,
|
|
|
|
{PushedTree, MinSQN, MaxSQN},
|
2016-10-30 18:25:30 +00:00
|
|
|
LedgerSQN,
|
|
|
|
L0Cache),
|
2017-01-05 21:58:33 +00:00
|
|
|
UpdL0Index = leveled_pmem:add_to_index(PushedIdx,
|
|
|
|
State#state.levelzero_index,
|
|
|
|
length(L0Cache) + 1),
|
2016-12-11 05:23:24 +00:00
|
|
|
|
2016-12-11 01:02:56 +00:00
|
|
|
{UpdMaxSQN, NewL0Size, UpdL0Cache} = Update,
|
2016-10-30 18:25:30 +00:00
|
|
|
if
|
2016-12-11 01:02:56 +00:00
|
|
|
UpdMaxSQN >= LedgerSQN ->
|
2016-10-30 18:25:30 +00:00
|
|
|
UpdState = State#state{levelzero_cache=UpdL0Cache,
|
|
|
|
levelzero_size=NewL0Size,
|
2017-01-05 21:58:33 +00:00
|
|
|
levelzero_index=UpdL0Index,
|
2016-12-11 01:02:56 +00:00
|
|
|
ledger_sqn=UpdMaxSQN},
|
2016-10-30 18:25:30 +00:00
|
|
|
CacheTooBig = NewL0Size > State#state.levelzero_maxcachesize,
|
2016-12-11 01:58:25 +00:00
|
|
|
CacheMuchTooBig = NewL0Size > ?SUPER_MAX_TABLE_SIZE,
|
2017-11-01 15:11:14 +00:00
|
|
|
L0Free =
|
|
|
|
not leveled_pmanifest:levelzero_present(State#state.manifest),
|
2016-12-09 14:36:03 +00:00
|
|
|
RandomFactor =
|
|
|
|
case State#state.levelzero_cointoss of
|
|
|
|
true ->
|
2017-07-31 20:20:39 +02:00
|
|
|
case leveled_rand:uniform(?COIN_SIDECOUNT) of
|
2016-12-09 14:36:03 +00:00
|
|
|
1 ->
|
|
|
|
true;
|
|
|
|
_ ->
|
|
|
|
false
|
|
|
|
end;
|
|
|
|
false ->
|
|
|
|
true
|
|
|
|
end,
|
2017-01-14 22:03:57 +00:00
|
|
|
NoPendingManifestChange = not State#state.work_ongoing,
|
2016-12-11 05:23:24 +00:00
|
|
|
JitterCheck = RandomFactor or CacheMuchTooBig,
|
2017-01-14 22:03:57 +00:00
|
|
|
case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of
|
|
|
|
{true, true, true, true} ->
|
2017-11-28 01:19:30 +00:00
|
|
|
{L0Constructor, none} = roll_memory(UpdState, false),
|
2017-09-27 23:52:49 +01:00
|
|
|
leveled_log:log_timer("P0031", [true, true], SW),
|
2016-10-30 18:25:30 +00:00
|
|
|
UpdState#state{levelzero_pending=true,
|
|
|
|
levelzero_constructor=L0Constructor};
|
|
|
|
_ ->
|
2017-09-27 23:52:49 +01:00
|
|
|
leveled_log:log_timer("P0031",
|
|
|
|
[CacheTooBig, JitterCheck],
|
|
|
|
SW),
|
2016-10-30 18:25:30 +00:00
|
|
|
UpdState
|
2017-01-04 21:36:59 +00:00
|
|
|
end
|
2016-10-30 18:25:30 +00:00
|
|
|
end.
|
|
|
|
|
|
|
|
|
2016-10-31 01:33:33 +00:00
|
|
|
%% Casting a large object (the levelzero cache) to the gen_server did not lead
|
|
|
|
%% to an immediate return as expected. With 32K keys in the TreeList it could
|
|
|
|
%% take around 35-40ms.
|
|
|
|
%%
|
2016-12-29 02:07:14 +00:00
|
|
|
%% To avoid blocking this gen_server, the SST file can request each item of the
|
2016-10-31 01:33:33 +00:00
|
|
|
%% cache one at a time.
|
|
|
|
%%
|
|
|
|
%% The Wait is set to false to use a cast when calling this in normal operation
|
|
|
|
%% where as the Wait of true is used at shutdown
|
|
|
|
|
|
|
|
roll_memory(State, false) ->
|
2017-03-09 21:23:09 +00:00
|
|
|
ManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1,
|
|
|
|
RootPath = sst_rootpath(State#state.root_path),
|
|
|
|
FileName = sst_filename(ManSQN, 0, 0),
|
2016-12-29 02:07:14 +00:00
|
|
|
leveled_log:log("P0019", [FileName, State#state.ledger_sqn]),
|
2016-10-31 01:33:33 +00:00
|
|
|
PCL = self(),
|
|
|
|
FetchFun = fun(Slot) -> pcl_fetchlevelzero(PCL, Slot) end,
|
2017-03-09 21:23:09 +00:00
|
|
|
R = leveled_sst:sst_newlevelzero(RootPath,
|
|
|
|
FileName,
|
2016-10-31 01:33:33 +00:00
|
|
|
length(State#state.levelzero_cache),
|
|
|
|
FetchFun,
|
2016-12-29 02:07:14 +00:00
|
|
|
PCL,
|
2017-11-06 15:54:58 +00:00
|
|
|
State#state.ledger_sqn,
|
|
|
|
State#state.compression_method),
|
2016-10-31 01:33:33 +00:00
|
|
|
{ok, Constructor, _} = R,
|
2017-11-28 01:19:30 +00:00
|
|
|
{Constructor, none};
|
2016-10-31 01:33:33 +00:00
|
|
|
roll_memory(State, true) ->
|
2017-03-09 21:23:09 +00:00
|
|
|
ManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1,
|
|
|
|
RootPath = sst_rootpath(State#state.root_path),
|
|
|
|
FileName = sst_filename(ManSQN, 0, 0),
|
2016-10-31 01:33:33 +00:00
|
|
|
FetchFun = fun(Slot) -> lists:nth(Slot, State#state.levelzero_cache) end,
|
2016-12-29 02:07:14 +00:00
|
|
|
KVList = leveled_pmem:to_list(length(State#state.levelzero_cache),
|
|
|
|
FetchFun),
|
2017-03-09 21:23:09 +00:00
|
|
|
R = leveled_sst:sst_new(RootPath,
|
|
|
|
FileName,
|
|
|
|
0,
|
|
|
|
KVList,
|
2017-11-06 15:54:58 +00:00
|
|
|
State#state.ledger_sqn,
|
|
|
|
State#state.compression_method),
|
2017-11-28 01:19:30 +00:00
|
|
|
{ok, Constructor, _, Bloom} = R,
|
|
|
|
{Constructor, Bloom}.
|
2016-10-20 02:23:45 +01:00
|
|
|
|
2017-11-21 19:58:36 +00:00
|
|
|
timed_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, Timings) ->
|
2016-12-22 14:03:31 +00:00
|
|
|
SW = os:timestamp(),
|
2017-01-13 18:23:57 +00:00
|
|
|
{R, Level} = fetch_mem(Key, Hash, Manifest, L0Cache, L0Index),
|
2017-11-21 19:58:36 +00:00
|
|
|
UpdTimings = update_timings(SW, Timings, R, Level),
|
|
|
|
{R, UpdTimings}.
|
2016-09-26 10:55:08 +01:00
|
|
|
|
2017-01-13 18:23:57 +00:00
|
|
|
plain_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index) ->
|
|
|
|
R = fetch_mem(Key, Hash, Manifest, L0Cache, L0Index),
|
2016-12-22 14:03:31 +00:00
|
|
|
element(1, R).
|
2016-12-11 05:23:24 +00:00
|
|
|
|
2017-01-13 18:23:57 +00:00
|
|
|
fetch_mem(Key, Hash, Manifest, L0Cache, L0Index) ->
|
2017-06-30 10:03:36 +01:00
|
|
|
PosList = leveled_pmem:check_index(Hash, L0Index),
|
2017-01-05 21:58:33 +00:00
|
|
|
L0Check = leveled_pmem:check_levelzero(Key, Hash, PosList, L0Cache),
|
2016-10-30 18:25:30 +00:00
|
|
|
case L0Check of
|
|
|
|
{false, not_found} ->
|
2017-03-13 12:16:36 +00:00
|
|
|
fetch(Key, Hash, Manifest, 0, fun timed_sst_get/4);
|
2016-10-30 18:25:30 +00:00
|
|
|
{true, KV} ->
|
2017-11-21 19:58:36 +00:00
|
|
|
{KV, memory}
|
2016-08-09 16:09:29 +01:00
|
|
|
end.
|
|
|
|
|
2017-01-13 18:23:57 +00:00
|
|
|
fetch(_Key, _Hash, _Manifest, ?MAX_LEVELS + 1, _FetchFun) ->
|
2016-12-22 14:03:31 +00:00
|
|
|
{not_present, basement};
|
2017-01-13 18:23:57 +00:00
|
|
|
fetch(Key, Hash, Manifest, Level, FetchFun) ->
|
2017-01-17 14:11:50 +00:00
|
|
|
case leveled_pmanifest:key_lookup(Manifest, Level, Key) of
|
2017-01-12 13:48:43 +00:00
|
|
|
false ->
|
2017-01-13 18:23:57 +00:00
|
|
|
fetch(Key, Hash, Manifest, Level + 1, FetchFun);
|
|
|
|
FP ->
|
2017-11-28 01:19:30 +00:00
|
|
|
case leveled_pmanifest:check_bloom(Manifest, FP, Hash) of
|
|
|
|
true ->
|
|
|
|
case FetchFun(FP, Key, Hash, Level) of
|
|
|
|
not_present ->
|
|
|
|
fetch(Key, Hash, Manifest, Level + 1, FetchFun);
|
|
|
|
ObjectFound ->
|
|
|
|
{ObjectFound, Level}
|
|
|
|
end;
|
|
|
|
false ->
|
|
|
|
fetch(Key, Hash, Manifest, Level + 1, FetchFun)
|
2016-08-09 16:09:29 +01:00
|
|
|
end
|
|
|
|
end.
|
2016-09-26 10:55:08 +01:00
|
|
|
|
2017-03-13 12:16:36 +00:00
|
|
|
timed_sst_get(PID, Key, Hash, Level) ->
|
2016-12-21 12:45:27 +00:00
|
|
|
SW = os:timestamp(),
|
2016-12-29 02:07:14 +00:00
|
|
|
R = leveled_sst:sst_get(PID, Key, Hash),
|
2016-12-21 12:45:27 +00:00
|
|
|
T0 = timer:now_diff(os:timestamp(), SW),
|
2017-03-13 12:16:36 +00:00
|
|
|
log_slowfetch(T0, R, PID, Level, ?SLOW_FETCH).
|
2017-02-26 21:48:04 +00:00
|
|
|
|
2017-03-13 12:16:36 +00:00
|
|
|
log_slowfetch(T0, R, PID, Level, FetchTolerance) ->
|
2016-12-21 12:45:27 +00:00
|
|
|
case {T0, R} of
|
2017-02-26 21:48:04 +00:00
|
|
|
{T, R} when T < FetchTolerance ->
|
2016-12-21 12:45:27 +00:00
|
|
|
R;
|
|
|
|
{T, not_present} ->
|
2017-03-13 12:16:36 +00:00
|
|
|
leveled_log:log("PC016", [PID, T, Level, not_present]),
|
2016-12-21 12:45:27 +00:00
|
|
|
not_present;
|
|
|
|
{T, R} ->
|
2017-03-13 12:16:36 +00:00
|
|
|
leveled_log:log("PC016", [PID, T, Level, found]),
|
2016-12-21 12:45:27 +00:00
|
|
|
R
|
|
|
|
end.
|
2016-08-09 16:09:29 +01:00
|
|
|
|
2016-10-07 10:04:48 +01:00
|
|
|
compare_to_sqn(Obj, SQN) ->
|
|
|
|
case Obj of
|
|
|
|
not_present ->
|
|
|
|
false;
|
|
|
|
Obj ->
|
2016-10-13 21:02:15 +01:00
|
|
|
SQNToCompare = leveled_codec:strip_to_seqonly(Obj),
|
2016-10-07 10:04:48 +01:00
|
|
|
if
|
|
|
|
SQNToCompare > SQN ->
|
|
|
|
false;
|
|
|
|
true ->
|
|
|
|
true
|
|
|
|
end
|
|
|
|
end.
|
|
|
|
|
|
|
|
|
2017-10-31 23:28:35 +00:00
|
|
|
%%%============================================================================
|
|
|
|
%%% Iterator functions
|
|
|
|
%%%
|
|
|
|
%%% TODO - move to dedicated module with extended unit testing
|
|
|
|
%%%============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
-spec keyfolder(list(), list(), tuple(), tuple(), {fun(), any()}) -> any().
|
|
|
|
%% @doc
|
|
|
|
%% The keyfolder will compare an iterator across the immutable in-memory cache
|
|
|
|
%% of the Penciller (the IMMiter), with an iterator across the persisted part
|
|
|
|
%% (the SSTiter).
|
|
|
|
%%
|
|
|
|
%% A Segment List and a MaxKeys may be passed. Every time something is added
|
|
|
|
%% to the accumulator MaxKeys is reduced - so set MaxKeys to -1 if it is
|
|
|
|
%% intended to be infinite.
|
|
|
|
%%
|
|
|
|
%% The basic principle is to take the next key in the IMMiter and compare it
|
|
|
|
%% to the next key in the SSTiter, and decide which one should be added to the
|
|
|
|
%% accumulator. The iterators are advanced if they either win (i.e. are the
|
|
|
|
%% next key), or are dominated. This goes on until the iterators are empty.
|
|
|
|
%%
|
|
|
|
%% To advance the SSTiter the find_nextkey/4 function is used, as the SSTiter
|
|
|
|
%% is an iterator across multiple levels - and so needs to do its own
|
|
|
|
%% comparisons to pop the next result.
|
|
|
|
keyfolder(IMMiter, SSTiter, StartKey, EndKey, {AccFun, Acc}) ->
|
|
|
|
keyfolder({IMMiter, SSTiter},
|
|
|
|
{StartKey, EndKey},
|
|
|
|
{AccFun, Acc},
|
|
|
|
{false, -1}).
|
|
|
|
|
|
|
|
keyfolder(_Iterators, _KeyRange, {_AccFun, Acc}, {_SegmentList, MaxKeys})
|
|
|
|
when MaxKeys == 0 ->
|
|
|
|
Acc;
|
|
|
|
keyfolder({[], SSTiter}, KeyRange, {AccFun, Acc}, {SegmentList, MaxKeys}) ->
|
|
|
|
{StartKey, EndKey} = KeyRange,
|
|
|
|
case find_nextkey(SSTiter, StartKey, EndKey, SegmentList) of
|
|
|
|
no_more_keys ->
|
|
|
|
Acc;
|
|
|
|
{NxSSTiter, {SSTKey, SSTVal}} ->
|
|
|
|
Acc1 = AccFun(SSTKey, SSTVal, Acc),
|
|
|
|
keyfolder({[], NxSSTiter},
|
|
|
|
KeyRange,
|
|
|
|
{AccFun, Acc1},
|
|
|
|
{SegmentList, MaxKeys - 1})
|
|
|
|
end;
|
|
|
|
keyfolder({[{IMMKey, IMMVal}|NxIMMiterator], SSTiterator},
|
|
|
|
KeyRange,
|
|
|
|
{AccFun, Acc},
|
|
|
|
{SegmentList, MaxKeys}) ->
|
|
|
|
{StartKey, EndKey} = KeyRange,
|
|
|
|
case {IMMKey < StartKey, leveled_codec:endkey_passed(EndKey, IMMKey)} of
|
|
|
|
{true, _} ->
|
|
|
|
|
|
|
|
% Normally everything is pre-filterd, but the IMM iterator can
|
|
|
|
% be re-used and so may be behind the StartKey if the StartKey has
|
|
|
|
% advanced from the previous use
|
|
|
|
keyfolder({NxIMMiterator, SSTiterator},
|
|
|
|
KeyRange,
|
|
|
|
{AccFun, Acc},
|
|
|
|
{SegmentList, MaxKeys});
|
|
|
|
{false, true} ->
|
|
|
|
% There are no more keys in-range in the in-memory
|
|
|
|
% iterator, so take action as if this iterator is empty
|
|
|
|
% (see above)
|
|
|
|
keyfolder({[], SSTiterator},
|
|
|
|
KeyRange,
|
|
|
|
{AccFun, Acc},
|
|
|
|
{SegmentList, MaxKeys});
|
|
|
|
{false, false} ->
|
|
|
|
case find_nextkey(SSTiterator, StartKey, EndKey, SegmentList) of
|
|
|
|
no_more_keys ->
|
|
|
|
% No more keys in range in the persisted store, so use the
|
|
|
|
% in-memory KV as the next
|
|
|
|
Acc1 = AccFun(IMMKey, IMMVal, Acc),
|
|
|
|
keyfolder({NxIMMiterator,
|
|
|
|
[]},
|
|
|
|
KeyRange,
|
|
|
|
{AccFun, Acc1},
|
|
|
|
{SegmentList, MaxKeys - 1});
|
|
|
|
{NxSSTiterator, {SSTKey, SSTVal}} ->
|
|
|
|
% There is a next key, so need to know which is the
|
|
|
|
% next key between the two (and handle two keys
|
|
|
|
% with different sequence numbers).
|
|
|
|
case leveled_codec:key_dominates({IMMKey,
|
|
|
|
IMMVal},
|
|
|
|
{SSTKey,
|
|
|
|
SSTVal}) of
|
|
|
|
left_hand_first ->
|
|
|
|
Acc1 = AccFun(IMMKey, IMMVal, Acc),
|
|
|
|
% Stow the previous best result away at Level -1
|
|
|
|
% so that there is no need to iterate to it again
|
|
|
|
NewEntry = {-1, [{SSTKey, SSTVal}]},
|
|
|
|
keyfolder({NxIMMiterator,
|
|
|
|
lists:keystore(-1,
|
|
|
|
1,
|
|
|
|
NxSSTiterator,
|
|
|
|
NewEntry)},
|
|
|
|
KeyRange,
|
|
|
|
{AccFun, Acc1},
|
|
|
|
{SegmentList, MaxKeys - 1});
|
|
|
|
right_hand_first ->
|
|
|
|
Acc1 = AccFun(SSTKey, SSTVal, Acc),
|
|
|
|
keyfolder({[{IMMKey, IMMVal}|NxIMMiterator],
|
|
|
|
NxSSTiterator},
|
|
|
|
KeyRange,
|
|
|
|
{AccFun, Acc1},
|
|
|
|
{SegmentList, MaxKeys - 1});
|
|
|
|
left_hand_dominant ->
|
|
|
|
Acc1 = AccFun(IMMKey, IMMVal, Acc),
|
|
|
|
% We can add to the accumulator here. As the SST
|
|
|
|
% key was the most dominant across all SST levels,
|
|
|
|
% so there is no need to hold off until the IMMKey
|
|
|
|
% is left hand first.
|
|
|
|
keyfolder({NxIMMiterator,
|
|
|
|
NxSSTiterator},
|
|
|
|
KeyRange,
|
|
|
|
{AccFun, Acc1},
|
|
|
|
{SegmentList, MaxKeys - 1})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end.
|
|
|
|
|
2016-10-12 17:12:49 +01:00
|
|
|
%% Looks to find the best choice for the next key across the levels (other
|
|
|
|
%% than in-memory table)
|
|
|
|
%% In finding the best choice, the next key in a given level may be a next
|
|
|
|
%% block or next file pointer which will need to be expanded
|
|
|
|
|
|
|
|
find_nextkey(QueryArray, StartKey, EndKey) ->
|
2017-10-31 23:28:35 +00:00
|
|
|
find_nextkey(QueryArray, StartKey, EndKey, false).
|
|
|
|
|
|
|
|
find_nextkey(QueryArray, StartKey, EndKey, SegmentList) ->
|
2016-10-12 17:12:49 +01:00
|
|
|
find_nextkey(QueryArray,
|
2017-10-31 23:28:35 +00:00
|
|
|
-1,
|
2016-10-12 17:12:49 +01:00
|
|
|
{null, null},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegmentList, ?ITERATOR_SCANWIDTH).
|
2016-10-12 17:12:49 +01:00
|
|
|
|
2017-10-31 23:28:35 +00:00
|
|
|
find_nextkey(_QueryArray, LCnt,
|
|
|
|
{null, null},
|
|
|
|
_StartKey, _EndKey,
|
|
|
|
_SegList, _Width) when LCnt > ?MAX_LEVELS ->
|
2016-10-12 17:12:49 +01:00
|
|
|
% The array has been scanned wihtout finding a best key - must be
|
|
|
|
% exhausted - respond to indicate no more keys to be found by the
|
|
|
|
% iterator
|
|
|
|
no_more_keys;
|
2017-10-31 23:28:35 +00:00
|
|
|
find_nextkey(QueryArray, LCnt,
|
|
|
|
{BKL, BestKV},
|
|
|
|
_StartKey, _EndKey,
|
|
|
|
_SegList, _Width) when LCnt > ?MAX_LEVELS ->
|
2016-10-12 17:12:49 +01:00
|
|
|
% All levels have been scanned, so need to remove the best result from
|
|
|
|
% the array, and return that array along with the best key/sqn/status
|
|
|
|
% combination
|
|
|
|
{BKL, [BestKV|Tail]} = lists:keyfind(BKL, 1, QueryArray),
|
|
|
|
{lists:keyreplace(BKL, 1, QueryArray, {BKL, Tail}), BestKV};
|
2017-10-31 23:28:35 +00:00
|
|
|
find_nextkey(QueryArray, LCnt,
|
|
|
|
{BestKeyLevel, BestKV},
|
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width) ->
|
2016-10-12 17:12:49 +01:00
|
|
|
% Get the next key at this level
|
2017-10-31 23:28:35 +00:00
|
|
|
{NextKey, RestOfKeys} =
|
|
|
|
case lists:keyfind(LCnt, 1, QueryArray) of
|
|
|
|
false ->
|
|
|
|
{null, null};
|
|
|
|
{LCnt, []} ->
|
|
|
|
{null, null};
|
|
|
|
{LCnt, [NK|ROfKs]} ->
|
|
|
|
{NK, ROfKs}
|
|
|
|
end,
|
2016-10-12 17:12:49 +01:00
|
|
|
% Compare the next key at this level with the best key
|
|
|
|
case {NextKey, BestKeyLevel, BestKV} of
|
|
|
|
{null, BKL, BKV} ->
|
|
|
|
% There is no key at this level - go to the next level
|
2016-12-29 02:07:14 +00:00
|
|
|
find_nextkey(QueryArray,
|
|
|
|
LCnt + 1,
|
|
|
|
{BKL, BKV},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width);
|
2017-01-14 16:36:05 +00:00
|
|
|
{{next, Owner, _SK}, BKL, BKV} ->
|
2016-10-12 17:12:49 +01:00
|
|
|
% The first key at this level is pointer to a file - need to query
|
|
|
|
% the file to expand this level out before proceeding
|
2016-12-29 02:07:14 +00:00
|
|
|
Pointer = {next, Owner, StartKey, EndKey},
|
|
|
|
UpdList = leveled_sst:expand_list_by_pointer(Pointer,
|
|
|
|
RestOfKeys,
|
2017-10-31 23:28:35 +00:00
|
|
|
Width,
|
|
|
|
SegList),
|
2016-12-29 02:07:14 +00:00
|
|
|
NewEntry = {LCnt, UpdList},
|
2016-10-12 17:12:49 +01:00
|
|
|
% Need to loop around at this level (LCnt) as we have not yet
|
|
|
|
% examined a real key at this level
|
|
|
|
find_nextkey(lists:keyreplace(LCnt, 1, QueryArray, NewEntry),
|
|
|
|
LCnt,
|
|
|
|
{BKL, BKV},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width);
|
2016-12-29 02:07:14 +00:00
|
|
|
{{pointer, SSTPid, Slot, PSK, PEK}, BKL, BKV} ->
|
2016-10-12 17:12:49 +01:00
|
|
|
% The first key at this level is pointer within a file - need to
|
|
|
|
% query the file to expand this level out before proceeding
|
2016-12-29 02:07:14 +00:00
|
|
|
Pointer = {pointer, SSTPid, Slot, PSK, PEK},
|
|
|
|
UpdList = leveled_sst:expand_list_by_pointer(Pointer,
|
|
|
|
RestOfKeys,
|
2017-10-31 23:28:35 +00:00
|
|
|
Width,
|
|
|
|
SegList),
|
2016-12-29 02:07:14 +00:00
|
|
|
NewEntry = {LCnt, UpdList},
|
2016-10-12 17:12:49 +01:00
|
|
|
% Need to loop around at this level (LCnt) as we have not yet
|
|
|
|
% examined a real key at this level
|
|
|
|
find_nextkey(lists:keyreplace(LCnt, 1, QueryArray, NewEntry),
|
|
|
|
LCnt,
|
|
|
|
{BKL, BKV},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width);
|
2016-10-12 17:12:49 +01:00
|
|
|
{{Key, Val}, null, null} ->
|
|
|
|
% No best key set - so can assume that this key is the best key,
|
2016-10-20 16:00:08 +01:00
|
|
|
% and check the lower levels
|
2016-10-12 17:12:49 +01:00
|
|
|
find_nextkey(QueryArray,
|
|
|
|
LCnt + 1,
|
|
|
|
{LCnt, {Key, Val}},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width);
|
2016-10-12 17:12:49 +01:00
|
|
|
{{Key, Val}, _BKL, {BestKey, _BestVal}} when Key < BestKey ->
|
|
|
|
% There is a real key and a best key to compare, and the real key
|
|
|
|
% at this level is before the best key, and so is now the new best
|
|
|
|
% key
|
|
|
|
% The QueryArray is not modified until we have checked all levels
|
|
|
|
find_nextkey(QueryArray,
|
|
|
|
LCnt + 1,
|
|
|
|
{LCnt, {Key, Val}},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width);
|
2016-10-12 17:12:49 +01:00
|
|
|
{{Key, Val}, BKL, {BestKey, BestVal}} when Key == BestKey ->
|
2016-10-13 21:02:15 +01:00
|
|
|
SQN = leveled_codec:strip_to_seqonly({Key, Val}),
|
|
|
|
BestSQN = leveled_codec:strip_to_seqonly({BestKey, BestVal}),
|
2016-10-12 17:12:49 +01:00
|
|
|
if
|
|
|
|
SQN =< BestSQN ->
|
|
|
|
% This is a dominated key, so we need to skip over it
|
2017-11-01 15:11:14 +00:00
|
|
|
NewQArray = lists:keyreplace(LCnt,
|
|
|
|
1,
|
|
|
|
QueryArray,
|
|
|
|
{LCnt, RestOfKeys}),
|
|
|
|
find_nextkey(NewQArray,
|
2016-10-12 17:12:49 +01:00
|
|
|
LCnt + 1,
|
|
|
|
{BKL, {BestKey, BestVal}},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width);
|
2016-10-12 17:12:49 +01:00
|
|
|
SQN > BestSQN ->
|
|
|
|
% There is a real key at the front of this level and it has
|
|
|
|
% a higher SQN than the best key, so we should use this as
|
|
|
|
% the best key
|
|
|
|
% But we also need to remove the dominated key from the
|
|
|
|
% lower level in the query array
|
|
|
|
OldBestEntry = lists:keyfind(BKL, 1, QueryArray),
|
|
|
|
{BKL, [{BestKey, BestVal}|BestTail]} = OldBestEntry,
|
|
|
|
find_nextkey(lists:keyreplace(BKL,
|
|
|
|
1,
|
|
|
|
QueryArray,
|
|
|
|
{BKL, BestTail}),
|
|
|
|
LCnt + 1,
|
|
|
|
{LCnt, {Key, Val}},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width)
|
2016-10-12 17:12:49 +01:00
|
|
|
end;
|
|
|
|
{_, BKL, BKV} ->
|
|
|
|
% This is not the best key
|
2016-12-29 02:07:14 +00:00
|
|
|
find_nextkey(QueryArray,
|
|
|
|
LCnt + 1,
|
|
|
|
{BKL, BKV},
|
2017-10-31 23:28:35 +00:00
|
|
|
StartKey, EndKey,
|
|
|
|
SegList, Width)
|
2016-10-12 17:12:49 +01:00
|
|
|
end.
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-11-21 19:58:36 +00:00
|
|
|
%%%============================================================================
|
|
|
|
%%% Timing Functions
|
|
|
|
%%%============================================================================
|
|
|
|
|
|
|
|
-spec update_statetimings(pcl_timings(), integer())
|
|
|
|
-> {pcl_timings(), integer()}.
|
|
|
|
%% @doc
|
|
|
|
%%
|
|
|
|
%% The timings state is either in countdown to the next set of samples of
|
|
|
|
%% we are actively collecting a sample. Active collection take place
|
|
|
|
%% when the countdown is 0. Once the sample has reached the expected count
|
|
|
|
%% then there is a log of that sample, and the countdown is restarted.
|
|
|
|
%%
|
|
|
|
%% Outside of sample windows the timings object should be set to the atom
|
|
|
|
%% no_timing. no_timing is a valid state for the pcl_timings type.
|
|
|
|
update_statetimings(no_timing, 0) ->
|
|
|
|
{#pcl_timings{}, 0};
|
|
|
|
update_statetimings(Timings, 0) ->
|
|
|
|
case Timings#pcl_timings.sample_count of
|
|
|
|
SC when SC >= ?TIMING_SAMPLESIZE ->
|
|
|
|
log_timings(Timings),
|
|
|
|
{no_timing, leveled_rand:uniform(2 * ?TIMING_SAMPLECOUNTDOWN)};
|
|
|
|
_SC ->
|
|
|
|
{Timings, 0}
|
|
|
|
end;
|
|
|
|
update_statetimings(no_timing, N) ->
|
|
|
|
{no_timing, N - 1}.
|
|
|
|
|
|
|
|
log_timings(Timings) ->
|
|
|
|
leveled_log:log("P0032", [Timings#pcl_timings.sample_count,
|
|
|
|
Timings#pcl_timings.foundmem_time,
|
|
|
|
Timings#pcl_timings.found0_time,
|
|
|
|
Timings#pcl_timings.found1_time,
|
|
|
|
Timings#pcl_timings.found2_time,
|
|
|
|
Timings#pcl_timings.foundlower_time,
|
|
|
|
Timings#pcl_timings.missed_time,
|
|
|
|
Timings#pcl_timings.foundmem_count,
|
|
|
|
Timings#pcl_timings.found0_count,
|
|
|
|
Timings#pcl_timings.found1_count,
|
|
|
|
Timings#pcl_timings.found2_count,
|
|
|
|
Timings#pcl_timings.foundlower_count,
|
|
|
|
Timings#pcl_timings.missed_count]).
|
|
|
|
|
|
|
|
-spec update_timings(erlang:timestamp(), pcl_timings(),
|
|
|
|
not_found|tuple(), integer()|basement)
|
|
|
|
-> pcl_timings().
|
|
|
|
%% @doc
|
|
|
|
%%
|
|
|
|
%% update the timings record unless the current record object is the atom
|
|
|
|
%% no_timing.
|
|
|
|
update_timings(_SW, no_timing, _Result, _Stage) ->
|
|
|
|
no_timing;
|
|
|
|
update_timings(SW, Timings, Result, Stage) ->
|
|
|
|
Timer = timer:now_diff(os:timestamp(), SW),
|
|
|
|
SC = Timings#pcl_timings.sample_count + 1,
|
|
|
|
Timings0 = Timings#pcl_timings{sample_count = SC},
|
|
|
|
case {Result, Stage} of
|
|
|
|
{not_present, _} ->
|
|
|
|
NFT = Timings#pcl_timings.missed_time + Timer,
|
|
|
|
NFC = Timings#pcl_timings.missed_count + 1,
|
|
|
|
Timings0#pcl_timings{missed_time = NFT, missed_count = NFC};
|
|
|
|
{_, memory} ->
|
|
|
|
PMT = Timings#pcl_timings.foundmem_time + Timer,
|
|
|
|
PMC = Timings#pcl_timings.foundmem_count + 1,
|
|
|
|
Timings0#pcl_timings{foundmem_time = PMT, foundmem_count = PMC};
|
|
|
|
{_, 0} ->
|
|
|
|
L0T = Timings#pcl_timings.found0_time + Timer,
|
|
|
|
L0C = Timings#pcl_timings.found0_count + 1,
|
|
|
|
Timings0#pcl_timings{found0_time = L0T, found0_count = L0C};
|
|
|
|
{_, 1} ->
|
|
|
|
L1T = Timings#pcl_timings.found1_time + Timer,
|
|
|
|
L1C = Timings#pcl_timings.found1_count + 1,
|
|
|
|
Timings0#pcl_timings{found1_time = L1T, found1_count = L1C};
|
|
|
|
{_, 2} ->
|
|
|
|
L2T = Timings#pcl_timings.found2_time + Timer,
|
|
|
|
L2C = Timings#pcl_timings.found2_count + 1,
|
|
|
|
Timings0#pcl_timings{found2_time = L2T, found2_count = L2C};
|
|
|
|
_ ->
|
|
|
|
LLT = Timings#pcl_timings.foundlower_time + Timer,
|
|
|
|
LLC = Timings#pcl_timings.foundlower_count + 1,
|
|
|
|
Timings0#pcl_timings{foundlower_time = LLT, foundlower_count = LLC}
|
|
|
|
end.
|
|
|
|
|
2016-10-05 09:54:53 +01:00
|
|
|
|
2016-07-27 18:03:44 +01:00
|
|
|
%%%============================================================================
|
|
|
|
%%% Test
|
|
|
|
%%%============================================================================
|
|
|
|
|
2016-09-08 14:21:30 +01:00
|
|
|
-ifdef(TEST).
|
|
|
|
|
2016-12-29 02:07:14 +00:00
|
|
|
|
|
|
|
generate_randomkeys({Count, StartSQN}) ->
|
2017-07-31 19:39:40 +02:00
|
|
|
generate_randomkeys(Count, StartSQN, []).
|
2016-12-29 02:07:14 +00:00
|
|
|
|
|
|
|
generate_randomkeys(0, _SQN, Acc) ->
|
|
|
|
lists:reverse(Acc);
|
|
|
|
generate_randomkeys(Count, SQN, Acc) ->
|
|
|
|
K = {o,
|
2017-07-31 20:20:39 +02:00
|
|
|
lists:concat(["Bucket", leveled_rand:uniform(1024)]),
|
|
|
|
lists:concat(["Key", leveled_rand:uniform(1024)]),
|
2016-12-29 02:07:14 +00:00
|
|
|
null},
|
|
|
|
RandKey = {K,
|
|
|
|
{SQN,
|
|
|
|
{active, infinity},
|
2017-10-20 23:04:29 +01:00
|
|
|
leveled_codec:segment_hash(K),
|
2016-12-29 02:07:14 +00:00
|
|
|
null}},
|
|
|
|
generate_randomkeys(Count - 1, SQN + 1, [RandKey|Acc]).
|
|
|
|
|
|
|
|
|
2016-09-08 14:21:30 +01:00
|
|
|
clean_testdir(RootPath) ->
|
2017-03-09 21:23:09 +00:00
|
|
|
clean_subdir(sst_rootpath(RootPath)),
|
|
|
|
clean_subdir(filename:join(RootPath, ?MANIFEST_FP)).
|
2016-09-08 14:21:30 +01:00
|
|
|
|
|
|
|
clean_subdir(DirPath) ->
|
|
|
|
case filelib:is_dir(DirPath) of
|
|
|
|
true ->
|
|
|
|
{ok, Files} = file:list_dir(DirPath),
|
2016-10-13 17:51:47 +01:00
|
|
|
lists:foreach(fun(FN) ->
|
|
|
|
File = filename:join(DirPath, FN),
|
2016-11-03 16:46:25 +00:00
|
|
|
ok = file:delete(File),
|
|
|
|
io:format("Success deleting ~s~n", [File])
|
|
|
|
end,
|
2016-09-08 14:21:30 +01:00
|
|
|
Files);
|
|
|
|
false ->
|
|
|
|
ok
|
|
|
|
end.
|
2016-07-27 18:03:44 +01:00
|
|
|
|
2016-10-18 19:41:33 +01:00
|
|
|
|
2016-10-26 21:03:50 +01:00
|
|
|
maybe_pause_push(PCL, KL) ->
|
2017-01-20 16:36:20 +00:00
|
|
|
T0 = [],
|
2017-01-05 21:58:33 +00:00
|
|
|
I0 = leveled_pmem:new_index(),
|
|
|
|
T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
|
2017-01-20 16:36:20 +00:00
|
|
|
UpdSL = [{K, V}|AccSL],
|
2016-12-11 01:02:56 +00:00
|
|
|
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
2017-10-20 23:04:29 +01:00
|
|
|
H = leveled_codec:segment_hash(K),
|
2017-01-05 21:58:33 +00:00
|
|
|
UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
|
|
|
|
{UpdSL, UpdIdx, min(SQN, MinSQN), max(SQN, MaxSQN)}
|
2016-12-11 01:02:56 +00:00
|
|
|
end,
|
2017-01-05 21:58:33 +00:00
|
|
|
{T0, I0, infinity, 0},
|
2016-10-27 20:56:18 +01:00
|
|
|
KL),
|
2017-01-20 16:36:20 +00:00
|
|
|
SL = element(1, T1),
|
2017-01-21 11:38:26 +00:00
|
|
|
Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, SL), ?CACHE_TYPE),
|
2017-01-20 16:36:20 +00:00
|
|
|
T2 = setelement(1, T1, Tree),
|
|
|
|
case pcl_pushmem(PCL, T2) of
|
2016-10-30 18:25:30 +00:00
|
|
|
returned ->
|
2016-10-27 20:56:18 +01:00
|
|
|
timer:sleep(50),
|
2016-10-26 21:03:50 +01:00
|
|
|
maybe_pause_push(PCL, KL);
|
2016-10-27 20:56:18 +01:00
|
|
|
ok ->
|
2016-10-08 22:15:48 +01:00
|
|
|
ok
|
|
|
|
end.
|
|
|
|
|
2016-12-11 01:02:56 +00:00
|
|
|
%% old test data doesn't have the magic hash
|
|
|
|
add_missing_hash({K, {SQN, ST, MD}}) ->
|
2017-10-20 23:04:29 +01:00
|
|
|
{K, {SQN, ST, leveled_codec:segment_hash(K), MD}}.
|
2016-12-11 01:02:56 +00:00
|
|
|
|
|
|
|
|
2017-02-26 20:52:40 +00:00
|
|
|
clean_dir_test() ->
|
|
|
|
% Pointless gesture to test coverage
|
|
|
|
RootPath = "../test/ledger",
|
2017-09-15 15:10:04 +01:00
|
|
|
ok = filelib:ensure_dir(RootPath),
|
2017-02-26 20:52:40 +00:00
|
|
|
?assertMatch(ok, file:write_file(RootPath ++ "/test.bob", "hello")),
|
2017-02-27 20:23:36 +00:00
|
|
|
ok = clean_subdir(RootPath ++ "/test.bob"),
|
|
|
|
ok = file:delete(RootPath ++ "/test.bob").
|
2017-02-26 20:52:40 +00:00
|
|
|
|
2017-09-28 10:50:54 +01:00
|
|
|
|
|
|
|
archive_files_test() ->
|
|
|
|
RootPath = "../test/ledger",
|
|
|
|
SSTPath = sst_rootpath(RootPath),
|
|
|
|
ok = filelib:ensure_dir(SSTPath),
|
|
|
|
ok = file:write_file(SSTPath ++ "/test1.sst", "hello_world"),
|
|
|
|
ok = file:write_file(SSTPath ++ "/test2.sst", "hello_world"),
|
|
|
|
ok = file:write_file(SSTPath ++ "/test3.bob", "hello_world"),
|
|
|
|
UsedFiles = ["./test1.sst"],
|
|
|
|
ok = archive_files(RootPath, UsedFiles),
|
|
|
|
{ok, AllFiles} = file:list_dir(SSTPath),
|
|
|
|
?assertMatch(true, lists:member("test1.sst", AllFiles)),
|
|
|
|
?assertMatch(false, lists:member("test2.sst", AllFiles)),
|
|
|
|
?assertMatch(true, lists:member("test3.bob", AllFiles)),
|
|
|
|
?assertMatch(true, lists:member("test2.bak", AllFiles)),
|
|
|
|
ok = clean_subdir(SSTPath).
|
|
|
|
|
2016-09-08 14:21:30 +01:00
|
|
|
simple_server_test() ->
|
|
|
|
RootPath = "../test/ledger",
|
|
|
|
clean_testdir(RootPath),
|
|
|
|
{ok, PCL} = pcl_start(#penciller_options{root_path=RootPath,
|
2017-11-06 15:54:58 +00:00
|
|
|
max_inmemory_tablesize=1000,
|
|
|
|
compression_method=native}),
|
2016-12-11 01:02:56 +00:00
|
|
|
Key1_Pre = {{o,"Bucket0001", "Key0001", null},
|
|
|
|
{1, {active, infinity}, null}},
|
|
|
|
Key1 = add_missing_hash(Key1_Pre),
|
2016-12-29 02:07:14 +00:00
|
|
|
KL1 = generate_randomkeys({1000, 2}),
|
2016-12-11 01:02:56 +00:00
|
|
|
Key2_Pre = {{o,"Bucket0002", "Key0002", null},
|
2016-11-07 10:11:57 +00:00
|
|
|
{1002, {active, infinity}, null}},
|
2016-12-11 01:02:56 +00:00
|
|
|
Key2 = add_missing_hash(Key2_Pre),
|
2016-12-29 02:07:14 +00:00
|
|
|
KL2 = generate_randomkeys({900, 1003}),
|
2016-11-07 10:11:57 +00:00
|
|
|
% Keep below the max table size by having 900 not 1000
|
2016-12-11 01:02:56 +00:00
|
|
|
Key3_Pre = {{o,"Bucket0003", "Key0003", null},
|
2016-11-07 10:11:57 +00:00
|
|
|
{2003, {active, infinity}, null}},
|
2016-12-11 01:02:56 +00:00
|
|
|
Key3 = add_missing_hash(Key3_Pre),
|
2016-12-29 02:07:14 +00:00
|
|
|
KL3 = generate_randomkeys({1000, 2004}),
|
2016-12-11 01:02:56 +00:00
|
|
|
Key4_Pre = {{o,"Bucket0004", "Key0004", null},
|
2016-11-07 10:11:57 +00:00
|
|
|
{3004, {active, infinity}, null}},
|
2016-12-11 01:02:56 +00:00
|
|
|
Key4 = add_missing_hash(Key4_Pre),
|
2016-12-29 02:07:14 +00:00
|
|
|
KL4 = generate_randomkeys({1000, 3005}),
|
2016-10-27 20:56:18 +01:00
|
|
|
ok = maybe_pause_push(PCL, [Key1]),
|
2016-10-12 17:12:49 +01:00
|
|
|
?assertMatch(Key1, pcl_fetch(PCL, {o,"Bucket0001", "Key0001", null})),
|
2016-10-27 20:56:18 +01:00
|
|
|
ok = maybe_pause_push(PCL, KL1),
|
2016-10-12 17:12:49 +01:00
|
|
|
?assertMatch(Key1, pcl_fetch(PCL, {o,"Bucket0001", "Key0001", null})),
|
2016-10-26 21:03:50 +01:00
|
|
|
ok = maybe_pause_push(PCL, [Key2]),
|
2016-10-12 17:12:49 +01:00
|
|
|
?assertMatch(Key1, pcl_fetch(PCL, {o,"Bucket0001", "Key0001", null})),
|
|
|
|
?assertMatch(Key2, pcl_fetch(PCL, {o,"Bucket0002", "Key0002", null})),
|
2016-10-27 20:56:18 +01:00
|
|
|
|
2016-10-26 21:03:50 +01:00
|
|
|
ok = maybe_pause_push(PCL, KL2),
|
2016-10-27 20:56:18 +01:00
|
|
|
?assertMatch(Key2, pcl_fetch(PCL, {o,"Bucket0002", "Key0002", null})),
|
2016-10-26 21:03:50 +01:00
|
|
|
ok = maybe_pause_push(PCL, [Key3]),
|
2016-10-19 17:34:58 +01:00
|
|
|
|
2016-10-12 17:12:49 +01:00
|
|
|
?assertMatch(Key1, pcl_fetch(PCL, {o,"Bucket0001", "Key0001", null})),
|
|
|
|
?assertMatch(Key2, pcl_fetch(PCL, {o,"Bucket0002", "Key0002", null})),
|
|
|
|
?assertMatch(Key3, pcl_fetch(PCL, {o,"Bucket0003", "Key0003", null})),
|
2016-11-07 10:11:57 +00:00
|
|
|
timer:sleep(200),
|
|
|
|
% This sleep should make sure that the merge to L1 has occurred
|
2017-11-01 15:11:14 +00:00
|
|
|
% This will free up the L0 slot for the remainder to be written in
|
|
|
|
% shutdown
|
2016-09-08 14:21:30 +01:00
|
|
|
ok = pcl_close(PCL),
|
2016-10-27 20:56:18 +01:00
|
|
|
|
2016-09-08 14:21:30 +01:00
|
|
|
{ok, PCLr} = pcl_start(#penciller_options{root_path=RootPath,
|
2017-11-06 15:54:58 +00:00
|
|
|
max_inmemory_tablesize=1000,
|
|
|
|
compression_method=native}),
|
2016-11-07 10:11:57 +00:00
|
|
|
?assertMatch(2003, pcl_getstartupsequencenumber(PCLr)),
|
|
|
|
% ok = maybe_pause_push(PCLr, [Key2] ++ KL2 ++ [Key3]),
|
2016-10-27 20:56:18 +01:00
|
|
|
|
2016-10-12 17:12:49 +01:00
|
|
|
?assertMatch(Key1, pcl_fetch(PCLr, {o,"Bucket0001", "Key0001", null})),
|
|
|
|
?assertMatch(Key2, pcl_fetch(PCLr, {o,"Bucket0002", "Key0002", null})),
|
|
|
|
?assertMatch(Key3, pcl_fetch(PCLr, {o,"Bucket0003", "Key0003", null})),
|
2016-10-26 21:03:50 +01:00
|
|
|
ok = maybe_pause_push(PCLr, KL3),
|
|
|
|
ok = maybe_pause_push(PCLr, [Key4]),
|
|
|
|
ok = maybe_pause_push(PCLr, KL4),
|
2016-10-12 17:12:49 +01:00
|
|
|
?assertMatch(Key1, pcl_fetch(PCLr, {o,"Bucket0001", "Key0001", null})),
|
|
|
|
?assertMatch(Key2, pcl_fetch(PCLr, {o,"Bucket0002", "Key0002", null})),
|
|
|
|
?assertMatch(Key3, pcl_fetch(PCLr, {o,"Bucket0003", "Key0003", null})),
|
|
|
|
?assertMatch(Key4, pcl_fetch(PCLr, {o,"Bucket0004", "Key0004", null})),
|
2017-01-14 19:41:09 +00:00
|
|
|
|
2017-03-06 18:42:32 +00:00
|
|
|
{ok, PclSnap, null} =
|
|
|
|
leveled_bookie:snapshot_store(leveled_bookie:empty_ledgercache(),
|
|
|
|
PCLr,
|
|
|
|
null,
|
|
|
|
ledger,
|
2017-10-17 20:39:11 +01:00
|
|
|
undefined,
|
|
|
|
false),
|
2017-01-14 19:41:09 +00:00
|
|
|
|
2016-10-12 17:12:49 +01:00
|
|
|
?assertMatch(Key1, pcl_fetch(PclSnap, {o,"Bucket0001", "Key0001", null})),
|
|
|
|
?assertMatch(Key2, pcl_fetch(PclSnap, {o,"Bucket0002", "Key0002", null})),
|
|
|
|
?assertMatch(Key3, pcl_fetch(PclSnap, {o,"Bucket0003", "Key0003", null})),
|
|
|
|
?assertMatch(Key4, pcl_fetch(PclSnap, {o,"Bucket0004", "Key0004", null})),
|
2016-09-26 10:55:08 +01:00
|
|
|
?assertMatch(true, pcl_checksequencenumber(PclSnap,
|
2016-10-12 17:12:49 +01:00
|
|
|
{o,
|
|
|
|
"Bucket0001",
|
|
|
|
"Key0001",
|
|
|
|
null},
|
2016-09-26 10:55:08 +01:00
|
|
|
1)),
|
|
|
|
?assertMatch(true, pcl_checksequencenumber(PclSnap,
|
2016-10-12 17:12:49 +01:00
|
|
|
{o,
|
|
|
|
"Bucket0002",
|
|
|
|
"Key0002",
|
|
|
|
null},
|
2016-09-26 10:55:08 +01:00
|
|
|
1002)),
|
|
|
|
?assertMatch(true, pcl_checksequencenumber(PclSnap,
|
2016-10-12 17:12:49 +01:00
|
|
|
{o,
|
|
|
|
"Bucket0003",
|
|
|
|
"Key0003",
|
|
|
|
null},
|
2016-10-27 20:56:18 +01:00
|
|
|
2003)),
|
2016-09-26 10:55:08 +01:00
|
|
|
?assertMatch(true, pcl_checksequencenumber(PclSnap,
|
2016-10-12 17:12:49 +01:00
|
|
|
{o,
|
|
|
|
"Bucket0004",
|
|
|
|
"Key0004",
|
|
|
|
null},
|
2016-10-27 20:56:18 +01:00
|
|
|
3004)),
|
2016-10-20 02:23:45 +01:00
|
|
|
% Add some more keys and confirm that check sequence number still
|
2017-11-01 15:11:14 +00:00
|
|
|
% sees the old version in the previous snapshot, but will see the new
|
|
|
|
% version in a new snapshot
|
2017-01-14 19:41:09 +00:00
|
|
|
|
2016-12-11 01:02:56 +00:00
|
|
|
Key1A_Pre = {{o,"Bucket0001", "Key0001", null},
|
|
|
|
{4005, {active, infinity}, null}},
|
|
|
|
Key1A = add_missing_hash(Key1A_Pre),
|
2016-12-29 02:07:14 +00:00
|
|
|
KL1A = generate_randomkeys({2000, 4006}),
|
2016-10-26 21:03:50 +01:00
|
|
|
ok = maybe_pause_push(PCLr, [Key1A]),
|
|
|
|
ok = maybe_pause_push(PCLr, KL1A),
|
2016-09-26 10:55:08 +01:00
|
|
|
?assertMatch(true, pcl_checksequencenumber(PclSnap,
|
2016-10-12 17:12:49 +01:00
|
|
|
{o,
|
|
|
|
"Bucket0001",
|
|
|
|
"Key0001",
|
|
|
|
null},
|
2016-09-26 10:55:08 +01:00
|
|
|
1)),
|
|
|
|
ok = pcl_close(PclSnap),
|
2017-01-14 19:41:09 +00:00
|
|
|
|
2017-03-06 18:42:32 +00:00
|
|
|
{ok, PclSnap2, null} =
|
|
|
|
leveled_bookie:snapshot_store(leveled_bookie:empty_ledgercache(),
|
|
|
|
PCLr,
|
|
|
|
null,
|
|
|
|
ledger,
|
2017-10-17 20:39:11 +01:00
|
|
|
undefined,
|
|
|
|
false),
|
2017-03-06 18:42:32 +00:00
|
|
|
|
2016-09-26 10:55:08 +01:00
|
|
|
?assertMatch(false, pcl_checksequencenumber(PclSnap2,
|
2016-10-12 17:12:49 +01:00
|
|
|
{o,
|
|
|
|
"Bucket0001",
|
|
|
|
"Key0001",
|
|
|
|
null},
|
2016-09-26 10:55:08 +01:00
|
|
|
1)),
|
|
|
|
?assertMatch(true, pcl_checksequencenumber(PclSnap2,
|
2016-10-12 17:12:49 +01:00
|
|
|
{o,
|
|
|
|
"Bucket0001",
|
|
|
|
"Key0001",
|
|
|
|
null},
|
2016-10-27 20:56:18 +01:00
|
|
|
4005)),
|
2016-09-26 10:55:08 +01:00
|
|
|
?assertMatch(true, pcl_checksequencenumber(PclSnap2,
|
2016-10-12 17:12:49 +01:00
|
|
|
{o,
|
|
|
|
"Bucket0002",
|
|
|
|
"Key0002",
|
|
|
|
null},
|
2016-09-26 10:55:08 +01:00
|
|
|
1002)),
|
|
|
|
ok = pcl_close(PclSnap2),
|
2016-09-08 14:21:30 +01:00
|
|
|
ok = pcl_close(PCLr),
|
|
|
|
clean_testdir(RootPath).
|
|
|
|
|
2016-09-21 18:31:42 +01:00
|
|
|
|
2016-10-12 17:12:49 +01:00
|
|
|
simple_findnextkey_test() ->
|
|
|
|
QueryArray = [
|
|
|
|
{2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}},
|
|
|
|
{{o, "Bucket1", "Key5"}, {4, {active, infinity}, null}}]},
|
|
|
|
{3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}]},
|
|
|
|
{5, [{{o, "Bucket1", "Key2"}, {2, {active, infinity}, null}}]}
|
|
|
|
],
|
|
|
|
{Array2, KV1} = find_nextkey(QueryArray,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
|
|
|
?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}}, KV1),
|
|
|
|
{Array3, KV2} = find_nextkey(Array2,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
|
|
|
?assertMatch({{o, "Bucket1", "Key2"}, {2, {active, infinity}, null}}, KV2),
|
|
|
|
{Array4, KV3} = find_nextkey(Array3,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
|
|
|
?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}, KV3),
|
|
|
|
{Array5, KV4} = find_nextkey(Array4,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
|
|
|
?assertMatch({{o, "Bucket1", "Key5"}, {4, {active, infinity}, null}}, KV4),
|
|
|
|
ER = find_nextkey(Array5,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
|
|
|
?assertMatch(no_more_keys, ER).
|
|
|
|
|
|
|
|
sqnoverlap_findnextkey_test() ->
|
|
|
|
QueryArray = [
|
2016-12-11 01:02:56 +00:00
|
|
|
{2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
|
|
|
|
{{o, "Bucket1", "Key5"}, {4, {active, infinity}, 0, null}}]},
|
|
|
|
{3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]},
|
|
|
|
{5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]}
|
2016-10-12 17:12:49 +01:00
|
|
|
],
|
|
|
|
{Array2, KV1} = find_nextkey(QueryArray,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
2016-12-11 01:02:56 +00:00
|
|
|
?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
|
|
|
|
KV1),
|
2016-10-12 17:12:49 +01:00
|
|
|
{Array3, KV2} = find_nextkey(Array2,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
2016-12-11 01:02:56 +00:00
|
|
|
?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}},
|
|
|
|
KV2),
|
2016-10-12 17:12:49 +01:00
|
|
|
{Array4, KV3} = find_nextkey(Array3,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
2016-12-11 01:02:56 +00:00
|
|
|
?assertMatch({{o, "Bucket1", "Key5"}, {4, {active, infinity}, 0, null}},
|
|
|
|
KV3),
|
2016-10-12 17:12:49 +01:00
|
|
|
ER = find_nextkey(Array4,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
|
|
|
?assertMatch(no_more_keys, ER).
|
|
|
|
|
|
|
|
sqnoverlap_otherway_findnextkey_test() ->
|
|
|
|
QueryArray = [
|
2016-12-11 01:02:56 +00:00
|
|
|
{2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
|
|
|
|
{{o, "Bucket1", "Key5"}, {1, {active, infinity}, 0, null}}]},
|
|
|
|
{3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]},
|
|
|
|
{5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]}
|
2016-10-12 17:12:49 +01:00
|
|
|
],
|
|
|
|
{Array2, KV1} = find_nextkey(QueryArray,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
2016-12-11 01:02:56 +00:00
|
|
|
?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
|
|
|
|
KV1),
|
2016-10-12 17:12:49 +01:00
|
|
|
{Array3, KV2} = find_nextkey(Array2,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
2016-12-11 01:02:56 +00:00
|
|
|
?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}},
|
|
|
|
KV2),
|
2016-10-12 17:12:49 +01:00
|
|
|
{Array4, KV3} = find_nextkey(Array3,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
2016-12-11 01:02:56 +00:00
|
|
|
?assertMatch({{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}},
|
|
|
|
KV3),
|
2016-10-12 17:12:49 +01:00
|
|
|
ER = find_nextkey(Array4,
|
|
|
|
{o, "Bucket1", "Key0"},
|
|
|
|
{o, "Bucket1", "Key5"}),
|
|
|
|
?assertMatch(no_more_keys, ER).
|
|
|
|
|
|
|
|
foldwithimm_simple_test() ->
|
|
|
|
QueryArray = [
|
2017-01-20 16:36:20 +00:00
|
|
|
{2, [{{o, "Bucket1", "Key1", null},
|
|
|
|
{5, {active, infinity}, 0, null}},
|
|
|
|
{{o, "Bucket1", "Key5", null},
|
|
|
|
{1, {active, infinity}, 0, null}}]},
|
|
|
|
{3, [{{o, "Bucket1", "Key3", null},
|
|
|
|
{3, {active, infinity}, 0, null}}]},
|
|
|
|
{5, [{{o, "Bucket1", "Key5", null},
|
|
|
|
{2, {active, infinity}, 0, null}}]}
|
2016-10-12 17:12:49 +01:00
|
|
|
],
|
2017-01-20 16:36:20 +00:00
|
|
|
KL1A = [{{o, "Bucket1", "Key6", null}, {7, {active, infinity}, 0, null}},
|
|
|
|
{{o, "Bucket1", "Key1", null}, {8, {active, infinity}, 0, null}},
|
|
|
|
{{o, "Bucket1", "Key8", null}, {9, {active, infinity}, 0, null}}],
|
2017-01-21 11:38:26 +00:00
|
|
|
IMM2 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1A), ?CACHE_TYPE),
|
2017-01-20 16:36:20 +00:00
|
|
|
IMMiter = leveled_tree:match_range({o, "Bucket1", "Key1", null},
|
|
|
|
{o, null, null, null},
|
|
|
|
IMM2),
|
2016-10-13 21:02:15 +01:00
|
|
|
AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}),
|
2016-10-12 17:12:49 +01:00
|
|
|
Acc ++ [{K, SQN}] end,
|
|
|
|
Acc = keyfolder(IMMiter,
|
|
|
|
QueryArray,
|
2017-01-20 16:36:20 +00:00
|
|
|
{o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null},
|
2016-10-12 17:12:49 +01:00
|
|
|
{AccFun, []}),
|
2017-01-20 16:36:20 +00:00
|
|
|
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
|
|
|
{{o, "Bucket1", "Key3", null}, 3},
|
|
|
|
{{o, "Bucket1", "Key5", null}, 2},
|
|
|
|
{{o, "Bucket1", "Key6", null}, 7}], Acc),
|
2016-10-12 17:12:49 +01:00
|
|
|
|
2017-01-20 16:36:20 +00:00
|
|
|
IMMiterA = [{{o, "Bucket1", "Key1", null},
|
|
|
|
{8, {active, infinity}, 0, null}}],
|
2016-10-12 17:12:49 +01:00
|
|
|
AccA = keyfolder(IMMiterA,
|
2017-01-20 16:36:20 +00:00
|
|
|
QueryArray,
|
2017-11-01 15:11:14 +00:00
|
|
|
{o, "Bucket1", "Key1", null},
|
|
|
|
{o, "Bucket1", "Key6", null},
|
2017-01-20 16:36:20 +00:00
|
|
|
{AccFun, []}),
|
|
|
|
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
|
|
|
{{o, "Bucket1", "Key3", null}, 3},
|
|
|
|
{{o, "Bucket1", "Key5", null}, 2}], AccA),
|
2016-10-12 17:12:49 +01:00
|
|
|
|
2017-11-01 15:11:14 +00:00
|
|
|
AddKV = {{o, "Bucket1", "Key4", null}, {10, {active, infinity}, 0, null}},
|
|
|
|
KL1B = [AddKV|KL1A],
|
2017-01-21 11:38:26 +00:00
|
|
|
IMM3 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1B), ?CACHE_TYPE),
|
2017-01-20 16:36:20 +00:00
|
|
|
IMMiterB = leveled_tree:match_range({o, "Bucket1", "Key1", null},
|
|
|
|
{o, null, null, null},
|
|
|
|
IMM3),
|
2017-10-31 23:28:35 +00:00
|
|
|
io:format("Compare IMM3 with QueryArrary~n"),
|
2016-10-12 17:12:49 +01:00
|
|
|
AccB = keyfolder(IMMiterB,
|
|
|
|
QueryArray,
|
2017-01-20 16:36:20 +00:00
|
|
|
{o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null},
|
2016-10-12 17:12:49 +01:00
|
|
|
{AccFun, []}),
|
2017-01-20 16:36:20 +00:00
|
|
|
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
|
|
|
{{o, "Bucket1", "Key3", null}, 3},
|
|
|
|
{{o, "Bucket1", "Key4", null}, 10},
|
|
|
|
{{o, "Bucket1", "Key5", null}, 2},
|
|
|
|
{{o, "Bucket1", "Key6", null}, 7}], AccB).
|
2016-10-12 17:12:49 +01:00
|
|
|
|
2016-10-29 00:52:49 +01:00
|
|
|
create_file_test() ->
|
2017-03-09 21:23:09 +00:00
|
|
|
{RP, Filename} = {"../test/", "new_file.sst"},
|
|
|
|
ok = file:write_file(filename:join(RP, Filename), term_to_binary("hello")),
|
2017-07-31 19:39:40 +02:00
|
|
|
KVL = lists:usort(generate_randomkeys({10000, 0})),
|
2017-01-21 11:38:26 +00:00
|
|
|
Tree = leveled_tree:from_orderedlist(KVL, ?CACHE_TYPE),
|
2016-10-31 01:33:33 +00:00
|
|
|
FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end,
|
2016-10-30 18:25:30 +00:00
|
|
|
{ok,
|
|
|
|
SP,
|
2017-03-09 21:23:09 +00:00
|
|
|
noreply} = leveled_sst:sst_newlevelzero(RP,
|
|
|
|
Filename,
|
|
|
|
1,
|
|
|
|
FetchFun,
|
|
|
|
undefined,
|
2017-11-06 15:54:58 +00:00
|
|
|
10000,
|
|
|
|
native),
|
2016-10-29 00:52:49 +01:00
|
|
|
lists:foreach(fun(X) ->
|
|
|
|
case checkready(SP) of
|
|
|
|
timeout ->
|
|
|
|
timer:sleep(X);
|
|
|
|
_ ->
|
|
|
|
ok
|
|
|
|
end end,
|
|
|
|
[50, 50, 50, 50, 50]),
|
|
|
|
{ok, SrcFN, StartKey, EndKey} = checkready(SP),
|
|
|
|
io:format("StartKey ~w EndKey ~w~n", [StartKey, EndKey]),
|
|
|
|
?assertMatch({o, _, _, _}, StartKey),
|
|
|
|
?assertMatch({o, _, _, _}, EndKey),
|
2017-03-09 21:23:09 +00:00
|
|
|
?assertMatch("./new_file.sst", SrcFN),
|
2016-12-29 02:07:14 +00:00
|
|
|
ok = leveled_sst:sst_clear(SP),
|
|
|
|
{ok, Bin} = file:read_file("../test/new_file.sst.discarded"),
|
2016-10-29 00:52:49 +01:00
|
|
|
?assertMatch("hello", binary_to_term(Bin)).
|
|
|
|
|
2017-02-26 21:48:04 +00:00
|
|
|
slow_fetch_test() ->
|
2017-11-20 17:29:57 +00:00
|
|
|
?assertMatch(not_present, log_slowfetch(2, not_present, "fake", 0, 1)),
|
|
|
|
?assertMatch("value", log_slowfetch(2, "value", "fake", 0, 1)).
|
2017-02-26 21:48:04 +00:00
|
|
|
|
2016-11-05 13:42:44 +00:00
|
|
|
checkready(Pid) ->
|
|
|
|
try
|
2016-12-29 02:07:14 +00:00
|
|
|
leveled_sst:sst_checkready(Pid)
|
2016-11-05 13:42:44 +00:00
|
|
|
catch
|
|
|
|
exit:{timeout, _} ->
|
|
|
|
timeout
|
|
|
|
end.
|
|
|
|
|
2017-11-21 23:13:24 +00:00
|
|
|
timings_test() ->
|
|
|
|
SW = os:timestamp(),
|
|
|
|
timer:sleep(1),
|
|
|
|
T0 = update_timings(SW, #pcl_timings{}, {"K", "V"}, 2),
|
|
|
|
timer:sleep(1),
|
|
|
|
T1 = update_timings(SW, T0, {"K", "V"}, 3),
|
|
|
|
T2 = update_timings(SW, T1, {"K", "V"}, basement),
|
|
|
|
?assertMatch(3, T2#pcl_timings.sample_count),
|
|
|
|
?assertMatch(true, T2#pcl_timings.foundlower_time > T2#pcl_timings.found2_time),
|
|
|
|
?assertMatch(1, T2#pcl_timings.found2_count),
|
|
|
|
?assertMatch(2, T2#pcl_timings.foundlower_count).
|
|
|
|
|
|
|
|
|
2016-11-14 20:43:38 +00:00
|
|
|
coverage_cheat_test() ->
|
|
|
|
{noreply, _State0} = handle_info(timeout, #state{}),
|
|
|
|
{ok, _State1} = code_change(null, #state{}, null).
|
2016-11-05 13:42:44 +00:00
|
|
|
|
2016-11-05 15:59:31 +00:00
|
|
|
-endif.
|