From 8bf36214e11563c1e9c554df4d5f54fa5fcc70aa Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 14 Dec 2018 11:23:04 +0000 Subject: [PATCH 1/4] Make snapshot timeout configurable Also increase defaults. In riak snapshots may be used to fold over all objects, and with delays at the receiving end, this could take significant time --- include/leveled.hrl | 4 +++- src/leveled_bookie.erl | 24 +++++++++++++++++++++--- src/leveled_penciller.erl | 37 ++++++++++++++++++++++--------------- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/include/leveled.hrl b/include/leveled.hrl index 8106c02..9c792ca 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -78,7 +78,9 @@ source_penciller :: pid() | undefined, snapshot_longrunning = true :: boolean(), compression_method = native :: lz4|native, - levelzero_cointoss = false :: boolean()}). + levelzero_cointoss = false :: boolean(), + snaptimeout_short :: pos_integer() | undefined, + snaptimeout_long :: pos_integer() | undefined}). -record(iclerk_options, {inker :: pid() | undefined, diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 9cfba69..9938bed 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -104,7 +104,6 @@ -define(MIN_PCL_CACHE_SIZE, 400). -define(MAX_PCL_CACHE_SIZE, 28000). % This is less than actual max - but COIN_SIDECOUNT --define(SNAPSHOT_TIMEOUT, 300000). -define(CACHE_SIZE_JITTER, 25). -define(JOURNAL_SIZE_JITTER, 20). -define(ABSOLUTEMAX_JOURNALSIZE, 4000000000). @@ -118,6 +117,8 @@ -define(MAX_KEYCHECK_FREQUENCY, 100). -define(MIN_KEYCHECK_FREQUENCY, 1). -define(OPEN_LASTMOD_RANGE, {0, infinity}). +-define(PCL_SNAPTIMEOUT_SHORT, 900). % 15 minutes +-define(PCL_SNAPTIMEOUT_LONG, 43200). % 12 hours -define(OPTION_DEFAULTS, [{root_path, undefined}, {snapshot_bookie, undefined}, @@ -135,7 +136,9 @@ {compression_point, ?COMPRESSION_POINT}, {log_level, ?LOG_LEVEL}, {forced_logs, []}, - {override_functions, []}]). + {override_functions, []}, + {pcl_snapshottimeout_short, ?PCL_SNAPTIMEOUT_SHORT}, + {pcl_snapshottimeout_long, ?PCL_SNAPTIMEOUT_LONG}]). -record(ledger_cache, {mem :: ets:tab(), loader = leveled_tree:empty(?CACHE_TYPE) @@ -328,9 +331,19 @@ % "P0032", "SST12", "CDB19", "SST13", "I0019"]} % Will log all timing points even when log_level is not set to % support info - {override_functions, list(leveled_head:appdefinable_function_tuple())} + {override_functions, list(leveled_head:appdefinable_function_tuple())} | % Provide a list of override functions that will be used for % user-defined tags + {pcl_snapshottimeout_short, pos_integer()} | + % Time in seconds before a snapshot that has not been shutdown is + % assumed to have failed, and so requires to be torndown. The + % short timeout is applied to queries where long_running is set to + % false + {pcl_snapshottimeout_long, pos_integer()} + % Time in seconds before a snapshot that has not been shutdown is + % assumed to have failed, and so requires to be torndown. The + % short timeout is applied to queries where long_running is set to + % true ]. @@ -1564,6 +1577,9 @@ set_options(Opts) -> SyncStrat = proplists:get_value(sync_strategy, Opts), WRP = proplists:get_value(waste_retention_period, Opts), + SnapTimeoutShort = proplists:get_value(pcl_snapshottimeout_short, Opts), + SnapTimeoutLong = proplists:get_value(pcl_snapshottimeout_long, Opts), + AltStrategy = proplists:get_value(reload_strategy, Opts), ReloadStrategy = leveled_codec:inker_reload_strategy(AltStrategy), @@ -1613,6 +1629,8 @@ set_options(Opts) -> #penciller_options{root_path = LedgerFP, max_inmemory_tablesize = PCLL0CacheSize, levelzero_cointoss = true, + snaptimeout_short = SnapTimeoutShort, + snaptimeout_long = SnapTimeoutLong, sst_options = #sst_options{press_method = CompressionMethod, log_options=leveled_log:get_opts()}} diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index c1e9fdc..3480124 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -236,8 +236,6 @@ -define(COIN_SIDECOUNT, 5). -define(SLOW_FETCH, 20000). -define(ITERATOR_SCANWIDTH, 4). --define(SNAPSHOT_TIMEOUT_LONG, 3600). --define(SNAPSHOT_TIMEOUT_SHORT, 600). -define(TIMING_SAMPLECOUNTDOWN, 10000). -define(TIMING_SAMPLESIZE, 100). -define(OPEN_LASTMOD_RANGE, {0, infinity}). @@ -270,6 +268,9 @@ timings = no_timing :: pcl_timings(), timings_countdown = 0 :: integer(), + snaptimeout_short :: pos_integer()|undefined, + snaptimeout_long :: pos_integer()|undefined, + sst_options = #sst_options{} :: #sst_options{}}). -record(pcl_timings, @@ -633,7 +634,8 @@ init([LogOpts, PCLopts]) -> LongRunning = PCLopts#penciller_options.snapshot_longrunning, %% monitor the bookie, and close the snapshot when bookie %% exits - BookieMonitor = erlang:monitor(process, PCLopts#penciller_options.bookies_pid), + BookieMonitor = + erlang:monitor(process, PCLopts#penciller_options.bookies_pid), {ok, State} = pcl_registersnapshot(SrcPenciller, self(), @@ -793,25 +795,25 @@ handle_call({fetch_keys, end; handle_call(get_startup_sqn, _From, State) -> {reply, State#state.persisted_sqn, State}; -handle_call({register_snapshot, Snapshot, Query, BookiesMem, LR}, _From, State) -> +handle_call({register_snapshot, Snapshot, Query, BookiesMem, LongRunning}, + _From, State) -> % Register and load a snapshot % % For setup of the snapshot to be efficient should pass a query % of (StartKey, EndKey) - this will avoid a fully copy of the penciller's % memory being required to be trasnferred to the clone. However, this % will not be a valid clone for fetch - Timeout = - case LR of - true -> - ?SNAPSHOT_TIMEOUT_LONG; - false -> - ?SNAPSHOT_TIMEOUT_SHORT - end, - - Manifest0 = leveled_pmanifest:add_snapshot(State#state.manifest, - Snapshot, - Timeout), + TimeO = + case LongRunning of + true -> + State#state.snaptimeout_long; + false -> + State#state.snaptimeout_short + end, + Manifest0 = + leveled_pmanifest:add_snapshot(State#state.manifest, Snapshot, TimeO), + {BookieIncrTree, BookieIdx, MinSQN, MaxSQN} = BookiesMem, LM1Cache = case BookieIncrTree of @@ -1090,6 +1092,9 @@ start_from_file(PCLopts) -> RootPath = PCLopts#penciller_options.root_path, MaxTableSize = PCLopts#penciller_options.max_inmemory_tablesize, OptsSST = PCLopts#penciller_options.sst_options, + + SnapTimeoutShort = PCLopts#penciller_options.snaptimeout_short, + SnapTimeoutLong = PCLopts#penciller_options.snaptimeout_long, {ok, MergeClerk} = leveled_pclerk:clerk_new(self(), RootPath, OptsSST), @@ -1103,6 +1108,8 @@ start_from_file(PCLopts) -> levelzero_maxcachesize = MaxTableSize, levelzero_cointoss = CoinToss, levelzero_index = leveled_pmem:new_index(), + snaptimeout_short = SnapTimeoutShort, + snaptimeout_long = SnapTimeoutLong, sst_options = OptsSST}, %% Open manifest From 2741c46daa58639967801b2d52f4227356b99b88 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 14 Dec 2018 13:53:36 +0000 Subject: [PATCH 2/4] Add timeout for inker snapshots So that they can also be released if they silently crash without closing neatly. --- docs/STARTUP_OPTIONS.md | 12 ++++++++++++ include/leveled.hrl | 3 ++- src/leveled_bookie.erl | 17 +++++++++-------- src/leveled_inker.erl | 29 ++++++++++++++++++++++++----- 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/docs/STARTUP_OPTIONS.md b/docs/STARTUP_OPTIONS.md index b946385..e73a021 100644 --- a/docs/STARTUP_OPTIONS.md +++ b/docs/STARTUP_OPTIONS.md @@ -107,3 +107,15 @@ The `compaction_runs_perday` indicates for the leveled store how many times eahc The `compaction_low_hour` and `compaction_high_hour` are the hours of the day which support the compaction window - set to 0 and 23 respectively if compaction is required to be a continuous process. The `max_run_length` controls how many files can be compacted in a single compaction run. The scoring of files and runs is controlled through `maxrunlength_compactionpercentage` and `singlefile_compactionpercentage`. + + +## Snapshot Timeouts + +There are two snapshot timeouts that can be configured: + +- `snapshot_timeout_short` +- `snapshot_timeout_long` + +These set the period in seconds before a snapshot which has not shutdown, is declared to have been released - so that any file deletions which are awaiting the snapshot's completion can go ahead. + +This covers only silently failing snapshots. Snapshots that shutdown neatly will be released from locking deleted files when they shutdown. The 'short' timeout is used for snapshots which support index queries and bucket listing. The 'long' timeout is used for all other folds (e.g. key lists, head folds and object folds). diff --git a/include/leveled.hrl b/include/leveled.hrl index 9c792ca..a945f0c 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -65,7 +65,8 @@ compress_on_receipt = false :: boolean(), max_run_length, singlefile_compactionperc :: float()|undefined, - maxrunlength_compactionperc :: float()|undefined}). + maxrunlength_compactionperc :: float()|undefined, + snaptimeout_long :: pos_integer() | undefined}). -record(penciller_options, {root_path :: string() | undefined, diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 9938bed..c6027a0 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -117,8 +117,8 @@ -define(MAX_KEYCHECK_FREQUENCY, 100). -define(MIN_KEYCHECK_FREQUENCY, 1). -define(OPEN_LASTMOD_RANGE, {0, infinity}). --define(PCL_SNAPTIMEOUT_SHORT, 900). % 15 minutes --define(PCL_SNAPTIMEOUT_LONG, 43200). % 12 hours +-define(SNAPTIMEOUT_SHORT, 900). % 15 minutes +-define(SNAPTIMEOUT_LONG, 43200). % 12 hours -define(OPTION_DEFAULTS, [{root_path, undefined}, {snapshot_bookie, undefined}, @@ -137,8 +137,8 @@ {log_level, ?LOG_LEVEL}, {forced_logs, []}, {override_functions, []}, - {pcl_snapshottimeout_short, ?PCL_SNAPTIMEOUT_SHORT}, - {pcl_snapshottimeout_long, ?PCL_SNAPTIMEOUT_LONG}]). + {snapshot_timeout_short, ?SNAPTIMEOUT_SHORT}, + {snapshot_timeout_long, ?SNAPTIMEOUT_LONG}]). -record(ledger_cache, {mem :: ets:tab(), loader = leveled_tree:empty(?CACHE_TYPE) @@ -334,12 +334,12 @@ {override_functions, list(leveled_head:appdefinable_function_tuple())} | % Provide a list of override functions that will be used for % user-defined tags - {pcl_snapshottimeout_short, pos_integer()} | + {snapshot_timeout_short, pos_integer()} | % Time in seconds before a snapshot that has not been shutdown is % assumed to have failed, and so requires to be torndown. The % short timeout is applied to queries where long_running is set to % false - {pcl_snapshottimeout_long, pos_integer()} + {snapshot_timeout_long, pos_integer()} % Time in seconds before a snapshot that has not been shutdown is % assumed to have failed, and so requires to be torndown. The % short timeout is applied to queries where long_running is set to @@ -1577,8 +1577,8 @@ set_options(Opts) -> SyncStrat = proplists:get_value(sync_strategy, Opts), WRP = proplists:get_value(waste_retention_period, Opts), - SnapTimeoutShort = proplists:get_value(pcl_snapshottimeout_short, Opts), - SnapTimeoutLong = proplists:get_value(pcl_snapshottimeout_long, Opts), + SnapTimeoutShort = proplists:get_value(snapshot_timeout_short, Opts), + SnapTimeoutLong = proplists:get_value(snapshot_timeout_long, Opts), AltStrategy = proplists:get_value(reload_strategy, Opts), ReloadStrategy = leveled_codec:inker_reload_strategy(AltStrategy), @@ -1619,6 +1619,7 @@ set_options(Opts) -> singlefile_compactionperc = SFL_CompPerc, maxrunlength_compactionperc = MRL_CompPerc, waste_retention_period = WRP, + snaptimeout_long = SnapTimeoutLong, compression_method = CompressionMethod, compress_on_receipt = CompressOnReceipt, cdb_options = diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index 01571f6..994e78d 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -151,6 +151,7 @@ is_snapshot = false :: boolean(), compression_method = native :: lz4|native, compress_on_receipt = false :: boolean(), + snap_timeout :: pos_integer() | undefined, % in seconds source_inker :: pid() | undefined}). @@ -541,6 +542,7 @@ handle_call({fold, end; handle_call({register_snapshot, Requestor}, _From , State) -> Rs = [{Requestor, + os:timestamp(), State#state.manifest_sqn}|State#state.registered_snapshots], leveled_log:log("I0002", [Requestor, State#state.manifest_sqn]), {reply, {State#state.manifest, @@ -548,13 +550,28 @@ handle_call({register_snapshot, Requestor}, _From , State) -> State#state.journal_sqn}, State#state{registered_snapshots=Rs}}; handle_call({confirm_delete, ManSQN}, _From, State) -> + % Check there are no snapshots that may be aware of the file process that + % is waiting to delete itself. CheckSQNFun = - fun({_R, SnapSQN}, Bool) -> + fun({_R, _TS, SnapSQN}, Bool) -> + % If the Snapshot SQN was at the same point the file was set to + % delete (or after), then the snapshot would not have been told + % of the file, and the snapshot should not hold up its deletion (SnapSQN >= ManSQN) and Bool end, + CheckSnapshotExpiryFun = + fun({_R, TS, _SnapSQN}) -> + Expiry = leveled_util:integer_time(TS) + State#state.snap_timeout, + % If Expiry has passed this will be false, and the snapshot + % will be removed from the list of registered snapshots and + % so will not longer block deletes + leveled_util:integer_now() < Expiry + end, + RegisteredSnapshots0 = + lists:filter(CheckSnapshotExpiryFun, State#state.registered_snapshots), {reply, - lists:foldl(CheckSQNFun, true, State#state.registered_snapshots), - State}; + lists:foldl(CheckSQNFun, true, RegisteredSnapshots0), + State#state{registered_snapshots = RegisteredSnapshots0}}; handle_call(get_manifest, _From, State) -> {reply, leveled_imanifest:to_list(State#state.manifest), State}; handle_call({update_manifest, @@ -791,6 +808,8 @@ start_from_file(InkOpts) -> MRL_CompactPerc = InkOpts#inker_options.maxrunlength_compactionperc, PressMethod = InkOpts#inker_options.compression_method, PressOnReceipt = InkOpts#inker_options.compress_on_receipt, + SnapTimeout = InkOpts#inker_options.snaptimeout_long, + IClerkOpts = #iclerk_options{inker = self(), cdb_options=IClerkCDBOpts, @@ -799,8 +818,7 @@ start_from_file(InkOpts) -> compression_method = PressMethod, max_run_length = MRL, singlefile_compactionperc = SFL_CompactPerc, - maxrunlength_compactionperc = MRL_CompactPerc - }, + maxrunlength_compactionperc = MRL_CompactPerc}, {ok, Clerk} = leveled_iclerk:clerk_new(IClerkOpts), @@ -821,6 +839,7 @@ start_from_file(InkOpts) -> cdb_options = CDBopts, compression_method = PressMethod, compress_on_receipt = PressOnReceipt, + snap_timeout = SnapTimeout, clerk = Clerk}}. From ef068326a02c7c3b4ddd04afc0340abf5191319a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 14 Dec 2018 14:13:44 +0000 Subject: [PATCH 3/4] Update priv/leveled.schema --- priv/leveled.schema | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/priv/leveled.schema b/priv/leveled.schema index 198a9b4..7134d77 100644 --- a/priv/leveled.schema +++ b/priv/leveled.schema @@ -116,6 +116,23 @@ ]}. +%% @doc Snapshot timeout (short) +%% Maximum expected time for an index query. A query which is taking longer +%% than this may fail as it will be released - potentially allowing for some +%% file processes to delete. Timeout is in seconds. +{mapping, "leveled.snapshot_timeout_short", "leveled.snapshot_timeout_short", [ + {default, 1800}, + {datatype, integer} +]}. + +%% @doc Snapshot timeout (long) +%% Maximum expected time for any othe rfold. A fold which is taking longer +%% than this may fail as it will be released - potentially allowing for some +%% file processes to delete. Timeout is in seconds. +{mapping, "leveled.snapshot_timeout_long", "leveled.snapshot_timeout_long", [ + {default, 86400}, + {datatype, integer} +]}. From ceea196cc03d8f0f13ef58e8cfebf62c43e023ce Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 14 Dec 2018 14:27:44 +0000 Subject: [PATCH 4/4] Update priv/leveled.schema The snapshottimeouts would not normally eb changed - so make them hidden --- priv/leveled.schema | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/priv/leveled.schema b/priv/leveled.schema index 7134d77..626302b 100644 --- a/priv/leveled.schema +++ b/priv/leveled.schema @@ -122,7 +122,8 @@ %% file processes to delete. Timeout is in seconds. {mapping, "leveled.snapshot_timeout_short", "leveled.snapshot_timeout_short", [ {default, 1800}, - {datatype, integer} + {datatype, integer}, + hidden ]}. %% @doc Snapshot timeout (long) @@ -131,7 +132,8 @@ %% file processes to delete. Timeout is in seconds. {mapping, "leveled.snapshot_timeout_long", "leveled.snapshot_timeout_long", [ {default, 86400}, - {datatype, integer} + {datatype, integer}, + hidden ]}.