From 8bf36214e11563c1e9c554df4d5f54fa5fcc70aa Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 14 Dec 2018 11:23:04 +0000 Subject: [PATCH] Make snapshot timeout configurable Also increase defaults. In riak snapshots may be used to fold over all objects, and with delays at the receiving end, this could take significant time --- include/leveled.hrl | 4 +++- src/leveled_bookie.erl | 24 +++++++++++++++++++++--- src/leveled_penciller.erl | 37 ++++++++++++++++++++++--------------- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/include/leveled.hrl b/include/leveled.hrl index 8106c02..9c792ca 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -78,7 +78,9 @@ source_penciller :: pid() | undefined, snapshot_longrunning = true :: boolean(), compression_method = native :: lz4|native, - levelzero_cointoss = false :: boolean()}). + levelzero_cointoss = false :: boolean(), + snaptimeout_short :: pos_integer() | undefined, + snaptimeout_long :: pos_integer() | undefined}). -record(iclerk_options, {inker :: pid() | undefined, diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 9cfba69..9938bed 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -104,7 +104,6 @@ -define(MIN_PCL_CACHE_SIZE, 400). -define(MAX_PCL_CACHE_SIZE, 28000). % This is less than actual max - but COIN_SIDECOUNT --define(SNAPSHOT_TIMEOUT, 300000). -define(CACHE_SIZE_JITTER, 25). -define(JOURNAL_SIZE_JITTER, 20). -define(ABSOLUTEMAX_JOURNALSIZE, 4000000000). @@ -118,6 +117,8 @@ -define(MAX_KEYCHECK_FREQUENCY, 100). -define(MIN_KEYCHECK_FREQUENCY, 1). -define(OPEN_LASTMOD_RANGE, {0, infinity}). +-define(PCL_SNAPTIMEOUT_SHORT, 900). % 15 minutes +-define(PCL_SNAPTIMEOUT_LONG, 43200). % 12 hours -define(OPTION_DEFAULTS, [{root_path, undefined}, {snapshot_bookie, undefined}, @@ -135,7 +136,9 @@ {compression_point, ?COMPRESSION_POINT}, {log_level, ?LOG_LEVEL}, {forced_logs, []}, - {override_functions, []}]). + {override_functions, []}, + {pcl_snapshottimeout_short, ?PCL_SNAPTIMEOUT_SHORT}, + {pcl_snapshottimeout_long, ?PCL_SNAPTIMEOUT_LONG}]). -record(ledger_cache, {mem :: ets:tab(), loader = leveled_tree:empty(?CACHE_TYPE) @@ -328,9 +331,19 @@ % "P0032", "SST12", "CDB19", "SST13", "I0019"]} % Will log all timing points even when log_level is not set to % support info - {override_functions, list(leveled_head:appdefinable_function_tuple())} + {override_functions, list(leveled_head:appdefinable_function_tuple())} | % Provide a list of override functions that will be used for % user-defined tags + {pcl_snapshottimeout_short, pos_integer()} | + % Time in seconds before a snapshot that has not been shutdown is + % assumed to have failed, and so requires to be torndown. The + % short timeout is applied to queries where long_running is set to + % false + {pcl_snapshottimeout_long, pos_integer()} + % Time in seconds before a snapshot that has not been shutdown is + % assumed to have failed, and so requires to be torndown. The + % short timeout is applied to queries where long_running is set to + % true ]. @@ -1564,6 +1577,9 @@ set_options(Opts) -> SyncStrat = proplists:get_value(sync_strategy, Opts), WRP = proplists:get_value(waste_retention_period, Opts), + SnapTimeoutShort = proplists:get_value(pcl_snapshottimeout_short, Opts), + SnapTimeoutLong = proplists:get_value(pcl_snapshottimeout_long, Opts), + AltStrategy = proplists:get_value(reload_strategy, Opts), ReloadStrategy = leveled_codec:inker_reload_strategy(AltStrategy), @@ -1613,6 +1629,8 @@ set_options(Opts) -> #penciller_options{root_path = LedgerFP, max_inmemory_tablesize = PCLL0CacheSize, levelzero_cointoss = true, + snaptimeout_short = SnapTimeoutShort, + snaptimeout_long = SnapTimeoutLong, sst_options = #sst_options{press_method = CompressionMethod, log_options=leveled_log:get_opts()}} diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index c1e9fdc..3480124 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -236,8 +236,6 @@ -define(COIN_SIDECOUNT, 5). -define(SLOW_FETCH, 20000). -define(ITERATOR_SCANWIDTH, 4). --define(SNAPSHOT_TIMEOUT_LONG, 3600). --define(SNAPSHOT_TIMEOUT_SHORT, 600). -define(TIMING_SAMPLECOUNTDOWN, 10000). -define(TIMING_SAMPLESIZE, 100). -define(OPEN_LASTMOD_RANGE, {0, infinity}). @@ -270,6 +268,9 @@ timings = no_timing :: pcl_timings(), timings_countdown = 0 :: integer(), + snaptimeout_short :: pos_integer()|undefined, + snaptimeout_long :: pos_integer()|undefined, + sst_options = #sst_options{} :: #sst_options{}}). -record(pcl_timings, @@ -633,7 +634,8 @@ init([LogOpts, PCLopts]) -> LongRunning = PCLopts#penciller_options.snapshot_longrunning, %% monitor the bookie, and close the snapshot when bookie %% exits - BookieMonitor = erlang:monitor(process, PCLopts#penciller_options.bookies_pid), + BookieMonitor = + erlang:monitor(process, PCLopts#penciller_options.bookies_pid), {ok, State} = pcl_registersnapshot(SrcPenciller, self(), @@ -793,25 +795,25 @@ handle_call({fetch_keys, end; handle_call(get_startup_sqn, _From, State) -> {reply, State#state.persisted_sqn, State}; -handle_call({register_snapshot, Snapshot, Query, BookiesMem, LR}, _From, State) -> +handle_call({register_snapshot, Snapshot, Query, BookiesMem, LongRunning}, + _From, State) -> % Register and load a snapshot % % For setup of the snapshot to be efficient should pass a query % of (StartKey, EndKey) - this will avoid a fully copy of the penciller's % memory being required to be trasnferred to the clone. However, this % will not be a valid clone for fetch - Timeout = - case LR of - true -> - ?SNAPSHOT_TIMEOUT_LONG; - false -> - ?SNAPSHOT_TIMEOUT_SHORT - end, - - Manifest0 = leveled_pmanifest:add_snapshot(State#state.manifest, - Snapshot, - Timeout), + TimeO = + case LongRunning of + true -> + State#state.snaptimeout_long; + false -> + State#state.snaptimeout_short + end, + Manifest0 = + leveled_pmanifest:add_snapshot(State#state.manifest, Snapshot, TimeO), + {BookieIncrTree, BookieIdx, MinSQN, MaxSQN} = BookiesMem, LM1Cache = case BookieIncrTree of @@ -1090,6 +1092,9 @@ start_from_file(PCLopts) -> RootPath = PCLopts#penciller_options.root_path, MaxTableSize = PCLopts#penciller_options.max_inmemory_tablesize, OptsSST = PCLopts#penciller_options.sst_options, + + SnapTimeoutShort = PCLopts#penciller_options.snaptimeout_short, + SnapTimeoutLong = PCLopts#penciller_options.snaptimeout_long, {ok, MergeClerk} = leveled_pclerk:clerk_new(self(), RootPath, OptsSST), @@ -1103,6 +1108,8 @@ start_from_file(PCLopts) -> levelzero_maxcachesize = MaxTableSize, levelzero_cointoss = CoinToss, levelzero_index = leveled_pmem:new_index(), + snaptimeout_short = SnapTimeoutShort, + snaptimeout_long = SnapTimeoutLong, sst_options = OptsSST}, %% Open manifest