Mas d31 i410looptoclose (#421)
* Mas i410 looptoclose (#420) * Stop waiting full SHUTDOWN_PAUSE If there is a snapshot outstanding at shutdown time, there was a wait of SHUTDOWN_PAUSE to give the snapshot time to close down. This causes an issue in kv_index_tictactree when rebuilds complete, when an exchange was in flight at the point the rebuild completed - the aae_controller will become blocked for the full shutdown pause, whilst it waits for the replaced key store to be closed. This change is to loop within the shutdown pause, so that if the snapshot supporting the exchange is closed, the paused bookie can close more quickly (unblocking the controller). Without this fix, there are intermittent issues in kv_index_tictactree's mockvnode_SUITE tests. * Address test reliability Be a bit clearer with waiting round seconds, Was intermittently failing on QR4 previously (but QR5 1s later was always OK). * Update iterator_SUITE.erl * Refine test assertion At Stage C there might be 0 files left, in which case equality with Stage D result is ok.
This commit is contained in:
parent
d544db5461
commit
6223b801f3
5 changed files with 88 additions and 27 deletions
|
@ -221,6 +221,7 @@
|
|||
-define(ITERATOR_SCANWIDTH, 4).
|
||||
-define(TIMING_SAMPLECOUNTDOWN, 10000).
|
||||
-define(TIMING_SAMPLESIZE, 100).
|
||||
-define(SHUTDOWN_LOOPS, 10).
|
||||
-define(SHUTDOWN_PAUSE, 10000).
|
||||
% How long to wait for snapshots to be released on shutdown
|
||||
% before forcing closure of snapshots
|
||||
|
@ -270,7 +271,10 @@
|
|||
|
||||
monitor = {no_monitor, 0} :: leveled_monitor:monitor(),
|
||||
|
||||
sst_options = #sst_options{} :: sst_options()}).
|
||||
sst_options = #sst_options{} :: sst_options(),
|
||||
|
||||
shutdown_loops = ?SHUTDOWN_LOOPS :: non_neg_integer()
|
||||
}).
|
||||
|
||||
|
||||
-type penciller_options() :: #penciller_options{}.
|
||||
|
@ -1153,16 +1157,25 @@ handle_cast({remove_logs, ForcedLogs}, State) ->
|
|||
handle_cast({maybe_defer_shutdown, ShutdownType, From}, State) ->
|
||||
case length(leveled_pmanifest:snapshot_pids(State#state.manifest)) of
|
||||
0 ->
|
||||
ok;
|
||||
gen_server:cast(self(), {complete_shutdown, ShutdownType, From}),
|
||||
{noreply, State};
|
||||
N ->
|
||||
% Whilst this process sleeps, then any remaining snapshots may
|
||||
% release and have their release messages queued before the
|
||||
% complete_shutdown cast is sent
|
||||
leveled_log:log(p0042, [N]),
|
||||
timer:sleep(?SHUTDOWN_PAUSE)
|
||||
end,
|
||||
gen_server:cast(self(), {complete_shutdown, ShutdownType, From}),
|
||||
{noreply, State};
|
||||
case State#state.shutdown_loops of
|
||||
LoopCount when LoopCount > 0 ->
|
||||
leveled_log:log(p0042, [N]),
|
||||
timer:sleep(?SHUTDOWN_PAUSE div ?SHUTDOWN_LOOPS),
|
||||
gen_server:cast(
|
||||
self(), {maybe_defer_shutdown, ShutdownType, From}),
|
||||
{noreply, State#state{shutdown_loops = LoopCount - 1}};
|
||||
0 ->
|
||||
gen_server:cast(
|
||||
self(), {complete_shutdown, ShutdownType, From}),
|
||||
{noreply, State}
|
||||
end
|
||||
end;
|
||||
handle_cast({complete_shutdown, ShutdownType, From}, State) ->
|
||||
lists:foreach(
|
||||
fun(Snap) -> ok = pcl_snapclose(Snap) end,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue