Close in stages - waiting for releases (#411)
* Close in stages - waiting for releases Have a consistent approach to closing the inker and the penciller - so that the close can be interrupted by releasing of snapshots. Then any unreleased snapshots are closed before shutdown - with a 10s pause to give queries a short opportunity to finish. This should address some issues, primarily seen (but very rarely) in test whereby post-rebuild destruction of parallel AAE keystores cause the crashing of aae_folds. The primary benefit is to stop an attempt to release a snapshot that has in fact already finished does not cause a crash of the database on normal stop. this was primarily an issue when shutdown is delayed by an ongoing journal compaction job. * Boost default test budget for EQC * Update test to use correct type * Update following review Avoid filtering out exited PIDs when closing snapshots by catching the exit exception when the Pid is down
This commit is contained in:
parent
bc87273c76
commit
7a5cf251b3
6 changed files with 256 additions and 88 deletions
|
@ -133,6 +133,11 @@
|
|||
-define(JOURNAL_FILEX, "cdb").
|
||||
-define(PENDING_FILEX, "pnd").
|
||||
-define(TEST_KC, {[], infinity}).
|
||||
-define(SHUTDOWN_PAUSE, 10000).
|
||||
% How long to wait for snapshots to be released on shutdown
|
||||
% before forcing closure of snapshots
|
||||
% 10s may not be long enough for all snapshots, but avoids crashes of
|
||||
% short-lived queries racing with the shutdown
|
||||
|
||||
-record(state, {manifest = [] :: list(),
|
||||
manifest_sqn = 0 :: integer(),
|
||||
|
@ -281,6 +286,18 @@ ink_confirmdelete(Pid, ManSQN, CDBpid) ->
|
|||
ink_close(Pid) ->
|
||||
gen_server:call(Pid, close, infinity).
|
||||
|
||||
-spec ink_snapclose(pid()) -> ok.
|
||||
%% @doc
|
||||
%% Specifically to be used when closing snpashots on shutdown, will handle a
|
||||
%% scenario where a snapshot has already exited
|
||||
ink_snapclose(Pid) ->
|
||||
try
|
||||
ink_close(Pid)
|
||||
catch
|
||||
exit:{noproc, _CallDetails} ->
|
||||
ok
|
||||
end.
|
||||
|
||||
-spec ink_doom(pid()) -> {ok, [{string(), string(), string(), string()}]}.
|
||||
%% @doc
|
||||
%% Test function used to close a file, and return all file paths (potentially
|
||||
|
@ -654,33 +671,23 @@ handle_call({check_sqn, LedgerSQN}, _From, State) ->
|
|||
end;
|
||||
handle_call(get_journalsqn, _From, State) ->
|
||||
{reply, {ok, State#state.journal_sqn}, State};
|
||||
handle_call(close, _From, State) ->
|
||||
case State#state.is_snapshot of
|
||||
true ->
|
||||
ok = ink_releasesnapshot(State#state.source_inker, self());
|
||||
false ->
|
||||
leveled_log:log(i0005, [close]),
|
||||
leveled_log:log(
|
||||
i0006, [State#state.journal_sqn, State#state.manifest_sqn]),
|
||||
ok = leveled_iclerk:clerk_stop(State#state.clerk),
|
||||
shutdown_snapshots(State#state.registered_snapshots),
|
||||
shutdown_manifest(State#state.manifest)
|
||||
end,
|
||||
handle_call(close, _From, State=#state{is_snapshot=Snap}) when Snap == true ->
|
||||
ok = ink_releasesnapshot(State#state.source_inker, self()),
|
||||
{stop, normal, ok, State};
|
||||
handle_call(doom, _From, State) ->
|
||||
FPs = [filepath(State#state.root_path, journal_dir),
|
||||
filepath(State#state.root_path, manifest_dir),
|
||||
filepath(State#state.root_path, journal_compact_dir),
|
||||
filepath(State#state.root_path, journal_waste_dir)],
|
||||
leveled_log:log(i0018, []),
|
||||
|
||||
leveled_log:log(i0005, [doom]),
|
||||
handle_call(ShutdownType, From, State)
|
||||
when ShutdownType == close; ShutdownType == doom ->
|
||||
case ShutdownType of
|
||||
doom ->
|
||||
leveled_log:log(i0018, []);
|
||||
_ ->
|
||||
ok
|
||||
end,
|
||||
leveled_log:log(i0005, [ShutdownType]),
|
||||
leveled_log:log(
|
||||
i0006, [State#state.journal_sqn, State#state.manifest_sqn]),
|
||||
ok = leveled_iclerk:clerk_stop(State#state.clerk),
|
||||
shutdown_snapshots(State#state.registered_snapshots),
|
||||
shutdown_manifest(State#state.manifest),
|
||||
{stop, normal, {ok, FPs}, State}.
|
||||
gen_server:cast(self(), {maybe_defer_shutdown, ShutdownType, From}),
|
||||
{noreply, State}.
|
||||
|
||||
|
||||
handle_cast({clerk_complete, ManifestSnippet, FilesToDelete}, State) ->
|
||||
|
@ -766,8 +773,39 @@ handle_cast({remove_logs, ForcedLogs}, State) ->
|
|||
ok = leveled_log:remove_forcedlogs(ForcedLogs),
|
||||
CDBopts = State#state.cdb_options,
|
||||
CDBopts0 = CDBopts#cdb_options{log_options = leveled_log:get_opts()},
|
||||
{noreply, State#state{cdb_options = CDBopts0}}.
|
||||
|
||||
{noreply, State#state{cdb_options = CDBopts0}};
|
||||
handle_cast({maybe_defer_shutdown, ShutdownType, From}, State) ->
|
||||
case length(State#state.registered_snapshots) of
|
||||
0 ->
|
||||
ok;
|
||||
N ->
|
||||
% Whilst this process sleeps, then any remaining snapshots may
|
||||
% release and have their release messages queued before the
|
||||
% complete_shutdown cast is sent
|
||||
leveled_log:log(i0026, [N]),
|
||||
timer:sleep(?SHUTDOWN_PAUSE)
|
||||
end,
|
||||
gen_server:cast(self(), {complete_shutdown, ShutdownType, From}),
|
||||
{noreply, State};
|
||||
handle_cast({complete_shutdown, ShutdownType, From}, State) ->
|
||||
lists:foreach(
|
||||
fun(SnapPid) -> ok = ink_snapclose(SnapPid) end,
|
||||
lists:map(
|
||||
fun(Snapshot) -> element(1, Snapshot) end,
|
||||
State#state.registered_snapshots)),
|
||||
shutdown_manifest(State#state.manifest),
|
||||
case ShutdownType of
|
||||
doom ->
|
||||
FPs =
|
||||
[filepath(State#state.root_path, journal_dir),
|
||||
filepath(State#state.root_path, manifest_dir),
|
||||
filepath(State#state.root_path, journal_compact_dir),
|
||||
filepath(State#state.root_path, journal_waste_dir)],
|
||||
gen_server:reply(From, {ok, FPs});
|
||||
close ->
|
||||
gen_server:reply(From, ok)
|
||||
end,
|
||||
{stop, normal, State}.
|
||||
|
||||
%% handle the bookie stopping and stop this snapshot
|
||||
handle_info({'DOWN', BookieMonRef, process, _BookiePid, _Info},
|
||||
|
@ -789,6 +827,7 @@ code_change(_OldVsn, State, _Extra) ->
|
|||
%%% Internal functions
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
-spec start_from_file(inker_options()) -> {ok, ink_state()}.
|
||||
%% @doc
|
||||
%% Start an Inker from the state on disk (i.e. not a snapshot).
|
||||
|
@ -854,13 +893,6 @@ start_from_file(InkOpts) ->
|
|||
clerk = Clerk}}.
|
||||
|
||||
|
||||
-spec shutdown_snapshots(list(registered_snapshot())) -> ok.
|
||||
%% @doc
|
||||
%% Shutdown any snapshots before closing the store
|
||||
shutdown_snapshots(Snapshots) ->
|
||||
lists:foreach(fun({Snap, _TS, _SQN}) -> ok = ink_close(Snap) end,
|
||||
Snapshots).
|
||||
|
||||
-spec shutdown_manifest(leveled_imanifest:manifest()) -> ok.
|
||||
%% @doc
|
||||
%% Shutdown all files in the manifest
|
||||
|
@ -1603,4 +1635,28 @@ loop() ->
|
|||
ok
|
||||
end.
|
||||
|
||||
close_no_crash_test_() ->
|
||||
{timeout, 60, fun close_no_crash_tester/0}.
|
||||
|
||||
close_no_crash_tester() ->
|
||||
RootPath = "test/test_area/journal",
|
||||
build_dummy_journal(),
|
||||
CDBopts = #cdb_options{max_size=300000, binary_mode=true},
|
||||
{ok, Inker} =
|
||||
ink_start(
|
||||
#inker_options{
|
||||
root_path=RootPath,
|
||||
cdb_options=CDBopts,
|
||||
compression_method=native,
|
||||
compress_on_receipt=true}),
|
||||
|
||||
SnapOpts =
|
||||
#inker_options{
|
||||
start_snapshot=true, bookies_pid = self(), source_inker=Inker},
|
||||
{ok, InkSnap} = ink_snapstart(SnapOpts),
|
||||
|
||||
exit(InkSnap, kill),
|
||||
ok = ink_close(Inker),
|
||||
clean_testdir(RootPath).
|
||||
|
||||
-endif.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue