Mas d31 i410looptoclose (#421)

* Mas i410 looptoclose (#420)

* Stop waiting full SHUTDOWN_PAUSE

If there is a snapshot outstanding at shutdown time, there was a wait of SHUTDOWN_PAUSE to give the snapshot time to close down.

This causes an issue in kv_index_tictactree when rebuilds complete, when an exchange was in flight at the point the rebuild completed - the aae_controller will become blocked for the full shutdown pause, whilst it waits for the replaced key store to be closed.

This change is to loop within the shutdown pause, so that if the snapshot supporting the exchange is closed, the paused bookie can close more quickly (unblocking the controller).

Without this fix, there are intermittent issues in kv_index_tictactree's mockvnode_SUITE tests.

* Address test reliability

Be a bit clearer with waiting round seconds,  Was intermittently failing on QR4 previously (but QR5 1s later was always OK).

* Update iterator_SUITE.erl

* Refine test assertion

At Stage C there might be 0 files left, in which case equality with Stage D result is ok.
This commit is contained in:
Martin Sumner 2023-11-10 15:04:47 +00:00 committed by GitHub
parent d544db5461
commit 6223b801f3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 88 additions and 27 deletions

View file

@ -2614,6 +2614,37 @@ generate_multiple_objects(Count, KeyNumber, ObjL) ->
ObjL ++ [{Key, Value, IndexSpec}]). ObjL ++ [{Key, Value, IndexSpec}]).
shutdown_test_() ->
{timeout, 10, fun shutdown_tester/0}.
shutdown_tester() ->
RootPath = reset_filestructure(),
{ok, Bookie1} = book_start([{root_path, RootPath}]),
lists:foreach(
fun({K, V, S}) ->
ok = book_put(Bookie1, <<"Bucket">>, K, V, S, ?STD_TAG)
end,
generate_multiple_objects(5000, 1)),
{ok, SnpPCL1, SnpJrnl1} =
leveled_bookie:book_snapshot(Bookie1, store, undefined, true),
TestPid = self(),
spawn(
fun() ->
ok = leveled_bookie:book_close(Bookie1),
TestPid ! ok
end),
timer:sleep(2000),
ok = leveled_penciller:pcl_close(SnpPCL1),
ok = leveled_inker:ink_close(SnpJrnl1),
SW = os:timestamp(),
receive ok -> ok end,
WaitForShutDown = timer:now_diff(SW, os:timestamp()) div 1000,
?assert(WaitForShutDown =< (1000 + 1)),
_ = reset_filestructure().
ttl_test() -> ttl_test() ->
RootPath = reset_filestructure(), RootPath = reset_filestructure(),
{ok, Bookie1} = book_start([{root_path, RootPath}]), {ok, Bookie1} = book_start([{root_path, RootPath}]),

View file

@ -133,6 +133,7 @@
-define(JOURNAL_FILEX, "cdb"). -define(JOURNAL_FILEX, "cdb").
-define(PENDING_FILEX, "pnd"). -define(PENDING_FILEX, "pnd").
-define(TEST_KC, {[], infinity}). -define(TEST_KC, {[], infinity}).
-define(SHUTDOWN_LOOPS, 10).
-define(SHUTDOWN_PAUSE, 10000). -define(SHUTDOWN_PAUSE, 10000).
% How long to wait for snapshots to be released on shutdown % How long to wait for snapshots to be released on shutdown
% before forcing closure of snapshots % before forcing closure of snapshots
@ -154,7 +155,8 @@
compression_method = native :: lz4|native|none, compression_method = native :: lz4|native|none,
compress_on_receipt = false :: boolean(), compress_on_receipt = false :: boolean(),
snap_timeout :: pos_integer() | undefined, % in seconds snap_timeout :: pos_integer() | undefined, % in seconds
source_inker :: pid() | undefined}). source_inker :: pid() | undefined,
shutdown_loops = ?SHUTDOWN_LOOPS :: non_neg_integer()}).
-type inker_options() :: #inker_options{}. -type inker_options() :: #inker_options{}.
@ -786,16 +788,25 @@ handle_cast({remove_logs, ForcedLogs}, State) ->
handle_cast({maybe_defer_shutdown, ShutdownType, From}, State) -> handle_cast({maybe_defer_shutdown, ShutdownType, From}, State) ->
case length(State#state.registered_snapshots) of case length(State#state.registered_snapshots) of
0 -> 0 ->
ok; gen_server:cast(self(), {complete_shutdown, ShutdownType, From}),
{noreply, State};
N -> N ->
% Whilst this process sleeps, then any remaining snapshots may % Whilst this process sleeps, then any remaining snapshots may
% release and have their release messages queued before the % release and have their release messages queued before the
% complete_shutdown cast is sent % complete_shutdown cast is sent
leveled_log:log(i0026, [N]), case State#state.shutdown_loops of
timer:sleep(?SHUTDOWN_PAUSE) LoopCount when LoopCount > 0 ->
end, leveled_log:log(i0026, [N]),
gen_server:cast(self(), {complete_shutdown, ShutdownType, From}), timer:sleep(?SHUTDOWN_PAUSE div ?SHUTDOWN_LOOPS),
{noreply, State}; gen_server:cast(
self(), {maybe_defer_shutdown, ShutdownType, From}),
{noreply, State#state{shutdown_loops = LoopCount - 1}};
0 ->
gen_server:cast(
self(), {complete_shutdown, ShutdownType, From}),
{noreply, State}
end
end;
handle_cast({complete_shutdown, ShutdownType, From}, State) -> handle_cast({complete_shutdown, ShutdownType, From}, State) ->
lists:foreach( lists:foreach(
fun(SnapPid) -> ok = ink_snapclose(SnapPid) end, fun(SnapPid) -> ok = ink_snapclose(SnapPid) end,

View file

@ -221,6 +221,7 @@
-define(ITERATOR_SCANWIDTH, 4). -define(ITERATOR_SCANWIDTH, 4).
-define(TIMING_SAMPLECOUNTDOWN, 10000). -define(TIMING_SAMPLECOUNTDOWN, 10000).
-define(TIMING_SAMPLESIZE, 100). -define(TIMING_SAMPLESIZE, 100).
-define(SHUTDOWN_LOOPS, 10).
-define(SHUTDOWN_PAUSE, 10000). -define(SHUTDOWN_PAUSE, 10000).
% How long to wait for snapshots to be released on shutdown % How long to wait for snapshots to be released on shutdown
% before forcing closure of snapshots % before forcing closure of snapshots
@ -270,7 +271,10 @@
monitor = {no_monitor, 0} :: leveled_monitor:monitor(), monitor = {no_monitor, 0} :: leveled_monitor:monitor(),
sst_options = #sst_options{} :: sst_options()}). sst_options = #sst_options{} :: sst_options(),
shutdown_loops = ?SHUTDOWN_LOOPS :: non_neg_integer()
}).
-type penciller_options() :: #penciller_options{}. -type penciller_options() :: #penciller_options{}.
@ -1153,16 +1157,25 @@ handle_cast({remove_logs, ForcedLogs}, State) ->
handle_cast({maybe_defer_shutdown, ShutdownType, From}, State) -> handle_cast({maybe_defer_shutdown, ShutdownType, From}, State) ->
case length(leveled_pmanifest:snapshot_pids(State#state.manifest)) of case length(leveled_pmanifest:snapshot_pids(State#state.manifest)) of
0 -> 0 ->
ok; gen_server:cast(self(), {complete_shutdown, ShutdownType, From}),
{noreply, State};
N -> N ->
% Whilst this process sleeps, then any remaining snapshots may % Whilst this process sleeps, then any remaining snapshots may
% release and have their release messages queued before the % release and have their release messages queued before the
% complete_shutdown cast is sent % complete_shutdown cast is sent
leveled_log:log(p0042, [N]), case State#state.shutdown_loops of
timer:sleep(?SHUTDOWN_PAUSE) LoopCount when LoopCount > 0 ->
end, leveled_log:log(p0042, [N]),
gen_server:cast(self(), {complete_shutdown, ShutdownType, From}), timer:sleep(?SHUTDOWN_PAUSE div ?SHUTDOWN_LOOPS),
{noreply, State}; gen_server:cast(
self(), {maybe_defer_shutdown, ShutdownType, From}),
{noreply, State#state{shutdown_loops = LoopCount - 1}};
0 ->
gen_server:cast(
self(), {complete_shutdown, ShutdownType, From}),
{noreply, State}
end
end;
handle_cast({complete_shutdown, ShutdownType, From}, State) -> handle_cast({complete_shutdown, ShutdownType, From}, State) ->
lists:foreach( lists:foreach(
fun(Snap) -> ok = pcl_snapclose(Snap) end, fun(Snap) -> ok = pcl_snapclose(Snap) end,

View file

@ -929,7 +929,7 @@ space_clear_ondelete(_Config) ->
true = PointB_Journals < length(FNsA_J), true = PointB_Journals < length(FNsA_J),
true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsA_L)), true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsA_L)),
true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsB_L)), true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsB_L)),
true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsC_L)), true = length(strip_nonsst(FNsD_L)) =< length(strip_nonsst(FNsC_L)),
true = length(strip_nonsst(FNsD_L)) == 0. true = length(strip_nonsst(FNsD_L)) == 0.

View file

@ -47,8 +47,11 @@ expiring_indexes(_Config) ->
SW1 = os:timestamp(), SW1 = os:timestamp(),
IBKL1 = testutil:stdload_expiring(Bookie1, KeyCount, Future), IBKL1 = testutil:stdload_expiring(Bookie1, KeyCount, Future),
timer:sleep(1000),
% Wait a second after last key so that none loaded in the last second
LoadTime = timer:now_diff(os:timestamp(), SW1)/1000000, LoadTime = timer:now_diff(os:timestamp(), SW1)/1000000,
io:format("Load of ~w std objects in ~w seconds~n", [KeyCount, LoadTime]), io:format("Load of ~w std objects in ~w seconds~n", [KeyCount, LoadTime]),
SW2 = os:timestamp(),
FilterFun = fun({I, _B, _K}) -> lists:member(I, [5, 6, 7, 8]) end, FilterFun = fun({I, _B, _K}) -> lists:member(I, [5, 6, 7, 8]) end,
LoadedEntriesInRange = lists:sort(lists:filter(FilterFun, IBKL1)), LoadedEntriesInRange = lists:sort(lists:filter(FilterFun, IBKL1)),
@ -101,6 +104,7 @@ expiring_indexes(_Config) ->
% this time index value of 6 % this time index value of 6
testutil:stdload_object( testutil:stdload_object(
Bookie1, B0, K0, 5, <<"value">>, leveled_util:integer_now() + 10), Bookie1, B0, K0, 5, <<"value">>, leveled_util:integer_now() + 10),
timer:sleep(1000),
{async, Folder2} = IndexFold(), {async, Folder2} = IndexFold(),
leveled_bookie:book_indexfold(Bookie1, leveled_bookie:book_indexfold(Bookie1,
B0, B0,
@ -121,23 +125,25 @@ expiring_indexes(_Config) ->
FoldTime = timer:now_diff(os:timestamp(), SW1)/1000000 - LoadTime, FoldTime = timer:now_diff(os:timestamp(), SW1)/1000000 - LoadTime,
io:format("Query returned ~w entries in ~w seconds - 3 queries + 10s wait~n", io:format("Query returned ~w entries in ~w seconds - 3 queries + 10s wait~n",
[length(QR1), FoldTime]), [length(QR3), FoldTime]),
true = (LoadTime + FoldTime) < Future,
SleepTime = round((Future - (LoadTime + FoldTime)) * 1000), SleepTime =
io:format("Sleeping ~w s for all to expire~n", [SleepTime/1000]), (Future - (timer:now_diff(os:timestamp(), SW2) div (1000 * 1000))) + 1,
timer:sleep(SleepTime + 1000), % add a second
io:format("Sleeping ~w s for all to expire~n", [SleepTime]),
timer:sleep(SleepTime * 1000),
% Index entries should now have expired % Index entries should now have expired
{async, Folder4} = IndexFold(), {async, Folder4} = IndexFold(),
QR4 = Folder4(), QR4 = Folder4(),
io:format("Unexpired indexes of length ~w~n", [length(QR4)]), io:format("QR4 Unexpired indexes of length ~w~n", [length(QR4)]),
lists:foreach( timer:sleep(1000),
fun(I) -> {async, Folder5} = IndexFold(),
io:format("Unexpired index ~p~n", [I]) QR5 = Folder5(),
end, io:format("QR5 Unexpired indexes of length ~w~n", [length(QR5)]),
QR4
),
true = QR4 == [], true = QR4 == [],
true = QR5 == [],
ok = leveled_bookie:book_close(Bookie1), ok = leveled_bookie:book_close(Bookie1),
testutil:reset_filestructure(). testutil:reset_filestructure().