Mas i370 deletepending (#377)

Previously delete_confirmation was blocked on work_ongoing.

However, if the penciller has a work backlog, work_ongoing may be a recurring problem ... and some files, may remain undeleted long after their use - lifetimes for L0 fails in particular have seen to rise from 10-15s to 5m +.

Letting L0 files linger can have a significant impact on memory. In put-heavy tests (e.g. when testing riak-admin transfers) the memory footprint of a riak node has bene observed peaking more than 80% above normal levels, when compared to using this patch.

This PR allows for deletes to be confirmed even when there is work ongoing, by postponing the updating of the manifest until the manifest is next returned from the clerk.

Co-authored-by: Thomas Arts <thomas.arts@quviq.com>
This commit is contained in:
Martin Sumner 2022-05-24 09:42:54 +01:00
parent f8485210ed
commit 234e0066e8
3 changed files with 107 additions and 44 deletions

View file

@ -3055,8 +3055,10 @@ is_empty_headonly_test() ->
undefined_rootpath_test() ->
Opts = [{max_journalsize, 1000000}, {cache_size, 500}],
error_logger:tty(false),
R = gen_server:start(?MODULE, [set_defaults(Opts)], []),
?assertMatch({error, no_root_path}, R).
?assertMatch({error, no_root_path}, R),
error_logger:tty(true).
foldkeys_headonly_test() ->

View file

@ -267,6 +267,9 @@
work_ongoing = false :: boolean(), % i.e. compaction work
work_backlog = false :: boolean(), % i.e. compaction work
pending_removals = [] :: list(string()),
maybe_release = false :: boolean(),
timings = no_timing :: pcl_timings(),
timings_countdown = 0 :: integer(),
@ -992,36 +995,76 @@ handle_cast({manifest_change, Manifest}, State) ->
ok =
leveled_pclerk:clerk_promptdeletions(State#state.clerk, NewManSQN),
% This is accepted as the new manifest, files may be deleted
UpdManifest =
UpdManifest0 =
leveled_pmanifest:merge_snapshot(State#state.manifest, Manifest),
% Need to preserve the penciller's view of snapshots stored in
% the manifest
{noreply, State#state{manifest=UpdManifest, work_ongoing=false}}
UpdManifest1 =
leveled_pmanifest:clear_pending(
UpdManifest0,
State#state.pending_removals,
State#state.maybe_release),
{noreply,
State#state{
manifest=UpdManifest1,
pending_removals = [],
maybe_release = false,
work_ongoing=false}}
end;
handle_cast({release_snapshot, Snapshot}, State) ->
Manifest0 = leveled_pmanifest:release_snapshot(State#state.manifest,
Snapshot),
leveled_log:log("P0003", [Snapshot]),
{noreply, State#state{manifest=Manifest0}};
handle_cast({confirm_delete, Filename, FilePid}, State=#state{is_snapshot=Snap})
when Snap == false ->
case State#state.work_ongoing of
false ->
R2D = leveled_pmanifest:ready_to_delete(State#state.manifest,
Filename),
case R2D of
{true, M0} ->
leveled_log:log("P0005", [Filename]),
ok = leveled_sst:sst_deleteconfirmed(FilePid),
{noreply, State#state{manifest=M0}};
{false, _M0} ->
{noreply, State}
end;
handle_cast({confirm_delete, PDFN, FilePid}, State=#state{is_snapshot=Snap})
when Snap == false ->
% This is a two stage process. A file that is ready for deletion can be
% checked against the manifest to prompt the deletion, however it must also
% be removed from the manifest's list of pending deletes. This is only
% possible when the manifest is in control of the penciller not the clerk.
% When work is ongoing (i.e. the manifest is under control of the clerk),
% any removals from the manifest need to be stored temporarily (in
% pending_removals) until such time that the manifest is in control of the
% penciller and can be updated.
% The maybe_release boolean on state is used if any file is not ready to
% delete, and there is work ongoing. This will then trigger a check to
% ensure any timed out snapshots are released, in case this is the factor
% blocking the delete confirmation
% When an updated manifest is submitted by the clerk, the pending_removals
% will be cleared from pending using the maybe_release boolean
case leveled_pmanifest:ready_to_delete(State#state.manifest, PDFN) of
true ->
% If there is ongoing work, then we can't safely update the pidmap
% as any change will be reverted when the manifest is passed back
% from the Clerk
{noreply, State}
leveled_log:log("P0005", [PDFN]),
ok = leveled_sst:sst_deleteconfirmed(FilePid),
case State#state.work_ongoing of
true ->
UpdRemovals =
lists:usort([PDFN|State#state.pending_removals]),
{noreply,
State#state{
pending_removals = UpdRemovals}};
false ->
UpdManifest =
leveled_pmanifest:clear_pending(
State#state.manifest,
[PDFN],
false),
{noreply,
State#state{manifest = UpdManifest}}
end;
false ->
case State#state.work_ongoing of
true ->
{noreply, State#state{maybe_release = true}};
false ->
UpdManifest =
leveled_pmanifest:clear_pending(
State#state.manifest,
[],
true),
{noreply,
State#state{manifest = UpdManifest}}
end
end;
handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) ->
leveled_log:log("P0029", []),

View file

@ -39,6 +39,7 @@
release_snapshot/2,
merge_snapshot/2,
ready_to_delete/2,
clear_pending/3,
check_for_work/2,
is_basement/2,
levelzero_present/1,
@ -540,32 +541,41 @@ release_snapshot(Manifest, Pid) ->
min_snapshot_sqn = MinSnapSQN}
end.
-spec ready_to_delete(manifest(), string()) -> {boolean(), manifest()}.
%% @doc
%% A SST file which is in the delete_pending state can check to see if it is
%% ready to delete against the manifest.
%% This does not update the manifest, a call is required to clear_pending to
%% remove the file from the manifest's list of pending_deletes.
-spec ready_to_delete(manifest(), string()) -> boolean().
ready_to_delete(Manifest, Filename) ->
PendingDelete = dict:find(Filename, Manifest#manifest.pending_deletes),
case {PendingDelete, Manifest#manifest.min_snapshot_sqn} of
{{ok, _}, 0} ->
% no shapshots
PDs = dict:erase(Filename, Manifest#manifest.pending_deletes),
{true, Manifest#manifest{pending_deletes = PDs}};
true;
{{ok, {ChangeSQN, _ME}}, N} when N >= ChangeSQN ->
% Every snapshot is looking at a version of history after this
% was removed
PDs = dict:erase(Filename, Manifest#manifest.pending_deletes),
{true, Manifest#manifest{pending_deletes = PDs}};
true;
_ ->
% If failed to delete then we should release a phantom pid
% in case this is necessary to timeout any snapshots
% This wll also trigger a log
% This may also be because of i355 - a race condition when a second
% confirm_delete may be sent prior to the first being processed
{false, release_snapshot(Manifest, ?PHANTOM_PID)}
false
end.
-spec clear_pending(manifest(), list(string()), boolean()) -> manifest().
clear_pending(Manifest, [], true) ->
% If the penciller got a confirm_delete that resulted in no pending
% removals, then maybe we need to timeout a snapshot via release
release_snapshot(Manifest, ?PHANTOM_PID);
clear_pending(Manifest, [], false) ->
Manifest;
clear_pending(Manifest, [FN|RestFN], MaybeRelease) ->
PDs = dict:erase(FN, Manifest#manifest.pending_deletes),
clear_pending(
Manifest#manifest{pending_deletes = PDs},
RestFN,
MaybeRelease).
-spec check_for_work(manifest(), list()) -> {list(), integer()}.
%% @doc
%% Check for compaction work in the manifest - look at levels which contain
@ -1303,6 +1313,14 @@ levelzero_present_test() ->
Man1 = insert_manifest_entry(Man0, 1, 0, E0),
?assertMatch(true, levelzero_present(Man1)).
ready_to_delete_combined(Manifest, Filename) ->
case ready_to_delete(Manifest, Filename) of
true ->
{true, clear_pending(Manifest, [Filename], false)};
false ->
{false, clear_pending(Manifest, [], true)}
end.
snapshot_release_test() ->
Man6 = element(7, initial_setup()),
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
@ -1329,31 +1347,31 @@ snapshot_release_test() ->
Man12 = remove_manifest_entry(Man11, 4, 1, E3),
Man13 = add_snapshot(Man12, pid_a4, 3600),
?assertMatch(false, element(1, ready_to_delete(Man8, "Z1"))),
?assertMatch(false, element(1, ready_to_delete(Man10, "Z2"))),
?assertMatch(false, element(1, ready_to_delete(Man12, "Z3"))),
?assertMatch(false, element(1, ready_to_delete_combined(Man8, "Z1"))),
?assertMatch(false, element(1, ready_to_delete_combined(Man10, "Z2"))),
?assertMatch(false, element(1, ready_to_delete_combined(Man12, "Z3"))),
Man14 = release_snapshot(Man13, pid_a1),
?assertMatch(false, element(1, ready_to_delete(Man14, "Z2"))),
?assertMatch(false, element(1, ready_to_delete(Man14, "Z3"))),
{Bool14, Man15} = ready_to_delete(Man14, "Z1"),
?assertMatch(false, element(1, ready_to_delete_combined(Man14, "Z2"))),
?assertMatch(false, element(1, ready_to_delete_combined(Man14, "Z3"))),
{Bool14, Man15} = ready_to_delete_combined(Man14, "Z1"),
?assertMatch(true, Bool14),
%This doesn't change anything - released snaphsot not the min
Man16 = release_snapshot(Man15, pid_a4),
?assertMatch(false, element(1, ready_to_delete(Man16, "Z2"))),
?assertMatch(false, element(1, ready_to_delete(Man16, "Z3"))),
?assertMatch(false, element(1, ready_to_delete_combined(Man16, "Z2"))),
?assertMatch(false, element(1, ready_to_delete_combined(Man16, "Z3"))),
Man17 = release_snapshot(Man16, pid_a2),
?assertMatch(false, element(1, ready_to_delete(Man17, "Z3"))),
{Bool17, Man18} = ready_to_delete(Man17, "Z2"),
?assertMatch(false, element(1, ready_to_delete_combined(Man17, "Z3"))),
{Bool17, Man18} = ready_to_delete_combined(Man17, "Z2"),
?assertMatch(true, Bool17),
Man19 = release_snapshot(Man18, pid_a3),
io:format("MinSnapSQN ~w~n", [Man19#manifest.min_snapshot_sqn]),
{Bool19, _Man20} = ready_to_delete(Man19, "Z3"),
{Bool19, _Man20} = ready_to_delete_combined(Man19, "Z3"),
?assertMatch(true, Bool19).