Merge pull request #23 from martinsumner/mas-pclerkcrash-3
Mas pclerkcrash 3
This commit is contained in:
commit
5ae93ecb17
2 changed files with 26 additions and 20 deletions
|
@ -225,14 +225,16 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, Additions) ->
|
|||
|
||||
|
||||
return_deletions(ManifestSQN, PendingDeletionD) ->
|
||||
case dict:find(ManifestSQN, PendingDeletionD) of
|
||||
{ok, PendingDeletions} ->
|
||||
% The returning of deletions had been seperated out as a failure to fetch
|
||||
% here had caased crashes of the clerk. The root cause of the failure to
|
||||
% fetch was the same clerk being asked to do the same work twice - and this
|
||||
% should be blocked now by the ongoing_work boolean in the Penciller
|
||||
% LoopData
|
||||
%
|
||||
% So this is now allowed to crash again
|
||||
PendingDeletions = dict:fetch(ManifestSQN, PendingDeletionD),
|
||||
leveled_log:log("PC021", [ManifestSQN]),
|
||||
{PendingDeletions, dict:erase(ManifestSQN, PendingDeletionD)};
|
||||
error ->
|
||||
leveled_log:log("PC020", [ManifestSQN]),
|
||||
{[], PendingDeletionD}
|
||||
end.
|
||||
{PendingDeletions, dict:erase(ManifestSQN, PendingDeletionD)}.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
|
@ -240,13 +242,6 @@ return_deletions(ManifestSQN, PendingDeletionD) ->
|
|||
|
||||
-ifdef(TEST).
|
||||
|
||||
return_deletions_test() ->
|
||||
% During volume tests there would occasionaly be a deletion prompt with
|
||||
% an empty pending deletions dictionary. Don't understand why this would
|
||||
% happen - so we check here that at least it does not kill the clerk
|
||||
R = {[], dict:new()},
|
||||
?assertMatch(R, return_deletions(20, dict:new())).
|
||||
|
||||
generate_randomkeys(Count, BucketRangeLow, BucketRangeHigh) ->
|
||||
generate_randomkeys(Count, [], BucketRangeLow, BucketRangeHigh).
|
||||
|
||||
|
|
|
@ -511,10 +511,19 @@ handle_cast({levelzero_complete, FN, StartKey, EndKey}, State) ->
|
|||
manifest=UpdMan,
|
||||
persisted_sqn=State#state.ledger_sqn}};
|
||||
handle_cast(work_for_clerk, State) ->
|
||||
case State#state.levelzero_pending of
|
||||
true ->
|
||||
{noreply, State};
|
||||
false ->
|
||||
case {State#state.levelzero_pending, State#state.work_ongoing} of
|
||||
{false, false} ->
|
||||
% TODO - as part of supervision tree and retry work:
|
||||
% Need to check for work_ongoing as well as levelzero_pending as
|
||||
% there may be a race that could lead to the clerk doing the same
|
||||
% thing twice.
|
||||
%
|
||||
% This has implications though if we auto-restart the pclerk in the
|
||||
% future, without altering this state - it may never be able to
|
||||
% request work due to ongoing work that crashed the previous clerk
|
||||
%
|
||||
% Perhaps the pclerk should not be restarted because of this, and
|
||||
% the failure should ripple up
|
||||
{WL, WC} = leveled_pmanifest:check_for_work(State#state.manifest,
|
||||
?LEVEL_SCALEFACTOR),
|
||||
case WC of
|
||||
|
@ -534,7 +543,9 @@ handle_cast(work_for_clerk, State) ->
|
|||
{TL, State#state.manifest}),
|
||||
{noreply,
|
||||
State#state{work_backlog=false, work_ongoing=true}}
|
||||
end
|
||||
end;
|
||||
_ ->
|
||||
{noreply, State}
|
||||
end.
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue