Allow for caching of compaction scores

Potentially reduce the overheads of scoring each file on every run.

The change also alters the default thresholds for compaction to favour longer runs (which will tend towards greater storage efficiency).
This commit is contained in:
Martin Sumner 2020-11-27 02:35:27 +00:00
parent e3bcd7eaec
commit b4c79caf7a
9 changed files with 153 additions and 17 deletions

View file

@ -106,8 +106,9 @@ The `compaction_runs_perday` indicates for the leveled store how many times eahc
The `compaction_low_hour` and `compaction_high_hour` are the hours of the day which support the compaction window - set to 0 and 23 respectively if compaction is required to be a continuous process. The `compaction_low_hour` and `compaction_high_hour` are the hours of the day which support the compaction window - set to 0 and 23 respectively if compaction is required to be a continuous process.
The `max_run_length` controls how many files can be compacted in a single compaction run. The scoring of files and runs is controlled through `maxrunlength_compactionpercentage` and `singlefile_compactionpercentage`. The `max_run_length` controls how many files can be compacted in a single compaction run. The scoring of files and runs is controlled through `maxrunlength_compactionpercentage` and `singlefile_compactionpercentage`. The `singlefile_compactionpercentage` is an acceptable compaction score for a file to be eligible for compaction on its own, where as the `maxrunlength_compactionpercentage` is the score required for a run of the `max_run_length` to be considered eligible. The higher the `maxrunlength_compactionpercentage` and the lower the `singlefile_compactionpercentage` - the more likely a longer run will be chosen over a shorter run.
The `journalcompaction_scoreonein` option controls how frequently a file will be scored. If this is set to one, then each and every file will be scored each and every compaction run. If this is set to an integer greater than one ('n'), then on average any given file will only be score on one in 'n' runs. On other runs. a cached score for the file will be used. On startup all files will be scored on the first run. As journals get very large, and where frequent comapction is required due to mutating objects, this can save significant resource.
## Snapshot Timeouts ## Snapshot Timeouts

View file

@ -69,6 +69,7 @@
max_run_length, max_run_length,
singlefile_compactionperc :: float()|undefined, singlefile_compactionperc :: float()|undefined,
maxrunlength_compactionperc :: float()|undefined, maxrunlength_compactionperc :: float()|undefined,
score_onein = 1 :: pos_integer(),
snaptimeout_long :: pos_integer() | undefined}). snaptimeout_long :: pos_integer() | undefined}).
-record(penciller_options, -record(penciller_options,
@ -94,4 +95,5 @@
compression_method = native :: lz4|native, compression_method = native :: lz4|native,
singlefile_compactionperc :: float()|undefined, singlefile_compactionperc :: float()|undefined,
maxrunlength_compactionperc :: float()|undefined, maxrunlength_compactionperc :: float()|undefined,
score_onein = 1 :: pos_integer(),
reload_strategy = [] :: list()}). reload_strategy = [] :: list()}).

View file

@ -100,6 +100,13 @@
{datatype, integer} {datatype, integer}
]}. ]}.
%% @doc The number of times per day to score an individual file for compaction
{mapping, "leveled.compaction_scores_perday", "leveled.compaction_scores_perday", [
{default, 1},
{datatype, integer},
hidden
]}.
%% @doc Compaction Low Hour %% @doc Compaction Low Hour
%% The hour of the day in which journal compaction can start. Use Low hour %% The hour of the day in which journal compaction can start. Use Low hour
%% of 0 and High hour of 23 to have no compaction window (i.e. always compact %% of 0 and High hour of 23 to have no compaction window (i.e. always compact
@ -143,7 +150,7 @@
%% then it is a candidate (e.g. in default case if 50% of space would be %% then it is a candidate (e.g. in default case if 50% of space would be
%% recovered) %% recovered)
{mapping, "leveled.singlefile_compactionpercentage", "leveled.singlefile_compactionpercentage", [ {mapping, "leveled.singlefile_compactionpercentage", "leveled.singlefile_compactionpercentage", [
{default, 50.0}, {default, 30.0},
{datatype, float}, {datatype, float},
hidden hidden
]}. ]}.

View file

@ -140,8 +140,9 @@
{head_only, false}, {head_only, false},
{waste_retention_period, undefined}, {waste_retention_period, undefined},
{max_run_length, undefined}, {max_run_length, undefined},
{singlefile_compactionpercentage, 50.0}, {singlefile_compactionpercentage, 30.0},
{maxrunlength_compactionpercentage, 70.0}, {maxrunlength_compactionpercentage, 70.0},
{journalcompaction_scoreonein, 1},
{reload_strategy, []}, {reload_strategy, []},
{max_pencillercachesize, ?MAX_PCL_CACHE_SIZE}, {max_pencillercachesize, ?MAX_PCL_CACHE_SIZE},
{ledger_preloadpagecache_level, ?SST_PAGECACHELEVEL_LOOKUP}, {ledger_preloadpagecache_level, ?SST_PAGECACHELEVEL_LOOKUP},
@ -292,6 +293,11 @@
% a run of max_run_length, before that run can be a compaction % a run of max_run_length, before that run can be a compaction
% candidate. For runs between 1 and max_run_length, a % candidate. For runs between 1 and max_run_length, a
% proportionate score is calculated % proportionate score is calculated
{journalcompaction_scoreonein, pos_integer()} |
% When scoring for compaction run a probability (1 in x) of whether
% any file will be scored this run. If not scored a cached score
% will be used, and the cached score is the average of the latest
% score and the rolling average of previous scores
{reload_strategy, list()} | {reload_strategy, list()} |
% The reload_strategy is exposed as an option as currently no firm % The reload_strategy is exposed as an option as currently no firm
% decision has been made about how recovery from failure should % decision has been made about how recovery from failure should
@ -1757,6 +1763,8 @@ set_options(Opts) ->
MaxSSTSlots = proplists:get_value(max_sstslots, Opts), MaxSSTSlots = proplists:get_value(max_sstslots, Opts),
ScoreOneIn = proplists:get_value(journalcompaction_scoreonein, Opts),
{#inker_options{root_path = JournalFP, {#inker_options{root_path = JournalFP,
reload_strategy = ReloadStrategy, reload_strategy = ReloadStrategy,
max_run_length = proplists:get_value(max_run_length, Opts), max_run_length = proplists:get_value(max_run_length, Opts),
@ -1766,6 +1774,7 @@ set_options(Opts) ->
snaptimeout_long = SnapTimeoutLong, snaptimeout_long = SnapTimeoutLong,
compression_method = CompressionMethod, compression_method = CompressionMethod,
compress_on_receipt = CompressOnReceipt, compress_on_receipt = CompressOnReceipt,
score_onein = ScoreOneIn,
cdb_options = cdb_options =
#cdb_options{max_size=MaxJournalSize, #cdb_options{max_size=MaxJournalSize,
max_count=MaxJournalCount, max_count=MaxJournalCount,

View file

@ -113,7 +113,9 @@
cdb_deletepending/1, cdb_deletepending/1,
cdb_deletepending/3, cdb_deletepending/3,
cdb_isrolling/1, cdb_isrolling/1,
cdb_clerkcomplete/1]). cdb_clerkcomplete/1,
cdb_getcachedscore/1,
cdb_putcachedscore/2]).
-export([finished_rolling/1, -export([finished_rolling/1,
hashtable_calc/2]). hashtable_calc/2]).
@ -152,7 +154,8 @@
timings = no_timing :: cdb_timings(), timings = no_timing :: cdb_timings(),
timings_countdown = 0 :: integer(), timings_countdown = 0 :: integer(),
log_options = leveled_log:get_opts() log_options = leveled_log:get_opts()
:: leveled_log:log_options()}). :: leveled_log:log_options(),
cached_score :: float()|undefined}).
-record(cdb_timings, {sample_count = 0 :: integer(), -record(cdb_timings, {sample_count = 0 :: integer(),
sample_cyclecount = 0 :: integer(), sample_cyclecount = 0 :: integer(),
@ -164,6 +167,9 @@
-type cdb_timings() :: no_timing|#cdb_timings{}. -type cdb_timings() :: no_timing|#cdb_timings{}.
-type hashtable_index() :: tuple(). -type hashtable_index() :: tuple().
-type file_location() :: integer()|eof. -type file_location() :: integer()|eof.
-type filter_fun() ::
fun((any(), binary(), integer(), any(), fun((binary()) -> any())) ->
{stop|loop, any()}).
@ -369,7 +375,7 @@ cdb_deletepending(Pid) ->
cdb_deletepending(Pid, ManSQN, Inker) -> cdb_deletepending(Pid, ManSQN, Inker) ->
gen_fsm:send_event(Pid, {delete_pending, ManSQN, Inker}). gen_fsm:send_event(Pid, {delete_pending, ManSQN, Inker}).
-spec cdb_scan(pid(), fun(), any(), integer()|undefined) -> -spec cdb_scan(pid(), filter_fun(), any(), integer()|undefined) ->
{integer()|eof, any()}. {integer()|eof, any()}.
%% @doc %% @doc
%% cdb_scan returns {LastPosition, Acc}. Use LastPosition as StartPosiiton to %% cdb_scan returns {LastPosition, Acc}. Use LastPosition as StartPosiiton to
@ -424,6 +430,20 @@ cdb_isrolling(Pid) ->
cdb_clerkcomplete(Pid) -> cdb_clerkcomplete(Pid) ->
gen_fsm:send_all_state_event(Pid, clerk_complete). gen_fsm:send_all_state_event(Pid, clerk_complete).
-spec cdb_getcachedscore(pid()) -> undefined|float().
%% @doc
%% Return the cached score for a CDB file
cdb_getcachedscore(Pid) ->
gen_fsm:sync_send_all_state_event(Pid, get_cachedscore, infinity).
-spec cdb_putcachedscore(pid(), float()) -> ok.
%% @doc
%% Return the cached score for a CDB file
cdb_putcachedscore(Pid, Score) ->
gen_fsm:sync_send_all_state_event(Pid, {put_cachedscore, Score}, infinity).
%%%============================================================================ %%%============================================================================
%%% gen_server callbacks %%% gen_server callbacks
@ -829,6 +849,10 @@ handle_sync_event(cdb_filename, _From, StateName, State) ->
{reply, State#state.filename, StateName, State}; {reply, State#state.filename, StateName, State};
handle_sync_event(cdb_isrolling, _From, StateName, State) -> handle_sync_event(cdb_isrolling, _From, StateName, State) ->
{reply, StateName == rolling, StateName, State}; {reply, StateName == rolling, StateName, State};
handle_sync_event(get_cachedscore, _From, StateName, State) ->
{reply, State#state.cached_score, StateName, State};
handle_sync_event({put_cachedscore, Score}, _From, StateName, State) ->
{reply, ok, StateName, State#state{cached_score = Score}};
handle_sync_event(cdb_close, _From, delete_pending, State) -> handle_sync_event(cdb_close, _From, delete_pending, State) ->
leveled_log:log("CDB05", leveled_log:log("CDB05",
[State#state.filename, delete_pending, cdb_close]), [State#state.filename, delete_pending, cdb_close]),
@ -836,8 +860,7 @@ handle_sync_event(cdb_close, _From, delete_pending, State) ->
State#state.filename, State#state.filename,
State#state.waste_path), State#state.waste_path),
{stop, normal, ok, State}; {stop, normal, ok, State};
handle_sync_event(cdb_close, _From, StateName, State) -> handle_sync_event(cdb_close, _From, _StateName, State) ->
leveled_log:log("CDB05", [State#state.filename, StateName, cdb_close]),
file:close(State#state.handle), file:close(State#state.handle),
{stop, normal, ok, State}. {stop, normal, ok, State}.
@ -2396,6 +2419,10 @@ get_keys_byposition_manykeys_test_to() ->
SampleList3 = cdb_getpositions(P2, KeyCount + 1), SampleList3 = cdb_getpositions(P2, KeyCount + 1),
?assertMatch(KeyCount, length(SampleList3)), ?assertMatch(KeyCount, length(SampleList3)),
?assertMatch(undefined, cdb_getcachedscore(P2)),
ok = cdb_putcachedscore(P2, 80.0),
?assertMatch(80.0, cdb_getcachedscore(P2)),
ok = cdb_close(P2), ok = cdb_close(P2),
ok = file:delete(F2). ok = file:delete(F2).

View file

@ -117,7 +117,8 @@
maxrunlength_compactionperc = ?MAXRUNLENGTH_COMPACTION_TARGET ::float(), maxrunlength_compactionperc = ?MAXRUNLENGTH_COMPACTION_TARGET ::float(),
compression_method = native :: lz4|native, compression_method = native :: lz4|native,
scored_files = [] :: list(candidate()), scored_files = [] :: list(candidate()),
scoring_state :: scoring_state()|undefined}). scoring_state :: scoring_state()|undefined,
score_onein = 1 :: pos_integer()}).
-record(candidate, {low_sqn :: integer() | undefined, -record(candidate, {low_sqn :: integer() | undefined,
filename :: string() | undefined, filename :: string() | undefined,
@ -280,7 +281,10 @@ init([LogOpts, IClerkOpts]) ->
singlefile_compactionperc = SFL_CompPerc, singlefile_compactionperc = SFL_CompPerc,
maxrunlength_compactionperc = MRL_CompPerc, maxrunlength_compactionperc = MRL_CompPerc,
compression_method = compression_method =
IClerkOpts#iclerk_options.compression_method}}. IClerkOpts#iclerk_options.compression_method,
score_onein =
IClerkOpts#iclerk_options.score_onein
}}.
handle_call(stop, _From, State) -> handle_call(stop, _From, State) ->
case State#state.scoring_state of case State#state.scoring_state of
@ -325,13 +329,22 @@ handle_cast({score_filelist, [Entry|Tail]}, State) ->
Candidates = State#state.scored_files, Candidates = State#state.scored_files,
{LowSQN, FN, JournalP, _LK} = Entry, {LowSQN, FN, JournalP, _LK} = Entry,
ScoringState = State#state.scoring_state, ScoringState = State#state.scoring_state,
CpctPerc = check_single_file(JournalP, CpctPerc =
case {leveled_cdb:cdb_getcachedscore(JournalP),
leveled_rand:uniform(State#state.score_onein) == 1} of
{CachedScore, UseNewScore}
when CachedScore == undefined; UseNewScore ->
check_single_file(JournalP,
ScoringState#scoring_state.filter_fun, ScoringState#scoring_state.filter_fun,
ScoringState#scoring_state.filter_server, ScoringState#scoring_state.filter_server,
ScoringState#scoring_state.max_sqn, ScoringState#scoring_state.max_sqn,
?SAMPLE_SIZE, ?SAMPLE_SIZE,
?BATCH_SIZE, ?BATCH_SIZE,
State#state.reload_strategy), State#state.reload_strategy);
{CachedScore, false} ->
CachedScore
end,
ok = leveled_cdb:cdb_putcachedscore(JournalP, CpctPerc),
Candidate = Candidate =
#candidate{low_sqn = LowSQN, #candidate{low_sqn = LowSQN,
filename = FN, filename = FN,

View file

@ -806,6 +806,7 @@ start_from_file(InkOpts) ->
PressMethod = InkOpts#inker_options.compression_method, PressMethod = InkOpts#inker_options.compression_method,
PressOnReceipt = InkOpts#inker_options.compress_on_receipt, PressOnReceipt = InkOpts#inker_options.compress_on_receipt,
SnapTimeout = InkOpts#inker_options.snaptimeout_long, SnapTimeout = InkOpts#inker_options.snaptimeout_long,
ScoreOneIn = InkOpts#inker_options.score_onein,
IClerkOpts = IClerkOpts =
#iclerk_options{inker = self(), #iclerk_options{inker = self(),
@ -815,7 +816,8 @@ start_from_file(InkOpts) ->
compression_method = PressMethod, compression_method = PressMethod,
max_run_length = MRL, max_run_length = MRL,
singlefile_compactionperc = SFL_CompactPerc, singlefile_compactionperc = SFL_CompactPerc,
maxrunlength_compactionperc = MRL_CompactPerc}, maxrunlength_compactionperc = MRL_CompactPerc,
score_onein = ScoreOneIn},
{ok, Clerk} = leveled_iclerk:clerk_new(IClerkOpts), {ok, Clerk} = leveled_iclerk:clerk_new(IClerkOpts),

View file

@ -299,7 +299,7 @@ journal_compaction_tester(Restart, WRP) ->
end, end,
ok = leveled_penciller:pcl_close(PclClone), ok = leveled_penciller:pcl_close(PclClone),
ok = leveled_inker:ink_close(InkClone), ok = leveled_inker:ink_close(InkClone),
% Snapshot released so deletes shoudl occur at next timeout % Snapshot released so deletes should occur at next timeout
case WRP of case WRP of
undefined -> undefined ->
timer:sleep(10100); % wait for delete_pending timeout timer:sleep(10100); % wait for delete_pending timeout

View file

@ -16,7 +16,8 @@
journal_compaction_bustedjournal/1, journal_compaction_bustedjournal/1,
close_duringcompaction/1, close_duringcompaction/1,
allkeydelta_journal_multicompact/1, allkeydelta_journal_multicompact/1,
recompact_keydeltas/1 recompact_keydeltas/1,
simple_cachescoring/1
]). ]).
all() -> [ all() -> [
@ -33,7 +34,8 @@ all() -> [
close_duringcompaction, close_duringcompaction,
allkeydelta_journal_multicompact, allkeydelta_journal_multicompact,
recompact_keydeltas, recompact_keydeltas,
stdtag_recalc stdtag_recalc,
simple_cachescoring
]. ].
@ -555,6 +557,79 @@ aae_missingjournal(_Config) ->
ok = leveled_bookie:book_close(Bookie2), ok = leveled_bookie:book_close(Bookie2),
testutil:reset_filestructure(). testutil:reset_filestructure().
simple_cachescoring(_Config) ->
RootPath = testutil:reset_filestructure(),
StartOpts = [{root_path, RootPath},
{max_journalobjectcount, 2000},
{sync_strategy, testutil:sync_strategy()}],
{ok, Bookie1} =
leveled_bookie:book_start(StartOpts ++
[{journalcompaction_scoreonein, 8}]),
{TestObject, TestSpec} = testutil:generate_testobject(),
ok = testutil:book_riakput(Bookie1, TestObject, TestSpec),
testutil:check_forobject(Bookie1, TestObject),
GenList = [2, 32002, 64002, 96002],
_CLs = testutil:load_objects(32000, GenList, Bookie1, TestObject,
fun testutil:generate_objects/2),
F = fun leveled_bookie:book_islastcompactionpending/1,
WaitForCompaction =
fun(B) ->
fun(X, Pending) ->
case X of
1 ->
leveled_bookie:book_compactjournal(B, 30000);
_ ->
ok
end,
case Pending of
false ->
false;
true ->
io:format("Loop ~w waiting for journal "
++ "compaction to complete~n", [X]),
timer:sleep(100),
F(B)
end
end
end,
io:format("Scoring for first time - every file should need scoring~n"),
Args1 = [WaitForCompaction(Bookie1), true, lists:seq(1, 300)],
{TC0, false} = timer:tc(lists, foldl, Args1),
io:format("Score four more times with cached scoring~n"),
{TC1, false} = timer:tc(lists, foldl, Args1),
{TC2, false} = timer:tc(lists, foldl, Args1),
{TC3, false} = timer:tc(lists, foldl, Args1),
{TC4, false} = timer:tc(lists, foldl, Args1),
ok = leveled_bookie:book_close(Bookie1),
{ok, Bookie2} =
leveled_bookie:book_start(StartOpts),
io:format("Re-opened bookie withour caching - re-compare compaction time~n"),
io:format("Scoring for first time - every file should need scoring~n"),
Args2 = [WaitForCompaction(Bookie2), true, lists:seq(1, 300)],
{TN0, false} = timer:tc(lists, foldl, Args2),
io:format("Score four more times with cached scoring~n"),
{TN1, false} = timer:tc(lists, foldl, Args2),
{TN2, false} = timer:tc(lists, foldl, Args2),
{TN3, false} = timer:tc(lists, foldl, Args2),
{TN4, false} = timer:tc(lists, foldl, Args2),
AvgSecondRunCache = (TC1 + TC2 +TC3 + TC4) div 4000,
AvgSecondRunNoCache = (TN1 + TN2 +TN3 + TN4) div 4000,
io:format("With caching ~w first run ~w average other runs~n",
[TC0 div 1000, AvgSecondRunCache]),
io:format("Without caching ~w first run ~w average other runs~n",
[TN0 div 1000, AvgSecondRunNoCache]),
true = (TC0 > AvgSecondRunCache),
true = (TC0/AvgSecondRunCache) > (TN0/AvgSecondRunNoCache),
ok = leveled_bookie:book_close(Bookie2),
io:format("Exit having proven simply that caching score is faster~n"),
testutil:reset_filestructure().
aae_bustedjournal(_Config) -> aae_bustedjournal(_Config) ->
RootPath = testutil:reset_filestructure(), RootPath = testutil:reset_filestructure(),
StartOpts = [{root_path, RootPath}, StartOpts = [{root_path, RootPath},