From b4c79caf7a83e280ea5481a19e14572ec4be203c Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 27 Nov 2020 02:35:27 +0000 Subject: [PATCH] Allow for caching of compaction scores Potentially reduce the overheads of scoring each file on every run. The change also alters the default thresholds for compaction to favour longer runs (which will tend towards greater storage efficiency). --- docs/STARTUP_OPTIONS.md | 3 +- include/leveled.hrl | 2 + priv/leveled.schema | 9 +++- src/leveled_bookie.erl | 11 ++++- src/leveled_cdb.erl | 37 ++++++++++++-- src/leveled_iclerk.erl | 23 +++++++-- src/leveled_inker.erl | 4 +- test/end_to_end/basic_SUITE.erl | 2 +- test/end_to_end/recovery_SUITE.erl | 79 +++++++++++++++++++++++++++++- 9 files changed, 153 insertions(+), 17 deletions(-) diff --git a/docs/STARTUP_OPTIONS.md b/docs/STARTUP_OPTIONS.md index c802f73..4dad87e 100644 --- a/docs/STARTUP_OPTIONS.md +++ b/docs/STARTUP_OPTIONS.md @@ -106,8 +106,9 @@ The `compaction_runs_perday` indicates for the leveled store how many times eahc The `compaction_low_hour` and `compaction_high_hour` are the hours of the day which support the compaction window - set to 0 and 23 respectively if compaction is required to be a continuous process. -The `max_run_length` controls how many files can be compacted in a single compaction run. The scoring of files and runs is controlled through `maxrunlength_compactionpercentage` and `singlefile_compactionpercentage`. +The `max_run_length` controls how many files can be compacted in a single compaction run. The scoring of files and runs is controlled through `maxrunlength_compactionpercentage` and `singlefile_compactionpercentage`. The `singlefile_compactionpercentage` is an acceptable compaction score for a file to be eligible for compaction on its own, where as the `maxrunlength_compactionpercentage` is the score required for a run of the `max_run_length` to be considered eligible. The higher the `maxrunlength_compactionpercentage` and the lower the `singlefile_compactionpercentage` - the more likely a longer run will be chosen over a shorter run. +The `journalcompaction_scoreonein` option controls how frequently a file will be scored. If this is set to one, then each and every file will be scored each and every compaction run. If this is set to an integer greater than one ('n'), then on average any given file will only be score on one in 'n' runs. On other runs. a cached score for the file will be used. On startup all files will be scored on the first run. As journals get very large, and where frequent comapction is required due to mutating objects, this can save significant resource. ## Snapshot Timeouts diff --git a/include/leveled.hrl b/include/leveled.hrl index 761b9a6..9db7941 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -69,6 +69,7 @@ max_run_length, singlefile_compactionperc :: float()|undefined, maxrunlength_compactionperc :: float()|undefined, + score_onein = 1 :: pos_integer(), snaptimeout_long :: pos_integer() | undefined}). -record(penciller_options, @@ -94,4 +95,5 @@ compression_method = native :: lz4|native, singlefile_compactionperc :: float()|undefined, maxrunlength_compactionperc :: float()|undefined, + score_onein = 1 :: pos_integer(), reload_strategy = [] :: list()}). diff --git a/priv/leveled.schema b/priv/leveled.schema index 7c259dc..ab7300b 100644 --- a/priv/leveled.schema +++ b/priv/leveled.schema @@ -100,6 +100,13 @@ {datatype, integer} ]}. +%% @doc The number of times per day to score an individual file for compaction +{mapping, "leveled.compaction_scores_perday", "leveled.compaction_scores_perday", [ + {default, 1}, + {datatype, integer}, + hidden +]}. + %% @doc Compaction Low Hour %% The hour of the day in which journal compaction can start. Use Low hour %% of 0 and High hour of 23 to have no compaction window (i.e. always compact @@ -143,7 +150,7 @@ %% then it is a candidate (e.g. in default case if 50% of space would be %% recovered) {mapping, "leveled.singlefile_compactionpercentage", "leveled.singlefile_compactionpercentage", [ - {default, 50.0}, + {default, 30.0}, {datatype, float}, hidden ]}. diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index e1a2728..b2d9629 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -140,8 +140,9 @@ {head_only, false}, {waste_retention_period, undefined}, {max_run_length, undefined}, - {singlefile_compactionpercentage, 50.0}, + {singlefile_compactionpercentage, 30.0}, {maxrunlength_compactionpercentage, 70.0}, + {journalcompaction_scoreonein, 1}, {reload_strategy, []}, {max_pencillercachesize, ?MAX_PCL_CACHE_SIZE}, {ledger_preloadpagecache_level, ?SST_PAGECACHELEVEL_LOOKUP}, @@ -292,6 +293,11 @@ % a run of max_run_length, before that run can be a compaction % candidate. For runs between 1 and max_run_length, a % proportionate score is calculated + {journalcompaction_scoreonein, pos_integer()} | + % When scoring for compaction run a probability (1 in x) of whether + % any file will be scored this run. If not scored a cached score + % will be used, and the cached score is the average of the latest + % score and the rolling average of previous scores {reload_strategy, list()} | % The reload_strategy is exposed as an option as currently no firm % decision has been made about how recovery from failure should @@ -1757,6 +1763,8 @@ set_options(Opts) -> MaxSSTSlots = proplists:get_value(max_sstslots, Opts), + ScoreOneIn = proplists:get_value(journalcompaction_scoreonein, Opts), + {#inker_options{root_path = JournalFP, reload_strategy = ReloadStrategy, max_run_length = proplists:get_value(max_run_length, Opts), @@ -1766,6 +1774,7 @@ set_options(Opts) -> snaptimeout_long = SnapTimeoutLong, compression_method = CompressionMethod, compress_on_receipt = CompressOnReceipt, + score_onein = ScoreOneIn, cdb_options = #cdb_options{max_size=MaxJournalSize, max_count=MaxJournalCount, diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 3de322b..d91d2b7 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -113,7 +113,9 @@ cdb_deletepending/1, cdb_deletepending/3, cdb_isrolling/1, - cdb_clerkcomplete/1]). + cdb_clerkcomplete/1, + cdb_getcachedscore/1, + cdb_putcachedscore/2]). -export([finished_rolling/1, hashtable_calc/2]). @@ -152,7 +154,8 @@ timings = no_timing :: cdb_timings(), timings_countdown = 0 :: integer(), log_options = leveled_log:get_opts() - :: leveled_log:log_options()}). + :: leveled_log:log_options(), + cached_score :: float()|undefined}). -record(cdb_timings, {sample_count = 0 :: integer(), sample_cyclecount = 0 :: integer(), @@ -164,6 +167,9 @@ -type cdb_timings() :: no_timing|#cdb_timings{}. -type hashtable_index() :: tuple(). -type file_location() :: integer()|eof. +-type filter_fun() :: + fun((any(), binary(), integer(), any(), fun((binary()) -> any())) -> + {stop|loop, any()}). @@ -369,7 +375,7 @@ cdb_deletepending(Pid) -> cdb_deletepending(Pid, ManSQN, Inker) -> gen_fsm:send_event(Pid, {delete_pending, ManSQN, Inker}). --spec cdb_scan(pid(), fun(), any(), integer()|undefined) -> +-spec cdb_scan(pid(), filter_fun(), any(), integer()|undefined) -> {integer()|eof, any()}. %% @doc %% cdb_scan returns {LastPosition, Acc}. Use LastPosition as StartPosiiton to @@ -424,6 +430,20 @@ cdb_isrolling(Pid) -> cdb_clerkcomplete(Pid) -> gen_fsm:send_all_state_event(Pid, clerk_complete). +-spec cdb_getcachedscore(pid()) -> undefined|float(). +%% @doc +%% Return the cached score for a CDB file +cdb_getcachedscore(Pid) -> + gen_fsm:sync_send_all_state_event(Pid, get_cachedscore, infinity). + + +-spec cdb_putcachedscore(pid(), float()) -> ok. +%% @doc +%% Return the cached score for a CDB file +cdb_putcachedscore(Pid, Score) -> + gen_fsm:sync_send_all_state_event(Pid, {put_cachedscore, Score}, infinity). + + %%%============================================================================ %%% gen_server callbacks @@ -829,6 +849,10 @@ handle_sync_event(cdb_filename, _From, StateName, State) -> {reply, State#state.filename, StateName, State}; handle_sync_event(cdb_isrolling, _From, StateName, State) -> {reply, StateName == rolling, StateName, State}; +handle_sync_event(get_cachedscore, _From, StateName, State) -> + {reply, State#state.cached_score, StateName, State}; +handle_sync_event({put_cachedscore, Score}, _From, StateName, State) -> + {reply, ok, StateName, State#state{cached_score = Score}}; handle_sync_event(cdb_close, _From, delete_pending, State) -> leveled_log:log("CDB05", [State#state.filename, delete_pending, cdb_close]), @@ -836,8 +860,7 @@ handle_sync_event(cdb_close, _From, delete_pending, State) -> State#state.filename, State#state.waste_path), {stop, normal, ok, State}; -handle_sync_event(cdb_close, _From, StateName, State) -> - leveled_log:log("CDB05", [State#state.filename, StateName, cdb_close]), +handle_sync_event(cdb_close, _From, _StateName, State) -> file:close(State#state.handle), {stop, normal, ok, State}. @@ -2396,6 +2419,10 @@ get_keys_byposition_manykeys_test_to() -> SampleList3 = cdb_getpositions(P2, KeyCount + 1), ?assertMatch(KeyCount, length(SampleList3)), + ?assertMatch(undefined, cdb_getcachedscore(P2)), + ok = cdb_putcachedscore(P2, 80.0), + ?assertMatch(80.0, cdb_getcachedscore(P2)), + ok = cdb_close(P2), ok = file:delete(F2). diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 9f1256d..c2c34d1 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -117,7 +117,8 @@ maxrunlength_compactionperc = ?MAXRUNLENGTH_COMPACTION_TARGET ::float(), compression_method = native :: lz4|native, scored_files = [] :: list(candidate()), - scoring_state :: scoring_state()|undefined}). + scoring_state :: scoring_state()|undefined, + score_onein = 1 :: pos_integer()}). -record(candidate, {low_sqn :: integer() | undefined, filename :: string() | undefined, @@ -270,7 +271,7 @@ init([LogOpts, IClerkOpts]) -> MRLCP when is_float(MRLCP) -> MRLCP end, - + {ok, #state{max_run_length = MRL, inker = IClerkOpts#iclerk_options.inker, cdb_options = CDBopts, @@ -280,7 +281,10 @@ init([LogOpts, IClerkOpts]) -> singlefile_compactionperc = SFL_CompPerc, maxrunlength_compactionperc = MRL_CompPerc, compression_method = - IClerkOpts#iclerk_options.compression_method}}. + IClerkOpts#iclerk_options.compression_method, + score_onein = + IClerkOpts#iclerk_options.score_onein + }}. handle_call(stop, _From, State) -> case State#state.scoring_state of @@ -325,13 +329,22 @@ handle_cast({score_filelist, [Entry|Tail]}, State) -> Candidates = State#state.scored_files, {LowSQN, FN, JournalP, _LK} = Entry, ScoringState = State#state.scoring_state, - CpctPerc = check_single_file(JournalP, + CpctPerc = + case {leveled_cdb:cdb_getcachedscore(JournalP), + leveled_rand:uniform(State#state.score_onein) == 1} of + {CachedScore, UseNewScore} + when CachedScore == undefined; UseNewScore -> + check_single_file(JournalP, ScoringState#scoring_state.filter_fun, ScoringState#scoring_state.filter_server, ScoringState#scoring_state.max_sqn, ?SAMPLE_SIZE, ?BATCH_SIZE, - State#state.reload_strategy), + State#state.reload_strategy); + {CachedScore, false} -> + CachedScore + end, + ok = leveled_cdb:cdb_putcachedscore(JournalP, CpctPerc), Candidate = #candidate{low_sqn = LowSQN, filename = FN, diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index 432006c..8391007 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -806,6 +806,7 @@ start_from_file(InkOpts) -> PressMethod = InkOpts#inker_options.compression_method, PressOnReceipt = InkOpts#inker_options.compress_on_receipt, SnapTimeout = InkOpts#inker_options.snaptimeout_long, + ScoreOneIn = InkOpts#inker_options.score_onein, IClerkOpts = #iclerk_options{inker = self(), @@ -815,7 +816,8 @@ start_from_file(InkOpts) -> compression_method = PressMethod, max_run_length = MRL, singlefile_compactionperc = SFL_CompactPerc, - maxrunlength_compactionperc = MRL_CompactPerc}, + maxrunlength_compactionperc = MRL_CompactPerc, + score_onein = ScoreOneIn}, {ok, Clerk} = leveled_iclerk:clerk_new(IClerkOpts), diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 2495eb8..ac1a5ea 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -299,7 +299,7 @@ journal_compaction_tester(Restart, WRP) -> end, ok = leveled_penciller:pcl_close(PclClone), ok = leveled_inker:ink_close(InkClone), - % Snapshot released so deletes shoudl occur at next timeout + % Snapshot released so deletes should occur at next timeout case WRP of undefined -> timer:sleep(10100); % wait for delete_pending timeout diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 44d9418..162900d 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -16,7 +16,8 @@ journal_compaction_bustedjournal/1, close_duringcompaction/1, allkeydelta_journal_multicompact/1, - recompact_keydeltas/1 + recompact_keydeltas/1, + simple_cachescoring/1 ]). all() -> [ @@ -33,7 +34,8 @@ all() -> [ close_duringcompaction, allkeydelta_journal_multicompact, recompact_keydeltas, - stdtag_recalc + stdtag_recalc, + simple_cachescoring ]. @@ -555,6 +557,79 @@ aae_missingjournal(_Config) -> ok = leveled_bookie:book_close(Bookie2), testutil:reset_filestructure(). +simple_cachescoring(_Config) -> + RootPath = testutil:reset_filestructure(), + StartOpts = [{root_path, RootPath}, + {max_journalobjectcount, 2000}, + {sync_strategy, testutil:sync_strategy()}], + {ok, Bookie1} = + leveled_bookie:book_start(StartOpts ++ + [{journalcompaction_scoreonein, 8}]), + {TestObject, TestSpec} = testutil:generate_testobject(), + ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), + testutil:check_forobject(Bookie1, TestObject), + GenList = [2, 32002, 64002, 96002], + _CLs = testutil:load_objects(32000, GenList, Bookie1, TestObject, + fun testutil:generate_objects/2), + + F = fun leveled_bookie:book_islastcompactionpending/1, + WaitForCompaction = + fun(B) -> + fun(X, Pending) -> + case X of + 1 -> + leveled_bookie:book_compactjournal(B, 30000); + _ -> + ok + end, + case Pending of + false -> + false; + true -> + io:format("Loop ~w waiting for journal " + ++ "compaction to complete~n", [X]), + timer:sleep(100), + F(B) + end + end + end, + io:format("Scoring for first time - every file should need scoring~n"), + Args1 = [WaitForCompaction(Bookie1), true, lists:seq(1, 300)], + {TC0, false} = timer:tc(lists, foldl, Args1), + io:format("Score four more times with cached scoring~n"), + {TC1, false} = timer:tc(lists, foldl, Args1), + {TC2, false} = timer:tc(lists, foldl, Args1), + {TC3, false} = timer:tc(lists, foldl, Args1), + {TC4, false} = timer:tc(lists, foldl, Args1), + + ok = leveled_bookie:book_close(Bookie1), + {ok, Bookie2} = + leveled_bookie:book_start(StartOpts), + io:format("Re-opened bookie withour caching - re-compare compaction time~n"), + io:format("Scoring for first time - every file should need scoring~n"), + Args2 = [WaitForCompaction(Bookie2), true, lists:seq(1, 300)], + {TN0, false} = timer:tc(lists, foldl, Args2), + io:format("Score four more times with cached scoring~n"), + {TN1, false} = timer:tc(lists, foldl, Args2), + {TN2, false} = timer:tc(lists, foldl, Args2), + {TN3, false} = timer:tc(lists, foldl, Args2), + {TN4, false} = timer:tc(lists, foldl, Args2), + + AvgSecondRunCache = (TC1 + TC2 +TC3 + TC4) div 4000, + AvgSecondRunNoCache = (TN1 + TN2 +TN3 + TN4) div 4000, + + io:format("With caching ~w first run ~w average other runs~n", + [TC0 div 1000, AvgSecondRunCache]), + io:format("Without caching ~w first run ~w average other runs~n", + [TN0 div 1000, AvgSecondRunNoCache]), + true = (TC0 > AvgSecondRunCache), + true = (TC0/AvgSecondRunCache) > (TN0/AvgSecondRunNoCache), + ok = leveled_bookie:book_close(Bookie2), + + io:format("Exit having proven simply that caching score is faster~n"), + testutil:reset_filestructure(). + + aae_bustedjournal(_Config) -> RootPath = testutil:reset_filestructure(), StartOpts = [{root_path, RootPath},