From c3a4f5118d983ca0a28ee22f2f52a19138d21024 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 13 Jun 2019 11:50:02 +0100 Subject: [PATCH 1/2] Each merge log details of the level below Help with troubleshooting memory problems, and potential issues with GC --- src/leveled_log.erl | 3 +++ src/leveled_pclerk.erl | 7 +++++ src/leveled_pmanifest.erl | 56 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 97102d0..64d58b5 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -222,6 +222,9 @@ {info, "Prompting deletions at ManifestSQN=~w"}}, {"PC022", {info, "Storing reference to deletions at ManifestSQN=~w"}}, + {"PC023", + {info, "At level=~w file_count=~w avg_mem=~w " + ++ "file with most memory fn=~s p=~w mem=~w"}}, {"PM002", {info, "Completed dump of L0 cache to list of l0cache_size=~w"}}, diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 027eb44..3e697a3 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -176,6 +176,13 @@ handle_work({SrcLevel, Manifest}, State) -> {leveled_pmanifest:get_manifest_sqn(UpdManifest), EntriesToDelete}. merge(SrcLevel, Manifest, RootPath, OptsSST) -> + case leveled_pmanifest:report_manifest_level(Manifest, SrcLevel + 1) of + {0, 0, undefined} -> + ok; + {FCnt, AvgMem, {MaxFN, MaxP, MaxMem}} -> + leveled_log:log("PC023", + [SrcLevel + 1, FCnt, AvgMem, MaxFN, MaxP, MaxMem]) + end, Src = leveled_pmanifest:mergefile_selector(Manifest, SrcLevel), NewSQN = leveled_pmanifest:get_manifest_sqn(Manifest) + 1, SinkList = leveled_pmanifest:merge_lookup(Manifest, diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 031ba8a..84c02e3 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -42,7 +42,8 @@ check_for_work/2, is_basement/2, levelzero_present/1, - check_bloom/3 + check_bloom/3, + report_manifest_level/2 ]). -export([ @@ -225,6 +226,50 @@ remove_manifest(RootPath, GC_SQN) -> end. +-spec report_manifest_level(manifest(), non_neg_integer()) -> + {non_neg_integer(), + non_neg_integer(), + {string(), pid(), non_neg_integer()} | + undefined}. +%% @doc +%% Report on a level in the manifest +%% - How many files in the level +%% - The average size of the memory occupied by a files in the level +%% - The file with the largest memory footprint {Filename, Pid, Memory} +report_manifest_level(Manifest, LevelIdx) -> + Levels = Manifest#manifest.levels, + Level = array:get(LevelIdx, Levels), + {LevelSize, LevelList} = + case is_list(Level) of + true -> + {length(Level), Level}; + _ -> + {leveled_tree:tsize(Level), leveled_tree:to_list(Level)} + end, + AccMemFun = + fun(MaybeME, {MemAcc, Max}) -> + ME = get_manifest_entry(MaybeME), + P = ME#manifest_entry.owner, + {memory, PM} = process_info(P, memory), + UpdMax = + case Max of + {_MaxFN, _MaxP, MaxPM} when MaxPM > PM -> + Max; + _ -> + {ME#manifest_entry.filename, P, PM} + end, + {MemAcc + PM, UpdMax} + end, + case LevelSize of + 0 -> + {0, 0, undefined}; + _ -> + {TotalMem, BiggestMem} = + lists:foldl(AccMemFun, {0, undefined}, LevelList), + {LevelSize, TotalMem div LevelSize, BiggestMem} + end. + + -spec replace_manifest_entry(manifest(), integer(), integer(), list()|manifest_entry(), list()|manifest_entry()) -> manifest(). @@ -555,6 +600,15 @@ check_bloom(Manifest, FP, Hash) -> %%% Internal Functions %%%============================================================================ +-spec get_manifest_entry({tuple(), manifest_entry()}|manifest_entry()) + -> manifest_entry(). +%% @doc +%% Manifest levels can have entries of two forms, use this if only interested +%% in the latter form +get_manifest_entry({_EndKey, ManifestEntry}) -> + ManifestEntry; +get_manifest_entry(ManifestEntry) -> + ManifestEntry. %% All these internal functions that work on a level are also passed LeveIdx %% even if this is not presently relevant. Currnetly levels are lists, but From 876a023db15356c91fe22cec1cb0d680c9bfc25a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 13 Jun 2019 14:58:32 +0100 Subject: [PATCH 2/2] Add database_id to options So that this can be recorded in logs --- src/leveled_bookie.erl | 5 +++++ src/leveled_log.erl | 41 +++++++++++++++++++++++++--------- test/end_to_end/riak_SUITE.erl | 3 ++- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index acfccc7..72d4d25 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -140,6 +140,7 @@ {compression_point, ?COMPRESSION_POINT}, {log_level, ?LOG_LEVEL}, {forced_logs, []}, + {database_id, 0}, {override_functions, []}, {snapshot_timeout_short, ?SNAPTIMEOUT_SHORT}, {snapshot_timeout_long, ?SNAPTIMEOUT_LONG}]). @@ -339,6 +340,8 @@ % "P0032", "SST12", "CDB19", "SST13", "I0019"]} % Will log all timing points even when log_level is not set to % support info + {database_id, non_neg_integer()} | + % Integer database ID to be used in logs {override_functions, list(leveled_head:appdefinable_function_tuple())} | % Provide a list of override functions that will be used for % user-defined tags @@ -1131,6 +1134,8 @@ init([Opts]) -> leveled_log:set_loglevel(LogLevel), ForcedLogs = proplists:get_value(forced_logs, Opts), leveled_log:add_forcedlogs(ForcedLogs), + DatabaseID = proplists:get_value(database_id, Opts), + leveled_log:set_databaseid(DatabaseID), {InkerOpts, PencillerOpts} = set_options(Opts), diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 64d58b5..fd59366 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -12,6 +12,7 @@ log_randomtimer/4]). -export([set_loglevel/1, + set_databaseid/1, add_forcedlogs/1, remove_forcedlogs/1, get_opts/0, @@ -20,7 +21,8 @@ -record(log_options, {log_level = info :: log_level(), - forced_logs = [] :: [string()]}). + forced_logs = [] :: [string()], + database_id = 0 :: non_neg_integer()}). -type log_level() :: debug | info | warn | error | critical. -type log_options() :: #log_options{}. @@ -416,6 +418,14 @@ set_loglevel(LogLevel) when is_atom(LogLevel) -> UpdLO = LO#log_options{log_level = LogLevel}, save(UpdLO). +-spec set_databaseid(non_neg_integer()) -> ok. +%% @doc +%% Set the Database ID for this PID +set_databaseid(DBid) when is_integer(DBid) -> + LO = get_opts(), + UpdLO = LO#log_options{database_id = DBid}, + save(UpdLO). + -spec add_forcedlogs(list(string())) -> ok. %% @doc %% Add a forced log to the list of forced logs. this will cause the log of this @@ -474,13 +484,16 @@ log(LogReference, Subs) -> log(LogRef, Subs, SupportedLogLevels) -> case lists:keyfind(LogRef, 1, ?LOGBASE) of {LogRef, {LogLevel, LogText}} -> - case should_i_log(LogLevel, SupportedLogLevels, LogRef) of + LogOpts = get_opts(), + case should_i_log(LogLevel, SupportedLogLevels, LogRef, LogOpts) of true -> + DBid = LogOpts#log_options.database_id, io:format(format_time() ++ " " - ++ atom_to_list(LogLevel) ++ " " - ++ LogRef ++ " ~w " + ++ " log_level=" + ++ atom_to_list(LogLevel) ++ " log_ref=" + ++ LogRef ++ " db_id=~w pid=~w " ++ LogText ++ "~n", - [self()|Subs]); + [DBid|[self()|Subs]]); false -> ok end; @@ -489,7 +502,10 @@ log(LogRef, Subs, SupportedLogLevels) -> end. should_i_log(LogLevel, Levels, LogRef) -> - #log_options{log_level = CurLevel, forced_logs = ForcedLogs} = get_opts(), + should_i_log(LogLevel, Levels, LogRef, get_opts()). + +should_i_log(LogLevel, Levels, LogRef, LogOpts) -> + #log_options{log_level = CurLevel, forced_logs = ForcedLogs} = LogOpts, case lists:member(LogRef, ForcedLogs) of true -> true; @@ -508,10 +524,11 @@ is_active_level([_|T], C, L) -> is_active_level(T, C, L). log_timer(LogReference, Subs, StartTime) -> log_timer(LogReference, Subs, StartTime, ?LOG_LEVELS). -log_timer(LogRef, Subs, StartTime, SupportedLogLevels) -> +log_timer(LogRef, Subs, StartTime, SupportedLevels) -> case lists:keyfind(LogRef, 1, ?LOGBASE) of {LogRef, {LogLevel, LogText}} -> - case should_i_log(LogLevel, SupportedLogLevels, LogRef) of + LogOpts = get_opts(), + case should_i_log(LogLevel, SupportedLevels, LogRef, LogOpts) of true -> DurationText = case timer:now_diff(os:timestamp(), StartTime) of @@ -522,12 +539,14 @@ log_timer(LogRef, Subs, StartTime, SupportedLogLevels) -> US -> " with us_duration=" ++ integer_to_list(US) end, + DBid = LogOpts#log_options.database_id, io:format(format_time() ++ " " - ++ atom_to_list(LogLevel) ++ " " - ++ LogRef ++ " ~w " + ++ " log_level=" + ++ atom_to_list(LogLevel) ++ " log_ref=" + ++ LogRef ++ " db_id=~w pid=~w " ++ LogText ++ DurationText ++ "~n", - [self()|Subs]); + [DBid|[self()|Subs]]); false -> ok end; diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index aadf13f..abc93cc 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -38,7 +38,8 @@ basic_riak_tester(Bucket, KeyCount) -> StartOpts1 = [{root_path, RootPath}, {max_journalsize, 500000000}, {max_pencillercachesize, 24000}, - {sync_strategy, testutil:sync_strategy()}], + {sync_strategy, testutil:sync_strategy()}, + {database_id, 32}], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), IndexGenFun =