diff --git a/docs/STARTUP_OPTIONS.md b/docs/STARTUP_OPTIONS.md index d09ebd3..870b0db 100644 --- a/docs/STARTUP_OPTIONS.md +++ b/docs/STARTUP_OPTIONS.md @@ -120,3 +120,9 @@ There are two snapshot timeouts that can be configured: These set the period in seconds before a snapshot which has not shutdown, is declared to have been released - so that any file deletions which are awaiting the snapshot's completion can go ahead. This covers only silently failing snapshots. Snapshots that shutdown neatly will be released from locking deleted files when they shutdown. The 'short' timeout is used for snapshots which support index queries and bucket listing. The 'long' timeout is used for all other folds (e.g. key lists, head folds and object folds). + +## Statistic gathering + +Leveled will gather monitoring statistics on HEAD/GET/PUT requests, with timing points taken throughout the store. These timings are gathered by the `leveled_monitor`, and there are three configuration options. The two primary options are: `stats_percentage` is an integer between 0 and 100 which informs the store of the proprtion of the requests which should be timed at each part; and `stats_logfrequency` which controls the frequency (in seconds) with which the leveled_monitor will write a log file (for one of the stats types in its queue). + +The specific stats types logged can be found in the ?LOG_LIST within the leveled_monitor. If a subset only is of interest, than this list can be modified by setting `monitor_loglist`. This can also be used to repeat the frequency of individual log types by adding them to the list multiple times. \ No newline at end of file diff --git a/include/leveled.hrl b/include/leveled.hrl index e17698e..97eaebc 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -48,7 +48,8 @@ binary_mode = false :: boolean(), sync_strategy = sync, log_options = leveled_log:get_opts() - :: leveled_log:log_options()}). + :: leveled_log:log_options(), + monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). -record(sst_options, {press_method = native @@ -56,7 +57,8 @@ log_options = leveled_log:get_opts() :: leveled_log:log_options(), max_sstslots = 256 :: pos_integer(), - pagecache_level = 1 :: pos_integer()}). + pagecache_level = 1 :: pos_integer(), + monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). -record(inker_options, {cdb_max_size :: integer() | undefined, @@ -73,7 +75,8 @@ singlefile_compactionperc :: float()|undefined, maxrunlength_compactionperc :: float()|undefined, score_onein = 1 :: pos_integer(), - snaptimeout_long :: pos_integer() | undefined}). + snaptimeout_long :: pos_integer() | undefined, + monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). -record(penciller_options, {root_path :: string() | undefined, @@ -88,7 +91,8 @@ compression_method = native :: lz4|native, levelzero_cointoss = false :: boolean(), snaptimeout_short :: pos_integer() | undefined, - snaptimeout_long :: pos_integer() | undefined}). + snaptimeout_long :: pos_integer() | undefined, + monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). -record(iclerk_options, {inker :: pid() | undefined, diff --git a/priv/leveled.schema b/priv/leveled.schema index 1582f41..85e4a39 100644 --- a/priv/leveled.schema +++ b/priv/leveled.schema @@ -26,6 +26,15 @@ hidden ]}. +%% @doc The maximum size of the bookie's cache before each new PUT results in +%% a slow-offer pause. Prior to Riak 3.0.10 this defaulted to 4 +{mapping, "leveled.cache_multiple", "leveled.cache_multiple", [ + {default, 2}, + {datatype, integer}, + hidden +]}. + + %% @doc The key size of the Penciller's in-memory cache {mapping, "leveled.penciller_cache_size", "leveled.penciller_cache_size", [ {default, 20000}, @@ -179,7 +188,20 @@ hidden ]}. +%% @doc Statistic monitoring proportion +%% The proportion of requests to be convered by stats, an integer between +%% 0 and 100. There is no flow control, so setting this too high could +%% possibly overflow the leveled_monitor mailbox. +{mapping, "leveled.stats_percentage", "leveled.stats_percentage", [ + {default, 10}, + {datatype, integer}, + {validators, ["range:0-100"]} +]}. - - - +%% @doc Statistic log frequency (seconds) +%% The wait in seconds between logs from each leveled_monitor (there is one +%% monitor per vnode) +{mapping, "leveled.stats_logfrequency", "leveled.stats_logfrequency", [ + {default, 30}, + {datatype, integer} +]}. \ No newline at end of file diff --git a/priv/leveled_multi.schema b/priv/leveled_multi.schema index 0b4c854..ff0f4a4 100644 --- a/priv/leveled_multi.schema +++ b/priv/leveled_multi.schema @@ -178,7 +178,25 @@ hidden ]}. - +%% @doc Statistic monitoring proportion +%% The proportion of requests to be convered by stats, an integer between +%% 0 and 100. There is no flow control, so setting this too high could +%% possibly overflow the leveled_monitor mailbox. +{mapping, "multi_backend.$name.leveled.stats_percentage", "riak_kv.multi_backend", [ + {default, 10}, + {datatype, integer}, + {validators, ["range:0-100"]}, + hidden +]}. + +%% @doc Statistic log frequency (seconds) +%% The wait in seconds between logs from each leveled_monitor (there is one +%% monitor per vnode) +{mapping, "multi_backend.$name.leveled.stats_logfrequency", "riak_kv.multi_backend", [ + {default, 30}, + {datatype, integer}, + hidden +]}. diff --git a/rebar.config b/rebar.config index a28f223..e1112ba 100644 --- a/rebar.config +++ b/rebar.config @@ -1,11 +1,9 @@ -{erl_opts, [warnings_as_errors, +{minimum_otp_vsn, "22.0"}. + +{erl_opts, + [warnings_as_errors, {platform_define, "^2[0-5]{1}", fsm_deprecated}, - {platform_define, "^1[7-8]{1}", old_rand}, - {platform_define, "^17", no_log2}, - {platform_define, "^R", no_sync}, - {platform_define, "^R", old_rand}, - {platform_define, "^R", no_log2}, - {platform_define, "^R", slow_test}]}. + {platform_define, "^2[2-5]{1}", if_check}]}. {xref_checks, [undefined_function_calls,undefined_functions]}. diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index bd51dd2..2f9877b 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -94,7 +94,7 @@ ]). -export([empty_ledgercache/0, - snapshot_store/6, + snapshot_store/7, fetch_value/2, journal_notfound/4]). @@ -106,6 +106,7 @@ -define(LOADING_PAUSE, 1000). -define(CACHE_SIZE, 2500). +-define(MAX_CACHE_MULTTIPLE, 2). -define(MIN_CACHE_SIZE, 100). -define(MIN_PCL_CACHE_SIZE, 400). -define(MAX_PCL_CACHE_SIZE, 28000). @@ -130,10 +131,12 @@ -define(SST_PAGECACHELEVEL_NOLOOKUP, 1). -define(SST_PAGECACHELEVEL_LOOKUP, 4). -define(CACHE_LOGPOINT, 50000). +-define(DEFAULT_STATS_PERC, 10). -define(OPTION_DEFAULTS, [{root_path, undefined}, {snapshot_bookie, undefined}, {cache_size, ?CACHE_SIZE}, + {cache_multiple, ?MAX_CACHE_MULTTIPLE}, {max_journalsize, 1000000000}, {max_journalobjectcount, 200000}, {max_sstslots, 256}, @@ -154,70 +157,37 @@ {database_id, ?DEFAULT_DBID}, {override_functions, []}, {snapshot_timeout_short, ?SNAPTIMEOUT_SHORT}, - {snapshot_timeout_long, ?SNAPTIMEOUT_LONG}]). + {snapshot_timeout_long, ?SNAPTIMEOUT_LONG}, + {stats_percentage, ?DEFAULT_STATS_PERC}, + {stats_logfrequency, + element(1, leveled_monitor:get_defaults())}, + {monitor_loglist, + element(2, leveled_monitor:get_defaults())}]). -record(ledger_cache, {mem :: ets:tab(), loader = leveled_tree:empty(?CACHE_TYPE) :: tuple()|empty_cache, load_queue = [] :: list(), - index = leveled_pmem:new_index(), % array or empty_index + index = leveled_pmem:new_index(), min_sqn = infinity :: integer()|infinity, max_sqn = 0 :: integer()}). -record(state, {inker :: pid() | undefined, penciller :: pid() | undefined, - cache_size :: integer() | undefined, + cache_size :: pos_integer() | undefined, + cache_multiple :: pos_integer() | undefined, ledger_cache = #ledger_cache{} :: ledger_cache(), is_snapshot :: boolean() | undefined, slow_offer = false :: boolean(), - head_only = false :: boolean(), head_lookup = true :: boolean(), - ink_checking = ?MAX_KEYCHECK_FREQUENCY :: integer(), - - put_countdown = 0 :: integer(), - get_countdown = 0 :: integer(), - snapshot_countdown = 0 :: integer(), - head_countdown = 0 :: integer(), - cache_ratio = {0, 0, 0} :: cache_ratio(), - get_timings = no_timing :: get_timings(), - put_timings = no_timing :: put_timings(), - snapshot_timings = no_timing :: snapshot_timings(), - head_timings = no_timing :: head_timings()}). - - --record(get_timings, {sample_count = 0 :: integer(), - head_time = 0 :: integer(), - body_time = 0 :: integer(), - fetch_count = 0 :: integer()}). - --record(head_timings, {sample_count = 0 :: integer(), - pcl_time = 0 :: integer(), - buildhead_time = 0 :: integer()}). - --record(put_timings, {sample_count = 0 :: integer(), - mem_time = 0 :: integer(), - ink_time = 0 :: integer(), - total_size = 0 :: integer()}). - --record(snapshot_timings, {sample_count = 0 :: integer(), - bookie_time = 0 :: integer(), - pcl_time = 0 :: integer()}). + monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). -type book_state() :: #state{}. -type sync_mode() :: sync|none|riak_sync. -type ledger_cache() :: #ledger_cache{}. --type get_timings() :: no_timing|#get_timings{}. --type put_timings() :: no_timing|#put_timings{}. --type snapshot_timings() :: no_timing|#snapshot_timings{}. --type head_timings() :: no_timing|#head_timings{}. --type timings() :: - put_timings()|get_timings()|snapshot_timings()|head_timings(). --type timing_types() :: head|get|put|snapshot. --type cache_ratio() :: - {non_neg_integer(), non_neg_integer(), non_neg_integer()}. -type open_options() :: @@ -239,6 +209,10 @@ % randomised jitter (randomised jitter will still be added to % configured values) % The minimum value is 100 - any lower value will be ignored + {cache_multiple, pos_integer()} | + % A multiple of the cache size beyond which the cache should not + % grow even if the penciller is busy. A pasue will be returned for + % every PUT when this multiple of the cache_size is reached {max_journalsize, pos_integer()} | % The maximum size of a journal file in bytes. The absolute % maximum must be 4GB due to 4 byte file pointers being used @@ -365,13 +339,12 @@ % moving to higher log levels will at present make the operator % blind to sample performance statistics of leveled sub-components % etc - {forced_logs, list(string())} | + {forced_logs, list(atom())} | % Forced logs allow for specific info level logs, such as those % logging stats to be logged even when the default log level has % been set to a higher log level. Using: % {forced_logs, - % ["B0015", "B0016", "B0017", "B0018", - % "P0032", "SST12", "CDB19", "SST13", "I0019"]} + % [b0015, b0016, b0017, b0018, p0032, sst12]} % Will log all timing points even when log_level is not set to % support info {database_id, non_neg_integer()} | @@ -384,11 +357,20 @@ % assumed to have failed, and so requires to be torndown. The % short timeout is applied to queries where long_running is set to % false - {snapshot_timeout_long, pos_integer()} + {snapshot_timeout_long, pos_integer()} | % Time in seconds before a snapshot that has not been shutdown is % assumed to have failed, and so requires to be torndown. The % short timeout is applied to queries where long_running is set to % true + {stats_percentage, 0..100} | + % Probability that stats will be collected for an individual + % request. + {stats_logfrequency, pos_integer()} | + % Time in seconds before logging the next timing log. This covers + % the logs associated with the timing of GET/PUTs in various parts + % of the system. There are 7 such logs - so setting to 30s will + % mean that each inidividual log will occur every 210s + {monitor_loglist, list(leveled_monitor:log_type())} ]. -type initial_loadfun() :: @@ -736,7 +718,7 @@ book_indexfold(Pid, Bucket, FoldAccT, Range, TermHandling) -> % future release this code branch may be removed, and such queries may % instead return `error`. For now null is assumed to be lower than any % key - leveled_log:log("B0019", [Bucket]), + leveled_log:log(b0019, [Bucket]), book_indexfold(Pid, {Bucket, null}, FoldAccT, Range, TermHandling). @@ -1081,7 +1063,6 @@ book_headfold(Pid, Tag, all, FoldAccT, JournalCheck, SnapPreFold, book_snapshot(Pid, SnapType, Query, LongRunning) -> gen_server:call(Pid, {snapshot, SnapType, Query, LongRunning}, infinity). - -spec book_compactjournal(pid(), integer()) -> ok|busy. -spec book_islastcompactionpending(pid()) -> boolean(). -spec book_trimjournal(pid()) -> ok. @@ -1196,7 +1177,15 @@ init([Opts]) -> DatabaseID = proplists:get_value(database_id, Opts), leveled_log:set_databaseid(DatabaseID), - {InkerOpts, PencillerOpts} = set_options(Opts), + {ok, Monitor} = + leveled_monitor:monitor_start( + proplists:get_value(stats_logfrequency, Opts), + proplists:get_value(monitor_loglist, Opts) + ), + StatLogFrequency = proplists:get_value(stats_percentage, Opts), + + {InkerOpts, PencillerOpts} = + set_options(Opts, {Monitor, StatLogFrequency}), OverrideFunctions = proplists:get_value(override_functions, Opts), SetFun = @@ -1211,6 +1200,8 @@ init([Opts]) -> max(1, ConfiguredCacheSize div (100 div ?CACHE_SIZE_JITTER)), CacheSize = ConfiguredCacheSize + erlang:phash2(self()) rem CacheJitter, + MaxCacheMultiple = + proplists:get_value(cache_multiple, Opts), PCLMaxSize = PencillerOpts#penciller_options.max_inmemory_tablesize, CacheRatio = PCLMaxSize div ConfiguredCacheSize, @@ -1221,7 +1212,7 @@ init([Opts]) -> % and performance may be unpredictable case CacheRatio > 32 of true -> - leveled_log:log("B0020", [PCLMaxSize, ConfiguredCacheSize]); + leveled_log:log(b0020, [PCLMaxSize, ConfiguredCacheSize]); false -> ok end, @@ -1243,76 +1234,68 @@ init([Opts]) -> SSTOpts = PencillerOpts#penciller_options.sst_options, SSTOpts0 = SSTOpts#sst_options{pagecache_level = SSTPageCacheLevel}, PencillerOpts0 = - PencillerOpts#penciller_options{sst_options = SSTOpts0}, - - State0 = #state{cache_size=CacheSize, - is_snapshot=false, - head_only=HeadOnly, - head_lookup = HeadLookup}, + PencillerOpts#penciller_options{sst_options = SSTOpts0}, - {Inker, Penciller} = - startup(InkerOpts, PencillerOpts0, State0), + {Inker, Penciller} = startup(InkerOpts, PencillerOpts0), NewETS = ets:new(mem, [ordered_set]), - leveled_log:log("B0001", [Inker, Penciller]), - {ok, State0#state{inker=Inker, - penciller=Penciller, - ledger_cache=#ledger_cache{mem = NewETS}}}; + leveled_log:log(b0001, [Inker, Penciller]), + {ok, + #state{ + cache_size = CacheSize, + cache_multiple = MaxCacheMultiple, + is_snapshot = false, + head_only = HeadOnly, + head_lookup = HeadLookup, + inker = Inker, + penciller = Penciller, + ledger_cache = #ledger_cache{mem = NewETS}, + monitor = {Monitor, StatLogFrequency}}}; {Bookie, undefined} -> {ok, Penciller, Inker} = book_snapshot(Bookie, store, undefined, true), - leveled_log:log("B0002", [Inker, Penciller]), - {ok, #state{penciller=Penciller, - inker=Inker, - is_snapshot=true}} + leveled_log:log(b0002, [Inker, Penciller]), + {ok, + #state{penciller = Penciller, + inker = Inker, + is_snapshot = true}} end. handle_call({put, Bucket, Key, Object, IndexSpecs, Tag, TTL, DataSync}, From, State) when State#state.head_only == false -> LedgerKey = leveled_codec:to_ledgerkey(Bucket, Key, Tag), - SW0 = os:timestamp(), + SWLR = os:timestamp(), + SW0 = leveled_monitor:maybe_time(State#state.monitor), {ok, SQN, ObjSize} = leveled_inker:ink_put(State#state.inker, LedgerKey, Object, {IndexSpecs, TTL}, DataSync), - {SW1, Timings1} = - update_timings(SW0, {put, {inker, ObjSize}}, State#state.put_timings), - Changes = preparefor_ledgercache(null, - LedgerKey, - SQN, - Object, - ObjSize, - {IndexSpecs, TTL}), + {T0, SW1} = leveled_monitor:step_time(SW0), + Changes = + preparefor_ledgercache( + null, LedgerKey, SQN, Object, ObjSize, {IndexSpecs, TTL}), + {T1, SW2} = leveled_monitor:step_time(SW1), Cache0 = addto_ledgercache(Changes, State#state.ledger_cache), - {_SW2, Timings2} = update_timings(SW1, {put, mem}, Timings1), - - {Timings, CountDown} = - update_statetimings(put, Timings2, State#state.put_countdown), - % If the previous push to memory was returned then punish this PUT with - % a delay. If the back-pressure in the Penciller continues, these - % delays will beocme more frequent + {T2, _SW3} = leveled_monitor:step_time(SW2), case State#state.slow_offer of true -> gen_server:reply(From, pause); false -> gen_server:reply(From, ok) end, - maybe_longrunning(SW0, overall_put), - case maybepush_ledgercache(State#state.cache_size, - Cache0, - State#state.penciller) of - {ok, NewCache} -> - {noreply, State#state{ledger_cache = NewCache, - put_timings = Timings, - put_countdown = CountDown, - slow_offer = false}}; - {returned, NewCache} -> - {noreply, State#state{ledger_cache = NewCache, - put_timings = Timings, - put_countdown = CountDown, - slow_offer = true}} + maybe_longrunning(SWLR, overall_put), + maybelog_put_timing(State#state.monitor, T0, T1, T2, ObjSize), + case maybepush_ledgercache( + State#state.cache_size, + State#state.cache_multiple, + Cache0, + State#state.penciller) of + {ok, Cache} -> + {noreply, State#state{slow_offer = false, ledger_cache = Cache}}; + {returned, Cache} -> + {noreply, State#state{slow_offer = true, ledger_cache = Cache}} end; handle_call({mput, ObjectSpecs, TTL}, From, State) when State#state.head_only == true -> @@ -1329,25 +1312,24 @@ handle_call({mput, ObjectSpecs, TTL}, From, State) false -> gen_server:reply(From, ok) end, - case maybepush_ledgercache(State#state.cache_size, - Cache0, - State#state.penciller) of - {ok, NewCache} -> - {noreply, State#state{ledger_cache = NewCache, - slow_offer = false}}; - {returned, NewCache} -> - {noreply, State#state{ledger_cache = NewCache, - slow_offer = true}} + case maybepush_ledgercache( + State#state.cache_size, + State#state.cache_multiple, + Cache0, + State#state.penciller) of + {ok, Cache} -> + {noreply, State#state{ledger_cache = Cache, slow_offer = false}}; + {returned, Cache} -> + {noreply, State#state{ledger_cache = Cache, slow_offer = true}} end; handle_call({get, Bucket, Key, Tag}, _From, State) when State#state.head_only == false -> LedgerKey = leveled_codec:to_ledgerkey(Bucket, Key, Tag), - SWh = os:timestamp(), - {H0, UpdCR} = + SW0 = leveled_monitor:maybe_time(State#state.monitor), + {H0, _CacheHit} = fetch_head(LedgerKey, State#state.penciller, - State#state.ledger_cache, - State#state.cache_ratio), + State#state.ledger_cache), HeadResult = case H0 of not_present -> @@ -1367,72 +1349,62 @@ handle_call({get, Bucket, Key, Tag}, _From, State) end end end, - {SWb, Timings1} = - update_timings(SWh, {get, head}, State#state.get_timings), - {Reply, Timings2} = + {TS0, SW1} = leveled_monitor:step_time(SW0), + GetResult = case HeadResult of not_found -> - {not_found, Timings1}; + not_found; {LK, SQN} -> Object = fetch_value(State#state.inker, {LK, SQN}), - {_SW, UpdTimingsB} = - update_timings(SWb, {get, body}, Timings1), case Object of not_present -> - {not_found, UpdTimingsB}; + not_found; _ -> - {{ok, Object}, UpdTimingsB} + {ok, Object} end end, - {Timings, CountDown} = - update_statetimings(get, Timings2, State#state.get_countdown), - {reply, - Reply, - State#state{get_timings = Timings, - get_countdown = CountDown, - cache_ratio = - maybelog_cacheratio(UpdCR, State#state.is_snapshot)}}; + {TS1, _SW2} = leveled_monitor:step_time(SW1), + maybelog_get_timing( + State#state.monitor, TS0, TS1, GetResult == not_found), + {reply, GetResult, State}; handle_call({head, Bucket, Key, Tag, SQNOnly}, _From, State) when State#state.head_lookup == true -> - SWp = os:timestamp(), + SW0 = leveled_monitor:maybe_time(State#state.monitor), LK = leveled_codec:to_ledgerkey(Bucket, Key, Tag), - {Head, UpdCR} = + {Head, CacheHit} = fetch_head(LK, State#state.penciller, State#state.ledger_cache, - State#state.cache_ratio, State#state.head_only), - {SWr, UpdTimingsP} = - update_timings(SWp, {head, pcl}, State#state.head_timings), - {LedgerMD, SQN, JournalCheckFrequency} = + {TS0, SW1} = leveled_monitor:step_time(SW0), + JrnalCheckFreq = + case State#state.head_only of + true -> + 0; + false -> + State#state.ink_checking + end, + {LedgerMD, SQN, UpdJrnalCheckFreq} = case Head of not_present -> - {not_found, null, State#state.ink_checking}; + {not_found, null, JrnalCheckFreq}; Head -> case leveled_codec:striphead_to_v1details(Head) of {_SeqN, tomb, _MH, _MD} -> - {not_found, null, State#state.ink_checking}; + {not_found, null, JrnalCheckFreq}; {SeqN, {active, TS}, _MH, MD} -> case TS >= leveled_util:integer_now() of true -> - CheckFrequency = - case State#state.head_only of - true -> - 0; - false -> - State#state.ink_checking - end, - case journal_notfound(CheckFrequency, - State#state.inker, - LK, - SeqN) of + I = State#state.inker, + case journal_notfound( + JrnalCheckFreq, I, LK, SeqN) of {true, UppedFrequency} -> {not_found, null, UppedFrequency}; {false, ReducedFrequency} -> {MD, SeqN, ReducedFrequency} end; false -> - {not_found, null, State#state.ink_checking} + {not_found, null, JrnalCheckFreq} end end end, @@ -1445,50 +1417,61 @@ handle_call({head, Bucket, Key, Tag, SQNOnly}, _From, State) {_, true} -> {ok, SQN} end, - {_SW, UpdTimingsR} = - update_timings(SWr, {head, rsp}, UpdTimingsP), - {UpdTimings, CountDown} = - update_statetimings(head, - UpdTimingsR, - State#state.head_countdown), - - {reply, - Reply, - State#state{head_timings = UpdTimings, - head_countdown = CountDown, - ink_checking = JournalCheckFrequency, - cache_ratio = - maybelog_cacheratio(UpdCR, State#state.is_snapshot)}}; + {TS1, _SW2} = leveled_monitor:step_time(SW1), + maybelog_head_timing( + State#state.monitor, TS0, TS1, LedgerMD == not_found, CacheHit), + case UpdJrnalCheckFreq of + JrnalCheckFreq -> + {reply, Reply, State}; + UpdJrnalCheckFreq -> + {reply, Reply, State#state{ink_checking = UpdJrnalCheckFreq}} + end; handle_call({snapshot, SnapType, Query, LongRunning}, _From, State) -> % Snapshot the store, specifying if the snapshot should be long running % (i.e. will the snapshot be queued or be required for an extended period % e.g. many minutes) - {ok, PclSnap, InkSnap, Timings} = - snapshot_store(State, SnapType, Query, LongRunning), - {UpdTimings, CountDown} = - update_statetimings(snapshot, Timings, State#state.snapshot_countdown), - {reply, - {ok, PclSnap, InkSnap}, - State#state{ - snapshot_timings = UpdTimings, - snapshot_countdown = CountDown}}; + {ok, PclSnap, InkSnap} = + snapshot_store( + State#state.ledger_cache, + State#state.penciller, + State#state.inker, + State#state.monitor, + SnapType, + Query, + LongRunning), + {reply, {ok, PclSnap, InkSnap},State}; handle_call(log_settings, _From, State) -> {reply, leveled_log:return_settings(), State}; handle_call({return_runner, QueryType}, _From, State) -> Runner = get_runner(State, QueryType), {reply, Runner, State}; -handle_call({compact_journal, Timeout}, _From, State) +handle_call({compact_journal, Timeout}, From, State) when State#state.head_only == false -> case leveled_inker:ink_compactionpending(State#state.inker) of true -> {reply, {busy, undefined}, State}; false -> - {ok, PclSnap, null, _Timings} = - snapshot_store(State, ledger, undefined, true), + {ok, PclSnap, null} = + snapshot_store( + State#state.ledger_cache, + State#state.penciller, + State#state.inker, + State#state.monitor, + ledger, + undefined, + true), R = leveled_inker:ink_compactjournal(State#state.inker, PclSnap, Timeout), - {reply, R, State} + gen_server:reply(From, R), + case maybepush_ledgercache( + State#state.cache_size, + State#state.cache_multiple, + State#state.ledger_cache, + State#state.penciller) of + {_, NewCache} -> + {noreply, State#state{ledger_cache = NewCache}} + end end; handle_call(confirm_compact, _From, State) when State#state.head_only == false -> @@ -1514,11 +1497,13 @@ handle_call(hot_backup, _From, State) when State#state.head_only == false -> handle_call(close, _From, State) -> leveled_inker:ink_close(State#state.inker), leveled_penciller:pcl_close(State#state.penciller), + leveled_monitor:monitor_close(element(1, State#state.monitor)), {stop, normal, ok, State}; handle_call(destroy, _From, State=#state{is_snapshot=Snp}) when Snp == false -> - leveled_log:log("B0011", []), + leveled_log:log(b0011, []), {ok, InkPathList} = leveled_inker:ink_doom(State#state.inker), {ok, PCLPathList} = leveled_penciller:pcl_doom(State#state.penciller), + leveled_monitor:monitor_close(element(1, State#state.monitor)), lists:foreach(fun(DirPath) -> delete_path(DirPath) end, InkPathList), lists:foreach(fun(DirPath) -> delete_path(DirPath) end, PCLPathList), {stop, normal, ok, State}; @@ -1531,22 +1516,28 @@ handle_call(Msg, _From, State) -> handle_cast({log_level, LogLevel}, State) -> PCL = State#state.penciller, INK = State#state.inker, + Monitor = element(1, State#state.monitor), ok = leveled_penciller:pcl_loglevel(PCL, LogLevel), ok = leveled_inker:ink_loglevel(INK, LogLevel), + ok = leveled_monitor:log_level(Monitor, LogLevel), ok = leveled_log:set_loglevel(LogLevel), {noreply, State}; handle_cast({add_logs, ForcedLogs}, State) -> PCL = State#state.penciller, INK = State#state.inker, + Monitor = element(1, State#state.monitor), ok = leveled_penciller:pcl_addlogs(PCL, ForcedLogs), ok = leveled_inker:ink_addlogs(INK, ForcedLogs), + ok = leveled_monitor:log_add(Monitor, ForcedLogs), ok = leveled_log:add_forcedlogs(ForcedLogs), {noreply, State}; handle_cast({remove_logs, ForcedLogs}, State) -> PCL = State#state.penciller, INK = State#state.inker, + Monitor = element(1, State#state.monitor), ok = leveled_penciller:pcl_removelogs(PCL, ForcedLogs), ok = leveled_inker:ink_removelogs(INK, ForcedLogs), + ok = leveled_monitor:log_remove(Monitor, ForcedLogs), ok = leveled_log:remove_forcedlogs(ForcedLogs), {noreply, State}. @@ -1555,7 +1546,7 @@ handle_info(_Info, State) -> {noreply, State}. terminate(Reason, _State) -> - leveled_log:log("B0003", [Reason]). + leveled_log:log(b0003, [Reason]). code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -1615,11 +1606,11 @@ loadqueue_ledgercache(Cache) -> -spec snapshot_store(ledger_cache(), pid(), null|pid(), - snapshot_timings(), + leveled_monitor:monitor(), store|ledger, undefined|tuple(), undefined|boolean()) -> - {ok, pid(), pid()|null, snapshot_timings()}. + {ok, pid(), pid()|null}. %% @doc %% Allow all a snapshot to be created from part of the store, preferably %% passing in a query filter so that all of the LoopState does not need to @@ -1635,8 +1626,8 @@ loadqueue_ledgercache(Cache) -> %% lookup is required but the range isn't defined then 'undefined' should be %% passed as the query snapshot_store( - LedgerCache, Penciller, Inker, Timings, SnapType, Query, LongRunning) -> - TS0 = os:timestamp(), + LedgerCache, Penciller, Inker, Monitor, SnapType, Query, LongRunning) -> + SW0 = leveled_monitor:maybe_time(Monitor), LedgerCacheReady = readycache_forsnapshot(LedgerCache, Query), BookiesMem = {LedgerCacheReady#ledger_cache.loader, LedgerCacheReady#ledger_cache.index, @@ -1649,33 +1640,21 @@ snapshot_store( snapshot_longrunning = LongRunning, bookies_pid = self(), bookies_mem = BookiesMem}, - {TS1, Timings1} = update_timings(TS0, {snapshot, bookie}, Timings), + {TS0, SW1} = leveled_monitor:step_time(SW0), {ok, LedgerSnapshot} = leveled_penciller:pcl_snapstart(PCLopts), - {_TS2, Timings2} = update_timings(TS1, {snapshot, pcl}, Timings1), + {TS1, _SW2} = leveled_monitor:step_time(SW1), + ok = maybelog_snap_timing(Monitor, TS0, TS1), case SnapType of store -> - InkerOpts = #inker_options{start_snapshot=true, + InkerOpts = #inker_options{start_snapshot = true, bookies_pid = self(), - source_inker=Inker}, + source_inker = Inker}, {ok, JournalSnapshot} = leveled_inker:ink_snapstart(InkerOpts), - {ok, LedgerSnapshot, JournalSnapshot, Timings2}; + {ok, LedgerSnapshot, JournalSnapshot}; ledger -> - {ok, LedgerSnapshot, null, Timings2} + {ok, LedgerSnapshot, null} end. -snapshot_store(LedgerCache, Penciller, Inker, SnapType, Query, LongRunning) -> - snapshot_store( - LedgerCache, Penciller, Inker, no_timing, SnapType, Query, LongRunning). - -snapshot_store(State, SnapType, Query, LongRunning) -> - snapshot_store(State#state.ledger_cache, - State#state.penciller, - State#state.inker, - State#state.snapshot_timings, - SnapType, - Query, - LongRunning). - -spec fetch_value(pid(), leveled_codec:journal_ref()) -> not_present|any(). %% @doc @@ -1695,27 +1674,26 @@ fetch_value(Inker, {Key, SQN}) -> %%% Internal functions %%%============================================================================ --spec startup(#inker_options{}, #penciller_options{}, book_state()) - -> {pid(), pid()}. +-spec startup(#inker_options{}, #penciller_options{}) -> {pid(), pid()}. %% @doc %% Startup the Inker and the Penciller, and prompt the loading of the Penciller %% from the Inker. The Penciller may be shutdown without the latest data %% having been persisted: and so the Iker must be able to update the Penciller %% on startup with anything that happened but wasn't flushed to disk. -startup(InkerOpts, PencillerOpts, State) -> +startup(InkerOpts, PencillerOpts) -> {ok, Inker} = leveled_inker:ink_start(InkerOpts), {ok, Penciller} = leveled_penciller:pcl_start(PencillerOpts), LedgerSQN = leveled_penciller:pcl_getstartupsequencenumber(Penciller), - leveled_log:log("B0005", [LedgerSQN]), + leveled_log:log(b0005, [LedgerSQN]), ReloadStrategy = InkerOpts#inker_options.reload_strategy, - LoadFun = get_loadfun(ReloadStrategy, Penciller, State), + LoadFun = get_loadfun(ReloadStrategy, Penciller), BatchFun = fun(BatchAcc, _Acc) -> push_to_penciller(Penciller, BatchAcc) end, InitAccFun = fun(FN, CurrentMinSQN) -> - leveled_log:log("I0014", [FN, CurrentMinSQN]), + leveled_log:log(i0014, [FN, CurrentMinSQN]), empty_ledgercache() end, ok = leveled_inker:ink_loadpcl(Inker, @@ -1736,11 +1714,13 @@ set_defaults(Opts) -> lists:ukeysort(1, Opts), lists:ukeysort(1, ?OPTION_DEFAULTS)). --spec set_options(open_options()) -> {#inker_options{}, #penciller_options{}}. +-spec set_options( + open_options(), leveled_monitor:monitor()) -> + {#inker_options{}, #penciller_options{}}. %% @doc %% Take the passed in property list of operations and extract out any relevant %% options to the Inker or the Penciller -set_options(Opts) -> +set_options(Opts, Monitor) -> MaxJournalSize0 = min(?ABSOLUTEMAX_JOURNALSIZE, proplists:get_value(max_journalsize, Opts)), @@ -1798,30 +1778,36 @@ set_options(Opts) -> ScoreOneIn = proplists:get_value(journalcompaction_scoreonein, Opts), {#inker_options{root_path = JournalFP, - reload_strategy = ReloadStrategy, - max_run_length = proplists:get_value(max_run_length, Opts), - singlefile_compactionperc = SFL_CompPerc, - maxrunlength_compactionperc = MRL_CompPerc, - waste_retention_period = WRP, - snaptimeout_long = SnapTimeoutLong, - compression_method = CompressionMethod, - compress_on_receipt = CompressOnReceipt, - score_onein = ScoreOneIn, - cdb_options = - #cdb_options{max_size=MaxJournalSize, - max_count=MaxJournalCount, - binary_mode=true, - sync_strategy=SyncStrat, - log_options=leveled_log:get_opts()}}, + reload_strategy = ReloadStrategy, + max_run_length = proplists:get_value(max_run_length, Opts), + singlefile_compactionperc = SFL_CompPerc, + maxrunlength_compactionperc = MRL_CompPerc, + waste_retention_period = WRP, + snaptimeout_long = SnapTimeoutLong, + compression_method = CompressionMethod, + compress_on_receipt = CompressOnReceipt, + score_onein = ScoreOneIn, + cdb_options = + #cdb_options{ + max_size = MaxJournalSize, + max_count = MaxJournalCount, + binary_mode = true, + sync_strategy = SyncStrat, + log_options = leveled_log:get_opts(), + monitor = Monitor}, + monitor = Monitor}, #penciller_options{root_path = LedgerFP, max_inmemory_tablesize = PCLL0CacheSize, levelzero_cointoss = true, snaptimeout_short = SnapTimeoutShort, snaptimeout_long = SnapTimeoutLong, sst_options = - #sst_options{press_method=CompressionMethod, - log_options=leveled_log:get_opts(), - max_sstslots=MaxSSTSlots}} + #sst_options{ + press_method = CompressionMethod, + log_options = leveled_log:get_opts(), + max_sstslots = MaxSSTSlots, + monitor = Monitor}, + monitor = Monitor} }. @@ -1841,8 +1827,15 @@ set_options(Opts) -> return_snapfun(State, SnapType, Query, LongRunning, SnapPreFold) -> case SnapPreFold of true -> - {ok, LS, JS, _Timings} = - snapshot_store(State, SnapType, Query, LongRunning), + {ok, LS, JS} = + snapshot_store( + State#state.ledger_cache, + State#state.penciller, + State#state.inker, + State#state.monitor, + SnapType, + Query, + LongRunning), fun() -> {ok, LS, JS} end; false -> Self = self(), @@ -2074,7 +2067,7 @@ return_ledger_keyrange(Tag, Bucket, KeyRange) -> maybe_longrunning(SW, Aspect) -> case timer:now_diff(os:timestamp(), SW) of N when N > ?LONG_RUNNING -> - leveled_log:log("B0013", [N, Aspect]); + leveled_log:log(b0013, [N, Aspect]); _ -> ok end. @@ -2167,24 +2160,20 @@ scan_table(Table, StartKey, EndKey, Acc, MinSQN, MaxSQN) -> end. --spec fetch_head(leveled_codec:ledger_key(), pid(), ledger_cache(), - cache_ratio()) -> - {not_present|leveled_codec:ledger_value(), - cache_ratio()}. +-spec fetch_head(leveled_codec:ledger_key(), pid(), ledger_cache()) + -> {not_present|leveled_codec:ledger_value(), boolean()}. %% @doc %% Fetch only the head of the object from the Ledger (or the bookie's recent %% ledger cache if it has just been updated). not_present is returned if the %% Key is not found -fetch_head(Key, Penciller, LedgerCache, CacheRatio) -> - fetch_head(Key, Penciller, LedgerCache, CacheRatio, false). +fetch_head(Key, Penciller, LedgerCache) -> + fetch_head(Key, Penciller, LedgerCache, false). --spec fetch_head(leveled_codec:ledger_key(), pid(), ledger_cache(), - cache_ratio(), boolean()) - -> {not_present|leveled_codec:ledger_value(), - cache_ratio()}. +-spec fetch_head(leveled_codec:ledger_key(), pid(), ledger_cache(), boolean()) + -> {not_present|leveled_codec:ledger_value(), boolean()}. %% doc %% The L0Index needs to be bypassed when running head_only -fetch_head(Key, Penciller, LedgerCache, {RC, CC, HC}, HeadOnly) -> +fetch_head(Key, Penciller, LedgerCache, HeadOnly) -> SW = os:timestamp(), CacheResult = case LedgerCache#ledger_cache.mem of @@ -2195,7 +2184,7 @@ fetch_head(Key, Penciller, LedgerCache, {RC, CC, HC}, HeadOnly) -> end, case CacheResult of [{Key, Head}] -> - {Head, {RC + 1, CC + 1, HC + 1}}; + {Head, true}; [] -> Hash = leveled_codec:segment_hash(Key), UseL0Idx = not HeadOnly, @@ -2204,10 +2193,10 @@ fetch_head(Key, Penciller, LedgerCache, {RC, CC, HC}, HeadOnly) -> case leveled_penciller:pcl_fetch(Penciller, Key, Hash, UseL0Idx) of {Key, Head} -> maybe_longrunning(SW, pcl_head), - {Head, {RC + 1, CC, HC + 1}}; + {Head, false}; not_present -> maybe_longrunning(SW, pcl_head), - {not_present, {RC + 1, CC, HC}} + {not_present, false} end end. @@ -2368,7 +2357,7 @@ addto_ledgercache({H, SQN, KeyChanges}, Cache, loader) -> %% Check the ledger cache for a Key, when the ledger cache is in loader mode %% and so is populating a queue not an ETS table check_in_ledgercache(PK, Hash, Cache, loader) -> - case leveled_pmem:check_index(Hash, Cache#ledger_cache.index) of + case leveled_pmem:check_index(Hash, [Cache#ledger_cache.index]) of [] -> false; _ -> @@ -2376,8 +2365,9 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> end. --spec maybepush_ledgercache(integer(), ledger_cache(), pid()) - -> {ok|returned, ledger_cache()}. +-spec maybepush_ledgercache( + pos_integer(), pos_integer(), ledger_cache(), pid()) + -> {ok|returned, ledger_cache()}. %% @doc %% Following an update to the ledger cache, check if this now big enough to be %% pushed down to the Penciller. There is some random jittering here, to @@ -2388,10 +2378,10 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> %% in the reply. Try again later when it isn't busy (and also potentially %% implement a slow_offer state to slow down the pace at which PUTs are being %% received) -maybepush_ledgercache(MaxCacheSize, Cache, Penciller) -> +maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller) -> Tab = Cache#ledger_cache.mem, CacheSize = ets:info(Tab, size), - TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize), + TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize, MaxCacheMult), if TimeToPush -> CacheToLoad = {Tab, @@ -2408,26 +2398,28 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) -> {returned, Cache} end; true -> - {ok, Cache} + {ok, Cache} end. --spec maybe_withjitter(integer(), integer()) -> boolean(). +-spec maybe_withjitter( + non_neg_integer(), pos_integer(), pos_integer()) -> boolean(). %% @doc %% Push down randomly, but the closer to 4 * the maximum size, the more likely %% a push should be -maybe_withjitter(CacheSize, MaxCacheSize) when CacheSize > MaxCacheSize -> - R = leveled_rand:uniform(4 * MaxCacheSize), +maybe_withjitter( + CacheSize, MaxCacheSize, MaxCacheMult) when CacheSize > MaxCacheSize -> + R = leveled_rand:uniform(MaxCacheMult * MaxCacheSize), (CacheSize - MaxCacheSize) > R; -maybe_withjitter(_CacheSize, _MaxCacheSize) -> +maybe_withjitter(_CacheSize, _MaxCacheSize, _MaxCacheMult) -> false. --spec get_loadfun(leveled_codec:compaction_strategy(), pid(), book_state()) - -> initial_loadfun(). +-spec get_loadfun( + leveled_codec:compaction_strategy(), pid()) -> initial_loadfun(). %% @doc %% The LoadFun will be used by the Inker when walking across the Journal to %% load the Penciller at startup. -get_loadfun(ReloadStrat, Penciller, _State) -> +get_loadfun(ReloadStrat, Penciller) -> fun(KeyInJournal, ValueInJournal, _Pos, Acc0, ExtractFun) -> {MinSQN, MaxSQN, LedgerCache} = Acc0, {SQN, InkTag, PK} = KeyInJournal, @@ -2435,7 +2427,7 @@ get_loadfun(ReloadStrat, Penciller, _State) -> SQN when SQN < MinSQN -> {loop, Acc0}; SQN when SQN > MaxSQN -> - leveled_log:log("B0007", [MaxSQN, SQN]), + leveled_log:log(b0007, [MaxSQN, SQN]), {stop, Acc0}; _ -> {VBin, ValSize} = ExtractFun(ValueInJournal), @@ -2454,7 +2446,7 @@ get_loadfun(ReloadStrat, Penciller, _State) -> end, case SQN of MaxSQN -> - leveled_log:log("B0006", [SQN]), + leveled_log:log(b0006, [SQN]), LC0 = addto_ledgercache(Chngs, LedgerCache, loader), {stop, {MinSQN, MaxSQN, LC0}}; _ -> @@ -2471,153 +2463,56 @@ delete_path(DirPath) -> [file:delete(filename:join([DirPath, File])) || File <- Files], file:del_dir(DirPath). +-spec maybelog_put_timing( + leveled_monitor:monitor(), + leveled_monitor:timing(), + leveled_monitor:timing(), + leveled_monitor:timing(), + pos_integer()) -> ok. +maybelog_put_timing(_Monitor, no_timing, no_timing, no_timing, _Size) -> + ok; +maybelog_put_timing({Pid, _StatsFreq}, InkTime, PrepTime, MemTime, Size) -> + leveled_monitor:add_stat( + Pid, {bookie_put_update, InkTime, PrepTime, MemTime, Size}). + +-spec maybelog_head_timing( + leveled_monitor:monitor(), + leveled_monitor:timing(), + leveled_monitor:timing(), + boolean(), + boolean()) -> ok. +maybelog_head_timing(_Monitor, no_timing, no_timing, _NF, _CH) -> + ok; +maybelog_head_timing({Pid, _StatsFreq}, FetchTime, _, true, _CH) -> + leveled_monitor:add_stat( + Pid, {bookie_head_update, FetchTime, not_found, 0}); +maybelog_head_timing({Pid, _StatsFreq}, FetchTime, RspTime, _NF, CH) -> + CH0 = case CH of true -> 1; false -> 0 end, + leveled_monitor:add_stat( + Pid, {bookie_head_update, FetchTime, RspTime, CH0}). + +-spec maybelog_get_timing( + leveled_monitor:monitor(), + leveled_monitor:timing(), + leveled_monitor:timing(), + boolean()) -> ok. +maybelog_get_timing(_Monitor, no_timing, no_timing, _NF) -> + ok; +maybelog_get_timing({Pid, _StatsFreq}, HeadTime, _BodyTime, true) -> + leveled_monitor:add_stat(Pid, {bookie_get_update, HeadTime, not_found}); +maybelog_get_timing({Pid, _StatsFreq}, HeadTime, BodyTime, false) -> + leveled_monitor:add_stat(Pid, {bookie_get_update, HeadTime, BodyTime}). -%%%============================================================================ -%%% Timing Functions -%%%============================================================================ +-spec maybelog_snap_timing( + leveled_monitor:monitor(), + leveled_monitor:timing(), + leveled_monitor:timing()) -> ok. +maybelog_snap_timing(_Monitor, no_timing, no_timing) -> + ok; +maybelog_snap_timing({Pid, _StatsFreq}, BookieTime, PCLTime) -> + leveled_monitor:add_stat(Pid, {bookie_snap_update, BookieTime, PCLTime}). --spec update_statetimings(timing_types(), timings(), integer()) -> - {timings(), integer()}. -%% @doc -%% -%% The timings state is either in countdown to the next set of samples of -%% we are actively collecting a sample. Active collection take place -%% when the countdown is 0. Once the sample has reached the expected count -%% then there is a log of that sample, and the countdown is restarted. -%% -%% Outside of sample windows the timings object should be set to the atom -%% no_timing. no_timing is a valid state for each timings type. -update_statetimings(head, no_timing, 0) -> - {#head_timings{}, 0}; -update_statetimings(put, no_timing, 0) -> - {#put_timings{}, 0}; -update_statetimings(get, no_timing, 0) -> - {#get_timings{}, 0}; -update_statetimings(snapshot, no_timing, 0) -> - {#snapshot_timings{}, 0}; -update_statetimings(head, Timings, 0) -> - case Timings#head_timings.sample_count of - SC when SC >= ?TIMING_SAMPLESIZE -> - log_timings(head, Timings), - {no_timing, leveled_rand:uniform(10 * ?TIMING_SAMPLECOUNTDOWN)}; - _SC -> - {Timings, 0} - end; -update_statetimings(put, Timings, 0) -> - case Timings#put_timings.sample_count of - SC when SC >= ?TIMING_SAMPLESIZE -> - log_timings(put, Timings), - {no_timing, leveled_rand:uniform(2 * ?TIMING_SAMPLECOUNTDOWN)}; - _SC -> - {Timings, 0} - end; -update_statetimings(get, Timings, 0) -> - case Timings#get_timings.sample_count of - SC when SC >= ?TIMING_SAMPLESIZE -> - log_timings(get, Timings), - {no_timing, leveled_rand:uniform(2 * ?TIMING_SAMPLECOUNTDOWN)}; - _SC -> - {Timings, 0} - end; -update_statetimings(snapshot, Timings, 0) -> - case Timings#snapshot_timings.sample_count of - SC when SC >= ?TIMING_SAMPLESIZE -> - log_timings(snapshot, Timings), - {no_timing, - leveled_rand:uniform(2 * ?TIMING_SAMPLECOUNTDOWN)}; - _SC -> - {Timings, 0} - end; -update_statetimings(_, no_timing, N) -> - {no_timing, N - 1}. - -log_timings(head, Timings) -> - leveled_log:log("B0018", - [Timings#head_timings.sample_count, - Timings#head_timings.pcl_time, - Timings#head_timings.buildhead_time]); -log_timings(put, Timings) -> - leveled_log:log("B0015", [Timings#put_timings.sample_count, - Timings#put_timings.mem_time, - Timings#put_timings.ink_time, - Timings#put_timings.total_size]); -log_timings(get, Timings) -> - leveled_log:log("B0016", [Timings#get_timings.sample_count, - Timings#get_timings.head_time, - Timings#get_timings.body_time, - Timings#get_timings.fetch_count]); -log_timings(snapshot, Timings) -> - leveled_log:log("B0017", [Timings#snapshot_timings.sample_count, - Timings#snapshot_timings.bookie_time, - Timings#snapshot_timings.pcl_time]). - - -update_timings(_SW, _Stage, no_timing) -> - {no_timing, no_timing}; -update_timings(SW, {head, Stage}, Timings) -> - NextSW = os:timestamp(), - Timer = timer:now_diff(NextSW, SW), - Timings0 = - case Stage of - pcl -> - PCT = Timings#head_timings.pcl_time + Timer, - Timings#head_timings{pcl_time = PCT}; - rsp -> - BHT = Timings#head_timings.buildhead_time + Timer, - CNT = Timings#head_timings.sample_count + 1, - Timings#head_timings{buildhead_time = BHT, sample_count = CNT} - end, - {NextSW, Timings0}; -update_timings(SW, {put, Stage}, Timings) -> - NextSW = os:timestamp(), - Timer = timer:now_diff(NextSW, SW), - Timings0 = - case Stage of - {inker, ObjectSize} -> - INT = Timings#put_timings.ink_time + Timer, - TSZ = Timings#put_timings.total_size + ObjectSize, - Timings#put_timings{ink_time = INT, total_size = TSZ}; - mem -> - PCT = Timings#put_timings.mem_time + Timer, - CNT = Timings#put_timings.sample_count + 1, - Timings#put_timings{mem_time = PCT, sample_count = CNT} - end, - {NextSW, Timings0}; -update_timings(SW, {get, head}, Timings) -> - NextSW = os:timestamp(), - Timer = timer:now_diff(NextSW, SW), - GHT = Timings#get_timings.head_time + Timer, - CNT = Timings#get_timings.sample_count + 1, - Timings0 = Timings#get_timings{head_time = GHT, sample_count = CNT}, - {NextSW, Timings0}; -update_timings(SW, {get, body}, Timings) -> - Timer = timer:now_diff(os:timestamp(), SW), - GBT = Timings#get_timings.body_time + Timer, - FCNT = Timings#get_timings.fetch_count + 1, - Timings0 = Timings#get_timings{body_time = GBT, fetch_count = FCNT}, - {no_timing, Timings0}; -update_timings(SW, {snapshot, bookie}, Timings) -> - NextSW = os:timestamp(), - Timer = timer:now_diff(NextSW, SW), - BST = Timings#snapshot_timings.bookie_time + Timer, - CNT = Timings#snapshot_timings.sample_count + 1, - Timings0 = Timings#snapshot_timings{bookie_time = BST, sample_count = CNT}, - {NextSW, Timings0}; -update_timings(SW, {snapshot, pcl}, Timings) -> - NextSW = os:timestamp(), - Timer = timer:now_diff(NextSW, SW), - PST = Timings#snapshot_timings.pcl_time + Timer, - Timings0 = Timings#snapshot_timings{pcl_time = PST}, - {no_timing, Timings0}. - - --spec maybelog_cacheratio(cache_ratio(), boolean()) -> cache_ratio(). -maybelog_cacheratio({?CACHE_LOGPOINT, CC, HC}, false) -> - leveled_log:log("B0021", [?CACHE_LOGPOINT, CC, HC]), - {0, 0, 0}; -maybelog_cacheratio(CR, _IsSnap) -> - CR. %%%============================================================================ %%% Test %%%============================================================================ @@ -3228,14 +3123,16 @@ erase_journal_test() -> {cache_size, 100}]), ObjL1 = generate_multiple_objects(500, 1), % Put in all the objects with a TTL in the future - lists:foreach(fun({K, V, S}) -> ok = book_put(Bookie1, - "Bucket", K, V, S, - ?STD_TAG) end, - ObjL1), - lists:foreach(fun({K, V, _S}) -> - {ok, V} = book_get(Bookie1, "Bucket", K, ?STD_TAG) - end, - ObjL1), + lists:foreach( + fun({K, V, S}) -> + ok = book_put(Bookie1, "Bucket", K, V, S, ?STD_TAG) + end, + ObjL1), + lists:foreach( + fun({K, V, _S}) -> + {ok, V} = book_get(Bookie1, "Bucket", K, ?STD_TAG) + end, + ObjL1), CheckHeadFun = fun(Book) -> diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index de549e6..5596a37 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -62,16 +62,6 @@ {gen_fsm, reply, 2}]}). -endif. --ifdef(slow_test). --define(SPECIAL_DELFUN, fun(_F) -> ok end). - % There are problems with the pendingdelete_test/0 in riak make test - % The deletion of the file causes the process to crash and the test to - % fail, but thisis not an issue tetsing outside of riak make test. - % Workaround this problem by not performing the delete when running unit - % tests in R16 --else. --define(SPECIAL_DELFUN, fun(F) -> file:delete(F) end). --endif. -export([init/1, handle_sync_event/4, @@ -155,20 +145,12 @@ deferred_delete = false :: boolean(), waste_path :: string() | undefined, sync_strategy = none, - timings = no_timing :: cdb_timings(), - timings_countdown = 0 :: integer(), log_options = leveled_log:get_opts() :: leveled_log:log_options(), - cached_score :: {float(), erlang:timestamp()}|undefined}). - --record(cdb_timings, {sample_count = 0 :: integer(), - sample_cyclecount = 0 :: integer(), - sample_indextime = 0 :: integer(), - sample_fetchtime = 0 :: integer(), - fetchloop_starttime :: undefined|erlang:timestamp()}). + cached_score :: {float(), erlang:timestamp()}|undefined, + monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). -type cdb_options() :: #cdb_options{}. --type cdb_timings() :: no_timing|#cdb_timings{}. -type hashtable_index() :: tuple(). -type file_location() :: integer()|eof. -type filter_fun() :: @@ -489,14 +471,15 @@ init([Opts]) -> binary_mode=Opts#cdb_options.binary_mode, waste_path=Opts#cdb_options.waste_path, sync_strategy=Opts#cdb_options.sync_strategy, - log_options=Opts#cdb_options.log_options}}. + log_options=Opts#cdb_options.log_options, + monitor=Opts#cdb_options.monitor}}. starting({open_writer, Filename}, _From, State) -> leveled_log:save(State#state.log_options), - leveled_log:log("CDB01", [Filename]), + leveled_log:log(cdb01, [Filename]), {LastPosition, HashTree, LastKey} = open_active_file(Filename), {WriteOps, UpdStrategy} = set_writeops(State#state.sync_strategy), - leveled_log:log("CDB13", [WriteOps]), + leveled_log:log(cdb13, [WriteOps]), {ok, Handle} = file:open(Filename, WriteOps), State0 = State#state{handle=Handle, current_count = size_hashtree(HashTree), @@ -508,7 +491,7 @@ starting({open_writer, Filename}, _From, State) -> {reply, ok, writer, State0, hibernate}; starting({open_reader, Filename}, _From, State) -> leveled_log:save(State#state.log_options), - leveled_log:log("CDB02", [Filename]), + leveled_log:log(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, false), State0 = State#state{handle=Handle, last_key=LastKey, @@ -517,7 +500,7 @@ starting({open_reader, Filename}, _From, State) -> {reply, ok, reader, State0, hibernate}; starting({open_reader, Filename, LastKey}, _From, State) -> leveled_log:save(State#state.log_options), - leveled_log:log("CDB02", [Filename]), + leveled_log:log(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, LastKey), State0 = State#state{handle=Handle, last_key=LastKey, @@ -646,7 +629,7 @@ rolling({return_hashtable, IndexList, HashTreeBin}, _From, State) -> ok = write_top_index_table(Handle, BasePos, IndexList), file:close(Handle), ok = rename_for_read(State#state.filename, NewName), - leveled_log:log("CDB03", [NewName]), + leveled_log:log(cdb03, [NewName]), ets:delete(State#state.hashtree), {NewHandle, Index, LastKey} = open_for_readonly(NewName, State#state.last_key), @@ -658,7 +641,7 @@ rolling({return_hashtable, IndexList, HashTreeBin}, _From, State) -> true -> {reply, ok, delete_pending, State0}; false -> - leveled_log:log_timer("CDB18", [], SW), + leveled_log:log_timer(cdb18, [], SW), {reply, ok, reader, State0, hibernate} end; rolling(check_hashtable, _From, State) -> @@ -670,26 +653,21 @@ rolling({delete_pending, ManSQN, Inker}, State) -> State#state{delete_point=ManSQN, inker=Inker, deferred_delete=true}}. reader({get_kv, Key}, _From, State) -> - {UpdTimings, Result} = + Result = get_withcache(State#state.handle, Key, State#state.hash_index, State#state.binary_mode, - State#state.timings), - {UpdTimings0, CountDown} = - update_statetimings(UpdTimings, State#state.timings_countdown), - {reply, - Result, - reader, - State#state{timings = UpdTimings0, timings_countdown = CountDown}}; + State#state.monitor), + {reply, Result, reader, State}; reader({key_check, Key}, _From, State) -> - {no_timing, Result} = + Result = get_withcache(State#state.handle, Key, State#state.hash_index, loose_presence, State#state.binary_mode, - no_timing), + {no_monitor, 0}), {reply, Result, reader, State}; reader({get_positions, SampleSize, Index, Acc}, _From, State) -> {Pos, Count} = element(Index + 1, State#state.hash_index), @@ -735,7 +713,7 @@ reader({direct_fetch, PositionList, Info}, From, State) -> {next_state, reader, State} end; reader(cdb_complete, _From, State) -> - leveled_log:log("CDB05", [State#state.filename, reader, cdb_ccomplete]), + leveled_log:log(cdb05, [State#state.filename, reader, cdb_ccomplete]), ok = file:close(State#state.handle), {stop, normal, {ok, State#state.filename}, State#state{handle=undefined}}; reader(check_hashtable, _From, State) -> @@ -754,27 +732,21 @@ reader({delete_pending, ManSQN, Inker}, State) -> delete_pending({get_kv, Key}, _From, State) -> - {UpdTimings, Result} = + Result = get_withcache(State#state.handle, Key, State#state.hash_index, State#state.binary_mode, - State#state.timings), - {UpdTimings0, CountDown} = - update_statetimings(UpdTimings, State#state.timings_countdown), - {reply, - Result, - delete_pending, - State#state{timings = UpdTimings0, timings_countdown = CountDown}, - ?DELETE_TIMEOUT}; + State#state.monitor), + {reply, Result, delete_pending, State, ?DELETE_TIMEOUT}; delete_pending({key_check, Key}, _From, State) -> - {no_timing, Result} = + Result = get_withcache(State#state.handle, Key, State#state.hash_index, loose_presence, State#state.binary_mode, - no_timing), + {no_monitor, 0}), {reply, Result, delete_pending, State, ?DELETE_TIMEOUT}. delete_pending(timeout, State=#state{delete_point=ManSQN}) when ManSQN > 0 -> @@ -786,20 +758,20 @@ delete_pending(timeout, State=#state{delete_point=ManSQN}) when ManSQN > 0 -> self()), {next_state, delete_pending, State, ?DELETE_TIMEOUT}; false -> - leveled_log:log("CDB04", [State#state.filename, ManSQN]), + leveled_log:log(cdb04, [State#state.filename, ManSQN]), close_pendingdelete(State#state.handle, State#state.filename, State#state.waste_path), {stop, normal, State} end; delete_pending(delete_confirmed, State=#state{delete_point=ManSQN}) -> - leveled_log:log("CDB04", [State#state.filename, ManSQN]), + leveled_log:log(cdb04, [State#state.filename, ManSQN]), close_pendingdelete(State#state.handle, State#state.filename, State#state.waste_path), {stop, normal, State}; delete_pending(destroy, State) -> - leveled_log:log("CDB05", [State#state.filename, delete_pending, destroy]), + leveled_log:log(cdb05, [State#state.filename, delete_pending, destroy]), close_pendingdelete(State#state.handle, State#state.filename, State#state.waste_path), @@ -886,8 +858,7 @@ handle_sync_event({get_cachedscore, {NowMega, NowSecs, _}}, handle_sync_event({put_cachedscore, Score}, _From, StateName, State) -> {reply, ok, StateName, State#state{cached_score = {Score,os:timestamp()}}}; handle_sync_event(cdb_close, _From, delete_pending, State) -> - leveled_log:log("CDB05", - [State#state.filename, delete_pending, cdb_close]), + leveled_log:log(cdb05, [State#state.filename, delete_pending, cdb_close]), close_pendingdelete(State#state.handle, State#state.filename, State#state.waste_path), @@ -953,31 +924,14 @@ close_pendingdelete(Handle, Filename, WasteFP) -> false -> % This may happen when there has been a destroy while files are % still pending deletion - leveled_log:log("CDB21", [Filename]) + leveled_log:log(cdb21, [Filename]) end. -spec set_writeops(sync|riak_sync|none) -> {list(), sync|riak_sync|none}. -%% Assumption is that sync should be used - it is a transaction log. -%% -%% However this flag is not supported in OTP 16. Bitcask appears to pass an -%% o_sync flag, but this isn't supported either (maybe it works with the -%% bitcask nif fileops). -%% -%% To get round this will try and datasync on each PUT with riak_sync --ifdef(no_sync). - -set_writeops(SyncStrategy) -> - case SyncStrategy of - sync -> - {?WRITE_OPS, riak_sync}; - riak_sync -> - {?WRITE_OPS, riak_sync}; - none -> - {?WRITE_OPS, none} - end. - --else. - +%% @doc +%% Sync should be used - it is a transaction log - in single node +%% implementations. `riak_sync` is a legacy of earlier OTP versions when +%% passing the sync option was not supported set_writeops(SyncStrategy) -> case SyncStrategy of sync -> @@ -988,8 +942,6 @@ set_writeops(SyncStrategy) -> {?WRITE_OPS, none} end. --endif. - -spec open_active_file(list()) -> {integer(), ets:tid(), any()}. %% @doc %% Open an active file - one for which it is assumed the hash tables have not @@ -1014,7 +966,7 @@ open_active_file(FileName) when is_list(FileName) -> {?BASE_POSITION, 0} -> ok; _ -> - leveled_log:log("CDB06", [LastPosition, EndPosition]) + leveled_log:log(cdb06, [LastPosition, EndPosition]) end, {ok, _LastPosition} = file:position(Handle, LastPosition), ok = file:truncate(Handle), @@ -1087,36 +1039,31 @@ mput(Handle, KVList, {LastPosition, HashTree0}, BinaryMode, MaxSize) -> end. --spec get_withcache(file:io_device(), - any(), - tuple(), - boolean(), - cdb_timings()) - -> {cdb_timings(), missing|probably|tuple()}. +-spec get_withcache( + file:io_device(), any(), tuple(), boolean(), + leveled_monitor:monitor()) -> missing|probably|tuple(). %% @doc %% %% Using a cache of the Index array - get a K/V pair from the file using the %% Key. should return an updated timings object (if timings are being taken) %% along with the result (which may be missing if the no matching entry is %% found, or probably in QuickCheck scenarios) -get_withcache(Handle, Key, Cache, BinaryMode, Timings) -> - get(Handle, Key, Cache, true, BinaryMode, Timings). +get_withcache(Handle, Key, Cache, BinaryMode, Monitor) -> + get(Handle, Key, Cache, true, BinaryMode, Monitor). -get_withcache(Handle, Key, Cache, QuickCheck, BinaryMode, Timings) -> - get(Handle, Key, Cache, QuickCheck, BinaryMode, Timings). +get_withcache(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor) -> + get(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor). get(FileNameOrHandle, Key, BinaryMode) -> - {no_timing, R} = - get(FileNameOrHandle, Key, no_cache, true, BinaryMode, no_timing), - R. + get(FileNameOrHandle, Key, no_cache, true, BinaryMode, {no_monitor, 0}). --spec get(list()|file:io_device(), - any(), no_cache|tuple(), - loose_presence|any(), - boolean(), - cdb_timings()) - -> {cdb_timings(), tuple()|probably|missing}. +-spec get( + list()|file:io_device(), + any(), no_cache|tuple(), + loose_presence|any(), + boolean(), + leveled_monitor:monitor()) -> tuple()|probably|missing. %% @doc %% %% Get a K/V pair from the file using the Key. QuickCheck can be set to @@ -1125,38 +1072,37 @@ get(FileNameOrHandle, Key, BinaryMode) -> %% that Key) %% %% Timings also passed in and can be updated based on results -get(FileName, Key, Cache, QuickCheck, BinaryMode, Timings) +get(FileName, Key, Cache, QuickCheck, BinaryMode, Monitor) when is_list(FileName) -> {ok, Handle} = file:open(FileName,[binary, raw, read]), - get(Handle, Key, Cache, QuickCheck, BinaryMode, Timings); -get(Handle, Key, Cache, QuickCheck, BinaryMode, Timings) + get(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor); +get(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor) when is_tuple(Handle) -> - SW = os:timestamp(), - + SW0 = leveled_monitor:maybe_time(Monitor), Hash = hash(Key), Index = hash_to_index(Hash), {HashTable, Count} = get_index(Handle, Index, Cache), - + {TS0, SW1} = leveled_monitor:step_time(SW0), % If the count is 0 for that index - key must be missing case Count of 0 -> - {Timings, missing}; + missing; _ -> % Get starting slot in hashtable {ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}), Slot = hash_to_slot(Hash, Count), - UpdTimings = update_indextimings(Timings, SW), - search_hash_table(Handle, - {FirstHashPosition, - Slot, - 1, - Count}, - Hash, - Key, - QuickCheck, - BinaryMode, - UpdTimings) + {CycleCount, Result} = + search_hash_table( + Handle, + {FirstHashPosition, Slot, 1, Count}, + Hash, + Key, + QuickCheck, + BinaryMode), + {TS1, _SW2} = leveled_monitor:step_time(SW1), + maybelog_get_timing(Monitor, TS0, TS1, CycleCount), + Result end. get_index(Handle, Index, no_cache) -> @@ -1195,7 +1141,7 @@ hashtable_calc(HashTree, StartPos) -> Seq = lists:seq(0, 255), SWC = os:timestamp(), {IndexList, HashTreeBin} = write_hash_tables(Seq, HashTree, StartPos), - leveled_log:log_timer("CDB07", [], SWC), + leveled_log:log_timer(cdb07, [], SWC), {IndexList, HashTreeBin}. %%%%%%%%%%%%%%%%%%%% @@ -1208,7 +1154,7 @@ determine_new_filename(Filename) -> rename_for_read(Filename, NewName) -> %% Rename file - leveled_log:log("CDB08", [Filename, NewName, filelib:is_file(NewName)]), + leveled_log:log(cdb08, [Filename, NewName, filelib:is_file(NewName)]), file:rename(Filename, NewName). @@ -1401,7 +1347,7 @@ scan_over_file(Handle, Position, FilterFun, Output, LastKey) -> % Not interesting that we've nothing to read at base ok; _ -> - leveled_log:log("CDB09", [Position]) + leveled_log:log(cdb09, [Position]) end, % Bring file back to that position {ok, Position} = file:position(Handle, {bof, Position}), @@ -1514,7 +1460,7 @@ safe_read_next(Handle, Length, ReadFun) -> loose_read(Handle, Length, ReadFun) catch error:ReadError -> - leveled_log:log("CDB20", [ReadError, Length]), + leveled_log:log(cdb20, [ReadError, Length]), false end. @@ -1541,11 +1487,11 @@ crccheck(<>, KeyBin) when is_binary(KeyBin) -> CRC -> Value; _ -> - leveled_log:log("CDB10", []), + leveled_log:log(cdb10, ["mismatch"]), false end; crccheck(_V, _KB) -> - leveled_log:log("CDB11", []), + leveled_log:log(cdb10, ["size"]), false. @@ -1595,10 +1541,10 @@ read_integerpairs(< - {cdb_timings(), missing|probably|tuple()}. +-spec search_hash_table( + file:io_device(), tuple(), integer(), any(), + loose_presence|boolean(), boolean()) + -> {pos_integer(), missing|probably|tuple()}. %% @doc %% %% Seach the hash table for the matching hash and key. Be prepared for @@ -1609,16 +1555,15 @@ read_integerpairs(< + _QuickCheck, _BinaryMode) -> % We have done the full loop - value must not be present - {Timings, missing}; + {TotalSlots, missing}; search_hash_table(Handle, {FirstHashPosition, Slot, CycleCount, TotalSlots}, Hash, Key, - QuickCheck, BinaryMode, Timings) -> - + QuickCheck, BinaryMode) -> % Read the next 2 integers at current position, see if it matches the hash % we're after Offset = @@ -1628,7 +1573,7 @@ search_hash_table(Handle, case read_next_2_integers(Handle) of {0, 0} -> - {Timings, missing}; + {CycleCount, missing}; {Hash, DataLoc} -> KV = case QuickCheck of @@ -1639,92 +1584,34 @@ search_hash_table(Handle, end, case KV of missing -> - leveled_log:log("CDB15", [Hash]), - search_hash_table(Handle, - {FirstHashPosition, - Slot, - CycleCount + 1, - TotalSlots}, - Hash, Key, - QuickCheck, BinaryMode, - Timings); + leveled_log:log(cdb15, [Hash]), + search_hash_table( + Handle, + {FirstHashPosition, Slot, CycleCount + 1, TotalSlots}, + Hash, Key, + QuickCheck, BinaryMode); _ -> - UpdTimings = update_fetchtimings(Timings, CycleCount), - {UpdTimings, KV} + {CycleCount, KV} end; _ -> - search_hash_table(Handle, - {FirstHashPosition, - Slot, - CycleCount + 1, - TotalSlots}, - Hash, Key, - QuickCheck, BinaryMode, - Timings) + search_hash_table( + Handle, + {FirstHashPosition, Slot, CycleCount + 1, TotalSlots}, + Hash, Key, + QuickCheck, BinaryMode) end. --spec update_fetchtimings(no_timing|cdb_timings(), integer()) -> - no_timing|cdb_timings(). -%% @doc -%% -%% Update the timings record if sample timings currently being taken -%% (otherwise the timngs record will be set to no_timing) -update_fetchtimings(no_timing, _CycleCount) -> - no_timing; -update_fetchtimings(Timings, CycleCount) -> - FetchTime = - timer:now_diff(os:timestamp(), - Timings#cdb_timings.fetchloop_starttime), - Timings#cdb_timings{sample_fetchtime = - Timings#cdb_timings.sample_fetchtime + FetchTime, - sample_cyclecount = - Timings#cdb_timings.sample_cyclecount + CycleCount, - sample_count = - Timings#cdb_timings.sample_count + 1}. - --spec update_indextimings(no_timing|cdb_timings(), erlang:timestamp()) -> - no_timing|cdb_timings(). -%% @doc -%% -%% Update the timings record with the time spent looking up the position -%% list to check from the index -update_indextimings(no_timing, _SW) -> - no_timing; -update_indextimings(Timings, SW) -> - IdxTime = timer:now_diff(os:timestamp(), SW), - Timings#cdb_timings{sample_indextime = - Timings#cdb_timings.sample_indextime - + IdxTime, - fetchloop_starttime = - os:timestamp()}. - --spec update_statetimings(cdb_timings(), integer()) - -> {cdb_timings(), integer()}. -%% @doc -%% -%% The timings state is either in countdown to the next set of samples of -%% we are actively collecting a sample. Active collection take place -%% when the countdown is 0. Once the sample has reached the expected count -%% then there is a log of that sample, and the countdown is restarted. -%% -%% Outside of sample windows the timings object should be set to the atom -%% no_timing. no_timing is a valid state for the cdb_timings type. -update_statetimings(no_timing, 0) -> - {#cdb_timings{}, 0}; -update_statetimings(Timings, 0) -> - case Timings#cdb_timings.sample_count of - SC when SC >= ?TIMING_SAMPLESIZE -> - leveled_log:log("CDB19", [Timings#cdb_timings.sample_count, - Timings#cdb_timings.sample_cyclecount, - Timings#cdb_timings.sample_fetchtime, - Timings#cdb_timings.sample_indextime]), - {no_timing, leveled_rand:uniform(2 * ?TIMING_SAMPLECOUNTDOWN)}; - _SC -> - {Timings, 0} - end; -update_statetimings(no_timing, N) -> - {no_timing, N - 1}. +-spec maybelog_get_timing( + leveled_monitor:monitor(), + leveled_monitor:timing(), + leveled_monitor:timing(), + pos_integer()) -> ok. +maybelog_get_timing(_Monitor, no_timing, no_timing, _CC) -> + ok; +maybelog_get_timing({Pid, _StatsFreq}, IndexTime, ReadTime, CycleCount) -> + leveled_monitor:add_stat( + Pid, {cdb_get_update, CycleCount, IndexTime, ReadTime}). % Write Key and Value tuples into the CDB. Each tuple consists of a @@ -1762,7 +1649,7 @@ perform_write_hash_tables(Handle, HashTreeBin, StartPos) -> ok = file:write(Handle, HashTreeBin), {ok, EndPos} = file:position(Handle, cur), ok = file:advise(Handle, StartPos, EndPos - StartPos, will_need), - leveled_log:log_timer("CDB12", [], SWW), + leveled_log:log_timer(cdb12, [], SWW), ok. @@ -1932,7 +1819,7 @@ write_hash_tables(Indexes, HashTree, CurrPos) -> write_hash_tables([], _HashTree, _CurrPos, _BasePos, IndexList, HT_BinList, {T1, T2, T3}) -> - leveled_log:log("CDB14", [T1, T2, T3]), + leveled_log:log(cdb14, [T1, T2, T3]), IL = lists:reverse(IndexList), {IL, list_to_binary(HT_BinList)}; write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos, @@ -2336,12 +2223,13 @@ search_hash_table_findinslot_test() -> io:format("Slot 2 has Hash ~w Position ~w~n", [ReadH4, ReadP4]), ?assertMatch(0, ReadH4), ?assertMatch({"key1", "value1"}, get(Handle, Key1, false)), - ?assertMatch({no_timing, probably}, - get(Handle, Key1, - no_cache, loose_presence, false, no_timing)), - ?assertMatch({no_timing, missing}, - get(Handle, "Key99", - no_cache, loose_presence, false, no_timing)), + NoMonitor = {no_monitor, 0}, + ?assertMatch( + probably, + get(Handle, Key1, no_cache, loose_presence, false, NoMonitor)), + ?assertMatch( + missing, + get(Handle, "Key99", no_cache, loose_presence, false, NoMonitor)), {ok, _} = file:position(Handle, FirstHashPosition), FlipH3 = endian_flip(ReadH3), FlipP3 = endian_flip(ReadP3), @@ -2814,7 +2702,7 @@ pendingdelete_test() -> {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), ?assertMatch({"Key1", "Value1"}, cdb_get(P2, "Key1")), ?assertMatch({"Key100", "Value100"}, cdb_get(P2, "Key100")), - ?SPECIAL_DELFUN(F2), + ok = file:delete(F2), ok = cdb_deletepending(P2), % No issues destroying even though the file has already been removed ok = cdb_destroy(P2). diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index e8be244..ca1a3dd 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -80,6 +80,8 @@ tomb|{active, non_neg_integer()|infinity}. -type ledger_key() :: {tag(), any(), any(), any()}|all. +-type slimmed_key() :: + {binary(), binary()|null}|binary()|null|all. -type ledger_value() :: ledger_value_v1()|ledger_value_v2(). -type ledger_value_v1() :: @@ -342,20 +344,46 @@ isvalid_ledgerkey({Tag, _B, _K, _SK}) -> isvalid_ledgerkey(_LK) -> false. --spec endkey_passed(ledger_key(), ledger_key()) -> boolean(). -%% @oc +-spec endkey_passed( + ledger_key()|slimmed_key(), + ledger_key()|slimmed_key()) -> boolean(). +%% @doc %% Compare a key against a query key, only comparing elements that are non-null -%% in the Query key. This is used for comparing against end keys in queries. +%% in the Query key. +%% +%% Query key of `all` matches all keys +%% Query key element of `null` matches all keys less than or equal in previous +%% elements +%% +%% This function is required to make sense of this with erlang term order, +%% where otherwise atom() < binary() +%% +%% endkey_passed means "Query End Key has been passed when scanning this range" +%% +%% If the Query End Key is within the range ending in RangeEndkey then +%% endkey_passed is true. This range extends beyond the end of the Query +%% range, and so no further ranges need to be added to the Query results. +%% If the Query End Key is beyond the Range End Key, then endkey_passed is +%% false and further results may be required from further ranges. endkey_passed(all, _) -> false; -endkey_passed({EK1, null, null, null}, {CK1, _, _, _}) -> - EK1 < CK1; -endkey_passed({EK1, EK2, null, null}, {CK1, CK2, _, _}) -> - {EK1, EK2} < {CK1, CK2}; -endkey_passed({EK1, EK2, EK3, null}, {CK1, CK2, CK3, _}) -> - {EK1, EK2, EK3} < {CK1, CK2, CK3}; -endkey_passed(EndKey, CheckingKey) -> - EndKey < CheckingKey. +endkey_passed({K1, null, null, null}, {K1, _, _, _}) -> + false; +endkey_passed({K1, K2, null, null}, {K1, K2, _, _}) -> + false; +endkey_passed({K1, K2, K3, null}, {K1, K2, K3, _}) -> + false; +endkey_passed({K1, null}, {K1, _}) -> + % See leveled_sst SlotIndex implementation. Here keys may be slimmed to + % single binaries or two element tuples before forming the index. + false; +endkey_passed(null, _) -> + false; +endkey_passed(QueryEndKey, RangeEndKey) -> + % i.e. false = Keep searching not yet beyond query range + % true = this range extends byeond the end of the query, no further results + % required + QueryEndKey < RangeEndKey. %%%============================================================================ @@ -386,7 +414,7 @@ get_tagstrategy(Tag, Strategy) -> {Tag, TagStrat} -> TagStrat; false -> - leveled_log:log("IC012", [Tag, Strategy]), + leveled_log:log(ic012, [Tag, Strategy]), retain end. diff --git a/src/leveled_head.erl b/src/leveled_head.erl index 3ee66c2..7224e7f 100644 --- a/src/leveled_head.erl +++ b/src/leveled_head.erl @@ -75,8 +75,21 @@ key_to_canonicalbinary | build_head | extract_metadata | diff_indexspecs. % Functions for which default behaviour can be over-written for the % application's own tags +-type appdefinable_keyfun() :: + fun((tuple()) -> binary()). +-type appdefinable_headfun() :: + fun((object_tag(), object_metadata()) -> head()). +-type appdefinable_metadatafun() :: + fun(({leveled_codec:tag(), non_neg_integer(), any()}) -> + {object_metadata(), list(erlang:timestamp())}). +-type appdefinable_indexspecsfun() :: + fun((object_tag(), object_metadata(), object_metadata()|not_present) -> + leveled_codec:index_specs()). +-type appdefinable_function_fun() :: + appdefinable_keyfun() | appdefinable_headfun() | + appdefinable_metadatafun() | appdefinable_indexspecsfun(). -type appdefinable_function_tuple() :: - {appdefinable_function(), fun()}. + {appdefinable_function(), appdefinable_function_fun()}. -type index_op() :: add | remove. -type index_value() :: integer() | binary(). @@ -265,9 +278,9 @@ standard_hash(Obj) -> %%% Handling Override Functions %%%============================================================================ --spec get_appdefined_function(appdefinable_function(), - fun(), - non_neg_integer()) -> fun(). +-spec get_appdefined_function( + appdefinable_function(), appdefinable_function_fun(), non_neg_integer()) -> + appdefinable_function_fun(). %% @doc %% If a keylist of [{function_name, fun()}] has been set as an environment %% variable for a tag, then this FunctionName can be used instead of the diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index d7edc73..7ae3b44 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -304,7 +304,7 @@ handle_call(stop, _From, State) -> handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Manifest0}, State) -> - leveled_log:log("IC014", [State#state.reload_strategy, + leveled_log:log(ic014, [State#state.reload_strategy, State#state.max_run_length]), % Empty the waste folder clear_waste(State), @@ -386,7 +386,7 @@ handle_cast(scoring_complete, State) -> State#state.maxrunlength_compactionperc, State#state.singlefile_compactionperc}, {BestRun0, Score} = assess_candidates(Candidates, ScoreParams), - leveled_log:log_timer("IC003", [Score, length(BestRun0)], SW), + leveled_log:log_timer(ic003, [Score, length(BestRun0)], SW), case Score > 0.0 of true -> BestRun1 = sort_run(BestRun0), @@ -405,7 +405,7 @@ handle_cast(scoring_complete, State) -> undefined} end, BestRun1), - leveled_log:log("IC002", [length(FilesToDelete)]), + leveled_log:log(ic002, [length(FilesToDelete)]), ok = CloseFun(FilterServer), ok = leveled_inker:ink_clerkcomplete(State#state.inker, ManifestSlice, @@ -418,7 +418,7 @@ handle_cast(scoring_complete, State) -> handle_cast({trim, PersistedSQN, ManifestAsList}, State) -> FilesToDelete = leveled_imanifest:find_persistedentries(PersistedSQN, ManifestAsList), - leveled_log:log("IC007", []), + leveled_log:log(ic007, []), ok = leveled_inker:ink_clerkcomplete(State#state.inker, [], FilesToDelete), {noreply, State}; handle_cast({prompt_deletions, ManifestSQN, FilesToDelete}, State) -> @@ -455,7 +455,7 @@ handle_info(_Info, State) -> terminate(normal, _State) -> ok; terminate(Reason, _State) -> - leveled_log:log("IC001", [Reason]). + leveled_log:log(ic001, [Reason]). code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -576,12 +576,12 @@ check_single_file(CDB, FilterFun, FilterServer, MaxSQN, Score. safely_log_filescore([], FN, Score, SW) -> - leveled_log:log_timer("IC004", [Score, empty, FN], SW); + leveled_log:log_timer(ic004, [Score, empty, FN], SW); safely_log_filescore(PositionList, FN, Score, SW) -> AvgJump = (lists:last(PositionList) - lists:nth(1, PositionList)) div length(PositionList), - leveled_log:log_timer("IC004", [Score, AvgJump, FN], SW). + leveled_log:log_timer(ic004, [Score, AvgJump, FN], SW). -spec size_comparison_score(list(key_size() | corrupted_test_key_size()), leveled_inker:filterfun(), @@ -732,12 +732,13 @@ score_run(Run, {MaxRunLength, MR_CT, SF_CT}) -> print_compaction_run(BestRun, ScoreParams) -> - leveled_log:log("IC005", [length(BestRun), - score_run(BestRun, ScoreParams)]), - lists:foreach(fun(File) -> - leveled_log:log("IC006", [File#candidate.filename]) - end, - BestRun). + leveled_log:log( + ic005, [length(BestRun), score_run(BestRun, ScoreParams)]), + lists:foreach( + fun(File) -> + leveled_log:log(ic006, [File#candidate.filename]) + end, + BestRun). sort_run(RunOfFiles) -> CompareFun = fun(Cand1, Cand2) -> @@ -795,7 +796,7 @@ get_all_positions([], PositionBatches) -> get_all_positions([HeadRef|RestOfBest], PositionBatches) -> SrcJournal = HeadRef#candidate.journal, Positions = leveled_cdb:cdb_getpositions(SrcJournal, all), - leveled_log:log("IC008", [HeadRef#candidate.filename, length(Positions)]), + leveled_log:log(ic008, [HeadRef#candidate.filename, length(Positions)]), Batches = split_positions_into_batches(lists:sort(Positions), SrcJournal, []), @@ -913,7 +914,7 @@ write_values(KVCList, CDBopts, Journal0, ManSlice0, PressMethod) -> {SQN, _LK} = leveled_codec:from_journalkey(TK), FP = CDBopts#cdb_options.file_path, FN = leveled_inker:filepath(FP, SQN, compact_journal), - leveled_log:log("IC009", [FN]), + leveled_log:log(ic009, [FN]), leveled_cdb:cdb_open_writer(FN, CDBopts); _ -> {ok, Journal0} @@ -941,9 +942,9 @@ clear_waste(State) -> case N - calendar:datetime_to_gregorian_seconds(LMD) of LMD_Delta when LMD_Delta >= WRP -> ok = file:delete(WP ++ DelJ), - leveled_log:log("IC010", [WP ++ DelJ]); + leveled_log:log(ic010, [WP ++ DelJ]); LMD_Delta -> - leveled_log:log("IC011", [WP ++ DelJ, LMD_Delta]), + leveled_log:log(ic011, [WP ++ DelJ, LMD_Delta]), ok end end, diff --git a/src/leveled_imanifest.erl b/src/leveled_imanifest.erl index e278ad7..115e5f9 100644 --- a/src/leveled_imanifest.erl +++ b/src/leveled_imanifest.erl @@ -58,7 +58,7 @@ generate_entry(Journal) -> [{StartSQN, NewFN, PidR, LastKey}]; empty -> ok = leveled_cdb:cdb_close(PidR), - leveled_log:log("IC013", [NewFN]), + leveled_log:log(ic013, [NewFN]), [] end. @@ -100,7 +100,7 @@ append_lastkey(Manifest, Pid, LastKey) -> %% Remove an entry from a manifest (after compaction) remove_entry(Manifest, Entry) -> {SQN, FN, _PidR, _LastKey} = Entry, - leveled_log:log("I0013", [FN]), + leveled_log:log(i0013, [FN]), Man0 = lists:keydelete(SQN, 1, to_list(Manifest)), from_list(Man0). @@ -157,7 +157,7 @@ to_list(Manifest) -> %% loss on rollback. reader(SQN, RootPath) -> ManifestPath = leveled_inker:filepath(RootPath, manifest_dir), - leveled_log:log("I0015", [ManifestPath, SQN]), + leveled_log:log(i0015, [ManifestPath, SQN]), {ok, MBin} = file:read_file(filename:join(ManifestPath, integer_to_list(SQN) ++ ".man")), @@ -179,7 +179,7 @@ writer(Manifest, ManSQN, RootPath) -> %% check backwards compatible (so that the reader can read manifests both %% with and without a CRC check) MBin = term_to_binary(to_list(Manifest), [compressed]), - leveled_log:log("I0016", [ManSQN]), + leveled_log:log(i0016, [ManSQN]), ok = leveled_util:safe_rename(TmpFN, NewFN, MBin, true), GC_SQN = ManSQN - ?MANIFESTS_TO_RETAIN, GC_Man = filename:join(ManPath, @@ -196,9 +196,11 @@ writer(Manifest, ManSQN, RootPath) -> %% @doc %% Print the manifest to the log printer(Manifest) -> - lists:foreach(fun({SQN, FN, _PID, _LK}) -> - leveled_log:log("I0017", [SQN, FN]) end, - to_list(Manifest)). + lists:foreach( + fun({SQN, FN, _PID, _LK}) -> + leveled_log:log(i0017, [SQN, FN]) + end, + to_list(Manifest)). -spec complete_filex() -> string(). %% @doc diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index 08f1629..ae6b7d1 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -517,7 +517,7 @@ handle_call({fetch, Key, SQN}, _From, State) -> {{SQN, Key}, {Value, _IndexSpecs}} -> {reply, {ok, Value}, State}; Other -> - leveled_log:log("I0001", [Key, SQN, Other]), + leveled_log:log(i0001, [Key, SQN, Other]), {reply, not_present, State} end; handle_call({get, Key, SQN}, _From, State) -> @@ -546,7 +546,7 @@ handle_call({register_snapshot, Requestor}, Rs = [{Requestor, os:timestamp(), State#state.manifest_sqn}|State#state.registered_snapshots], - leveled_log:log("I0002", [Requestor, State#state.manifest_sqn]), + leveled_log:log(i0002, [Requestor, State#state.manifest_sqn]), {reply, {State#state.manifest, State#state.active_journaldb, State#state.journal_sqn}, @@ -592,7 +592,7 @@ handle_call(roll, _From, State=#state{is_snapshot=Snap}) when Snap == false -> State#state.cdb_options, State#state.root_path, State#state.manifest_sqn), - leveled_log:log_timer("I0024", [NewSQN], SWroll), + leveled_log:log_timer(i0024, [NewSQN], SWroll), {reply, ok, State#state{journal_sqn = NewSQN, manifest = Manifest1, manifest_sqn = NewManSQN, @@ -604,7 +604,7 @@ handle_call({backup, BackupPath}, _from, State) BackupJFP = filepath(filename:join(BackupPath, ?JOURNAL_FP), journal_dir), ok = filelib:ensure_dir(BackupJFP), {ok, CurrentFNs} = file:list_dir(BackupJFP), - leveled_log:log("I0023", [length(CurrentFNs)]), + leveled_log:log(i0023, [length(CurrentFNs)]), BackupFun = fun({SQN, FN, PidR, LastKey}, {ManAcc, FTRAcc}) -> case SQN < State#state.journal_sqn of @@ -623,7 +623,7 @@ handle_call({backup, BackupPath}, _from, State) {[{SQN, BackupName, PidR, LastKey}|ManAcc], [ExtendedBaseFN|FTRAcc]}; false -> - leveled_log:log("I0021", [FN, SQN, State#state.journal_sqn]), + leveled_log:log(i0021, [FN, SQN, State#state.journal_sqn]), {ManAcc, FTRAcc} end end, @@ -635,7 +635,7 @@ handle_call({backup, BackupPath}, _from, State) FilesToRemove = lists:subtract(CurrentFNs, FilesToRetain), RemoveFun = fun(RFN) -> - leveled_log:log("I0022", [RFN]), + leveled_log:log(i0022, [RFN]), RemoveFile = filename:join(BackupJFP, RFN), case filelib:is_file(RemoveFile) and not filelib:is_dir(RemoveFile) of @@ -649,15 +649,15 @@ handle_call({backup, BackupPath}, _from, State) leveled_imanifest:writer(leveled_imanifest:from_list(BackupManifest), State#state.manifest_sqn, filename:join(BackupPath, ?JOURNAL_FP)), - leveled_log:log_timer("I0020", - [filename:join(BackupPath, ?JOURNAL_FP), - length(BackupManifest)], - SW), + leveled_log:log_timer( + i0020, + [filename:join(BackupPath, ?JOURNAL_FP), length(BackupManifest)], + SW), {reply, ok, State}; handle_call({check_sqn, LedgerSQN}, _From, State) -> case State#state.journal_sqn of JSQN when JSQN < LedgerSQN -> - leveled_log:log("I0025", [JSQN, LedgerSQN]), + leveled_log:log(i0025, [JSQN, LedgerSQN]), {reply, ok, State#state{journal_sqn = LedgerSQN}}; _JSQN -> {reply, ok, State} @@ -669,9 +669,9 @@ handle_call(close, _From, State) -> true -> ok = ink_releasesnapshot(State#state.source_inker, self()); false -> - leveled_log:log("I0005", [close]), - leveled_log:log("I0006", [State#state.journal_sqn, - State#state.manifest_sqn]), + leveled_log:log(i0005, [close]), + leveled_log:log( + i0006, [State#state.journal_sqn, State#state.manifest_sqn]), ok = leveled_iclerk:clerk_stop(State#state.clerk), shutdown_snapshots(State#state.registered_snapshots), shutdown_manifest(State#state.manifest) @@ -682,11 +682,11 @@ handle_call(doom, _From, State) -> filepath(State#state.root_path, manifest_dir), filepath(State#state.root_path, journal_compact_dir), filepath(State#state.root_path, journal_waste_dir)], - leveled_log:log("I0018", []), + leveled_log:log(i0018, []), - leveled_log:log("I0005", [doom]), - leveled_log:log("I0006", [State#state.journal_sqn, - State#state.manifest_sqn]), + leveled_log:log(i0005, [doom]), + leveled_log:log( + i0006, [State#state.journal_sqn, State#state.manifest_sqn]), ok = leveled_iclerk:clerk_stop(State#state.clerk), shutdown_snapshots(State#state.registered_snapshots), shutdown_manifest(State#state.manifest), @@ -748,12 +748,12 @@ handle_cast({confirm_delete, ManSQN, CDB}, State) -> end, {noreply, State#state{registered_snapshots = RegisteredSnapshots0}}; handle_cast({release_snapshot, Snapshot}, State) -> - leveled_log:log("I0003", [Snapshot]), + leveled_log:log(i0003, [Snapshot]), case lists:keydelete(Snapshot, 1, State#state.registered_snapshots) of [] -> {noreply, State#state{registered_snapshots=[]}}; Rs -> - leveled_log:log("I0004", [length(Rs)]), + leveled_log:log(i0004, [length(Rs)]), {noreply, State#state{registered_snapshots=Rs}} end; handle_cast({log_level, LogLevel}, State) -> @@ -875,7 +875,7 @@ shutdown_snapshots(Snapshots) -> %% @doc %% Shutdown all files in the manifest shutdown_manifest(Manifest) -> - leveled_log:log("I0007", []), + leveled_log:log(i0007, []), leveled_imanifest:printer(Manifest), ManAsList = leveled_imanifest:to_list(Manifest), close_allmanifest(ManAsList). @@ -940,7 +940,7 @@ put_object(LedgerKey, Object, KeyChanges, Sync, State) -> State#state.cdb_options, State#state.root_path, State#state.manifest_sqn), - leveled_log:log_timer("I0008", [], SWroll), + leveled_log:log_timer(i0008, [], SWroll), ok = leveled_cdb:cdb_put(NewJournalP, JournalKey, JournalBin), @@ -1051,13 +1051,13 @@ build_manifest(ManifestFilenames, UpdManifestSQN = if length(OpenManifest) > length(Manifest) -> - leveled_log:log("I0009", []), + leveled_log:log(i0009, []), leveled_imanifest:printer(OpenManifest), NextSQN = ManifestSQN + 1, leveled_imanifest:writer(OpenManifest, NextSQN, RootPath), NextSQN; true -> - leveled_log:log("I0010", []), + leveled_log:log(i0010, []), leveled_imanifest:printer(OpenManifest), ManifestSQN end, @@ -1082,7 +1082,7 @@ close_allmanifest([H|ManifestT]) -> %% Open all the files in the manifets, and updating the manifest with the PIDs %% of the opened files open_all_manifest([], RootPath, CDBOpts) -> - leveled_log:log("I0011", []), + leveled_log:log(i0011, []), leveled_imanifest:add_entry([], start_new_activejournal(0, RootPath, CDBOpts), true); @@ -1113,7 +1113,7 @@ open_all_manifest(Man0, RootPath, CDBOpts) -> PendingHeadFN = HeadFN ++ "." ++ ?PENDING_FILEX, case filelib:is_file(CompleteHeadFN) of true -> - leveled_log:log("I0012", [HeadFN]), + leveled_log:log(i0012, [HeadFN]), {ok, HeadR} = leveled_cdb:cdb_open_reader(CompleteHeadFN), LastKey = leveled_cdb:cdb_lastkey(HeadR), LastSQN = element(1, LastKey), diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 88d0058..d609f77 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -19,10 +19,14 @@ save/1, return_settings/0]). +-ifdef(TEST). +-export([format_time/1, log_prefix/5]). +-endif. --record(log_options, {log_level = info :: log_level(), - forced_logs = [] :: [string()], - database_id = 0 :: non_neg_integer()}). +-record(log_options, + {log_level = info :: log_level(), + forced_logs = [] :: [atom()], + database_id = 0 :: non_neg_integer()}). -type log_level() :: debug | info | warn | error | critical. -type log_options() :: #log_options{}. @@ -32,385 +36,279 @@ -define(LOG_LEVELS, [debug, info, warn, error, critical]). -define(DEFAULT_LOG_LEVEL, error). --define(LOGBASE, [ - - {"G0001", - {info, "Generic log point"}}, - {"G0002", - {info, "Generic log point with term ~w"}}, - {"D0001", - {debug, "Generic debug log"}}, - - {"B0001", - {info, "Bookie starting with Ink ~w Pcl ~w"}}, - {"B0002", - {info, "Snapshot starting with Ink ~w Pcl ~w"}}, - {"B0003", - {info, "Bookie closing for reason ~w"}}, - {"B0004", - {info, "Initialised PCL clone and length of increment in snapshot is ~w"}}, - {"B0005", - {info, "LedgerSQN=~w at startup"}}, - {"B0006", - {info, "Reached end of load batch with SQN ~w"}}, - {"B0007", - {info, "Skipping as exceeded MaxSQN ~w with SQN ~w"}}, - {"B0008", - {info, "Bucket list finds no more results"}}, - {"B0009", - {debug, "Bucket list finds Bucket ~w"}}, - {"B0011", - {warn, "Call to destroy the store and so all files to be removed"}}, - {"B0013", - {warn, "Long running task took ~w microseconds with task_type=~w"}}, - {"B0015", - {info, "Put timing with sample_count=~w and mem_time=~w ink_time=~w" - ++ " with total_object_size=~w"}}, - {"B0016", - {info, "Get timing with sample_count=~w and head_time=~w body_time=~w" - ++ " with fetch_count=~w"}}, - {"B0017", - {info, "Snapshot timing with sample_count=~w and bookie_time=~w pcl_time=~w"}}, - {"B0018", - {info, "Positive HEAD responses timed with sample_count=~w and " - ++ " pcl_time=~w rsp_time=~w"}}, - {"B0019", - {warn, "Use of book_indexfold with constraint of Bucket ~w with " - ++ "no StartKey is deprecated"}}, - {"B0020", - {warn, "Ratio of penciller cache size ~w to bookie's memory " - ++ "cache size ~w is larger than expected"}}, - {"B0021", - {info, "Bookie fetch RequestCount=~w and CacheCount=~w and " - ++ "ObjectFoundCount=~w"}}, - - {"R0001", - {debug, "Object fold to process batch of ~w objects"}}, - - {"P0001", - {debug, "Ledger snapshot ~w registered"}}, - {"P0003", - {debug, "Ledger snapshot ~w released"}}, - {"P0004", - {debug, "Remaining ledger snapshots are ~w"}}, - {"P0005", - {debug, "Delete confirmed as file ~s is removed from Manifest"}}, - {"P0006", - {info, "Orphaned reply after timeout on L0 file write ~s"}}, - {"P0007", - {debug, "Sent release message for cloned Penciller following close for " - ++ "reason ~w"}}, - {"P0008", - {info, "Penciller closing for reason ~w"}}, - {"P0010", - {info, "discarded=~w level zero on close of Penciller"}}, - {"P0011", - {info, "Shutdown complete for Penciller for reason ~w"}}, - {"P0012", - {info, "Store to be started based on manifest sequence number of ~w"}}, - {"P0013", - {warn, "Seqence number of 0 indicates no valid manifest"}}, - {"P0014", - {info, "Maximum sequence number of ~w found in nonzero levels"}}, - {"P0015", - {info, "L0 file found ~s"}}, - {"P0016", - {info, "L0 file had maximum sequence number of ~w"}}, - {"P0017", - {info, "No L0 file found"}}, - {"P0018", - {info, "Response to push_mem of ~w with " - ++ "L0 pending ~w and merge backlog ~w"}}, - {"P0019", - {info, "Rolling level zero to filename ~s at ledger sqn ~w"}}, - {"P0021", - {info, "Allocation of work blocked as L0 pending"}}, - {"P0022", - {info, "Manifest at Level ~w"}}, - {"P0023", - {info, "Manifest entry of startkey ~s ~s ~s endkey ~s ~s ~s " - ++ "filename=~s~n"}}, - {"P0024", - {info, "Outstanding compaction work items of ~w with backlog status " - ++ "of ~w"}}, - {"P0025", - {info, "Merge to sqn ~w from Level ~w completed"}}, - {"P0026", - {info, "Merge has been commmitted at sequence number ~w"}}, - {"P0027", - {info, "Rename of manifest from ~s ~w to ~s ~w"}}, - {"P0028", - {debug, "Adding cleared file ~s to deletion list"}}, - {"P0029", - {info, "L0 completion confirmed and will transition to not pending"}}, - {"P0030", - {warn, "We're doomed - intention recorded to destroy all files"}}, - {"P0031", - {info, "Completion of update to levelzero" - ++ " with cache_size=~w level0_due=~w" - ++ " and change_pending=~w"}}, - {"P0032", - {info, "Fetch head timing with sample_count=~w and level timings of" - ++ " foundmem_time=~w found0_time=~w found1_time=~w" - ++ " found2_time=~w found3_time=~w foundlower_time=~w" - ++ " missed_time=~w" - ++ " with counts of" - ++ " foundmem_count=~w found0_count=~w found1_count=~w" - ++ " found2_count=~w found3_count=~w foundlower_count=~w" - ++ " missed_count=~w"}}, - {"P0033", - {error, "Corrupted manifest file at path ~s to be ignored " - ++ "due to error ~w"}}, - {"P0034", - {warn, "Snapshot with pid ~w timed out and so deletion will " - ++ "continue regardless"}}, - {"P0035", - {info, "Startup with Manifest SQN of ~w"}}, - {"P0036", - {info, "Garbage collection on manifest removes key for filename ~s"}}, - {"P0037", - {debug, "Merging of penciller L0 tree from size ~w complete"}}, - {"P0038", - {info, "Timeout of snapshot with pid=~w at SQN=~w at TS ~w " - ++ "set to timeout=~w"}}, - {"P0039", - {debug, "Failed to release pid=~w " - ++ "leaving SnapshotCount=~w and MinSQN=~w"}}, - {"P0040", - {info, "Archiving filename ~s as unused at startup"}}, - {"P0041", - {info, "Penciller manifest switched from SQN ~w to ~w"}}, - {"P0042", - {warn, "Cache full so attempting roll memory with l0_size=~w"}}, - - {"PC001", - {info, "Penciller's clerk ~w started with owner ~w"}}, - {"PC002", - {info, "Request for manifest change from clerk on closing"}}, - {"PC003", - {info, "Confirmation of manifest change on closing"}}, - {"PC004", - {info, "Prompted confirmation of manifest change"}}, - {"PC005", - {info, "Penciller's Clerk ~w shutdown now complete for reason ~w"}}, - {"PC006", - {debug, "Work prompted but none needed"}}, - {"PC007", - {debug, "Clerk prompting Penciller regarding manifest change"}}, - {"PC008", - {info, "Merge from level ~w to merge into ~w files below"}}, - {"PC009", - {debug, "File ~s to simply switch levels to level ~w"}}, - {"PC010", - {info, "Merge to be commenced for FileToMerge=~s with MSN=~w"}}, - {"PC011", - {info, "Merge completed with MSN=~w to Level=~w and FileCounter=~w"}}, - {"PC012", - {debug, "File to be created as part of MSN=~w Filename=~s " - ++ "IsBasement=~w"}}, - {"PC013", - {warn, "Merge resulted in empty file ~s"}}, - {"PC015", - {info, "File created"}}, - {"PC016", - {info, "Slow fetch from SFT ~w of ~w us at level ~w " - ++ "with result ~w"}}, - {"PC017", - {debug, "Notified clerk of manifest change"}}, - {"PC018", - {info, "Saved manifest file"}}, - {"PC019", - {debug, "After ~s level ~w is ~w"}}, - {"PC020", - {warn, "Empty prompt deletions at ManifestSQN=~w"}}, - {"PC021", - {debug, "Prompting deletions at ManifestSQN=~w"}}, - {"PC022", - {debug, "Storing reference to deletions at ManifestSQN=~w"}}, - {"PC023", - {info, "At level=~w file_count=~w avg_mem=~w " - ++ "file with most memory fn=~s p=~w mem=~w"}}, - {"PC024", - {info, "Grooming compaction picked file with tomb_count=~w"}}, - {"PM002", - {info, "Completed dump of L0 cache to list of l0cache_size=~w"}}, - - {"SST01", - {info, "SST timing for result=~w is sample=~w total=~w and max=~w"}}, - {"SST02", - {error, "False result returned from SST with filename ~s as " - ++ "slot ~w has failed crc check"}}, - {"SST03", - {info, "Opening SST file with filename ~s slot_count=~w and" - ++ " max sqn ~w"}}, - {"SST04", - {debug, "Exit called for reason ~w on filename ~s"}}, - {"SST05", - {warn, "Rename rogue filename ~s to ~s"}}, - {"SST06", - {debug, "File ~s has been set for delete"}}, - {"SST07", - {info, "Exit called and now clearing ~s"}}, - {"SST08", - {info, "Completed creation of ~s at level ~w with max sqn ~w"}}, - {"SST09", - {warn, "Read request exposes slot with bad CRC"}}, - {"SST10", - {debug, "Expansion sought to support pointer to pid ~w status ~w"}}, - {"SST11", - {info, "Level zero creation timings in microseconds " - ++ "pmem_fetch=~w merge_lists=~w build_slots=~w " - ++ "build_summary=~w read_switch=~w"}}, - {"SST12", - {info, "SST Timings at level=~w for sample_count=~w" - ++ " at timing points index_query_time=~w" - ++ " lookup_cache_time=~w slot_index_time=~w " - ++ " fetch_cache_time=~w slot_fetch_time=~w" - ++ " noncached_block_fetch_time=~w" - ++ " exiting at points slot_index=~w" - ++ " fetch_cache=~w slot_fetch=~w noncached_block_fetch=~w"}}, - {"SST13", - {info, "SST merge list build timings of" - ++ " fold_toslot=~w slot_hashlist=~w" - ++ " slot_serialise=~w slot_finish=~w" - ++ " is_basement=~w level=~w"}}, - - {"I0001", - {info, "Unexpected failure to fetch value for Key=~w SQN=~w " - ++ "with reason ~w"}}, - {"I0002", - {debug, "Journal snapshot ~w registered at SQN ~w"}}, - {"I0003", - {debug, "Journal snapshot ~w released"}}, - {"I0004", - {info, "Remaining number of journal snapshots is ~w"}}, - {"I0005", - {info, "Inker closing journal for reason ~w"}}, - {"I0006", - {info, "Close triggered with journal_sqn=~w and manifest_sqn=~w"}}, - {"I0007", - {info, "Inker manifest when closing is:"}}, - {"I0008", - {info, "Put to new active journal required roll and manifest write"}}, - {"I0009", - {info, "Updated manifest on startup:"}}, - {"I0010", - {info, "Unchanged manifest on startup:"}}, - {"I0011", - {info, "Manifest is empty, starting from manifest SQN 1"}}, - {"I0012", - {info, "Head manifest entry ~s is complete so new active journal " - ++ "required"}}, - {"I0013", - {info, "File ~s to be removed from manifest"}}, - {"I0014", - {info, "On startup loading from filename ~s from SQN ~w"}}, - {"I0015", - {info, "Opening manifest file at ~s with SQN ~w"}}, - {"I0016", - {info, "Writing new version of manifest for manifestSQN=~w"}}, - {"I0017", - {debug, "At SQN=~w journal has filename ~s"}}, - {"I0018", - {warn, "We're doomed - intention recorded to destroy all files"}}, - {"I0019", - {info, "After ~w PUTs total prepare time is ~w total cdb time is ~w " - ++ "and max prepare time is ~w and max cdb time is ~w"}}, - {"I0020", - {info, "Journal backup completed to path=~s with file_count=~w"}}, - {"I0021", - {info, "Ingoring filename=~s with SQN=~w and JournalSQN=~w"}}, - {"I0022", - {info, "Removing filename=~s from backup folder as not in backup"}}, - {"I0023", - {info, "Backup commencing into folder with ~w existing files"}}, - {"I0024", - {info, "Prompted roll at NewSQN=~w"}}, - {"I0025", - {warn, "Journal SQN of ~w is below Ledger SQN of ~w " ++ - "anti-entropy will be required"}}, - - {"IC001", - {info, "Closed for reason ~w so maybe leaving garbage"}}, - {"IC002", - {info, "Clerk updating Inker as compaction complete of ~w files"}}, - {"IC003", - {info, "Scoring of compaction runs complete with highest score=~w " - ++ "with run of run_length=~w"}}, - {"IC004", - {info, "Score=~w with mean_byte_jump=~w for filename ~s"}}, - {"IC005", - {info, "Compaction to be performed on ~w files with score of ~w"}}, - {"IC006", - {info, "Filename ~s is part of compaction run"}}, - {"IC007", - {info, "Clerk has completed compaction process"}}, - {"IC008", - {info, "Compaction source ~s has yielded ~w positions"}}, - {"IC009", - {info, "Generate journal for compaction with filename ~s"}}, - {"IC010", - {info, "Clearing journal with filename ~s"}}, - {"IC011", - {info, "Not clearing filename ~s as modified delta is only ~w seconds"}}, - {"IC012", - {warn, "Tag ~w not found in Strategy ~w - maybe corrupted"}}, - {"IC013", - {warn, "File with name ~s to be ignored in manifest as scanning for " - ++ "first key returned empty - maybe corrupted"}}, - {"IC014", - {info, "Compaction to be run with strategy ~w and max_run_length ~w"}}, - - {"CDB01", - {info, "Opening file for writing with filename ~s"}}, - {"CDB02", - {info, "Opening file for reading with filename ~s"}}, - {"CDB03", - {info, "Re-opening file for reading with filename ~s"}}, - {"CDB04", - {info, "Deletion confirmed for file ~s at ManifestSQN ~w"}}, - {"CDB05", - {info, "Closing of filename ~s from state ~w for reason ~w"}}, - {"CDB06", - {warn, "File to be truncated at last position of ~w with end of " - ++ "file at ~w"}}, - {"CDB07", - {info, "Hashtree computed"}}, - {"CDB08", - {info, "Renaming file from ~s to ~s for which existence is ~w"}}, - {"CDB09", - {info, "Failure to read Key/Value at Position ~w in scan " ++ - "this may be the end of the file"}}, - {"CDB10", - {info, "CRC check failed due to mismatch"}}, - {"CDB11", - {info, "CRC check failed due to size"}}, - {"CDB12", - {info, "HashTree written"}}, - {"CDB13", - {debug, "Write options of ~w"}}, - {"CDB14", - {info, "Microsecond timings for hashtree build of " - ++ "to_list=~w sort=~w build=~w"}}, - {"CDB15", - {info, "Collision in search for hash ~w"}}, - {"CDB16", - {info, "CDB scan from start ~w in file with end ~w and last_key ~w"}}, - {"CDB17", - {info, "After ~w PUTs total_write_time=~w total_sync_time=~w " - ++ "and max_write_time=~w and max_sync_time=~w"}}, - {"CDB18", - {info, "Handled return and write of hashtable"}}, - {"CDB19", - {info, "Sample timings in microseconds for sample_count=~w " - ++ "with totals of cycle_count=~w " - ++ "fetch_time=~w index_time=~w"}}, - {"CDB20", - {warn, "Error ~w caught when safe reading a file to length ~w"}}, - {"CDB21", - {warn, "File ~s to be deleted but already gone"}} - - ]). +-define(LOGBASE, + #{ + g0001 => + {info, <<"Generic log point">>}, + g0002 => + {info, <<"Generic log point with term ~w">>}, + d0001 => + {info, <<"Generic debug log">>}, + b0001 => + {info, <<"Bookie starting with Ink ~w Pcl ~w">>}, + b0002 => + {info, <<"Snapshot starting with Ink ~w Pcl ~w">>}, + b0003 => + {info, <<"Bookie closing for reason ~w">>}, + b0005 => + {info, <<"LedgerSQN=~w at startup">>}, + b0006 => + {info, <<"Reached end of load batch with SQN ~w">>}, + b0007 => + {info, <<"Skipping as exceeded MaxSQN ~w with SQN ~w">>}, + b0008 => + {info, <<"Bucket list finds no more results">>}, + b0009 => + {debug, <<"Bucket list finds Bucket ~w">>}, + b0011 => + {warn, <<"Call to destroy the store and so all files to be removed">>}, + b0013 => + {warn, <<"Long running task took ~w microseconds with task_type=~w">>}, + b0015 => + {info, <<"Put timing with sample_count=~w ink_time=~w prep_time=~w mem_time=~w with total_object_size=~w with sample_period=~w seconds">>}, + b0016 => + {info, <<"Get timing with sample_count=~w and head_time=~w body_time=~w with fetch_count=~w with sample_period=~w seconds">>}, + b0017 => + {info, <<"Snapshot timing with sample_count=~w and bookie_time=~w pcl_time=~w with sample_period=~w seconds">>}, + b0018 => + {info, <<"Positive HEAD responses timed with sample_count=~w and cache_count=~w found_count=~w fetch_ledger_time=~w fetch_ledgercache_time=~w rsp_time=~w notfound_time=~w with sample_period=~w seconds">>}, + b0019 => + {warn, <<"Use of book_indexfold with constraint of Bucket ~w with no StartKey is deprecated">>}, + b0020 => + {warn, <<"Ratio of penciller cache size ~w to bookie's memory cache size ~w is larger than expected">>}, + r0001 => + {debug, <<"Object fold to process batch of ~w objects">>}, + p0001 => + {debug, <<"Ledger snapshot ~w registered">>}, + p0003 => + {debug, <<"Ledger snapshot ~w released">>}, + p0004 => + {debug, <<"Remaining ledger snapshots are ~w">>}, + p0005 => + {debug, <<"Delete confirmed as file ~s is removed from Manifest">>}, + p0007 => + {debug, <<"Sent release message for cloned Penciller following close for reason ~w">>}, + p0008 => + {info, <<"Penciller closing for reason ~w">>}, + p0010 => + {info, <<"level zero discarded_count=~w on close of Penciller">>}, + p0011 => + {info, <<"Shutdown complete for Penciller for reason ~w">>}, + p0012 => + {info, <<"Store to be started based on manifest sequence number of ~w">>}, + p0013 => + {warn, <<"Seqence number of 0 indicates no valid manifest">>}, + p0014 => + {info, <<"Maximum sequence number of ~w found in nonzero levels">>}, + p0015 => + {info, <<"L0 file found ~s">>}, + p0016 => + {info, <<"L0 file had maximum sequence number of ~w">>}, + p0017 => + {info, <<"No L0 file found">>}, + p0018 => + {info, <<"Response to push_mem of returned with cache_size=~w L0_pending=~w merge_backlog=~w cachelines_full=~w">>}, + p0019 => + {info, <<"Rolling level zero to filename ~s at ledger sqn ~w">>}, + p0024 => + {info, <<"Outstanding compaction work items of ~w with backlog status of ~w">>}, + p0029 => + {info, <<"L0 completion confirmed and will transition to not pending">>}, + p0030 => + {warn, <<"We're doomed - intention recorded to destroy all files">>}, + p0031 => + {info, <<"Completion of update to levelzero with cache_size=~w level0_due=~w change_pending=~w MinSQN=~w MaxSQN=~w">>}, + p0032 => + {info, <<"Fetch head timing with sample_count=~w and level timings of foundmem_time=~w found0_time=~w found1_time=~w found2_time=~w found3_time=~w foundlower_time=~w missed_time=~w with counts of foundmem_count=~w found0_count=~w found1_count=~w found2_count=~w found3_count=~w foundlower_count=~w missed_count=~w with sample_period=~w seconds">>}, + p0033 => + {error, <<"Corrupted manifest file at path ~s to be ignored due to error ~s">>}, + p0035 => + {info, <<"Startup with Manifest SQN of ~w">>}, + p0037 => + {debug, <<"Merging of penciller L0 tree from size ~w complete">>}, + p0038 => + {info, <<"Timeout of snapshot with pid=~w at SQN=~w at TS ~w set to timeout=~w">>}, + p0039 => + {debug, <<"Failed to release pid=~w leaving SnapshotCount=~w and MinSQN=~w">>}, + p0040 => + {info, <<"Archiving filename ~s as unused at startup">>}, + p0041 => + {info, <<"Penciller manifest switched from SQN ~w to ~w">>}, + pc001 => + {info, <<"Penciller's clerk ~w started with owner ~w">>}, + pc005 => + {info, <<"Penciller's Clerk ~w shutdown now complete for reason ~w">>}, + pc007 => + {debug, <<"Clerk prompting Penciller regarding manifest change">>}, + pc008 => + {info, <<"Merge from level ~w to merge into ~w files below">>}, + pc009 => + {debug, <<"File ~s to simply switch levels to level ~w">>}, + pc010 => + {info, <<"Merge to be commenced for FileToMerge=~s with MSN=~w">>}, + pc011 => + {info, <<"Merge completed with MSN=~w to Level=~w and FileCounter=~w">>}, + pc012 => + {debug, <<"File to be created as part of MSN=~w Filename=~s IsBasement=~w">>}, + pc013 => + {warn, <<"Merge resulted in empty file ~s">>}, + pc015 => + {info, <<"File created">>}, + pc016 => + {info, <<"Slow fetch from SFT ~w of ~w us at level ~w with result ~w">>}, + pc017 => + {debug, <<"Notified clerk of manifest change">>}, + pc018 => + {info, <<"Saved manifest file">>}, + pc019 => + {debug, <<"After ~s level ~w is ~w">>}, + pc021 => + {debug, <<"Prompting deletions at ManifestSQN=~w">>}, + pc022 => + {debug, <<"Storing reference to deletions at ManifestSQN=~w">>}, + pc023 => + {info, <<"At level=~w file_count=~w avg_mem=~w file with most memory fn=~s p=~w mem=~w">>}, + pc024 => + {info, <<"Grooming compaction picked file with tomb_count=~w">>}, + pm002 => + {info, <<"Completed dump of L0 cache to list of l0cache_size=~w">>}, + sst03 => + {info, <<"Opening SST file with filename ~s slot_count=~w and max sqn ~w">>}, + sst04 => + {debug, <<"Exit called for reason ~w on filename ~s">>}, + sst05 => + {warn, <<"Rename rogue filename ~s to ~s">>}, + sst06 => + {debug, <<"File ~s has been set for delete">>}, + sst07 => + {info, <<"Exit called and now clearing ~s">>}, + sst08 => + {info, <<"Completed creation of ~s at level ~w with max sqn ~w">>}, + sst09 => + {warn, <<"Read request exposes slot with bad CRC">>}, + sst10 => + {debug, <<"Expansion sought to support pointer to pid ~w status ~w">>}, + sst11 => + {info, <<"Level zero creation timings in microseconds pmem_fetch=~w merge_lists=~w build_slots=~w build_summary=~w read_switch=~w">>}, + sst12 => + {info, <<"SST Timings at level=~w for sample_count=~w at timing points notfound_time=~w fetchcache_time=~w slotcached_time=~w slotnoncached_time=~w exiting at points notfound_count=~w fetchcache_count=~w slotcached_count=~w slotnoncached_count=~w with sample_period=~w seconds">>}, + sst13 => + {info, <<"SST merge list build timings of fold_toslot=~w slot_hashlist=~w slot_serialise=~w slot_finish=~w is_basement=~w level=~w">>}, + sst14 => + {debug, <<"File ~s has completed BIC">>}, + i0001 => + {info, <<"Unexpected failure to fetch value for Key=~w SQN=~w with reason ~w">>}, + i0002 => + {debug, <<"Journal snapshot ~w registered at SQN ~w">>}, + i0003 => + {debug, <<"Journal snapshot ~w released">>}, + i0004 => + {info, <<"Remaining number of journal snapshots is ~w">>}, + i0005 => + {info, <<"Inker closing journal for reason ~w">>}, + i0006 => + {info, <<"Close triggered with journal_sqn=~w and manifest_sqn=~w">>}, + i0007 => + {info, <<"Inker manifest when closing is:">>}, + i0008 => + {info, <<"Put to new active journal required roll and manifest write">>}, + i0009 => + {info, <<"Updated manifest on startup:">>}, + i0010 => + {info, <<"Unchanged manifest on startup:">>}, + i0011 => + {info, <<"Manifest is empty, starting from manifest SQN 1">>}, + i0012 => + {info, <<"Head manifest entry ~s is complete so new active journal required">>}, + i0013 => + {info, <<"File ~s to be removed from manifest">>}, + i0014 => + {info, <<"On startup loading from filename ~s from SQN ~w">>}, + i0015 => + {info, <<"Opening manifest file at ~s with SQN ~w">>}, + i0016 => + {info, <<"Writing new version of manifest for manifestSQN=~w">>}, + i0017 => + {debug, <<"At SQN=~w journal has filename ~s">>}, + i0018 => + {warn, <<"We're doomed - intention recorded to destroy all files">>}, + i0020 => + {info, <<"Journal backup completed to path=~s with file_count=~w">>}, + i0021 => + {info, <<"Ingoring filename=~s with SQN=~w and JournalSQN=~w">>}, + i0022 => + {info, <<"Removing filename=~s from backup folder as not in backup">>}, + i0023 => + {info, <<"Backup commencing into folder with ~w existing files">>}, + i0024 => + {info, <<"Prompted roll at NewSQN=~w">>}, + i0025 => + {warn, <<"Journal SQN of ~w is below Ledger SQN of ~w anti-entropy will be required">>}, + ic001 => + {info, <<"Closed for reason ~w so maybe leaving garbage">>}, + ic002 => + {info, <<"Clerk updating Inker as compaction complete of ~w files">>}, + ic003 => + {info, <<"Scoring of compaction runs complete with highest score=~w with run of run_length=~w">>}, + ic004 => + {info, <<"Score=~w with mean_byte_jump=~w for filename ~s">>}, + ic005 => + {info, <<"Compaction to be performed on file_count=~w with compaction_score=~w">>}, + ic006 => + {info, <<"Filename ~s is part of compaction run">>}, + ic007 => + {info, <<"Clerk has completed compaction process">>}, + ic008 => + {info, <<"Compaction source ~s has yielded ~w positions">>}, + ic009 => + {info, <<"Generate journal for compaction with filename ~s">>}, + ic010 => + {info, <<"Clearing journal with filename ~s">>}, + ic011 => + {info, <<"Not clearing filename ~s as modified delta is only ~w seconds">>}, + ic012 => + {warn, <<"Tag ~w not found in Strategy ~w - maybe corrupted">>}, + ic013 => + {warn, "File with name ~s to be ignored in manifest as scanning for first key returned empty - maybe corrupted"}, + ic014 => + {info, <<"Compaction to be run with strategy ~w and max_run_length ~w">>}, + cdb01 => + {info, <<"Opening file for writing with filename ~s">>}, + cdb02 => + {info, <<"Opening file for reading with filename ~s">>}, + cdb03 => + {info, <<"Re-opening file for reading with filename ~s">>}, + cdb04 => + {info, <<"Deletion confirmed for file ~s at ManifestSQN ~w">>}, + cdb05 => + {info, <<"Closing of filename ~s from state ~w for reason ~w">>}, + cdb06 => + {warn, <<"File to be truncated at last position of ~w with end of file at ~w">>}, + cdb07 => + {info, <<"Hashtree index computed">>}, + cdb08 => + {info, <<"Renaming file from ~s to ~s for which existence is ~w">>}, + cdb09 => + {info, <<"Failure to read Key/Value at Position ~w in scan this may be the end of the file">>}, + cdb10 => + {warn, <<"CRC check failed due to error=~s">>}, + cdb12 => + {info, <<"Hashtree index written">>}, + cdb13 => + {debug, <<"Write options of ~w">>}, + cdb14 => + {info, <<"Microsecond timings for hashtree build of to_list=~w sort=~w build=~w">>}, + cdb15 => + {info, <<"Collision in search for hash ~w">>}, + cdb18 => + {info, <<"Handled return and write of hashtable">>}, + cdb19 => + {info, <<"Sample timings in microseconds for sample_count=~w with totals of cycle_count=~w index_time=~w read_time=~w with sample_period=~w seconds">>}, + cdb20 => + {warn, <<"Error ~w caught when safe reading a file to length ~w">>}, + cdb21 => + {warn, <<"File ~s to be deleted but already gone">>} + }). %%%============================================================================ @@ -433,7 +331,7 @@ set_databaseid(DBid) when is_integer(DBid) -> UpdLO = LO#log_options{database_id = DBid}, save(UpdLO). --spec add_forcedlogs(list(string())) -> ok. +-spec add_forcedlogs(list(atom())) -> ok. %% @doc %% Add a forced log to the list of forced logs. this will cause the log of this %% logReference to be logged even if the log_level of the process would not @@ -445,7 +343,7 @@ add_forcedlogs(LogRefs) -> UpdLO = LO#log_options{forced_logs = lists:usort(LogRefs ++ ForcedLogs)}, save(UpdLO). --spec remove_forcedlogs(list(string())) -> ok. +-spec remove_forcedlogs(list(atom())) -> ok. %% @doc %% Remove a forced log from the list of forced logs remove_forcedlogs(LogRefs) -> @@ -484,26 +382,21 @@ return_settings() -> %%% Prompt Logs %%%============================================================================ - +-spec log(atom(), list()) -> ok. log(LogReference, Subs) -> log(LogReference, Subs, ?LOG_LEVELS). log(LogRef, Subs, SupportedLogLevels) -> - case lists:keyfind(LogRef, 1, ?LOGBASE) of - {LogRef, {LogLevel, LogText}} -> - LogOpts = get_opts(), - case should_i_log(LogLevel, SupportedLogLevels, LogRef, LogOpts) of - true -> - DBid = LogOpts#log_options.database_id, - io:format(format_time() ++ " " - ++ " log_level=" - ++ atom_to_list(LogLevel) ++ " log_ref=" - ++ LogRef ++ " db_id=~w pid=~w " - ++ LogText ++ "~n", - [DBid|[self()|Subs]]); - false -> - ok - end; + {LogLevel, Log} = maps:get(LogRef, ?LOGBASE), + LogOpts = get_opts(), + case should_i_log(LogLevel, SupportedLogLevels, LogRef, LogOpts) of + true -> + DBid = LogOpts#log_options.database_id, + Prefix = + log_prefix( + localtime_ms(), LogLevel, LogRef, DBid, self()), + Suffix = <<"~n">>, + io:format(iolist_to_binary([Prefix, Log, Suffix]), Subs); false -> ok end. @@ -528,39 +421,29 @@ is_active_level([L|_], L, _) -> true; is_active_level([L|_], _, L) -> false; is_active_level([_|T], C, L) -> is_active_level(T, C, L). +-spec log_timer(atom(), list(), erlang:timestamp()) -> ok. log_timer(LogReference, Subs, StartTime) -> log_timer(LogReference, Subs, StartTime, ?LOG_LEVELS). log_timer(LogRef, Subs, StartTime, SupportedLevels) -> - case lists:keyfind(LogRef, 1, ?LOGBASE) of - {LogRef, {LogLevel, LogText}} -> - LogOpts = get_opts(), - case should_i_log(LogLevel, SupportedLevels, LogRef, LogOpts) of - true -> - DurationText = - case timer:now_diff(os:timestamp(), StartTime) of - US when US > 1000 -> - " with us_duration=" ++ integer_to_list(US) ++ - " or ms_duration=" - ++ integer_to_list(US div 1000); - US -> - " with us_duration=" ++ integer_to_list(US) - end, - DBid = LogOpts#log_options.database_id, - io:format(format_time() ++ " " - ++ " log_level=" - ++ atom_to_list(LogLevel) ++ " log_ref=" - ++ LogRef ++ " db_id=~w pid=~w " - ++ LogText - ++ DurationText ++ "~n", - [DBid|[self()|Subs]]); - false -> - ok - end; + {LogLevel, Log} = maps:get(LogRef, ?LOGBASE), + LogOpts = get_opts(), + case should_i_log(LogLevel, SupportedLevels, LogRef, LogOpts) of + true -> + DBid = LogOpts#log_options.database_id, + Prefix = + log_prefix( + localtime_ms(), LogLevel, LogRef, DBid, self()), + Suffix = <<"~n">>, + Duration = duration_text(StartTime), + io:format( + iolist_to_binary([Prefix, Log, Duration, Suffix]), + Subs); false -> ok end. +-spec log_randomtimer(atom(), list(), erlang:timestamp(), float()) -> ok. log_randomtimer(LogReference, Subs, StartTime, RandomProb) -> R = leveled_rand:uniform(), case R < RandomProb of @@ -570,53 +453,125 @@ log_randomtimer(LogReference, Subs, StartTime, RandomProb) -> ok end. -format_time() -> - format_time(localtime_ms()). - localtime_ms() -> {_, _, Micro} = Now = os:timestamp(), {Date, {Hours, Minutes, Seconds}} = calendar:now_to_local_time(Now), {Date, {Hours, Minutes, Seconds, Micro div 1000 rem 1000}}. -format_time({{Y, M, D}, {H, Mi, S, Ms}}) -> - io_lib:format("~b-~2..0b-~2..0b", [Y, M, D]) ++ "T" ++ - io_lib:format("~2..0b:~2..0b:~2..0b.~3..0b", [H, Mi, S, Ms]). +-spec log_prefix( + tuple(), atom(), atom(), non_neg_integer(), pid()) -> io_lib:chars(). +log_prefix({{Y, M, D}, {H, Mi, S, Ms}}, LogLevel, LogRef, DBid, Pid) -> + [integer_to_list(Y), $-, i2l(M), $-, i2l(D), + $T, i2l(H), $:, i2l(Mi), $:, i2l(S), $., i3l(Ms), + " log_level=", atom_to_list(LogLevel), " log_ref=", atom_to_list(LogRef), + " db_id=", integer_to_list(DBid), " pid=", pid_to_list(Pid), " "]. +-spec i2l(non_neg_integer()) -> list(). +i2l(I) when I < 10 -> + [$0, $0+I]; +i2l(I) -> + integer_to_list(I). + +-spec i3l(non_neg_integer()) -> list(). +i3l(I) when I < 100 -> + [$0 | i2l(I)]; +i3l(I) -> + integer_to_list(I). + +-spec duration_text(erlang:timestamp()) -> io_lib:chars(). +duration_text(StartTime) -> + case timer:now_diff(os:timestamp(), StartTime) of + US when US > 1000 -> + [" with us_duration=", integer_to_list(US), + " or ms_duration=", integer_to_list(US div 1000)]; + US -> + [" with us_duration=", integer_to_list(US)] + end. %%%============================================================================ %%% Test %%%============================================================================ - - -ifdef(TEST). +format_time({{Y, M, D}, {H, Mi, S, Ms}}) -> + io_lib:format("~b-~2..0b-~2..0b", [Y, M, D]) ++ "T" ++ + io_lib:format("~2..0b:~2..0b:~2..0b.~3..0b", [H, Mi, S, Ms]). + +prefix_compare_test() -> + Time = localtime_ms(), + DBid = 64, + LogLevel = info, + LogRef = b0001, + {TS0, OldTS} = + timer:tc(?MODULE, format_time, [Time]), + {TS1, NewPrefix} = + timer:tc(?MODULE, log_prefix, [Time, LogLevel, LogRef, DBid, self()]), + {NewTS, _Rest} = lists:split(23, lists:flatten(NewPrefix)), + ?assertMatch(OldTS, NewTS), + io:format(user, "~nTimestamp timings old ~w new ~w~n", [TS0, TS1]). + log_test() -> - log("D0001", []), - log_timer("D0001", [], os:timestamp()). + log(d0001, []), + log_timer(d0001, [], os:timestamp()). log_warn_test() -> - ok = log("G0001", [], [warn, error]), - ok = log("G8888", [], [info, warn, error]), - ok = log_timer("G0001", [], os:timestamp(), [warn, error]), - ok = log_timer("G8888", [], os:timestamp(), [info, warn, error]). + ok = log(g0001, [], [warn, error]), + ok = log_timer(g0001, [], os:timestamp(), [warn, error]). shouldilog_test() -> ok = set_loglevel(debug), - ?assertMatch(true, should_i_log(info, ?LOG_LEVELS, "G0001")), + ?assertMatch(true, should_i_log(info, ?LOG_LEVELS, g0001)), ok = set_loglevel(info), - ?assertMatch(true, should_i_log(info, ?LOG_LEVELS, "G0001")), - ok = add_forcedlogs(["G0001"]), + ?assertMatch(true, should_i_log(info, ?LOG_LEVELS, g0001)), + ok = add_forcedlogs([g0001]), ok = set_loglevel(error), - ?assertMatch(true, should_i_log(info, ?LOG_LEVELS, "G0001")), - ?assertMatch(false, should_i_log(info, ?LOG_LEVELS, "G0002")), - ok = remove_forcedlogs(["G0001"]), + ?assertMatch(true, should_i_log(info, ?LOG_LEVELS, g0001)), + ?assertMatch(false, should_i_log(info, ?LOG_LEVELS, g0002)), + ok = remove_forcedlogs([g0001]), ok = set_loglevel(info), - ?assertMatch(false, should_i_log(debug, ?LOG_LEVELS, "D0001")). + ?assertMatch(false, should_i_log(debug, ?LOG_LEVELS, d0001)). badloglevel_test() -> % Set a bad log level - and everything logs ?assertMatch(true, is_active_level(?LOG_LEVELS, debug, unsupported)), ?assertMatch(true, is_active_level(?LOG_LEVELS, critical, unsupported)). +timing_test() -> + % Timing test + % Previous LOGBASE used list with string-based keys and values + % The size of the LOGBASE was 19,342 words (>150KB), and logs took + % o(100) microseconds. + % Changing the LOGBASE ot a map with binary-based keys and values does not + % appear to improve the speed of logging, but does reduce the size of the + % LOGBASE to just over 2,000 words (so an order of magnitude improvement) + timer:sleep(10), + io:format(user, "Log timings:~n", []), + io:format(user, "Logbase size ~w~n", [erts_debug:flat_size(?LOGBASE)]), + io:format( + user, + "Front log timing ~p~n", + [timer:tc(fun() -> log(cdb21, ["test_file"]) end)] + ), + io:format( + user, + "Mid log timing ~p~n", + [timer:tc(fun() -> log(pc013, ["test_file"]) end)] + ), + io:format( + user, + "End log timing ~p~n", + [timer:tc(fun() -> log(b0003, ["testing"]) end)] + ), + io:format( + user, + "Big log timing ~p~n", + [timer:tc(fun() -> log(sst13, [100,100,100,100,true,1]) end)] + ), + io:format( + user, + "Timer log timing ~p~n", + [timer:tc(fun() -> log_timer(pc015, [], os:timestamp()) end)] + ). + -endif. diff --git a/src/leveled_math.erl b/src/leveled_math.erl deleted file mode 100644 index f88ed4f..0000000 --- a/src/leveled_math.erl +++ /dev/null @@ -1,38 +0,0 @@ -%% Handle missing log2 prior to OTP18 - --module(leveled_math). - -%% API --export([ - log2/1 - ]). - - --include_lib("eunit/include/eunit.hrl"). - -%%%=================================================================== -%%% Use log2 -%%%=================================================================== --ifndef(no_log2). - -log2(X) -> - math:log2(X). - --else. -%%%=================================================================== -%%% Old (r18) random style functions -%%%=================================================================== - -log2(X) -> - math:log(X) / 0.6931471805599453. - --endif. - - --ifdef(TEST). - -log2_test() -> - ?assertMatch(8, round(log2(256))), - ?assertMatch(16, round(log2(65536))). - --endif. diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl new file mode 100644 index 0000000..0727c72 --- /dev/null +++ b/src/leveled_monitor.erl @@ -0,0 +1,617 @@ +%% -------- MONITOR --------- +%% +%% The bookie's monitor is a process dedicated to gathering and reporting +%% stats related to performance of the leveled store. +%% +%% Depending on the sample frequency, a process will randomly determine whether +%% or not to take a timing of a transaction. If a timing is taken the result +%% is cast to the moniitor. +%% +%% The monitor gathers stats across the store, and then on a timing loop logs +%% out the gathered stats for one of the monitored stat types once every +%% ?LOG_FREQUENCY_SECONDS. On each timing trigger the monitor should move on +%% to the next timing stat in its list. +%% +%% The different types of timing stats are defined within the ?LOG_LIST. Each +%% type of timing stat has its own record maintained in the monitor loop state. + +-module(leveled_monitor). + +-behaviour(gen_server). + +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +-export([ + monitor_start/2, + add_stat/2, + report_stats/2, + monitor_close/1, + maybe_time/1, + step_time/1, + log_level/2, + log_add/2, + log_remove/2, + get_defaults/0]). + +-include_lib("eunit/include/eunit.hrl"). + +-define(LOG_LIST, + [bookie_get, bookie_put, bookie_head, bookie_snap, + pcl_fetch, sst_fetch, cdb_get]). +-define(LOG_FREQUENCY_SECONDS, 30). + + +-record(bookie_get_timings, + {sample_count = 0 :: non_neg_integer(), + head_time = 0 :: non_neg_integer(), + body_time = 0 :: non_neg_integer(), + fetch_count = 0 :: non_neg_integer(), + sample_start_time = os:timestamp() :: erlang:timestamp()}). + +-record(bookie_head_timings, + {sample_count = 0 :: non_neg_integer(), + cache_count = 0 :: non_neg_integer(), + found_count = 0 :: non_neg_integer(), + cache_hits = 0 :: non_neg_integer(), + fetch_ledger_time = 0 :: non_neg_integer(), + fetch_ledgercache_time = 0 :: non_neg_integer(), + rsp_time = 0 :: non_neg_integer(), + notfound_time = 0 :: non_neg_integer(), + sample_start_time = os:timestamp() :: erlang:timestamp()}). + +-record(bookie_put_timings, + {sample_count = 0 :: non_neg_integer(), + ink_time = 0 :: non_neg_integer(), + prep_time = 0 :: non_neg_integer(), + mem_time = 0 :: non_neg_integer(), + total_size = 0 :: non_neg_integer(), + sample_start_time = os:timestamp() :: erlang:timestamp()}). + +-record(bookie_snap_timings, + {sample_count = 0 :: non_neg_integer(), + bookie_time = 0 :: non_neg_integer(), + pcl_time = 0 :: non_neg_integer(), + sample_start_time = os:timestamp() :: erlang:timestamp()}). + +-record(pcl_fetch_timings, + {sample_count = 0 :: non_neg_integer(), + foundmem_time = 0 :: non_neg_integer(), + found0_time = 0 :: non_neg_integer(), + found1_time = 0 :: non_neg_integer(), + found2_time = 0 :: non_neg_integer(), + found3_time = 0 :: non_neg_integer(), + foundlower_time = 0 :: non_neg_integer(), + notfound_time = 0 :: non_neg_integer(), + foundmem_count = 0 :: non_neg_integer(), + found0_count = 0 :: non_neg_integer(), + found1_count = 0 :: non_neg_integer(), + found2_count = 0 :: non_neg_integer(), + found3_count = 0 :: non_neg_integer(), + foundlower_count = 0 :: non_neg_integer(), + notfound_count = 0 :: non_neg_integer(), + sample_start_time = os:timestamp() :: erlang:timestamp()}). + +-record(sst_fetch_timings, + {sample_count = 0 :: non_neg_integer(), + fetchcache_time = 0 :: non_neg_integer(), + slotcached_time = 0 :: non_neg_integer(), + slotnoncached_time = 0 :: non_neg_integer(), + notfound_time = 0 :: non_neg_integer(), + fetchcache_count = 0 :: non_neg_integer(), + slotcached_count = 0 :: non_neg_integer(), + slotnoncached_count = 0 :: non_neg_integer(), + notfound_count = 0 :: non_neg_integer(), + sample_start_time = os:timestamp() :: erlang:timestamp()}). + +-record(cdb_get_timings, + {sample_count = 0 :: non_neg_integer(), + cycle_count = 0 :: non_neg_integer(), + index_time = 0 :: non_neg_integer(), + read_time = 0 :: non_neg_integer(), + sample_start_time = os:timestamp() :: erlang:timestamp()}). + +-record(state, + {bookie_get_timings = #bookie_get_timings{} :: bookie_get_timings(), + bookie_head_timings = #bookie_head_timings{} :: bookie_head_timings(), + bookie_put_timings = #bookie_put_timings{} :: bookie_put_timings(), + bookie_snap_timings = #bookie_snap_timings{} :: bookie_snap_timings(), + pcl_fetch_timings = #pcl_fetch_timings{} :: pcl_fetch_timings(), + sst_fetch_timings = [] :: list(sst_fetch_timings()), + cdb_get_timings = #cdb_get_timings{} :: cdb_get_timings(), + log_frequency = ?LOG_FREQUENCY_SECONDS :: pos_integer(), + log_order = [] :: list(log_type())}). + + +-type bookie_get_timings() :: #bookie_get_timings{}. +-type bookie_head_timings() :: #bookie_head_timings{}. +-type bookie_put_timings() :: #bookie_put_timings{}. +-type bookie_snap_timings() :: #bookie_snap_timings{}. +-type pcl_fetch_timings() :: #pcl_fetch_timings{}. +-type cdb_get_timings() :: #cdb_get_timings{}. +-type sst_fetch_timings() :: + {leveled_pmanifest:lsm_level(), #sst_fetch_timings{}}. +-type log_type() :: + bookie_head|bookie_get|bookie_put|bookie_snap|pcl_fetch|sst_fetch|cdb_get. +-type pcl_level() :: mem|leveled_pmanifest:lsm_level(). +-type sst_fetch_type() :: + fetch_cache|slot_cachedblock|slot_noncachedblock|not_found. +-type microsecs() :: pos_integer(). +-type byte_size() :: pos_integer(). +-type monitor() :: {no_monitor, 0}|{pid(), 0..100}. +-type timing() :: no_timing|pos_integer(). + + +-type bookie_get_update() :: + {bookie_get_update, microsecs(), microsecs()|not_found}. +-type bookie_head_update() :: + {bookie_head_update, microsecs(), microsecs()|not_found, 0..1}. +-type bookie_put_update() :: + {bookie_put_update, microsecs(), microsecs(), microsecs(), byte_size()}. +-type bookie_snap_update() :: + {bookie_snap_update, microsecs(), microsecs()}. +-type pcl_fetch_update() :: + {pcl_fetch_update, not_found|pcl_level(), microsecs()}. +-type sst_fetch_update() :: + {sst_fetch_update, + leveled_pmanifest:lsm_level(), sst_fetch_type(), microsecs()}. +-type cdb_get_update() :: + {cdb_get_update, pos_integer(), microsecs(), microsecs()}. +-type statistic() :: + bookie_get_update()|bookie_head_update()|bookie_put_update()| + bookie_snap_update()| + pcl_fetch_update()|sst_fetch_update()|cdb_get_update(). + +-export_type([monitor/0, timing/0, sst_fetch_type/0, log_type/0]). + +%%%============================================================================ +%%% API +%%%============================================================================ + +-spec monitor_start(pos_integer(), list(log_type())) -> {ok, pid()}. +monitor_start(LogFreq, LogOrder) -> + gen_server:start_link( + ?MODULE, [leveled_log:get_opts(), LogFreq, LogOrder], []). + +-spec add_stat(pid(), statistic()) -> ok. +add_stat(Watcher, Statistic) -> + gen_server:cast(Watcher, Statistic). + +-spec report_stats(pid(), log_type()) -> ok. +report_stats(Watcher, StatsType) -> + gen_server:cast(Watcher, {report_stats, StatsType}). + +-spec monitor_close(pid()|no_monitor) -> ok. +monitor_close(no_monitor) -> + ok; +monitor_close(Watcher) -> + gen_server:call(Watcher, close, 60000). + +-spec log_level(pid(), leveled_log:log_level()) -> ok. +log_level(Pid, LogLevel) -> + gen_server:cast(Pid, {log_level, LogLevel}). + +-spec log_add(pid(), list(string())) -> ok. +log_add(Pid, ForcedLogs) -> + gen_server:cast(Pid, {log_add, ForcedLogs}). + +-spec log_remove(pid(), list(string())) -> ok. +log_remove(Pid, ForcedLogs) -> + gen_server:cast(Pid, {log_remove, ForcedLogs}). + +-spec maybe_time(monitor()) -> os:timestamp()|no_timing. +maybe_time({_Pid, TimingProbability}) -> + case leveled_rand:uniform(100) of + N when N =< TimingProbability -> + os:timestamp(); + _ -> + no_timing + end. + +-spec step_time( + os:timestamp()|no_timing) -> + {pos_integer(), os:timestamp()}|{no_timing, no_timing}. +step_time(no_timing) -> + {no_timing, no_timing}; +step_time(TS) -> + Now = os:timestamp(), + {timer:now_diff(Now, TS), Now}. + +-spec get_defaults() -> {pos_integer(), list(log_type())}. +get_defaults() -> + {?LOG_FREQUENCY_SECONDS, ?LOG_LIST}. + +%%%============================================================================ +%%% gen_server callbacks +%%%============================================================================ + +init([LogOpts, LogFrequency, LogOrder]) -> + leveled_log:save(LogOpts), + leveled_rand:seed(), + RandomLogOrder = + lists:map( + fun({_R, SL}) -> SL end, + lists:keysort( + 1, + lists:map( + fun(L) -> {leveled_rand:uniform(), L} end, + LogOrder))), + InitialJitter = leveled_rand:uniform(2 * 1000 * LogFrequency), + erlang:send_after(InitialJitter, self(), report_next_stats), + {ok, #state{log_frequency = LogFrequency, log_order = RandomLogOrder}}. + +handle_call(close, _From, State) -> + {stop, normal, ok, State}. + +handle_cast({bookie_head_update, FetchTime, RspTime, CacheHit}, State) -> + Timings = State#state.bookie_head_timings, + SC0 = Timings#bookie_head_timings.sample_count + 1, + CC0 = Timings#bookie_head_timings.cache_count + CacheHit, + FC = Timings#bookie_head_timings.found_count, + FLT = Timings#bookie_head_timings.fetch_ledger_time, + FCT = Timings#bookie_head_timings.fetch_ledgercache_time, + RST = Timings#bookie_head_timings.rsp_time, + NFT = Timings#bookie_head_timings.notfound_time, + + {FC0, FLT0, FCT0, RST0, NFT0} = + case {RspTime, CacheHit} of + {not_found, _} -> + {FC, FLT, FCT, RST, NFT + FetchTime}; + {RspTime, 0} -> + {FC + 1, FLT + FetchTime, FCT, RST + RspTime, NFT}; + {RspTime, 1} -> + {FC + 1, FLT, FCT + FetchTime, RST + RspTime, NFT} + end, + UpdTimings = + #bookie_head_timings{ + sample_count = SC0, + cache_count = CC0, + found_count = FC0, + fetch_ledger_time = FLT0, + fetch_ledgercache_time = FCT0, + rsp_time = RST0, + notfound_time = NFT0 + }, + {noreply, State#state{bookie_head_timings = UpdTimings}}; +handle_cast({bookie_get_update, HeadTime, BodyTime}, State) -> + Timings = State#state.bookie_get_timings, + SC0 = Timings#bookie_get_timings.sample_count + 1, + {FC0, HT0, BT0} = + case BodyTime of + not_found -> + {Timings#bookie_get_timings.fetch_count, + Timings#bookie_get_timings.head_time + HeadTime, + Timings#bookie_get_timings.body_time}; + BodyTime -> + {Timings#bookie_get_timings.fetch_count + 1, + Timings#bookie_get_timings.head_time + HeadTime, + Timings#bookie_get_timings.body_time + BodyTime} + end, + UpdTimings = + #bookie_get_timings{ + sample_count = SC0, + head_time = HT0, + body_time = BT0, + fetch_count = FC0 + }, + {noreply, State#state{bookie_get_timings = UpdTimings}}; +handle_cast({bookie_put_update, InkTime, PrepTime, MemTime, Size}, State) -> + Timings = State#state.bookie_put_timings, + SC0 = Timings#bookie_put_timings.sample_count + 1, + SZ0 = Timings#bookie_put_timings.total_size + Size, + IT0 = Timings#bookie_put_timings.ink_time + InkTime, + PT0 = Timings#bookie_put_timings.prep_time + PrepTime, + MT0 = Timings#bookie_put_timings.mem_time + MemTime, + UpdTimings = + #bookie_put_timings{ + sample_count = SC0, + ink_time = IT0, + prep_time = PT0, + mem_time = MT0, + total_size = SZ0 + }, + {noreply, State#state{bookie_put_timings = UpdTimings}}; +handle_cast({bookie_snap_update, BookieTime, PCLTime}, State) -> + Timings = State#state.bookie_snap_timings, + SC0 = Timings#bookie_snap_timings.sample_count + 1, + BT0 = Timings#bookie_snap_timings.bookie_time + BookieTime, + PT0 = Timings#bookie_snap_timings.pcl_time + PCLTime, + UpdTimings = + #bookie_snap_timings{ + sample_count = SC0, + bookie_time = BT0, + pcl_time = PT0 + }, + {noreply, State#state{bookie_snap_timings = UpdTimings}}; +handle_cast({pcl_fetch_update, Level, FetchTime}, State) -> + Timings = State#state.pcl_fetch_timings, + SC0 = Timings#pcl_fetch_timings.sample_count + 1, + UpdTimings = + case Level of + not_found -> + Timings#pcl_fetch_timings{ + notfound_count = + Timings#pcl_fetch_timings.notfound_count + 1, + notfound_time = + Timings#pcl_fetch_timings.notfound_time + FetchTime + }; + memory -> + Timings#pcl_fetch_timings{ + foundmem_count = + Timings#pcl_fetch_timings.foundmem_count + 1, + foundmem_time = + Timings#pcl_fetch_timings.foundmem_time + FetchTime + }; + 0 -> + Timings#pcl_fetch_timings{ + found0_count = + Timings#pcl_fetch_timings.found0_count + 1, + found0_time = + Timings#pcl_fetch_timings.found0_time + FetchTime + }; + 1 -> + Timings#pcl_fetch_timings{ + found1_count = + Timings#pcl_fetch_timings.found1_count + 1, + found1_time = + Timings#pcl_fetch_timings.found1_time + FetchTime + }; + 2 -> + Timings#pcl_fetch_timings{ + found2_count = + Timings#pcl_fetch_timings.found2_count + 1, + found2_time = + Timings#pcl_fetch_timings.found2_time + FetchTime + }; + 3 -> + Timings#pcl_fetch_timings{ + found3_count = + Timings#pcl_fetch_timings.found3_count + 1, + found3_time = + Timings#pcl_fetch_timings.found3_time + FetchTime + }; + N when N > 3 -> + Timings#pcl_fetch_timings{ + foundlower_count = + Timings#pcl_fetch_timings.foundlower_count + 1, + foundlower_time = + Timings#pcl_fetch_timings.foundlower_time + FetchTime + } + end, + UpdTimings0 = UpdTimings#pcl_fetch_timings{sample_count = SC0}, + {noreply, State#state{pcl_fetch_timings = UpdTimings0}}; +handle_cast({sst_fetch_update, Level, FetchPoint, FetchTime}, State) -> + Timings = + case lists:keyfind(Level, 1, State#state.sst_fetch_timings) of + {Level, PrvTimings} -> + PrvTimings; + false -> + #sst_fetch_timings{} + end, + SC0 = Timings#sst_fetch_timings.sample_count + 1, + UpdTimings = + case FetchPoint of + not_found -> + Timings#sst_fetch_timings{ + notfound_count = + Timings#sst_fetch_timings.notfound_count + 1, + notfound_time = + Timings#sst_fetch_timings.notfound_time + FetchTime + }; + fetch_cache -> + Timings#sst_fetch_timings{ + fetchcache_count = + Timings#sst_fetch_timings.fetchcache_count + 1, + fetchcache_time = + Timings#sst_fetch_timings.fetchcache_time + FetchTime + }; + slot_cachedblock -> + Timings#sst_fetch_timings{ + slotcached_count = + Timings#sst_fetch_timings.slotcached_count + 1, + slotcached_time = + Timings#sst_fetch_timings.slotcached_time + FetchTime + }; + slot_noncachedblock -> + Timings#sst_fetch_timings{ + slotnoncached_count = + Timings#sst_fetch_timings.slotnoncached_count + 1, + slotnoncached_time = + Timings#sst_fetch_timings.slotnoncached_time + FetchTime + } + end, + UpdLevel = {Level, UpdTimings#sst_fetch_timings{sample_count = SC0}}, + UpdLevels = + lists:ukeysort(1, [UpdLevel|State#state.sst_fetch_timings]), + {noreply, State#state{sst_fetch_timings = UpdLevels}}; +handle_cast({cdb_get_update, CycleCount, IndexTime, ReadTime}, State) -> + Timings = State#state.cdb_get_timings, + SC0 = Timings#cdb_get_timings.sample_count + 1, + CC0 = Timings#cdb_get_timings.cycle_count + CycleCount, + IT0 = Timings#cdb_get_timings.index_time + IndexTime, + RT0 = Timings#cdb_get_timings.read_time + ReadTime, + UpdTimings = + #cdb_get_timings{ + sample_count = SC0, + cycle_count = CC0, + index_time = IT0, + read_time = RT0 + }, + {noreply, State#state{cdb_get_timings = UpdTimings}}; +handle_cast({report_stats, bookie_get}, State) -> + Timings = State#state.bookie_get_timings, + SamplePeriod = + timer:now_diff( + os:timestamp(), + Timings#bookie_get_timings.sample_start_time) div 1000000, + leveled_log:log( + b0016, + [Timings#bookie_get_timings.sample_count, + Timings#bookie_get_timings.head_time, + Timings#bookie_get_timings.body_time, + Timings#bookie_get_timings.fetch_count, + SamplePeriod + ]), + {noreply, State#state{bookie_get_timings = #bookie_get_timings{}}}; +handle_cast({report_stats, bookie_head}, State) -> + Timings = State#state.bookie_head_timings, + SamplePeriod = + timer:now_diff( + os:timestamp(), + Timings#bookie_head_timings.sample_start_time) div 1000000, + leveled_log:log( + b0018, + [Timings#bookie_head_timings.sample_count, + Timings#bookie_head_timings.cache_count, + Timings#bookie_head_timings.found_count, + Timings#bookie_head_timings.fetch_ledger_time, + Timings#bookie_head_timings.fetch_ledgercache_time, + Timings#bookie_head_timings.rsp_time, + Timings#bookie_head_timings.notfound_time, + SamplePeriod + ]), + {noreply, State#state{bookie_head_timings = #bookie_head_timings{}}}; +handle_cast({report_stats, bookie_put}, State) -> + Timings = State#state.bookie_put_timings, + SamplePeriod = + timer:now_diff( + os:timestamp(), + Timings#bookie_put_timings.sample_start_time) div 1000000, + leveled_log:log( + b0015, + [Timings#bookie_put_timings.sample_count, + Timings#bookie_put_timings.ink_time, + Timings#bookie_put_timings.prep_time, + Timings#bookie_put_timings.mem_time, + Timings#bookie_put_timings.total_size, + SamplePeriod + ]), + {noreply, State#state{bookie_put_timings = #bookie_put_timings{}}}; +handle_cast({report_stats, bookie_snap}, State) -> + Timings = State#state.bookie_snap_timings, + SamplePeriod = + timer:now_diff( + os:timestamp(), + Timings#bookie_snap_timings.sample_start_time) div 1000000, + leveled_log:log( + b0017, + [Timings#bookie_snap_timings.sample_count, + Timings#bookie_snap_timings.bookie_time, + Timings#bookie_snap_timings.pcl_time, + SamplePeriod + ]), + {noreply, State#state{bookie_snap_timings = #bookie_snap_timings{}}}; +handle_cast({report_stats, pcl_fetch}, State) -> + Timings = State#state.pcl_fetch_timings, + SamplePeriod = + timer:now_diff( + os:timestamp(), + Timings#pcl_fetch_timings.sample_start_time) div 1000000, + leveled_log:log( + p0032, + [Timings#pcl_fetch_timings.sample_count, + Timings#pcl_fetch_timings.foundmem_time, + Timings#pcl_fetch_timings.found0_time, + Timings#pcl_fetch_timings.found1_time, + Timings#pcl_fetch_timings.found2_time, + Timings#pcl_fetch_timings.found3_time, + Timings#pcl_fetch_timings.foundlower_time, + Timings#pcl_fetch_timings.notfound_time, + Timings#pcl_fetch_timings.foundmem_count, + Timings#pcl_fetch_timings.found0_count, + Timings#pcl_fetch_timings.found1_count, + Timings#pcl_fetch_timings.found2_count, + Timings#pcl_fetch_timings.found3_count, + Timings#pcl_fetch_timings.foundlower_count, + Timings#pcl_fetch_timings.notfound_count, + SamplePeriod + ]), + {noreply, State#state{pcl_fetch_timings = #pcl_fetch_timings{}}}; +handle_cast({report_stats, sst_fetch}, State) -> + LogFun = + fun({Level, Timings}) -> + SamplePeriod = + timer:now_diff( + os:timestamp(), + Timings#sst_fetch_timings.sample_start_time) div 1000000, + leveled_log:log( + sst12, + [Level, + Timings#sst_fetch_timings.sample_count, + Timings#sst_fetch_timings.notfound_time, + Timings#sst_fetch_timings.fetchcache_time, + Timings#sst_fetch_timings.slotcached_time, + Timings#sst_fetch_timings.slotnoncached_time, + Timings#sst_fetch_timings.notfound_count, + Timings#sst_fetch_timings.fetchcache_count, + Timings#sst_fetch_timings.slotcached_count, + Timings#sst_fetch_timings.slotnoncached_count, + SamplePeriod + ]) + end, + lists:foreach(LogFun, State#state.sst_fetch_timings), + {noreply, State#state{sst_fetch_timings = []}}; +handle_cast({report_stats, cdb_get}, State) -> + Timings = State#state.cdb_get_timings, + SamplePeriod = + timer:now_diff( + os:timestamp(), + Timings#cdb_get_timings.sample_start_time) div 1000000, + leveled_log:log( + cdb19, + [Timings#cdb_get_timings.sample_count, + Timings#cdb_get_timings.cycle_count, + Timings#cdb_get_timings.index_time, + Timings#cdb_get_timings.read_time, + SamplePeriod + ]), + {noreply, State#state{cdb_get_timings = #cdb_get_timings{}}}; +handle_cast({log_level, LogLevel}, State) -> + ok = leveled_log:set_loglevel(LogLevel), + {noreply, State}; +handle_cast({log_add, ForcedLogs}, State) -> + ok = leveled_log:add_forcedlogs(ForcedLogs), + {noreply, State}; +handle_cast({log_remove, ForcedLogs}, State) -> + ok = leveled_log:remove_forcedlogs(ForcedLogs), + {noreply, State}. + +handle_info(report_next_stats, State) -> + erlang:send_after( + State#state.log_frequency * 1000, self(), report_next_stats), + case State#state.log_order of + [] -> + {noreply, State}; + [NextStat|TailLogOrder] -> + ok = report_stats(self(), NextStat), + {noreply, State#state{log_order = TailLogOrder ++ [NextStat]}} + end. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +coverage_cheat_test() -> + {ok, M} = monitor_start(1, []), + timer:sleep(2000), + {ok, _State1} = code_change(null, #state{}, null), + ok = add_stat(M, {pcl_fetch_update, 4, 100}), + ok = report_stats(M, pcl_fetch), + % Can close, so empty log_order hasn't crashed + ok = monitor_close(M). + +-endif. \ No newline at end of file diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index c8f494d..c5c7618 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -53,32 +53,39 @@ -record(state, {owner :: pid() | undefined, root_path :: string() | undefined, - pending_deletions = dict:new(), % OTP 16 does not like type - sst_options :: #sst_options{} + pending_deletions = dict:new() :: dict:dict(), + sst_options :: sst_options() }). --type manifest_entry() :: #manifest_entry{}. +-type sst_options() :: #sst_options{}. %%%============================================================================ %%% API %%%============================================================================ -clerk_new(Owner, Manifest, OptsSST) -> +-spec clerk_new( + pid(), string(), sst_options()) -> {ok, pid()}. +clerk_new(Owner, RootPath, OptsSST) -> {ok, Pid} = gen_server:start_link(?MODULE, [leveled_log:get_opts(), {sst_options, OptsSST}], []), - ok = gen_server:call(Pid, {load, Owner, Manifest}, infinity), - leveled_log:log("PC001", [Pid, Owner]), + ok = gen_server:call(Pid, {load, Owner, RootPath}, infinity), + leveled_log:log(pc001, [Pid, Owner]), {ok, Pid}. +-spec clerk_prompt(pid()) -> ok. clerk_prompt(Pid) -> gen_server:cast(Pid, prompt). +-spec clerk_promptdeletions(pid(), pos_integer()) -> ok. clerk_promptdeletions(Pid, ManifestSQN) -> gen_server:cast(Pid, {prompt_deletions, ManifestSQN}). +-spec clerk_push( + pid(), {leveled_pmanifest:lsm_level(), leveled_pmanifest:manifest()}) -> + ok. clerk_push(Pid, Work) -> gen_server:cast(Pid, {push_work, Work}). @@ -100,6 +107,7 @@ clerk_addlogs(Pid, ForcedLogs) -> clerk_removelogs(Pid, ForcedLogs) -> gen_server:cast(Pid, {remove_logs, ForcedLogs}). +-spec clerk_close(pid()) -> ok. clerk_close(Pid) -> gen_server:call(Pid, close, 20000). @@ -119,10 +127,13 @@ handle_call(close, _From, State) -> handle_cast(prompt, State) -> handle_info(timeout, State); handle_cast({push_work, Work}, State) -> - {ManifestSQN, Deletions} = handle_work(Work, State), + {ManifestSQN, Deletions} = + handle_work( + Work, + State#state.root_path, State#state.sst_options, State#state.owner), PDs = dict:store(ManifestSQN, Deletions, State#state.pending_deletions), - leveled_log:log("PC022", [ManifestSQN]), - {noreply, State#state{pending_deletions = PDs}, ?MAX_TIMEOUT}; + leveled_log:log(pc022, [ManifestSQN]), + {noreply, State#state{pending_deletions = PDs}, ?MIN_TIMEOUT}; handle_cast({prompt_deletions, ManifestSQN}, State) -> {Deletions, UpdD} = return_deletions(ManifestSQN, State#state.pending_deletions), @@ -145,7 +156,7 @@ handle_cast({remove_logs, ForcedLogs}, State) -> {noreply, State#state{sst_options = SSTopts0}}. handle_info(timeout, State) -> - request_work(State), + ok = leveled_penciller:pcl_workforclerk(State#state.owner), % When handling work, the clerk can collect a large number of binary % references, so proactively GC this process before receiving any future % work. In under pressure clusters, clerks with large binary memory @@ -154,7 +165,7 @@ handle_info(timeout, State) -> {noreply, State, ?MAX_TIMEOUT}. terminate(Reason, _State) -> - leveled_log:log("PC005", [self(), Reason]). + leveled_log:log(pc005, [self(), Reason]). code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -164,31 +175,35 @@ code_change(_OldVsn, State, _Extra) -> %%% Internal functions %%%============================================================================ -request_work(State) -> - ok = leveled_penciller:pcl_workforclerk(State#state.owner). - -handle_work({SrcLevel, Manifest}, State) -> - {UpdManifest, EntriesToDelete} = merge(SrcLevel, - Manifest, - State#state.root_path, - State#state.sst_options), - leveled_log:log("PC007", []), +-spec handle_work( + {leveled_pmanifest:lsm_level(), leveled_pmanifest:manifest()}, + string(), sst_options(), pid()) -> + {leveled_pmanifest:pos_integer(), + list(leveled_pmanifest:manifest_entry())}. +handle_work( + {SrcLevel, Manifest}, RootPath, SSTOpts, Owner) -> + {UpdManifest, EntriesToDelete} = + merge(SrcLevel, Manifest, RootPath, SSTOpts), + leveled_log:log(pc007, []), SWMC = os:timestamp(), - ok = leveled_penciller:pcl_manifestchange(State#state.owner, - UpdManifest), - leveled_log:log_timer("PC017", [], SWMC), + ok = leveled_penciller:pcl_manifestchange(Owner, UpdManifest), + leveled_log:log_timer(pc017, [], SWMC), SWSM = os:timestamp(), - ok = leveled_pmanifest:save_manifest(UpdManifest, - State#state.root_path), - leveled_log:log_timer("PC018", [], SWSM), + ok = leveled_pmanifest:save_manifest(UpdManifest, RootPath), + leveled_log:log_timer(pc018, [], SWSM), {leveled_pmanifest:get_manifest_sqn(UpdManifest), EntriesToDelete}. +-spec merge( + leveled_pmanifes:lsm_level(), leveled_pmanifest:manifest(), + string(), sst_options()) -> + {leveled_pmanifest:manifest(), + list(leveled_pmanifest:manifest_entry())}. merge(SrcLevel, Manifest, RootPath, OptsSST) -> case leveled_pmanifest:report_manifest_level(Manifest, SrcLevel + 1) of {0, 0, undefined} -> ok; {FCnt, AvgMem, {MaxFN, MaxP, MaxMem}} -> - leveled_log:log("PC023", + leveled_log:log(pc023, [SrcLevel + 1, FCnt, AvgMem, MaxFN, MaxP, MaxMem]) end, SelectMethod = @@ -206,11 +221,11 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) -> Src#manifest_entry.start_key, Src#manifest_entry.end_key), Candidates = length(SinkList), - leveled_log:log("PC008", [SrcLevel, Candidates]), + leveled_log:log(pc008, [SrcLevel, Candidates]), case Candidates of 0 -> NewLevel = SrcLevel + 1, - leveled_log:log("PC009", [Src#manifest_entry.filename, NewLevel]), + leveled_log:log(pc009, [Src#manifest_entry.filename, NewLevel]), leveled_sst:sst_switchlevels(Src#manifest_entry.owner, NewLevel), Man0 = leveled_pmanifest:switch_manifest_entry(Manifest, NewSQN, @@ -224,6 +239,7 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) -> SST_RP, NewSQN, OptsSST) end. +-spec notify_deletions(list(leveled_pmanifest:manifest_entry()), pid()) -> ok. notify_deletions([], _Penciller) -> ok; notify_deletions([Head|Tail], Penciller) -> @@ -240,7 +256,7 @@ perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN, OptsSST) -> - leveled_log:log("PC010", [Src#manifest_entry.filename, NewSQN]), + leveled_log:log(pc010, [Src#manifest_entry.filename, NewSQN]), SrcList = [{next, Src, all}], MaxSQN = leveled_sst:sst_getmaxsequencenumber(Src#manifest_entry.owner), SinkLevel = SrcLevel + 1, @@ -268,19 +284,19 @@ perform_merge(Manifest, {Man2, [Src|SinkManifestList]}. do_merge([], [], SinkLevel, _SinkB, _RP, NewSQN, _MaxSQN, _Opts, Additions) -> - leveled_log:log("PC011", [NewSQN, SinkLevel, length(Additions)]), + leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions)]), Additions; do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) -> FileName = leveled_penciller:sst_filename(NewSQN, SinkLevel, length(Additions)), - leveled_log:log("PC012", [NewSQN, FileName, SinkB]), + leveled_log:log(pc012, [NewSQN, FileName, SinkB]), TS1 = os:timestamp(), case leveled_sst:sst_newmerge(RP, FileName, KL1, KL2, SinkB, SinkLevel, MaxSQN, OptsSST) of empty -> - leveled_log:log("PC013", [FileName]), + leveled_log:log(pc013, [FileName]), do_merge([], [], SinkLevel, SinkB, RP, NewSQN, MaxSQN, @@ -293,7 +309,7 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) -> owner=Pid, filename=FileName, bloom=Bloom}, - leveled_log:log_timer("PC015", [], TS1), + leveled_log:log_timer(pc015, [], TS1), do_merge(KL1Rem, KL2Rem, SinkLevel, SinkB, RP, NewSQN, MaxSQN, @@ -301,11 +317,13 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) -> Additions ++ [Entry]) end. --spec grooming_scorer(list(manifest_entry())) -> manifest_entry(). +-spec grooming_scorer( + list(leveled_pmanifest:manifest_entry())) + -> leveled_pmanifest:manifest_entry(). grooming_scorer([ME | MEs]) -> InitTombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner), {HighestTC, BestME} = grooming_scorer(InitTombCount, ME, MEs), - leveled_log:log("PC024", [HighestTC]), + leveled_log:log(pc024, [HighestTC]), BestME. grooming_scorer(HighestTC, BestME, []) -> @@ -328,7 +346,7 @@ return_deletions(ManifestSQN, PendingDeletionD) -> % % So this is now allowed to crash again PendingDeletions = dict:fetch(ManifestSQN, PendingDeletionD), - leveled_log:log("PC021", [ManifestSQN]), + leveled_log:log(pc021, [ManifestSQN]), {PendingDeletions, dict:erase(ManifestSQN, PendingDeletionD)}. %%%============================================================================ diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 99c3a70..6b38077 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -233,12 +233,8 @@ % related to specific query, and the StartKey/EndKey % used to extract this part - persisted_sqn = 0 :: integer(), % The highest SQN persisted - + persisted_sqn = 0 :: integer(), % The highest SQN persisted ledger_sqn = 0 :: integer(), % The highest SQN added to L0 - root_path = "test" :: string(), - - clerk :: pid() | undefined, levelzero_pending = false :: boolean(), levelzero_constructor :: pid() | undefined, @@ -246,14 +242,18 @@ levelzero_size = 0 :: integer(), levelzero_maxcachesize :: integer() | undefined, levelzero_cointoss = false :: boolean(), - levelzero_index :: array:array() | undefined | redacted, + levelzero_index :: + leveled_pmem:index_array() | undefined | redacted, + levelzero_astree :: list() | undefined | redacted, + + root_path = "test" :: string(), + clerk :: pid() | undefined, is_snapshot = false :: boolean(), snapshot_fully_loaded = false :: boolean(), snapshot_time :: pos_integer() | undefined, source_penciller :: pid() | undefined, bookie_monref :: reference() | undefined, - levelzero_astree :: list() | undefined | redacted, work_ongoing = false :: boolean(), % i.e. compaction work work_backlog = false :: boolean(), % i.e. compaction work @@ -261,39 +261,20 @@ pending_removals = [] :: list(string()), maybe_release = false :: boolean(), - timings = no_timing :: pcl_timings(), - timings_countdown = 0 :: integer(), - snaptimeout_short :: pos_integer()|undefined, snaptimeout_long :: pos_integer()|undefined, - sst_options = #sst_options{} :: #sst_options{}}). + monitor = {no_monitor, 0} :: leveled_monitor:monitor(), + + sst_options = #sst_options{} :: sst_options()}). --record(pcl_timings, - {sample_count = 0 :: integer(), - foundmem_time = 0 :: integer(), - found0_time = 0 :: integer(), - found1_time = 0 :: integer(), - found2_time = 0 :: integer(), - found3_time = 0 :: integer(), - foundlower_time = 0 :: integer(), - missed_time = 0 :: integer(), - foundmem_count = 0 :: integer(), - found0_count = 0 :: integer(), - found1_count = 0 :: integer(), - found2_count = 0 :: integer(), - found3_count = 0 :: integer(), - foundlower_count = 0 :: integer(), - missed_count = 0 :: integer()}). -type penciller_options() :: #penciller_options{}. -type bookies_memory() :: {tuple()|empty_cache, - % array:array()|empty_array, - any()|empty_array, % Issue of type compatability with OTP16 + array:array()|empty_array, integer()|infinity, integer()}. -type pcl_state() :: #state{}. --type pcl_timings() :: no_timing|#pcl_timings{}. -type levelzero_cacheentry() :: {pos_integer(), leveled_tree:leveled_tree()}. -type levelzero_cache() :: list(levelzero_cacheentry()). -type iterator_entry() @@ -313,7 +294,7 @@ fun((leveled_codec:ledger_key(), leveled_codec:ledger_value(), any()) -> any()). - +-type sst_options() :: #sst_options{}. -export_type([levelzero_cacheentry/0, levelzero_returnfun/0, sqn_check/0]). @@ -655,73 +636,76 @@ init([LogOpts, PCLopts]) -> Query, BookiesMem, LongRunning), - leveled_log:log("P0001", [self()]), - {ok, State#state{is_snapshot=true, + leveled_log:log(p0001, [self()]), + {ok, State#state{is_snapshot = true, bookie_monref = BookieMonitor, - source_penciller=SrcPenciller}}; + source_penciller = SrcPenciller}}; {_RootPath, _Snapshot=false, _Q, _BM} -> start_from_file(PCLopts) end. handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}}, - From, + _From, State=#state{is_snapshot=Snap}) when Snap == false -> % The push_mem process is as follows: % - % 1 - Receive a cache. The cache has four parts: a tree of keys and - % values, an array of 256 binaries listing the hashes present in the - % tree, a min SQN and a max SQN - % - % 2 - Check to see if there is a levelzero file pending. If so, the - % update must be returned. If not the update can be accepted - % - % 3 - The Penciller can now reply to the Bookie to show if the push has - % been accepted - % - % 4 - Update the cache: - % a) Append the cache to the list - % b) Add each of the 256 hash-listing binaries to the master L0 index array - % - % Check the approximate size of the cache. If it is over the maximum size, - % trigger a background L0 file write and update state of levelzero_pending. - CacheUpdateBlockedByPendingWork - = State#state.levelzero_pending or State#state.work_backlog, - CacheFull = leveled_pmem:cache_full(State#state.levelzero_cache), - case {CacheUpdateBlockedByPendingWork, CacheFull} of - {true, _} -> - leveled_log:log("P0018", [returned, - State#state.levelzero_pending, - State#state.work_backlog]), + % 1. If either the penciller is still waiting on the last L0 file to be + % written, or there is a work backlog - the cache is returned with the + % expectation that PUTs should be slowed. Also if the cache has reached + % the maximum number of lines (by default after 31 pushes from the bookie) + % + % 2. If (1) does not apply, the bookie's cache will be added to the + % penciller's cache. + SW = os:timestamp(), + + L0Pending = State#state.levelzero_pending, + WorkBacklog = State#state.work_backlog, + CacheAlreadyFull = leveled_pmem:cache_full(State#state.levelzero_cache), + L0Size = State#state.levelzero_size, + + % The clerk is prompted into action as there may be a L0 write required + ok = leveled_pclerk:clerk_prompt(State#state.clerk), + + case L0Pending or WorkBacklog or CacheAlreadyFull of + true -> + % Cannot update the cache, or roll the memory so reply `returned` + % The Bookie must now retain the lesger cache and try to push the + % updated cache at a later time + leveled_log:log( + p0018, + [L0Size, L0Pending, WorkBacklog, CacheAlreadyFull]), {reply, returned, State}; - {false, true} -> - leveled_log:log("P0042", [State#state.levelzero_size]), - % The cache is full (the maximum line items have been reached), so - % can't accept any more. However, we need to try and roll memory - % otherwise cache may be permanently full. - gen_server:reply(From, returned), - {UpdState, none} = maybe_roll_memory(State, true, false), - {noreply, UpdState}; - {false, false} -> - % leveled_log:log("P0018", [ok, false, false]), + false -> + % Return ok as cache has been updated on State and the Bookie + % should clear its ledger cache which is now with the Penciller PushedTree = case is_tuple(LedgerTable) of true -> LedgerTable; false -> - leveled_tree:from_orderedset(LedgerTable, - ?CACHE_TYPE) + leveled_tree:from_orderedset(LedgerTable, ?CACHE_TYPE) end, - % Reply must happen after the table has been converted - gen_server:reply(From, ok), - % Update LevelZero will add to the cache and maybe roll the - % cache from memory to L0 disk if the cache is too big - {noreply, - update_levelzero(State#state.levelzero_size, - {PushedTree, PushedIdx, MinSQN, MaxSQN}, - State#state.ledger_sqn, - State#state.levelzero_cache, - State)} + {UpdMaxSQN, NewL0Size, UpdL0Cache} = + leveled_pmem:add_to_cache( + L0Size, + {PushedTree, MinSQN, MaxSQN}, + State#state.ledger_sqn, + State#state.levelzero_cache), + UpdL0Index = + leveled_pmem:add_to_index( + PushedIdx, + State#state.levelzero_index, + length(State#state.levelzero_cache) + 1), + leveled_log:log_randomtimer( + p0031, [NewL0Size, true, true, MinSQN, MaxSQN], SW, 0.1), + {reply, + ok, + State#state{ + levelzero_cache = UpdL0Cache, + levelzero_size = NewL0Size, + levelzero_index = UpdL0Index, + ledger_sqn = UpdMaxSQN}} end; handle_call({fetch, Key, Hash, UseL0Index}, _From, State) -> L0Idx = @@ -731,15 +715,12 @@ handle_call({fetch, Key, Hash, UseL0Index}, _From, State) -> false -> none end, - {R, UpdTimings} = timed_fetch_mem(Key, - Hash, - State#state.manifest, - State#state.levelzero_cache, - L0Idx, - State#state.timings), - {UpdTimings0, CountDown} = - update_statetimings(UpdTimings, State#state.timings_countdown), - {reply, R, State#state{timings=UpdTimings0, timings_countdown=CountDown}}; + R = + timed_fetch_mem( + Key, Hash, State#state.manifest, + State#state.levelzero_cache, L0Idx, + State#state.monitor), + {reply, R, State}; handle_call({check_sqn, Key, Hash, SQN}, _From, State) -> {reply, compare_to_sqn( @@ -792,10 +773,8 @@ handle_call({fetch_keys, lists:filter(FilterFun, L0AsList) end, - leveled_log:log_randomtimer("P0037", - [State#state.levelzero_size], - SW, - 0.01), + leveled_log:log_randomtimer( + p0037, [State#state.levelzero_size], SW, 0.01), %% Rename any reference to loop state that may be used by the function %% to be returned - https://github.com/martinsumner/leveled/issues/326 @@ -874,10 +853,8 @@ handle_call({register_snapshot, Snapshot, Query, BookiesMem, LongRunning}, EndKey, State#state.levelzero_cache, LM1Cache), - leveled_log:log_randomtimer("P0037", - [State#state.levelzero_size], - SW, - 0.01), + leveled_log:log_randomtimer( + p0037, [State#state.levelzero_size], SW, 0.01), {#state{levelzero_astree = L0AsTree, ledger_sqn = MaxSQN, persisted_sqn = State#state.persisted_sqn}, @@ -892,15 +869,16 @@ handle_call({register_snapshot, Snapshot, Query, BookiesMem, LongRunning}, {LM1Cache, MinSQN, MaxSQN}, State#state.ledger_sqn, State#state.levelzero_cache), - L0Index = + LM1Idx = case BookieIdx of empty_index -> - State#state.levelzero_index; + leveled_pmem:new_index(); _ -> - leveled_pmem:add_to_index(BookieIdx, - State#state.levelzero_index, - length(L0Cache)) + BookieIdx end, + L0Index = + leveled_pmem:add_to_index( + LM1Idx, State#state.levelzero_index, length(L0Cache)), {#state{levelzero_cache = L0Cache, levelzero_index = L0Index, levelzero_size = UpdSize, @@ -930,26 +908,28 @@ handle_call(close, _From, State) -> % The penciller should close each file in the manifest, and call a close % on the clerk. ok = leveled_pclerk:clerk_close(State#state.clerk), - leveled_log:log("P0008", [close]), - L0Empty = State#state.levelzero_size == 0, - case (not State#state.levelzero_pending and not L0Empty) of + leveled_log:log(p0008, [close]), + L0Left = State#state.levelzero_size > 0, + case (not State#state.levelzero_pending and L0Left) of true -> - L0_Left = State#state.levelzero_size > 0, - {UpdState, _L0Bloom} = maybe_roll_memory(State, L0_Left, true), - L0Pid = UpdState#state.levelzero_constructor, - case is_pid(L0Pid) of - true -> - ok = leveled_sst:sst_close(L0Pid); - false -> - leveled_log:log("P0010", [State#state.levelzero_size]) - end; + Man0 = State#state.manifest, + {Constructor, _} = + roll_memory( + leveled_pmanifest:get_manifest_sqn(Man0) + 1, + State#state.ledger_sqn, + State#state.root_path, + State#state.levelzero_cache, + length(State#state.levelzero_cache), + State#state.sst_options, + true), + ok = leveled_sst:sst_close(Constructor); false -> - leveled_log:log("P0010", [State#state.levelzero_size]) + leveled_log:log(p0010, [State#state.levelzero_size]) end, shutdown_manifest(State#state.manifest, State#state.levelzero_constructor), {stop, normal, ok, State}; handle_call(doom, _From, State) -> - leveled_log:log("P0030", []), + leveled_log:log(p0030, []), ok = leveled_pclerk:clerk_close(State#state.clerk), shutdown_manifest(State#state.manifest, State#state.levelzero_constructor), @@ -983,7 +963,7 @@ handle_call(persisted_sqn, _From, State) -> handle_cast({manifest_change, Manifest}, State) -> NewManSQN = leveled_pmanifest:get_manifest_sqn(Manifest), OldManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest), - leveled_log:log("P0041", [OldManSQN, NewManSQN]), + leveled_log:log(p0041, [OldManSQN, NewManSQN]), % Only safe to update the manifest if the SQN increments if NewManSQN > OldManSQN -> ok = @@ -1008,7 +988,7 @@ handle_cast({manifest_change, Manifest}, State) -> handle_cast({release_snapshot, Snapshot}, State) -> Manifest0 = leveled_pmanifest:release_snapshot(State#state.manifest, Snapshot), - leveled_log:log("P0003", [Snapshot]), + leveled_log:log(p0003, [Snapshot]), {noreply, State#state{manifest=Manifest0}}; handle_cast({confirm_delete, PDFN, FilePid}, State=#state{is_snapshot=Snap}) when Snap == false -> @@ -1028,7 +1008,7 @@ handle_cast({confirm_delete, PDFN, FilePid}, State=#state{is_snapshot=Snap}) % will be cleared from pending using the maybe_release boolean case leveled_pmanifest:ready_to_delete(State#state.manifest, PDFN) of true -> - leveled_log:log("P0005", [PDFN]), + leveled_log:log(p0005, [PDFN]), ok = leveled_sst:sst_deleteconfirmed(FilePid), case State#state.work_ongoing of true -> @@ -1060,7 +1040,7 @@ handle_cast({confirm_delete, PDFN, FilePid}, State=#state{is_snapshot=Snap}) end end; handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) -> - leveled_log:log("P0029", []), + leveled_log:log(p0029, []), ManEntry = #manifest_entry{start_key=StartKey, end_key=EndKey, owner=State#state.levelzero_constructor, @@ -1072,53 +1052,70 @@ handle_cast({levelzero_complete, FN, StartKey, EndKey, Bloom}, State) -> 0, ManEntry), % Prompt clerk to ask about work - do this for every L0 roll - UpdIndex = leveled_pmem:clear_index(State#state.levelzero_index), ok = leveled_pclerk:clerk_prompt(State#state.clerk), {noreply, State#state{levelzero_cache=[], - levelzero_index=UpdIndex, + levelzero_index=[], levelzero_pending=false, levelzero_constructor=undefined, levelzero_size=0, manifest=UpdMan, persisted_sqn=State#state.ledger_sqn}}; handle_cast(work_for_clerk, State) -> - case {State#state.levelzero_pending, State#state.work_ongoing} of - {false, false} -> - % TODO - as part of supervision tree and retry work: - % Need to check for work_ongoing as well as levelzero_pending as - % there may be a race that could lead to the clerk doing the same - % thing twice. - % - % This has implications though if we auto-restart the pclerk in the - % future, without altering this state - it may never be able to - % request work due to ongoing work that crashed the previous clerk - % - % Perhaps the pclerk should not be restarted because of this, and - % the failure should ripple up - {WL, WC} = leveled_pmanifest:check_for_work(State#state.manifest), - case WC of - 0 -> - {noreply, State#state{work_backlog=false}}; - N -> - Backlog = N > ?WORKQUEUE_BACKLOG_TOLERANCE, - leveled_log:log("P0024", [N, Backlog]), - [TL|_Tail] = WL, - ok = - leveled_pclerk:clerk_push( - State#state.clerk, {TL, State#state.manifest}), - case TL of - 0 -> - % Just written a L0 so as LoopState now rewritten, - % garbage collect to free as much as possible as - % soon as possible - garbage_collect(); - _ -> - ok - end, - + case {State#state.levelzero_pending, + State#state.work_ongoing, + leveled_pmanifest:levelzero_present(State#state.manifest)} of + {false, false, false} -> + % If the penciller memory needs rolling, prompt this now + CacheOverSize = + maybe_cache_too_big( + State#state.levelzero_size, + State#state.levelzero_maxcachesize, + State#state.levelzero_cointoss), + CacheAlreadyFull = + leveled_pmem:cache_full(State#state.levelzero_cache), + case (CacheAlreadyFull or CacheOverSize) of + true -> + % Rolling the memory to create a new Level Zero file + NextSQN = + leveled_pmanifest:get_manifest_sqn( + State#state.manifest) + 1, + {Constructor, none} = + roll_memory( + NextSQN, + State#state.ledger_sqn, + State#state.root_path, + none, + length(State#state.levelzero_cache), + State#state.sst_options, + false), {noreply, - State#state{work_backlog=Backlog, work_ongoing=true}} + State#state{ + levelzero_pending=true, + levelzero_constructor=Constructor}}; + false -> + {WL, WC} = + leveled_pmanifest:check_for_work(State#state.manifest), + case WC of + 0 -> + % Should do some tidy-up work here? + {noreply, State#state{work_backlog=false}}; + N -> + Backlog = N > ?WORKQUEUE_BACKLOG_TOLERANCE, + leveled_log:log(p0024, [N, Backlog]), + [TL|_Tail] = WL, + ok = + leveled_pclerk:clerk_push( + State#state.clerk, + {TL, State#state.manifest}), + {noreply, + State#state{ + work_backlog=Backlog, work_ongoing=true}} + end end; + {false, false, true} -> + ok = leveled_pclerk:clerk_push( + State#state.clerk, {0, State#state.manifest}), + {noreply, State#state{work_ongoing=true}}; _ -> {noreply, State} end; @@ -1157,9 +1154,9 @@ handle_info(_Info, State) -> {noreply, State}. terminate(Reason, _State=#state{is_snapshot=Snap}) when Snap == true -> - leveled_log:log("P0007", [Reason]); + leveled_log:log(p0007, [Reason]); terminate(Reason, _State) -> - leveled_log:log("P0011", [Reason]). + leveled_log:log(p0011, [Reason]). format_status(normal, [_PDict, State]) -> State; @@ -1200,6 +1197,7 @@ start_from_file(PCLopts) -> RootPath = PCLopts#penciller_options.root_path, MaxTableSize = PCLopts#penciller_options.max_inmemory_tablesize, OptsSST = PCLopts#penciller_options.sst_options, + Monitor = PCLopts#penciller_options.monitor, SnapTimeoutShort = PCLopts#penciller_options.snaptimeout_short, SnapTimeoutLong = PCLopts#penciller_options.snaptimeout_long, @@ -1215,10 +1213,11 @@ start_from_file(PCLopts) -> root_path = RootPath, levelzero_maxcachesize = MaxTableSize, levelzero_cointoss = CoinToss, - levelzero_index = leveled_pmem:new_index(), + levelzero_index = [], snaptimeout_short = SnapTimeoutShort, snaptimeout_long = SnapTimeoutLong, - sst_options = OptsSST}, + sst_options = OptsSST, + monitor = Monitor}, %% Open manifest Manifest0 = leveled_pmanifest:open_manifest(RootPath), @@ -1232,15 +1231,15 @@ start_from_file(PCLopts) -> SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1, {MaxSQN, Manifest1, FileList} = leveled_pmanifest:load_manifest(Manifest0, OpenFun, SQNFun), - leveled_log:log("P0014", [MaxSQN]), + leveled_log:log(p0014, [MaxSQN]), ManSQN = leveled_pmanifest:get_manifest_sqn(Manifest1), - leveled_log:log("P0035", [ManSQN]), + leveled_log:log(p0035, [ManSQN]), %% Find any L0 files L0FN = sst_filename(ManSQN + 1, 0, 0), {State0, FileList0} = case filelib:is_file(filename:join(sst_rootpath(RootPath), L0FN)) of true -> - leveled_log:log("P0015", [L0FN]), + leveled_log:log(p0015, [L0FN]), L0Open = leveled_sst:sst_open(sst_rootpath(RootPath), L0FN, OptsSST, @@ -1257,14 +1256,14 @@ start_from_file(PCLopts) -> ManSQN + 1, 0, L0Entry), - leveled_log:log("P0016", [L0SQN]), + leveled_log:log(p0016, [L0SQN]), LedgerSQN = max(MaxSQN, L0SQN), {InitState#state{manifest = Manifest2, ledger_sqn = LedgerSQN, persisted_sqn = LedgerSQN}, [L0FN|FileList]}; false -> - leveled_log:log("P0017", []), + leveled_log:log(p0017, []), {InitState#state{manifest = Manifest1, ledger_sqn = MaxSQN, persisted_sqn = MaxSQN}, @@ -1330,7 +1329,7 @@ archive_files(RootPath, UsedFileList) -> true -> UnusedFiles; false -> - leveled_log:log("P0040", [FN0]), + leveled_log:log(p0040, [FN0]), [FN0|UnusedFiles] end; _ -> @@ -1350,143 +1349,76 @@ archive_files(RootPath, UsedFileList) -> ok. --spec update_levelzero(integer(), tuple(), integer(), list(), pcl_state()) - -> pcl_state(). +-spec maybe_cache_too_big( + pos_integer(), pos_integer(), boolean()) -> boolean(). %% @doc -%% Update the in-memory cache of recent changes for the penciller. This is -%% the level zero at the top of the tree. -%% Once the update is made, there needs to be a decision to potentially roll -%% the level-zero memory to an on-disk level zero sst file. This can only -%% happen when the cache has exeeded the size threshold (with some jitter -%% to prevent coordination across multiple leveled instances), and when there -%% is no level zero file already present, and when there is no manifest change -%% pending. -update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN}, - LedgerSQN, L0Cache, State) -> - SW = os:timestamp(), % Time this for logging purposes - Update = leveled_pmem:add_to_cache(L0Size, - {PushedTree, MinSQN, MaxSQN}, - LedgerSQN, - L0Cache), - UpdL0Index = leveled_pmem:add_to_index(PushedIdx, - State#state.levelzero_index, - length(L0Cache) + 1), - - {UpdMaxSQN, NewL0Size, UpdL0Cache} = Update, - if - UpdMaxSQN >= LedgerSQN -> - UpdState = State#state{levelzero_cache=UpdL0Cache, - levelzero_size=NewL0Size, - levelzero_index=UpdL0Index, - ledger_sqn=UpdMaxSQN}, - CacheTooBig = - NewL0Size > State#state.levelzero_maxcachesize, - CacheMuchTooBig = - NewL0Size > min(?SUPER_MAX_TABLE_SIZE, - 2 * State#state.levelzero_maxcachesize), - RandomFactor = - case State#state.levelzero_cointoss of - true -> - case leveled_rand:uniform(?COIN_SIDECOUNT) of - 1 -> - true; - _ -> - false - end; - false -> - true - end, - JitterCheck = RandomFactor or CacheMuchTooBig, - Due = CacheTooBig and JitterCheck, - {UpdState0, _L0Bloom} = maybe_roll_memory(UpdState, Due, false), - LogSubs = [NewL0Size, Due, State#state.work_ongoing], - case Due of - true -> - leveled_log:log_timer("P0031", LogSubs, SW); - _ -> - ok - end, - UpdState0 - end. +%% Is the cache too big - should it be flushed to on-disk Level 0 +%% There exists some jitter to prevent all caches from flushing concurrently +%% where there are multiple leveled instances on one machine. +maybe_cache_too_big(NewL0Size, L0MaxSize, CoinToss) -> + CacheTooBig = NewL0Size > L0MaxSize, + CacheMuchTooBig = + NewL0Size > min(?SUPER_MAX_TABLE_SIZE, 2 * L0MaxSize), + RandomFactor = + case CoinToss of + true -> + case leveled_rand:uniform(?COIN_SIDECOUNT) of + 1 -> + true; + _ -> + false + end; + false -> + true + end, + CacheTooBig and (RandomFactor or CacheMuchTooBig). - --spec maybe_roll_memory(pcl_state(), boolean(), boolean()) - -> {pcl_state(), leveled_ebloom:bloom()|none}. -%% @doc -%% Check that no L0 file is present before rolling memory. Returns a boolean -%% to indicate if memory has been rolled, the Pid of the L0 constructor and -%% The bloom of the L0 file (or none) -maybe_roll_memory(State, false, _SyncRoll) -> - {State, none}; -maybe_roll_memory(State, true, SyncRoll) -> - BlockedByL0 = leveled_pmanifest:levelzero_present(State#state.manifest), - PendingManifestChange = State#state.work_ongoing, - % It is critical that memory is not rolled if the manifest is due to be - % updated by a change by the clerk. When that manifest change is made it - % will override the addition of L0 and data will be lost. - case (BlockedByL0 or PendingManifestChange) of - true -> - {State, none}; - false -> - {L0Constructor, Bloom} = roll_memory(State, SyncRoll), - {State#state{levelzero_pending=true, - levelzero_constructor=L0Constructor}, - Bloom} - end. - --spec roll_memory(pcl_state(), boolean()) - -> {pid(), leveled_ebloom:bloom()|none}. +-spec roll_memory( + pos_integer(), non_neg_integer(), string(), + levelzero_cache()|none, pos_integer(), + sst_options(), boolean()) + -> {pid(), leveled_ebloom:bloom()|none}. %% @doc %% Roll the in-memory cache into a L0 file. If this is done synchronously, %% will return a bloom representing the contents of the file. %% -%% Casting a large object (the levelzero cache) to the gen_server did not lead -%% to an immediate return as expected. With 32K keys in the TreeList it could -%% take around 35-40ms. +%% Casting a large object (the levelzero cache) to the SST file does not lead +%% to an immediate return. With 32K keys in the TreeList it could take around +%% 35-40ms due to the overheads of copying. %% -%% To avoid blocking this gen_server, the SST file can request each item of the +%% To avoid blocking the penciller, the SST file can request each item of the %% cache one at a time. %% %% The Wait is set to false to use a cast when calling this in normal operation %% where as the Wait of true is used at shutdown -roll_memory(State, false) -> - ManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1, - RootPath = sst_rootpath(State#state.root_path), - FileName = sst_filename(ManSQN, 0, 0), - leveled_log:log("P0019", [FileName, State#state.ledger_sqn]), +roll_memory(NextManSQN, LedgerSQN, RootPath, none, CL, SSTOpts, false) -> + L0Path = sst_rootpath(RootPath), + L0FN = sst_filename(NextManSQN, 0, 0), + leveled_log:log(p0019, [L0FN, LedgerSQN]), PCL = self(), FetchFun = fun(Slot, ReturnFun) -> pcl_fetchlevelzero(PCL, Slot, ReturnFun) end, - R = leveled_sst:sst_newlevelzero(RootPath, - FileName, - length(State#state.levelzero_cache), - FetchFun, - PCL, - State#state.ledger_sqn, - State#state.sst_options), - {ok, Constructor, _} = R, + {ok, Constructor, _} = + leveled_sst:sst_newlevelzero( + L0Path, L0FN, CL, FetchFun, PCL, LedgerSQN, SSTOpts), {Constructor, none}; -roll_memory(State, true) -> - ManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest) + 1, - RootPath = sst_rootpath(State#state.root_path), - FileName = sst_filename(ManSQN, 0, 0), - LZC = State#state.levelzero_cache, - FetchFun = fun(Slot) -> lists:nth(Slot, LZC) end, - KVList = leveled_pmem:to_list(length(LZC), FetchFun), - R = leveled_sst:sst_new(RootPath, - FileName, - 0, - KVList, - State#state.ledger_sqn, - State#state.sst_options), - {ok, Constructor, _, Bloom} = R, +roll_memory(NextManSQN, LedgerSQN, RootPath, L0Cache, CL, SSTOpts, true) -> + L0Path = sst_rootpath(RootPath), + L0FN = sst_filename(NextManSQN, 0, 0), + FetchFun = fun(Slot) -> lists:nth(Slot, L0Cache) end, + KVList = leveled_pmem:to_list(CL, FetchFun), + {ok, Constructor, _, Bloom} = + leveled_sst:sst_new( + L0Path, L0FN, 0, KVList, LedgerSQN, SSTOpts), {Constructor, Bloom}. --spec timed_fetch_mem(tuple(), {integer(), integer()}, - leveled_pmanifest:manifest(), list(), - leveled_pmem:index_array(), pcl_timings()) - -> {tuple(), pcl_timings()}. +-spec timed_fetch_mem( + tuple(), + {integer(), integer()}, + leveled_pmanifest:manifest(), list(), + leveled_pmem:index_array(), + leveled_monitor:monitor()) -> leveled_codec:ledger_kv()|not_found. %% @doc %% Fetch the result from the penciller, starting by looking in the memory, %% and if it is not found looking down level by level through the LSM tree. @@ -1496,12 +1428,13 @@ roll_memory(State, true) -> %% the cost of requests dropping levels can be monitored. %% %% the result tuple includes the level at which the result was found. -timed_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, Timings) -> - SW = os:timestamp(), +timed_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, Monitor) -> + SW0 = leveled_monitor:maybe_time(Monitor), {R, Level} = fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, fun timed_sst_get/4), - UpdTimings = update_timings(SW, Timings, R, Level), - {R, UpdTimings}. + {TS0, _SW1} = leveled_monitor:step_time(SW0), + maybelog_fetch_timing(Monitor, Level, TS0, R == not_present), + R. -spec fetch_sqn( @@ -1576,10 +1509,10 @@ log_slowfetch(T0, R, PID, Level, FetchTolerance) -> {T, R} when T < FetchTolerance -> R; {T, not_present} -> - leveled_log:log("PC016", [PID, T, Level, not_present]), + leveled_log:log(pc016, [PID, T, Level, not_present]), not_present; {T, R} -> - leveled_log:log("PC016", [PID, T, Level, found]), + leveled_log:log(pc016, [PID, T, Level, found]), R end. @@ -1929,95 +1862,17 @@ find_nextkey(QueryArray, LCnt, end. - -%%%============================================================================ -%%% Timing Functions -%%%============================================================================ - --spec update_statetimings(pcl_timings(), integer()) - -> {pcl_timings(), integer()}. -%% @doc -%% -%% The timings state is either in countdown to the next set of samples of -%% we are actively collecting a sample. Active collection take place -%% when the countdown is 0. Once the sample has reached the expected count -%% then there is a log of that sample, and the countdown is restarted. -%% -%% Outside of sample windows the timings object should be set to the atom -%% no_timing. no_timing is a valid state for the pcl_timings type. -update_statetimings(no_timing, 0) -> - {#pcl_timings{}, 0}; -update_statetimings(Timings, 0) -> - case Timings#pcl_timings.sample_count of - SC when SC >= ?TIMING_SAMPLESIZE -> - log_timings(Timings), - {no_timing, leveled_rand:uniform(2 * ?TIMING_SAMPLECOUNTDOWN)}; - _SC -> - {Timings, 0} - end; -update_statetimings(no_timing, N) -> - {no_timing, N - 1}. - -log_timings(Timings) -> - leveled_log:log("P0032", [Timings#pcl_timings.sample_count, - Timings#pcl_timings.foundmem_time, - Timings#pcl_timings.found0_time, - Timings#pcl_timings.found1_time, - Timings#pcl_timings.found2_time, - Timings#pcl_timings.found3_time, - Timings#pcl_timings.foundlower_time, - Timings#pcl_timings.missed_time, - Timings#pcl_timings.foundmem_count, - Timings#pcl_timings.found0_count, - Timings#pcl_timings.found1_count, - Timings#pcl_timings.found2_count, - Timings#pcl_timings.found3_count, - Timings#pcl_timings.foundlower_count, - Timings#pcl_timings.missed_count]). - --spec update_timings(erlang:timestamp(), pcl_timings(), - not_found|tuple(), integer()|basement) - -> pcl_timings(). -%% @doc -%% -%% update the timings record unless the current record object is the atom -%% no_timing. -update_timings(_SW, no_timing, _Result, _Stage) -> - no_timing; -update_timings(SW, Timings, Result, Stage) -> - Timer = timer:now_diff(os:timestamp(), SW), - SC = Timings#pcl_timings.sample_count + 1, - Timings0 = Timings#pcl_timings{sample_count = SC}, - case {Result, Stage} of - {not_present, _} -> - NFT = Timings#pcl_timings.missed_time + Timer, - NFC = Timings#pcl_timings.missed_count + 1, - Timings0#pcl_timings{missed_time = NFT, missed_count = NFC}; - {_, memory} -> - PMT = Timings#pcl_timings.foundmem_time + Timer, - PMC = Timings#pcl_timings.foundmem_count + 1, - Timings0#pcl_timings{foundmem_time = PMT, foundmem_count = PMC}; - {_, 0} -> - L0T = Timings#pcl_timings.found0_time + Timer, - L0C = Timings#pcl_timings.found0_count + 1, - Timings0#pcl_timings{found0_time = L0T, found0_count = L0C}; - {_, 1} -> - L1T = Timings#pcl_timings.found1_time + Timer, - L1C = Timings#pcl_timings.found1_count + 1, - Timings0#pcl_timings{found1_time = L1T, found1_count = L1C}; - {_, 2} -> - L2T = Timings#pcl_timings.found2_time + Timer, - L2C = Timings#pcl_timings.found2_count + 1, - Timings0#pcl_timings{found2_time = L2T, found2_count = L2C}; - {_, 3} -> - L3T = Timings#pcl_timings.found3_time + Timer, - L3C = Timings#pcl_timings.found3_count + 1, - Timings0#pcl_timings{found3_time = L3T, found3_count = L3C}; - _ -> - LLT = Timings#pcl_timings.foundlower_time + Timer, - LLC = Timings#pcl_timings.foundlower_count + 1, - Timings0#pcl_timings{foundlower_time = LLT, foundlower_count = LLC} - end. +-spec maybelog_fetch_timing( + leveled_monitor:monitor(), + memory|leveled_pmanifest:lsm_level(), + leveled_monitor:timing(), + boolean()) -> ok. +maybelog_fetch_timing(_Monitor, _Level, no_timing, _NF) -> + ok; +maybelog_fetch_timing({Pid, _StatsFreq}, _Level, FetchTime, true) -> + leveled_monitor:add_stat(Pid, {pcl_fetch_update, not_found, FetchTime}); +maybelog_fetch_timing({Pid, _StatsFreq}, Level, FetchTime, _NF) -> + leveled_monitor:add_stat(Pid, {pcl_fetch_update, Level, FetchTime}). %%%============================================================================ @@ -2209,13 +2064,15 @@ simple_server_test() -> ?assertMatch(Key3, pcl_fetch(PCLr, {o,"Bucket0003", "Key0003", null})), ?assertMatch(Key4, pcl_fetch(PCLr, {o,"Bucket0004", "Key0004", null})), - {ok, PclSnap, null, _} = - leveled_bookie:snapshot_store(leveled_bookie:empty_ledgercache(), - PCLr, - null, - ledger, - undefined, - false), + {ok, PclSnap, null} = + leveled_bookie:snapshot_store( + leveled_bookie:empty_ledgercache(), + PCLr, + null, + {no_monitor, 0}, + ledger, + undefined, + false), ?assertMatch(Key1, pcl_fetch(PclSnap, {o,"Bucket0001", "Key0001", null})), ?assertMatch(Key2, pcl_fetch(PclSnap, {o,"Bucket0002", "Key0002", null})), @@ -2264,13 +2121,15 @@ simple_server_test() -> 1)), ok = pcl_close(PclSnap), - {ok, PclSnap2, null, _} = - leveled_bookie:snapshot_store(leveled_bookie:empty_ledgercache(), - PCLr, - null, - ledger, - undefined, - false), + {ok, PclSnap2, null} = + leveled_bookie:snapshot_store( + leveled_bookie:empty_ledgercache(), + PCLr, + null, + {no_monitor, 0}, + ledger, + undefined, + false), ?assertMatch(replaced, pcl_checksequencenumber(PclSnap2, {o, @@ -2476,19 +2335,6 @@ slow_fetch_test() -> ?assertMatch(not_present, log_slowfetch(2, not_present, "fake", 0, 1)), ?assertMatch("value", log_slowfetch(2, "value", "fake", 0, 1)). -timings_test() -> - SW = os:timestamp(), - timer:sleep(1), - T0 = update_timings(SW, #pcl_timings{}, {"K", "V"}, 2), - timer:sleep(1), - T1 = update_timings(SW, T0, {"K", "V"}, 3), - T2 = update_timings(SW, T1, {"K", "V"}, basement), - ?assertMatch(3, T2#pcl_timings.sample_count), - ?assertMatch(true, T2#pcl_timings.foundlower_time > T2#pcl_timings.found2_time), - ?assertMatch(1, T2#pcl_timings.found2_count), - ?assertMatch(1, T2#pcl_timings.found3_count), - ?assertMatch(1, T2#pcl_timings.foundlower_count). - coverage_cheat_test() -> {noreply, _State0} = handle_info(timeout, #state{}), @@ -2554,10 +2400,15 @@ handle_down_test() -> loop() -> receive {snap, PCLr, TestPid} -> - {ok, Snap, null, _Timings} = + {ok, Snap, null} = leveled_bookie:snapshot_store( leveled_bookie:empty_ledgercache(), - PCLr, null, ledger, undefined, false), + PCLr, + null, + {no_monitor, 0}, + ledger, + undefined, + false), TestPid ! {self(), {ok, Snap, null}}, loop(); stop -> diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index ea8d0dc..5286e15 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -74,9 +74,11 @@ % At o(10) trillion keys behaviour may become increasingly % difficult to predict. +-ifdef(if_check). -if(length(?LEVEL_SCALEFACTOR) /= ?MAX_LEVELS). -error("length ?LEVEL_SCALEFACTOR differs from ?MAX_LEVELS"). -endif. +-endif. -define(TREE_TYPE, idxt). -define(TREE_WIDTH, 8). @@ -105,10 +107,11 @@ -type manifest() :: #manifest{}. -type manifest_entry() :: #manifest_entry{}. -type manifest_owner() :: pid()|list(). +-type lsm_level() :: 0..7. -type selector_strategy() :: random|{grooming, fun((list(manifest_entry())) -> manifest_entry())}. --export_type([manifest/0, manifest_entry/0, manifest_owner/0]). +-export_type([manifest/0, manifest_entry/0, manifest_owner/0, lsm_level/0]). %%%============================================================================ %%% API @@ -171,8 +174,11 @@ copy_manifest(Manifest) -> % about is switched to undefined Manifest#manifest{snapshots = undefined, pending_deletes = undefined}. --spec load_manifest(manifest(), fun(), fun()) -> - {integer(), manifest(), list()}. +-spec load_manifest( + manifest(), + fun((file:name_all(), 1..7) -> {pid(), leveled_ebloom:bloom()}), + fun((pid()) -> pos_integer())) + -> {integer(), manifest(), list()}. %% @doc %% Roll over the manifest starting a process to manage each file in the %% manifest. The PidFun should be able to return the Pid of a file process @@ -204,7 +210,9 @@ load_manifest(Manifest, LoadFun, SQNFun) -> {0, Manifest, []}, lists:reverse(lists:seq(0, Manifest#manifest.basement))). --spec close_manifest(manifest(), fun()) -> ok. +-spec close_manifest( + manifest(), + fun((any()) -> ok)) -> ok. %% @doc %% Close all the files in the manifest (using CloseEntryFun to call close on %% a file). Firts all the files in the active manifest are called, and then @@ -317,7 +325,7 @@ replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) -> {UpdBlooms, StrippedAdditions} = update_blooms(Removals, Additions, Manifest#manifest.blooms), UpdLevel = replace_entry(LevelIdx, Level, Removals, StrippedAdditions), - leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]), + leveled_log:log(pc019, ["insert", LevelIdx, UpdLevel]), PendingDeletes = update_pendingdeletes(ManSQN, Removals, @@ -350,7 +358,7 @@ insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> {UpdBlooms, UpdEntry} = update_blooms([], Entry, Manifest#manifest.blooms), UpdLevel = add_entry(LevelIdx, Level, UpdEntry), - leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]), + leveled_log:log(pc019, ["insert", LevelIdx, UpdLevel]), Basement = max(LevelIdx, Manifest#manifest.basement), Manifest#manifest{levels = array:set(LevelIdx, UpdLevel, Levels), basement = Basement, @@ -367,7 +375,7 @@ remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> {UpdBlooms, []} = update_blooms(Entry, [], Manifest#manifest.blooms), UpdLevel = remove_entry(LevelIdx, Level, Entry), - leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]), + leveled_log:log(pc019, ["remove", LevelIdx, UpdLevel]), PendingDeletes = update_pendingdeletes(ManSQN, Entry, Manifest#manifest.pending_deletes), @@ -552,7 +560,7 @@ release_snapshot(Manifest, Pid) -> _ -> case seconds_now() > (ST + TO) of true -> - leveled_log:log("P0038", [P, SQN, ST, TO]), + leveled_log:log(p0038, [P, SQN, ST, TO]), {Acc, MinSQN, Found}; false -> {[{P, SQN, ST, TO}|Acc], min(SQN, MinSQN), Found} @@ -564,7 +572,7 @@ release_snapshot(Manifest, Pid) -> Manifest#manifest.snapshots), case Hit of false -> - leveled_log:log("P0039", [Pid, length(SnapList0), MinSnapSQN]); + leveled_log:log(p0039, [Pid, length(SnapList0), MinSnapSQN]); true -> ok end, @@ -573,7 +581,7 @@ release_snapshot(Manifest, Pid) -> Manifest#manifest{snapshots = SnapList0, min_snapshot_sqn = 0}; _ -> - leveled_log:log("P0004", [SnapList0]), + leveled_log:log(p0004, [SnapList0]), Manifest#manifest{snapshots = SnapList0, min_snapshot_sqn = MinSnapSQN} end. @@ -987,7 +995,7 @@ filepath(RootPath, NewMSN, pending_manifest) -> open_manifestfile(_RootPath, L) when L == [] orelse L == [0] -> - leveled_log:log("P0013", []), + leveled_log:log(p0013, []), new_manifest(); open_manifestfile(RootPath, [TopManSQN|Rest]) -> CurrManFile = filepath(RootPath, TopManSQN, current_manifest), @@ -995,10 +1003,10 @@ open_manifestfile(RootPath, [TopManSQN|Rest]) -> <> = FileBin, case erlang:crc32(BinaryOfTerm) of CRC -> - leveled_log:log("P0012", [TopManSQN]), + leveled_log:log(p0012, [TopManSQN]), binary_to_term(BinaryOfTerm); _ -> - leveled_log:log("P0033", [CurrManFile, "crc wonky"]), + leveled_log:log(p0033, [CurrManFile, "crc wonky"]), open_manifestfile(RootPath, Rest) end. diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl index 17b9277..025fa7b 100644 --- a/src/leveled_pmem.erl +++ b/src/leveled_pmem.erl @@ -37,7 +37,6 @@ merge_trees/4, add_to_index/3, new_index/0, - clear_index/1, check_index/2, cache_full/1 ]). @@ -46,8 +45,7 @@ -define(MAX_CACHE_LINES, 31). % Must be less than 128 -% -type index_array() :: array:array(). --type index_array() :: any()|none. % To live with OTP16 +-type index_array() :: list(array:array())|[]|none. -export_type([index_array/0]). @@ -61,8 +59,8 @@ cache_full(L0Cache) -> length(L0Cache) == ?MAX_CACHE_LINES. --spec prepare_for_index(index_array(), leveled_codec:segment_hash()) - -> index_array(). +-spec prepare_for_index( + array:array(), leveled_codec:segment_hash()) -> array:array(). %% @doc %% Add the hash of a key to the index. This is 'prepared' in the sense that %% this index is not use until it is loaded into the main index. @@ -77,45 +75,39 @@ prepare_for_index(IndexArray, Hash) -> Bin = array:get(Slot, IndexArray), array:set(Slot, <>, IndexArray). --spec add_to_index(index_array(), index_array(), integer()) -> index_array(). +-spec add_to_index(array:array(), index_array(), integer()) -> index_array(). %% @doc %% Expand the penciller's current index array with the details from a new %% ledger cache tree sent from the Bookie. The tree will have a cache slot %% which is the index of this ledger_cache in the list of the ledger_caches add_to_index(LM1Array, L0Index, CacheSlot) when CacheSlot < 128 -> - IndexAddFun = - fun(Slot, Acc) -> - Bin0 = array:get(Slot, Acc), - BinLM1 = array:get(Slot, LM1Array), - array:set(Slot, - <>, - Acc) - end, - lists:foldl(IndexAddFun, L0Index, lists:seq(0, 255)). + [LM1Array|L0Index]. --spec new_index() -> index_array(). +-spec new_index() -> array:array(). %% @doc %% Create a new index array new_index() -> array:new([{size, 256}, {default, <<>>}]). --spec clear_index(index_array()) -> index_array(). -%% @doc -%% Create a new index array -clear_index(_L0Index) -> - new_index(). - --spec check_index({integer(), integer()}, index_array()) -> list(integer()). +-spec check_index(leveled_codec:segment_hash(), index_array()) + -> list(non_neg_integer()). %% @doc %% return a list of positions in the list of cache arrays that may contain the %% key associated with the hash being checked check_index(Hash, L0Index) -> {Slot, H0} = split_hash(Hash), - Bin = array:get(Slot, L0Index), - find_pos(Bin, H0, [], 0). - + {_L, Positions} = + lists:foldl( + fun(A, {SlotC, PosList}) -> + B = array:get(Slot, A), + case find_pos(B, H0) of + true -> {SlotC + 1, [SlotC|PosList]}; + false -> {SlotC + 1, PosList} + end + end, + {1, []}, + L0Index), + lists:reverse(Positions). -spec add_to_cache(integer(), {tuple(), integer(), integer()}, @@ -128,19 +120,15 @@ check_index(Hash, L0Index) -> %% the Ledger's SQN. add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) -> LM1Size = leveled_tree:tsize(LevelMinus1), - case LM1Size of - 0 -> - {LedgerSQN, L0Size, TreeList}; - _ -> - if - MinSQN >= LedgerSQN -> - {MaxSQN, - L0Size + LM1Size, - lists:append(TreeList, [LevelMinus1])} - end + if + MinSQN >= LedgerSQN -> + {MaxSQN, + L0Size + LM1Size, + [LevelMinus1|TreeList]} end. --spec to_list(integer(), fun()) -> list(). +-spec to_list( + integer(), fun((pos_integer()) -> leveled_tree:leveled_tree())) -> list(). %% @doc %% The cache is a list of leveled_trees of length Slots. This will fetch %% each tree in turn by slot ID and then produce a merged/sorted output of @@ -150,7 +138,7 @@ add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) -> %% does a large object copy of the whole cache. to_list(Slots, FetchFun) -> SW = os:timestamp(), - SlotList = lists:reverse(lists:seq(1, Slots)), + SlotList = lists:seq(1, Slots), FullList = lists:foldl(fun(Slot, Acc) -> Tree = FetchFun(Slot), L = leveled_tree:to_list(Tree), @@ -158,7 +146,7 @@ to_list(Slots, FetchFun) -> end, [], SlotList), - leveled_log:log_timer("PM002", [length(FullList)], SW), + leveled_log:log_timer(pm002, [length(FullList)], SW), FullList. -spec check_levelzero(tuple(), list(integer()), list()) @@ -193,32 +181,24 @@ check_levelzero(Key, Hash, PosList, TreeList) -> %% currently unmerged bookie's ledger cache) that are between StartKey %% and EndKey (inclusive). merge_trees(StartKey, EndKey, TreeList, LevelMinus1) -> - lists:foldl(fun(Tree, Acc) -> - R = leveled_tree:match_range(StartKey, - EndKey, - Tree), - lists:ukeymerge(1, Acc, R) end, - [], - [LevelMinus1|lists:reverse(TreeList)]). + lists:foldl( + fun(Tree, Acc) -> + R = leveled_tree:match_range(StartKey, EndKey, Tree), + lists:ukeymerge(1, Acc, R) end, + [], + [LevelMinus1|TreeList]). %%%============================================================================ %%% Internal Functions %%%============================================================================ -find_pos(<<>>, _Hash, PosList, _SlotID) -> - PosList; -find_pos(<<1:1/integer, Hash:23/integer, T/binary>>, Hash, PosList, SlotID) -> - case lists:member(SlotID, PosList) of - true -> - find_pos(T, Hash, PosList, SlotID); - false -> - find_pos(T, Hash, PosList ++ [SlotID], SlotID) - end; -find_pos(<<1:1/integer, _Miss:23/integer, T/binary>>, Hash, PosList, SlotID) -> - find_pos(T, Hash, PosList, SlotID); -find_pos(<<0:1/integer, NxtSlot:7/integer, T/binary>>, Hash, PosList, _SlotID) -> - find_pos(T, Hash, PosList, NxtSlot). +find_pos(<<>>, _Hash) -> + false; +find_pos(<<1:1/integer, Hash:23/integer, _T/binary>>, Hash) -> + true; +find_pos(<<1:1/integer, _Miss:23/integer, T/binary>>, Hash) -> + find_pos(T, Hash). split_hash({SegmentID, ExtraHash}) -> @@ -242,9 +222,7 @@ check_slotlist(Key, _Hash, CheckList, TreeList) -> end end end, - lists:foldl(SlotCheckFun, - {false, not_found}, - lists:reverse(CheckList)). + lists:foldl(SlotCheckFun, {false, not_found}, CheckList). %%%============================================================================ %%% Test @@ -325,7 +303,7 @@ compare_method_test() -> end, S0 = lists:foldl(fun({Key, _V}, Acc) -> - R0 = lists:foldr(FindKeyFun(Key), + R0 = lists:foldl(FindKeyFun(Key), {false, not_found}, TreeList), [R0|Acc] end, @@ -394,7 +372,7 @@ with_index_test2() -> {R, UpdL0Index, lists:ukeymerge(1, LM1, SrcList)} end, - R0 = lists:foldl(LoadFun, {{0, 0, []}, new_index(), []}, lists:seq(1, 16)), + R0 = lists:foldl(LoadFun, {{0, 0, []}, [], []}, lists:seq(1, 16)), {{SQN, Size, TreeList}, L0Index, SrcKVL} = R0, ?assertMatch(32000, SQN), @@ -412,4 +390,52 @@ with_index_test2() -> _R1 = lists:foldl(CheckFun, {L0Index, TreeList}, SrcKVL). +index_performance_test() -> + LM1 = generate_randomkeys_aslist(1, 2000, 1, 500), + LM2 = generate_randomkeys_aslist(2001, 2000, 1, 500), + HL1 = lists:map(fun({K, _V}) -> leveled_codec:segment_hash(K) end, LM1), + HL2 = lists:map(fun({K, _V}) -> leveled_codec:segment_hash(K) end, LM2), + + SWP = os:timestamp(), + A1 = + lists:foldl( + fun(H, A) -> prepare_for_index(A, H) end, + new_index(), + HL1), + io:format( + user, + "~nPrepare single index takes ~w microsec~n", + [timer:now_diff(os:timestamp(), SWP)]), + + SWL = os:timestamp(), + PMI1 = + lists:foldl( + fun(I, Idx) -> add_to_index(A1, Idx, I) end, [], lists:seq(1, 8)), + io:format( + user, + "Appending to array takes ~w microsec~n", + [timer:now_diff(os:timestamp(), SWL)]), + + SWC1 = os:timestamp(), + R0 = lists:seq(1, 8), + lists:foreach(fun(H) -> ?assertMatch(R0, check_index(H, PMI1)) end, HL1), + io:format( + user, + "Checking 2000 matches in array at each level takes ~w microsec~n", + [timer:now_diff(os:timestamp(), SWC1)]), + + SWC2 = os:timestamp(), + FPT = + lists:foldl( + fun(H, FPC) -> FPC + length(check_index(H, PMI1)) end, + 0, + HL2), + io:format( + user, + "Checking 2000 misses in array at each level takes ~w microsec " ++ + "with ~w false positives~n", + [timer:now_diff(os:timestamp(), SWC2), FPT]). + + + -endif. diff --git a/src/leveled_rand.erl b/src/leveled_rand.erl index 4aabe5e..fedaf20 100644 --- a/src/leveled_rand.erl +++ b/src/leveled_rand.erl @@ -11,13 +11,9 @@ rand_bytes/1 ]). - --include_lib("eunit/include/eunit.hrl"). - %%%=================================================================== %%% New (r19+) rand style functions %%%=================================================================== --ifndef(old_rand). uniform() -> rand:uniform(). @@ -30,29 +26,3 @@ seed() -> rand_bytes(Size) -> crypto:strong_rand_bytes(Size). --else. -%%%=================================================================== -%%% Old (r18) random style functions -%%%=================================================================== -uniform() -> - random:uniform(). - -uniform(N) -> - random:uniform(N). - -seed() -> - SW = os:timestamp(), - random:seed(erlang:phash2(self()), element(2, SW), element(3, SW)). - -rand_bytes(Size) -> - crypto:rand_bytes(Size). - --endif. - - --ifdef(TEST). - -rand_test() -> - ?assertMatch(true, uniform() < 1). - --endif. diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index f9dda1f..1d8dcc7 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -423,7 +423,7 @@ foldobjects_allkeys(SnapFun, Tag, FoldObjectsFun, sqn_order) -> Acc end end, - leveled_log:log("R0001", [length(BatchAcc)]), + leveled_log:log(r0001, [length(BatchAcc)]), lists:foldr(ObjFun, ObjAcc, BatchAcc) end, @@ -527,10 +527,10 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) -> null), case R of {1, null} -> - leveled_log:log("B0008",[]), + leveled_log:log(b0008,[]), BKList; {0, {{B, K}, _V}} -> - leveled_log:log("B0009",[B]), + leveled_log:log(b0009,[B]), get_nextbucket(leveled_codec:next_key(B), null, Tag, diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index c5577c9..ea2d230 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -83,7 +83,7 @@ -define(DISCARD_EXT, ".discarded"). -define(DELETE_TIMEOUT, 10000). -define(TREE_TYPE, idxt). --define(TREE_SIZE, 4). +-define(TREE_SIZE, 16). -define(TIMING_SAMPLECOUNTDOWN, 20000). -define(TIMING_SAMPLESIZE, 100). -define(BLOCK_LENGTHS_LENGTH, 20). @@ -149,11 +149,15 @@ start_position :: integer(), length :: integer()}). --record(summary, {first_key :: tuple(), - last_key :: tuple(), - index :: tuple() | undefined, - size :: integer(), - max_sqn :: integer()}). +-record(summary, + {first_key :: tuple(), + last_key :: tuple(), + index :: tuple() | undefined, + size :: integer(), + max_sqn :: integer()}). + %% DO NOT CHANGE + %% The summary record is persisted as part of the sile format + %% Any chnage to this record will mean the change cannot be rolled back -type press_method() :: lz4|native|none. @@ -189,7 +193,8 @@ :: {binary(), binary(), list(integer()), leveled_codec:ledger_key()}. -type sst_summary() :: #summary{}. --type blockindex_cache() :: array:array(). +-type blockindex_cache() + :: {non_neg_integer(), array:array(), non_neg_integer()}. -type fetch_cache() :: array:array()|no_cache. -type cache_size() @@ -198,6 +203,8 @@ :: no_cache|non_neg_integer(). -type level() :: non_neg_integer(). +-type summary_filter() + :: fun((leveled_codec:ledger_key()) -> any()). %% yield_blockquery is used to determine if the work necessary to process a %% range query beyond the fetching the slot should be managed from within @@ -207,40 +214,26 @@ %% extra copying. Files at the top of the tree yield, those lower down don't. -record(state, - {summary, - handle :: file:fd() | undefined, - penciller :: pid() | undefined | false, - root_path, - filename, - yield_blockquery = false :: boolean(), - blockindex_cache :: - blockindex_cache() | undefined |redacted, - compression_method = native :: press_method(), - index_moddate = ?INDEX_MODDATE :: boolean(), - timings = no_timing :: sst_timings(), - timings_countdown = 0 :: integer(), - starting_pid :: pid()|undefined, - fetch_cache = no_cache :: fetch_cache() | redacted, - new_slots :: list()|undefined, - deferred_startup_tuple :: tuple()|undefined, - level :: level()|undefined, - tomb_count = not_counted - :: non_neg_integer()|not_counted, - high_modified_date :: non_neg_integer()|undefined}). - --record(sst_timings, - {sample_count = 0 :: integer(), - index_query_time = 0 :: integer(), - lookup_cache_time = 0 :: integer(), - slot_index_time = 0 :: integer(), - fetch_cache_time = 0 :: integer(), - slot_fetch_time = 0 :: integer(), - noncached_block_time = 0 :: integer(), - lookup_cache_count = 0 :: integer(), - slot_index_count = 0 :: integer(), - fetch_cache_count = 0 :: integer(), - slot_fetch_count = 0 :: integer(), - noncached_block_count = 0 :: integer()}). + {summary, + handle :: file:fd() | undefined, + penciller :: pid() | undefined | false, + root_path, + filename, + yield_blockquery = false :: boolean(), + blockindex_cache :: + blockindex_cache() | undefined | redacted, + compression_method = native :: press_method(), + index_moddate = ?INDEX_MODDATE :: boolean(), + starting_pid :: pid()|undefined, + fetch_cache = no_cache :: fetch_cache() | redacted, + new_slots :: list()|undefined, + deferred_startup_tuple :: tuple()|undefined, + level :: level()|undefined, + tomb_count = not_counted + :: non_neg_integer()|not_counted, + high_modified_date :: non_neg_integer()|undefined, + filter_fun :: summary_filter() | undefined, + monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). -record(build_timings, {slot_hashlist = 0 :: integer(), @@ -249,7 +242,6 @@ fold_toslot = 0 :: integer()}). -type sst_state() :: #state{}. --type sst_timings() :: no_timing|#sst_timings{}. -type build_timings() :: no_timing|#build_timings{}. -export_type([expandable_pointer/0, press_method/0]). @@ -536,13 +528,6 @@ sst_switchlevels(Pid, NewLevel) -> sst_close(Pid) -> gen_fsm:sync_send_event(Pid, close). --spec sst_printtimings(pid()) -> ok. -%% @doc -%% The state of the FSM keeps track of timings of operations, and this can -%% forced to be printed. -%% Used in unit tests to force the printing of timings -sst_printtimings(Pid) -> - gen_fsm:sync_send_event(Pid, print_timings). %%%============================================================================ @@ -554,6 +539,7 @@ init([]) -> starting({sst_open, RootPath, Filename, OptsSST, Level}, _From, State) -> leveled_log:save(OptsSST#sst_options.log_options), + Monitor = OptsSST#sst_options.monitor, {UpdState, Bloom} = read_file(Filename, State#state{root_path=RootPath}, @@ -562,17 +548,19 @@ starting({sst_open, RootPath, Filename, OptsSST, Level}, _From, State) -> {reply, {ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom}, reader, - UpdState#state{level = Level, fetch_cache = new_cache(Level)}}; + UpdState#state{ + level = Level, fetch_cache = new_cache(Level), monitor = Monitor}}; starting({sst_new, RootPath, Filename, Level, {SlotList, FirstKey}, MaxSQN, OptsSST, IdxModDate, CountOfTombs, StartingPID}, _From, State) -> SW = os:timestamp(), leveled_log:save(OptsSST#sst_options.log_options), + Monitor = OptsSST#sst_options.monitor, PressMethod = OptsSST#sst_options.press_method, {Length, SlotIndex, BlockEntries, SlotsBin, Bloom} = build_all_slots(SlotList), - {BlockIndex, HighModDate} = + {_, BlockIndex, HighModDate} = update_blockindex_cache(true, BlockEntries, new_blockindex_cache(Length), @@ -590,20 +578,19 @@ starting({sst_new, State#state{root_path=RootPath, yield_blockquery=YBQ}, OptsSST#sst_options.pagecache_level >= Level), Summary = UpdState#state.summary, - leveled_log:log_timer("SST08", - [ActualFilename, Level, Summary#summary.max_sqn], - SW), - erlang:send_after(?STARTUP_TIMEOUT, self(), tidyup_after_startup), - % always want to have an opportunity to GC - so force the timeout to - % occur whether or not there is an intervening message + leveled_log:log_timer( + sst08, [ActualFilename, Level, Summary#summary.max_sqn], SW), + erlang:send_after(?STARTUP_TIMEOUT, self(), start_complete), {reply, {ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom}, reader, - UpdState#state{blockindex_cache = BlockIndex, - high_modified_date = HighModDate, - starting_pid = StartingPID, - level = Level, - fetch_cache = new_cache(Level)}}; + UpdState#state{ + blockindex_cache = BlockIndex, + high_modified_date = HighModDate, + starting_pid = StartingPID, + level = Level, + fetch_cache = new_cache(Level), + monitor = Monitor}}; starting({sst_newlevelzero, RootPath, Filename, Penciller, MaxSQN, OptsSST, IdxModDate}, _From, State) -> @@ -627,6 +614,7 @@ starting(complete_l0startup, State) -> SW0 = os:timestamp(), FetchedSlots = State#state.new_slots, leveled_log:save(OptsSST#sst_options.log_options), + Monitor = OptsSST#sst_options.monitor, PressMethod = OptsSST#sst_options.press_method, FetchFun = fun(Slot) -> lists:nth(Slot, FetchedSlots) end, KVList = leveled_pmem:to_list(length(FetchedSlots), FetchFun), @@ -640,7 +628,7 @@ starting(complete_l0startup, State) -> SW2 = os:timestamp(), {SlotCount, SlotIndex, BlockEntries, SlotsBin,Bloom} = build_all_slots(SlotList), - {BlockIndex, HighModDate} = + {_, BlockIndex, HighModDate} = update_blockindex_cache(true, BlockEntries, new_blockindex_cache(SlotCount), @@ -670,26 +658,28 @@ starting(complete_l0startup, State) -> Summary = UpdState#state.summary, Time4 = timer:now_diff(os:timestamp(), SW4), - leveled_log:log_timer("SST08", - [ActualFilename, 0, Summary#summary.max_sqn], - SW0), - leveled_log:log("SST11", [Time0, Time1, Time2, Time3, Time4]), + leveled_log:log_timer( + sst08, [ActualFilename, 0, Summary#summary.max_sqn], SW0), + leveled_log:log(sst11, [Time0, Time1, Time2, Time3, Time4]), case Penciller of undefined -> ok; _ -> - leveled_penciller:pcl_confirml0complete(Penciller, - UpdState#state.filename, - Summary#summary.first_key, - Summary#summary.last_key, - Bloom), + leveled_penciller:pcl_confirml0complete( + Penciller, + UpdState#state.filename, + Summary#summary.first_key, + Summary#summary.last_key, + Bloom), ok end, {next_state, reader, - UpdState#state{blockindex_cache = BlockIndex, - high_modified_date = HighModDate}}; + UpdState#state{ + blockindex_cache = BlockIndex, + high_modified_date = HighModDate, + monitor = Monitor}}; starting({sst_returnslot, FetchedSlot, FetchFun, SlotCount}, State) -> Self = self(), FetchedSlots = @@ -722,21 +712,47 @@ starting({sst_returnslot, FetchedSlot, FetchFun, SlotCount}, State) -> reader({get_sqn, LedgerKey, Hash}, _From, State) -> % Get a KV value and potentially take sample timings - {Result, UpdState, _UpdTimings} = - fetch(LedgerKey, Hash, State, no_timing), - {reply, sqn_only(Result), reader, UpdState, ?HIBERNATE_TIMEOUT}; + {Result, _BIC, _HMD, _FC} = + fetch( + LedgerKey, Hash, + State#state.summary, + State#state.compression_method, + State#state.high_modified_date, + State#state.index_moddate, + State#state.filter_fun, + State#state.blockindex_cache, + State#state.fetch_cache, + State#state.handle, + State#state.level, + {no_monitor, 0}), + {reply, sqn_only(Result), reader, State, ?HIBERNATE_TIMEOUT}; reader({get_kv, LedgerKey, Hash}, _From, State) -> % Get a KV value and potentially take sample timings - {Result, UpdState, UpdTimings} = - fetch(LedgerKey, Hash, State, State#state.timings), - - {UpdTimings0, CountDown} = - update_statetimings(UpdTimings, - State#state.timings_countdown, - State#state.level), - - {reply, Result, reader, UpdState#state{timings = UpdTimings0, - timings_countdown = CountDown}}; + {Result, BIC, HMD, FC} = + fetch( + LedgerKey, Hash, + State#state.summary, + State#state.compression_method, + State#state.high_modified_date, + State#state.index_moddate, + State#state.filter_fun, + State#state.blockindex_cache, + State#state.fetch_cache, + State#state.handle, + State#state.level, + State#state.monitor), + case {BIC, HMD, FC} of + {no_update, no_update, no_update} -> + {reply, Result, reader, State}; + {no_update, no_update, FC} -> + {reply, Result, reader, State#state{fetch_cache = FC}}; + {BIC, HMD, no_update} -> + {reply, + Result, + reader, + State#state{ + blockindex_cache = BIC, high_modified_date = HMD}} + end; reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod}, _From, State) -> ReadNeeded = @@ -771,17 +787,23 @@ reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod}, PressMethod, IdxModDate, SegList), - {BlockIdxC0, HighModDate} = + {UpdateCache, BlockIdxC0, HighModDate} = update_blockindex_cache(NeedBlockIdx, FoundBIC, State#state.blockindex_cache, State#state.high_modified_date, State#state.index_moddate), - {reply, - L ++ SlotsToPoint, - reader, - State#state{blockindex_cache = BlockIdxC0, - high_modified_date = HighModDate}} + case UpdateCache of + true -> + {reply, + L ++ SlotsToPoint, + reader, + State#state{ + blockindex_cache = BlockIdxC0, + high_modified_date = HighModDate}}; + false -> + {reply, L ++ SlotsToPoint, reader, State} + end end; reader({get_slots, SlotList, SegList, LowLastMod}, _From, State) -> PressMethod = State#state.compression_method, @@ -801,11 +823,8 @@ reader({get_slots, SlotList, SegList, LowLastMod}, _From, State) -> reader(get_maxsequencenumber, _From, State) -> Summary = State#state.summary, {reply, Summary#summary.max_sqn, reader, State}; -reader(print_timings, _From, State) -> - log_timings(State#state.timings, State#state.level), - {reply, ok, reader, State}; reader({set_for_delete, Penciller}, _From, State) -> - leveled_log:log("SST06", [State#state.filename]), + leveled_log:log(sst06, [State#state.filename]), {reply, ok, delete_pending, @@ -848,12 +867,35 @@ reader({switch_levels, NewLevel}, State) -> delete_pending({get_sqn, LedgerKey, Hash}, _From, State) -> % Get a KV value and potentially take sample timings - {Result, UpdState, _UpdTimings} = - fetch(LedgerKey, Hash, State, no_timing), - {reply, sqn_only(Result), delete_pending, UpdState, ?DELETE_TIMEOUT}; + {Result, _BIC, _HMD, _FC} = + fetch( + LedgerKey, Hash, + State#state.summary, + State#state.compression_method, + State#state.high_modified_date, + State#state.index_moddate, + State#state.filter_fun, + State#state.blockindex_cache, + State#state.fetch_cache, + State#state.handle, + State#state.level, + {no_monitor, 0}), + {reply, sqn_only(Result), delete_pending, State, ?DELETE_TIMEOUT}; delete_pending({get_kv, LedgerKey, Hash}, _From, State) -> - {Result, UpdState, _Ts} = fetch(LedgerKey, Hash, State, no_timing), - {reply, Result, delete_pending, UpdState, ?DELETE_TIMEOUT}; + {Result, _BIC, _HMD, _FC} = + fetch( + LedgerKey, Hash, + State#state.summary, + State#state.compression_method, + State#state.high_modified_date, + State#state.index_moddate, + State#state.filter_fun, + State#state.blockindex_cache, + State#state.fetch_cache, + State#state.handle, + State#state.level, + {no_monitor, 0}), + {reply, Result, delete_pending, State, ?DELETE_TIMEOUT}; delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod}, _From, State) -> {_NeedBlockIdx, SlotsToFetchBinList, SlotsToPoint} = @@ -881,7 +923,7 @@ delete_pending({get_slots, SlotList, SegList, LowLastMod}, _From, State) -> State, ?DELETE_TIMEOUT}; delete_pending(close, _From, State) -> - leveled_log:log("SST07", [State#state.filename]), + leveled_log:log(sst07, [State#state.filename]), ok = file:close(State#state.handle), ok = file:delete(filename:join(State#state.root_path, State#state.filename)), @@ -899,7 +941,7 @@ delete_pending(timeout, State) -> % back-off {next_state, delete_pending, State, leveled_rand:uniform(10) * ?DELETE_TIMEOUT}; delete_pending(close, State) -> - leveled_log:log("SST07", [State#state.filename]), + leveled_log:log(sst07, [State#state.filename]), ok = file:close(State#state.handle), ok = file:delete(filename:join(State#state.root_path, State#state.filename)), @@ -909,7 +951,7 @@ handle_sync_event(_Msg, _From, StateName, State) -> {reply, undefined, StateName, State}. handle_event({update_blockindex_cache, BIC}, StateName, State) -> - {BlockIndexCache, HighModDate} = + {_, BlockIndexCache, HighModDate} = update_blockindex_cache(true, BIC, State#state.blockindex_cache, @@ -920,14 +962,27 @@ handle_event({update_blockindex_cache, BIC}, StateName, State) -> State#state{blockindex_cache = BlockIndexCache, high_modified_date = HighModDate}}. -handle_info(tidyup_after_startup, delete_pending, State) -> - % No need to GC, this file is to be shutdown. This message may have - % interrupted the delete timeout, so timeout straight away - {next_state, delete_pending, State, 0}; -handle_info(tidyup_after_startup, StateName, State) -> +handle_info(_Msg, delete_pending, State) -> + % Ignore messages when pending delete. The message may have interrupted + % the delete timeout, so timeout straight away + {next_state, delete_pending, State, 0}; +handle_info(bic_complete, StateName, State) -> + % The block index cache is complete, so the memory footprint should be + % relatively stable from this point. Hibernate to help minimise + % fragmentation + leveled_log:log(sst14, [State#state.filename]), + {next_state, StateName, State, hibernate}; +handle_info(start_complete, StateName, State) -> + % The SST file will be started by a clerk, but the clerk may be shut down + % prior to the manifest being updated about the existence of this SST file. + % If there is no activity after startup, check the clerk is still alive and + % otherwise assume this file is part of a closed store and shut down. + % If the clerk has crashed, the penciller will restart at the latest + % manifest, and so this file sill be restarted if and only if it is still + % part of the store case is_process_alive(State#state.starting_pid) of true -> - {next_state, StateName, State, hibernate}; + {next_state, StateName, State}; false -> {stop, normal, State} end. @@ -935,7 +990,7 @@ handle_info(tidyup_after_startup, StateName, State) -> terminate(normal, delete_pending, _State) -> ok; terminate(Reason, _StateName, State) -> - leveled_log:log("SST04", [Reason, State#state.filename]). + leveled_log:log(sst04, [Reason, State#state.filename]). code_change(_OldVsn, StateName, State, _Extra) -> {ok, StateName, State}. @@ -1004,7 +1059,7 @@ expand_list_by_pointer({pointer, SSTPid, Slot, StartKey, EndKey}, expand_list_by_pointer({next, ManEntry, StartKey, EndKey}, Tail, Width, SegList, LowLastMod) -> SSTPid = ManEntry#manifest_entry.owner, - leveled_log:log("SST10", [SSTPid, is_process_alive(SSTPid)]), + leveled_log:log(sst10, [SSTPid, is_process_alive(SSTPid)]), ExpPointer = sst_getfilteredrange(SSTPid, StartKey, EndKey, @@ -1186,7 +1241,7 @@ cache_size(N) when N < 3 -> cache_size(3) -> 32; cache_size(4) -> - 32; + 16; cache_size(5) -> 4; cache_size(6) -> @@ -1242,50 +1297,64 @@ tune_seglist(SegList) -> -spec new_blockindex_cache(pos_integer()) -> blockindex_cache(). new_blockindex_cache(Size) -> - array:new([{size, Size}, {default, none}]). + {0, array:new([{size, Size}, {default, none}]), 0}. --spec update_blockindex_cache(boolean(), - list({integer(), binary()}), - blockindex_cache(), - non_neg_integer()|undefined, - boolean()) -> - {blockindex_cache(), - non_neg_integer()|undefined}. -update_blockindex_cache(Needed, Entries, BIC, HighModDate, IdxModDate) - when Needed, - HighModDate == undefined -> - FoldFun = - fun(CacheEntry, Cache) -> - case CacheEntry of - {ID, Header} when is_binary(Header) -> - array:set(ID - 1, binary:copy(Header), Cache); - _ -> - Cache - end - end, - BlockIdxC0 = lists:foldl(FoldFun, BIC, Entries), - Size = array:size(BlockIdxC0), - BestModDates = - case IdxModDate of - true -> - ModDateFold = - fun(_ID, Header, Acc) when is_binary(Header) -> - [element(2, extract_header(Header, IdxModDate))|Acc] - end, - array:sparse_foldl(ModDateFold, [], BlockIdxC0); - false -> - [] - end, - BestModDate = - case length(BestModDates) of - Size -> - lists:max(BestModDates); +-spec updatebic_foldfun(boolean()) -> + fun(({integer(), binary()}, blockindex_cache()) -> blockindex_cache()). +updatebic_foldfun(HMDRequired) -> + fun(CacheEntry, {AccCount, Cache, AccHMD}) -> + case CacheEntry of + {ID, Header} when is_binary(Header) -> + case array:get(ID - 1, Cache) of + none -> + H0 = binary:copy(Header), + AccHMD0 = + case HMDRequired of + true -> + max(AccHMD, + element(2, extract_header(H0, true))); + false -> + AccHMD + end, + {AccCount + 1, array:set(ID - 1, H0, Cache), AccHMD0}; + _ -> + {AccCount, Cache, AccHMD} + end; _ -> - undefined - end, - {BlockIdxC0, BestModDate}; + {AccCount, Cache, AccHMD} + end + end. + +-spec update_blockindex_cache( + boolean(), list({integer(), binary()}), + blockindex_cache(), non_neg_integer()|undefined, + boolean()) -> + {boolean(), blockindex_cache(), non_neg_integer()|undefined}. +update_blockindex_cache(true, Entries, BIC, HighModDate, IdxModDate) -> + case {element(1, BIC), array:size(element(2, BIC))} of + {N, N} -> + {false, BIC, HighModDate}; + {N, S} when N < S -> + FoldFun = + case {HighModDate, IdxModDate} of + {undefined, true} -> + updatebic_foldfun(true); + _ -> + updatebic_foldfun(false) + end, + BIC0 = lists:foldl(FoldFun, BIC, Entries), + case {element(1, BIC0), IdxModDate} of + {N, _} -> + {false, BIC, HighModDate}; + {S, true} -> + erlang:send(self(), bic_complete), + {true, BIC0, element(3, BIC0)}; + _ -> + {true, BIC0, undefined} + end + end; update_blockindex_cache(_Needed, _Entries, BIC, HighModDate, _IdxModDate) -> - {BIC, HighModDate}. + {false, BIC, HighModDate}. -spec check_modified(non_neg_integer()|undefined, non_neg_integer(), @@ -1296,87 +1365,97 @@ check_modified(HighLastModifiedInSST, LowModDate, true) check_modified(_, _, _) -> true. --spec fetch(tuple(), - {integer(), integer()}|integer(), - sst_state(), sst_timings()) - -> {not_present|tuple(), sst_state(), sst_timings()}. +-spec fetch( + leveled_codec:ledger_key(), + leveled_codec:segment_hash(), + sst_summary(), + press_method(), + non_neg_integer()|undefined, + boolean(), + summary_filter(), + blockindex_cache(), + fetch_cache(), + file:fd(), + leveled_pmanifest:lsm_level(), + leveled_monitor:monitor()) + -> {not_present|leveled_codec:ledger_kv(), + blockindex_cache()|no_update, + non_neg_integer()|undefined|no_update, + fetch_cache()|no_update}. %% @doc %% %% Fetch a key from the store, potentially taking timings. Result should be %% not_present if the key is not in the store. -fetch(LedgerKey, Hash, State, Timings0) -> - SW0 = os:timestamp(), - - Summary = State#state.summary, - PressMethod = State#state.compression_method, - IdxModDate = State#state.index_moddate, - Slot = lookup_slot(LedgerKey, Summary#summary.index), - - {SW1, Timings1} = update_timings(SW0, Timings0, index_query, true), - +fetch(LedgerKey, Hash, + Summary, + PressMethod, HighModDate, IndexModDate, FilterFun, BIC, FetchCache, + Handle, Level, Monitor) -> + SW0 = leveled_monitor:maybe_time(Monitor), + Slot = + lookup_slot(LedgerKey, Summary#summary.index, FilterFun), SlotID = Slot#slot_index_value.slot_id, - CachedBlockIdx = - array:get(SlotID - 1, State#state.blockindex_cache), - {SW2, Timings2} = update_timings(SW1, Timings1, lookup_cache, true), - - case extract_header(CachedBlockIdx, IdxModDate) of + CachedBlockIdx = array:get(SlotID - 1, element(2, BIC)), + + case extract_header(CachedBlockIdx, IndexModDate) of none -> - SlotBin = read_slot(State#state.handle, Slot), + SlotBin = read_slot(Handle, Slot), {Result, Header} = - binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod, IdxModDate), - {BlockIndexCache, HighModDate} = + binaryslot_get( + SlotBin, LedgerKey, Hash, PressMethod, IndexModDate), + {_UpdateState, BIC0, HMD0} = update_blockindex_cache(true, [{SlotID, Header}], - State#state.blockindex_cache, - State#state.high_modified_date, - State#state.index_moddate), - {_SW3, Timings3} = - update_timings(SW2, Timings2, noncached_block, false), - {Result, - State#state{blockindex_cache = BlockIndexCache, - high_modified_date = HighModDate}, - Timings3}; + BIC, + HighModDate, + IndexModDate), + case Result of + not_present -> + maybelog_fetch_timing( + Monitor, Level, not_found, SW0); + _ -> + maybelog_fetch_timing( + Monitor, Level, slot_noncachedblock, SW0) + end, + {Result, BIC0, HMD0, no_update}; {BlockLengths, _LMD, PosBin} -> PosList = find_pos(PosBin, extract_hash(Hash), [], 0), case PosList of [] -> - {_SW3, Timings3} = - update_timings(SW2, Timings2, slot_index, false), - {not_present, State, Timings3}; + maybelog_fetch_timing(Monitor, Level, not_found, SW0), + {not_present, no_update, no_update, no_update}; _ -> - {SW3, Timings3} = - update_timings(SW2, Timings2, slot_index, true), - FetchCache = State#state.fetch_cache, - CacheHash = cache_hash(Hash, State#state.level), + CacheHash = cache_hash(Hash, Level), case fetch_from_cache(CacheHash, FetchCache) of {LedgerKey, V} -> - {_SW4, Timings4} = - update_timings(SW3, - Timings3, - fetch_cache, - false), - {{LedgerKey, V}, State, Timings4}; + maybelog_fetch_timing( + Monitor, Level, fetch_cache, SW0), + {{LedgerKey, V}, no_update, no_update, no_update}; _ -> StartPos = Slot#slot_index_value.start_position, Result = check_blocks(PosList, - {State#state.handle, StartPos}, + {Handle, StartPos}, BlockLengths, byte_size(PosBin), LedgerKey, PressMethod, - IdxModDate, + IndexModDate, not_present), - FetchCache0 = - add_to_cache(CacheHash, Result, FetchCache), - {_SW4, Timings4} = - update_timings(SW3, - Timings3, - slot_fetch, - false), - {Result, - State#state{fetch_cache = FetchCache0}, - Timings4} + case Result of + not_present -> + maybelog_fetch_timing( + Monitor, Level, not_found, SW0), + {not_present, + no_update, no_update, no_update}; + _ -> + FetchCache0 = + add_to_cache( + CacheHash, Result, FetchCache), + maybelog_fetch_timing( + Monitor, Level, slot_cachedblock, SW0), + {Result, + no_update, no_update, FetchCache0} + end end end end. @@ -1396,7 +1475,12 @@ fetch(LedgerKey, Hash, State, Timings0) -> fetch_range(StartKey, EndKey, ScanWidth, SegList, LowLastMod, State) -> Summary = State#state.summary, Handle = State#state.handle, - {Slots, RTrim} = lookup_slots(StartKey, EndKey, Summary#summary.index), + {Slots, RTrim} = + lookup_slots( + StartKey, + EndKey, + Summary#summary.index, + State#state.filter_fun), Self = self(), SL = length(Slots), @@ -1476,7 +1560,7 @@ write_file(RootPath, Filename, SummaryBin, SlotsBin, true -> AltName = filename:join(RootPath, filename:basename(FinalName)) ++ ?DISCARD_EXT, - leveled_log:log("SST05", [FinalName, AltName]), + leveled_log:log(sst05, [FinalName, AltName]), ok = file:rename(filename:join(RootPath, FinalName), AltName); false -> ok @@ -1502,15 +1586,17 @@ read_file(Filename, State, LoadPageCache) -> read_table_summary(SummaryBin, UpdState0#state.tomb_count), BlockIndexCache = new_blockindex_cache(Summary#summary.size), UpdState1 = UpdState0#state{blockindex_cache = BlockIndexCache}, - SlotIndex = from_list(SlotList), + {SlotIndex, FilterFun} = + from_list( + SlotList, Summary#summary.first_key, Summary#summary.last_key), UpdSummary = Summary#summary{index = SlotIndex}, - leveled_log:log("SST03", [Filename, - Summary#summary.size, - Summary#summary.max_sqn]), + leveled_log:log( + sst03, [Filename, Summary#summary.size, Summary#summary.max_sqn]), {UpdState1#state{summary = UpdSummary, handle = Handle, filename = Filename, - tomb_count = TombCount}, + tomb_count = TombCount, + filter_fun = FilterFun}, Bloom}. gen_fileversion(PressMethod, IdxModDate, CountOfTombs) -> @@ -1735,20 +1821,88 @@ hmac(Int) when is_integer(Int) -> %% %% This implementation of the SlotIndex uses leveled_tree -from_list(SlotList) -> - leveled_tree:from_orderedlist(SlotList, ?TREE_TYPE, ?TREE_SIZE). +from_list(SlotList, FirstKey, LastKey) -> + FilterFun = get_filterfun(FirstKey, LastKey), + FilteredList = + lists:map(fun({K, S}) -> {FilterFun(K), S} end, SlotList), + {leveled_tree:from_orderedlist(FilteredList, ?TREE_TYPE, ?TREE_SIZE), + FilterFun}. -lookup_slot(Key, Tree) -> +-spec get_filterfun( + leveled_codec:ledger_key(), leveled_codec:ledger_key()) -> + fun((leveled_codec:ledger_key()) + -> leveled_codec:ledger_key()|leveled_codec:slimmed_key()). +get_filterfun( + {?IDX_TAG, B, {Field, FT}, FK}, {?IDX_TAG, B, {Field, LT}, LK}) + when is_binary(Field), + is_binary(FT), is_binary(FK), is_binary(LT), is_binary(LK) -> + case {binary:longest_common_prefix([FT, LT]), byte_size(FT)} of + {N, M} when N > 0, M >= N -> + <> = FT, + term_prefix_filter(N, Prefix); + _ -> + fun term_filter/1 + end; +get_filterfun( + {Tag, B, FK, null}, {Tag, B, LK, null}) + when is_binary(FK), is_binary(LK) -> + case {binary:longest_common_prefix([FK, LK]), byte_size(FK)} of + {N, M} when N > 0, M >= N -> + <> = FK, + key_prefix_filter(N, Prefix); + _ -> + fun key_filter/1 + + end; +get_filterfun(_FirstKey, _LastKey) -> + fun null_filter/1. + +-spec null_filter(leveled_codec:ledger_key()) -> leveled_codec:ledger_key(). +null_filter(Key) -> Key. + +-spec key_filter(leveled_codec:ledger_key()) -> leveled_codec:slimmed_key(). +key_filter({_Tag, _Bucket, Key, null}) -> Key. + +-spec term_filter(leveled_codec:ledger_key()) -> leveled_codec:slimmed_key(). +term_filter({_Tag, _Bucket, {_Field, Term}, Key}) -> {Term, Key}. + +-spec key_prefix_filter( + pos_integer(), binary()) -> + fun((leveled_codec:ledger_key()) -> leveled_codec:slimmed_key()). +key_prefix_filter(N, Prefix) -> + fun({_Tag, _Bucket, Key, null}) -> + case Key of + <> -> + Suffix; + _ -> + null + end + end. + +-spec term_prefix_filter( + pos_integer(), binary()) -> + fun((leveled_codec:ledger_key()) -> leveled_codec:slimmed_key()). +term_prefix_filter(N, Prefix) -> + fun({_Tag, _Bucket, {_Field, Term}, Key}) -> + case Term of + <> -> + {Suffix, Key}; + _ -> + null + end + end. + +lookup_slot(Key, Tree, FilterFun) -> StartKeyFun = fun(_V) -> all end, % The penciller should never ask for presence out of range - so will - % always return a slot (As we don't compare to StartKey) - {_LK, Slot} = leveled_tree:search(Key, Tree, StartKeyFun), + % always return a slot (as we don't compare to StartKey) + {_LK, Slot} = leveled_tree:search(FilterFun(Key), Tree, StartKeyFun), Slot. -lookup_slots(StartKey, EndKey, Tree) -> +lookup_slots(StartKey, EndKey, Tree, FilterFun) -> StartKeyFun = fun(_V) -> all @@ -1757,9 +1911,25 @@ lookup_slots(StartKey, EndKey, Tree) -> fun({_LK, Slot}) -> Slot end, - SlotList = leveled_tree:search_range(StartKey, EndKey, Tree, StartKeyFun), + FilteredStartKey = + case StartKey of + all -> all; + _ -> FilterFun(StartKey) + end, + FilteredEndKey = + case EndKey of + all -> all; + _ -> FilterFun(EndKey) + end, + SlotList = + leveled_tree:search_range( + FilteredStartKey, + FilteredEndKey, + Tree, + StartKeyFun), {EK, _EndSlot} = lists:last(SlotList), - {lists:map(MapFun, SlotList), not leveled_codec:endkey_passed(EK, EndKey)}. + {lists:map(MapFun, SlotList), + leveled_codec:endkey_passed(FilteredEndKey, EK)}. %%%============================================================================ @@ -2118,7 +2288,7 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache}, BinMapFun = fun(Pointer, {NeededBlockIdx, Acc}) -> {SP, _L, ID, SK, EK} = pointer_mapfun(Pointer), - CachedHeader = array:get(ID - 1, BlockIndexCache), + CachedHeader = array:get(ID - 1, element(2, BlockIndexCache)), case extract_header(CachedHeader, IdxModDate) of none -> % If there is an attempt to use the seg list query and the @@ -2465,7 +2635,7 @@ crc_check_slot(FullBin) -> {CRC32H, CRC32PBL} -> {Header, Blocks}; _ -> - leveled_log:log("SST09", []), + leveled_log:log(sst09, []), crc_wonky end. @@ -2899,106 +3069,24 @@ update_buildtimings(SW, Timings, Stage) -> %% %% Log out the time spent during the merge lists part of the SST build log_buildtimings(Timings, LI) -> - leveled_log:log("SST13", [Timings#build_timings.fold_toslot, - Timings#build_timings.slot_hashlist, - Timings#build_timings.slot_serialise, - Timings#build_timings.slot_finish, - element(1, LI), - element(2, LI)]). + leveled_log:log( + sst13, + [Timings#build_timings.fold_toslot, + Timings#build_timings.slot_hashlist, + Timings#build_timings.slot_serialise, + Timings#build_timings.slot_finish, + element(1, LI), element(2, LI)]). - --spec update_statetimings(sst_timings(), integer(), non_neg_integer()) - -> {sst_timings(), integer()}. -%% @doc -%% -%% The timings state is either in countdown to the next set of samples of -%% we are actively collecting a sample. Active collection take place -%% when the countdown is 0. Once the sample has reached the expected count -%% then there is a log of that sample, and the countdown is restarted. -%% -%% Outside of sample windows the timings object should be set to the atom -%% no_timing. no_timing is a valid state for the cdb_timings type. -update_statetimings(no_timing, 0, _Level) -> - {#sst_timings{}, 0}; -update_statetimings(Timings, 0, Level) -> - case Timings#sst_timings.sample_count of - SC when SC >= ?TIMING_SAMPLESIZE -> - log_timings(Timings, Level), - % If file at lower level wait longer before tsking another - % sample - {no_timing, - leveled_rand:uniform(2 * ?TIMING_SAMPLECOUNTDOWN)}; - _SC -> - {Timings, 0} - end; -update_statetimings(no_timing, N, _Level) -> - {no_timing, N - 1}. - -log_timings(no_timing, _Level) -> +-spec maybelog_fetch_timing( + leveled_monitor:monitor(), + leveled_pmanifest:lsm_level(), + leveled_monitor:sst_fetch_type(), + erlang:timestamp()|no_timing) -> ok. +maybelog_fetch_timing(_Monitor, _Level, _Type, no_timing) -> ok; -log_timings(Timings, Level) -> - leveled_log:log("SST12", [Level, - Timings#sst_timings.sample_count, - Timings#sst_timings.index_query_time, - Timings#sst_timings.lookup_cache_time, - Timings#sst_timings.slot_index_time, - Timings#sst_timings.fetch_cache_time, - Timings#sst_timings.slot_fetch_time, - Timings#sst_timings.noncached_block_time, - Timings#sst_timings.slot_index_count, - Timings#sst_timings.fetch_cache_count, - Timings#sst_timings.slot_fetch_count, - Timings#sst_timings.noncached_block_count]). - - -update_timings(_SW, no_timing, _Stage, _Continue) -> - {no_timing, no_timing}; -update_timings(SW, Timings, Stage, Continue) -> - Timer = timer:now_diff(os:timestamp(), SW), - Timings0 = - case Stage of - index_query -> - IQT = Timings#sst_timings.index_query_time, - Timings#sst_timings{index_query_time = IQT + Timer}; - lookup_cache -> - TBT = Timings#sst_timings.lookup_cache_time, - Timings#sst_timings{lookup_cache_time = TBT + Timer}; - slot_index -> - SIT = Timings#sst_timings.slot_index_time, - Timings#sst_timings{slot_index_time = SIT + Timer}; - fetch_cache -> - FCT = Timings#sst_timings.fetch_cache_time, - Timings#sst_timings{fetch_cache_time = FCT + Timer}; - slot_fetch -> - SFT = Timings#sst_timings.slot_fetch_time, - Timings#sst_timings{slot_fetch_time = SFT + Timer}; - noncached_block -> - NCT = Timings#sst_timings.noncached_block_time, - Timings#sst_timings{noncached_block_time = NCT + Timer} - end, - case Continue of - true -> - {os:timestamp(), Timings0}; - false -> - Timings1 = - case Stage of - slot_index -> - SIC = Timings#sst_timings.slot_index_count, - Timings0#sst_timings{slot_index_count = SIC + 1}; - fetch_cache -> - FCC = Timings#sst_timings.fetch_cache_count, - Timings0#sst_timings{fetch_cache_count = FCC + 1}; - slot_fetch -> - SFC = Timings#sst_timings.slot_fetch_count, - Timings0#sst_timings{slot_fetch_count = SFC + 1}; - noncached_block -> - NCC = Timings#sst_timings.noncached_block_count, - Timings0#sst_timings{noncached_block_count = NCC + 1} - end, - SC = Timings1#sst_timings.sample_count, - {no_timing, Timings1#sst_timings{sample_count = SC + 1}} - end. - +maybelog_fetch_timing({Pid, _SlotFreq}, Level, Type, SW) -> + {TS1, _} = leveled_monitor:step_time(SW), + leveled_monitor:add_stat(Pid, {sst_fetch_update, Level, Type, TS1}). %%%============================================================================ %%% Test @@ -3664,7 +3752,7 @@ simple_persisted_rangesegfilter_tester(SSTNewFun) -> additional_range_test() -> % Test fetching ranges that fall into odd situations with regards to the - % summayr index + % summary index % - ranges which fall between entries in summary % - ranges which go beyond the end of the range of the sst % - ranges which match to an end key in the summary index @@ -3702,6 +3790,7 @@ additional_range_test() -> % Testing the gap [GapSKV] = generate_indexkey(?NOLOOK_SLOTSIZE + 1, ?NOLOOK_SLOTSIZE + 1), [GapEKV] = generate_indexkey(?NOLOOK_SLOTSIZE + 2, ?NOLOOK_SLOTSIZE + 2), + io:format("Gap test between ~p and ~p", [GapSKV, GapEKV]), R3 = sst_getkvrange(P1, element(1, GapSKV), element(1, GapEKV), 1), ?assertMatch([], R3), @@ -3864,7 +3953,7 @@ fetch_status_test() -> {status, Pid, {module, gen_fsm}, SItemL} = sys:get_status(Pid), S = lists:keyfind(state, 1, lists:nth(5, SItemL)), true = is_integer(array:size(S#state.fetch_cache)), - true = is_integer(array:size(S#state.blockindex_cache)), + true = is_integer(array:size(element(2, S#state.blockindex_cache))), ST = format_status(terminate, [dict:new(), S]), ?assertMatch(redacted, ST#state.blockindex_cache), ?assertMatch(redacted, ST#state.fetch_cache), @@ -3909,7 +3998,6 @@ simple_persisted_tester(SSTNewFun) -> "Checking for ~w keys (twice) in file with cache hit took ~w " ++ "microseconds~n", [length(KVList1), timer:now_diff(os:timestamp(), SW1)]), - ok = sst_printtimings(Pid), KVList2 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 32, 1, 20), MapFun = fun({K, V}, Acc) -> @@ -3931,7 +4019,6 @@ simple_persisted_tester(SSTNewFun) -> io:format(user, "Checking for ~w missing keys took ~w microseconds~n", [length(KVList3), timer:now_diff(os:timestamp(), SW2)]), - ok = sst_printtimings(Pid), FetchList1 = sst_getkvrange(Pid, all, all, 2), FoldFun = fun(X, Acc) -> case X of @@ -4077,12 +4164,12 @@ key_dominates_test() -> key_dominates([KV7|KL2], [KV2], {true, 1})). nonsense_coverage_test() -> - ?assertMatch({ok, reader, #state{}}, code_change(nonsense, - reader, - #state{}, - nonsense)), - ?assertMatch({reply, undefined, reader, #state{}}, - handle_sync_event("hello", self(), reader, #state{})), + ?assertMatch( + {ok, reader, #state{}}, + code_change(nonsense, reader, #state{}, nonsense)), + ?assertMatch( + {reply, undefined, reader, #state{}}, + handle_sync_event("hello", self(), reader, #state{})), SampleBin = <<0:128/integer>>, FlippedBin = flip_byte(SampleBin, 0, 16), @@ -4129,19 +4216,6 @@ check_segment_match(PosBinIndex1, KVL, TreeSize) -> end, lists:foreach(CheckFun, KVL). -timings_test() -> - SW = os:timestamp(), - timer:sleep(1), - {no_timing, T1} = update_timings(SW, #sst_timings{}, slot_index, false), - {no_timing, T2} = update_timings(SW, T1, slot_fetch, false), - {no_timing, T3} = update_timings(SW, T2, noncached_block, false), - timer:sleep(1), - {_, T4} = update_timings(SW, T3, slot_fetch, true), - ?assertMatch(3, T4#sst_timings.sample_count), - ?assertMatch(1, T4#sst_timings.slot_fetch_count), - ?assertMatch(true, T4#sst_timings.slot_fetch_time > - T3#sst_timings.slot_fetch_time). - take_max_lastmoddate_test() -> % TODO: Remove this test % Temporarily added to make dialyzer happy (until we've made use of last @@ -4295,23 +4369,714 @@ block_index_cache_test() -> lists:seq(1, 8)), HeaderTS = <<0:160/integer, Now:32/integer, 0:32/integer>>, HeaderNoTS = <<0:192>>, - BIC = array:new([{size, 8}, {default, none}]), - {BIC0, undefined} = + BIC = new_blockindex_cache(8), + {_, BIC0, undefined} = update_blockindex_cache(false, EntriesNoTS, BIC, undefined, false), - {BIC1, undefined} = + {_, BIC1, undefined} = update_blockindex_cache(false, EntriesTS, BIC, undefined, true), - {BIC2, undefined} = + {_, BIC2, undefined} = update_blockindex_cache(true, EntriesNoTS, BIC, undefined, false), - {BIC3, LMD3} = - update_blockindex_cache(true, EntriesTS, BIC, undefined, true), + {ETSP1, ETSP2} = lists:split(6, EntriesTS), + {_, BIC3, undefined} = + update_blockindex_cache(true, ETSP1, BIC, undefined, true), + {_, BIC3, undefined} = + update_blockindex_cache(true, ETSP1, BIC3, undefined, true), + {_, BIC4, LMD4} = + update_blockindex_cache(true, ETSP2, BIC3, undefined, true), + {_, BIC4, LMD4} = + update_blockindex_cache(true, ETSP2, BIC4, LMD4, true), - ?assertMatch(none, array:get(0, BIC0)), - ?assertMatch(none, array:get(0, BIC1)), - ?assertMatch(HeaderNoTS, array:get(0, BIC2)), - ?assertMatch(HeaderTS, array:get(0, BIC3)), - ?assertMatch(Now, LMD3). + ?assertMatch(none, array:get(0, element(2, BIC0))), + ?assertMatch(none, array:get(0, element(2, BIC1))), + ?assertMatch(HeaderNoTS, array:get(0, element(2, BIC2))), + ?assertMatch(HeaderTS, array:get(0, element(2, BIC3))), + ?assertMatch(HeaderTS, array:get(0, element(2, BIC4))), + ?assertMatch(Now, LMD4). +key_matchesprefix_test() -> + FileName = "keymatchesprefix_test", + IndexKeyFun = + fun(I) -> + {{?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, + list_to_binary("19601301|" + ++ io_lib:format("~6..0w", [I]))}, + list_to_binary(io_lib:format("~6..0w", [I]))}, + {1, {active, infinity}, no_lookup, null}} + end, + IndexEntries = lists:map(IndexKeyFun, lists:seq(1, 500)), + OddIdxKey = + {{?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, + list_to_binary(io_lib:format("~6..0w", [0]))}, + {1, {active, infinity}, no_lookup, null}}, + OptsSST = + #sst_options{press_method=native, + log_options=leveled_log:get_opts()}, + {ok, P1, {_FK1, _LK1}, _Bloom1} = + sst_new( + ?TEST_AREA, FileName, 1, [OddIdxKey|IndexEntries], 6000, OptsSST), + IdxRange2 = + sst_getkvrange( + P1, + {?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1960">>}, null}, + {?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1961">>}, null}, + 16), + IdxRange4 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000251">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1961">>}, null}, + 16), + IdxRangeX = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1961">>}, null}, + 16), + IdxRangeY = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1961">>}, null}, + 16), + IdxRangeZ = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000500">>}, null}, + 16), + ?assertMatch(501, length(IdxRange2)), + ?assertMatch(250, length(IdxRange4)), + ?assertMatch(501, length(IdxRangeX)), + ?assertMatch(500, length(IdxRangeY)), + ?assertMatch(500, length(IdxRangeZ)), + ok = sst_close(P1), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), + ObjectKeyFun = + fun(I) -> + {{?RIAK_TAG, + {<<"btype">>, <<"bucket">>}, + list_to_binary("19601301|" + ++ io_lib:format("~6..0w", [I])), + null}, + {1, {active, infinity}, {0, 0}, null}} + end, + ObjectEntries = lists:map(ObjectKeyFun, lists:seq(1, 500)), + OddObjKey = + {{?RIAK_TAG, + {<<"btype">>, <<"bucket">>}, + <<"19601301">>, + null}, + {1, {active, infinity}, {100, 100}, null}}, + OptsSST = + #sst_options{press_method=native, log_options=leveled_log:get_opts()}, + {ok, P2, {_FK2, _LK2}, _Bloom2} = + sst_new( + ?TEST_AREA, FileName, 1, [OddObjKey|ObjectEntries], 6000, OptsSST), + ObjRange2 = + sst_getkvrange( + P2, + {?RIAK_TAG, + {<<"btype">>, <<"bucket">>}, + <<"1960">>, null}, + {?RIAK_TAG, + {<<"btype">>, <<"bucket">>}, + <<"1961">>, null}, + 16), + ObjRange4 = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000251">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"1961">>, null}, + 16), + ObjRangeX = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"1961">>, null}, + 16), + ObjRangeY = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"1961">>, null}, + 16), + ?assertMatch(501, length(ObjRange2)), + ?assertMatch(250, length(ObjRange4)), + ?assertMatch(501, length(ObjRangeX)), + ?assertMatch(500, length(ObjRangeY)), + ok = sst_close(P2), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). + + +range_key_indextermmatch_test() -> + FileName = "indextermmatch_test", + IndexKeyFun = + fun(I) -> + {{?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, + <<"19601301">>}, + list_to_binary(io_lib:format("~6..0w", [I]))}, + {1, {active, infinity}, no_lookup, null}} + end, + IndexEntries = lists:map(IndexKeyFun, lists:seq(1, 500)), + OptsSST = + #sst_options{press_method=native, + log_options=leveled_log:get_opts()}, + {ok, P1, {_FK1, _LK1}, _Bloom1} = + sst_new(?TEST_AREA, FileName, 1, IndexEntries, 6000, OptsSST), + + IdxRange1 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1959">>}, null}, + all, + 16), + IdxRange2 = + sst_getkvrange( + P1, + {?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1960">>}, null}, + {?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1961">>}, null}, + 16), + IdxRange3 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, <<"000000">>}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, null}, + 16), + IdxRange4 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, <<"000100">>}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, null}, + 16), + IdxRange5 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, <<"000100">>}, + 16), + IdxRange6 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, <<"000300">>}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, null}, + 16), + IdxRange7 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, <<"000300">>}, + 16), + IdxRange8 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601302">>}, <<"000300">>}, + 16), + IdxRange9 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601300">>}, <<"000100">>}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, null}, + 16), + ?assertMatch(500, length(IdxRange1)), + ?assertMatch(500, length(IdxRange2)), + ?assertMatch(500, length(IdxRange3)), + ?assertMatch(401, length(IdxRange4)), + ?assertMatch(100, length(IdxRange5)), + ?assertMatch(201, length(IdxRange6)), + ?assertMatch(300, length(IdxRange7)), + ?assertMatch(500, length(IdxRange8)), + ?assertMatch(500, length(IdxRange9)), + ok = sst_close(P1), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). + + +range_key_lestthanprefix_test() -> + FileName = "lessthanprefix_test", + IndexKeyFun = + fun(I) -> + {{?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, + list_to_binary("19601301|" + ++ io_lib:format("~6..0w", [I]))}, + list_to_binary(io_lib:format("~6..0w", [I]))}, + {1, {active, infinity}, no_lookup, null}} + end, + IndexEntries = lists:map(IndexKeyFun, lists:seq(1, 500)), + OptsSST = + #sst_options{press_method=native, + log_options=leveled_log:get_opts()}, + {ok, P1, {_FK1, _LK1}, _Bloom1} = + sst_new(?TEST_AREA, FileName, 1, IndexEntries, 6000, OptsSST), + + IndexFileStateSize = size_summary(P1), + + IdxRange1 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1959">>}, null}, + all, + 16), + IdxRange2 = + sst_getkvrange( + P1, + {?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1960">>}, null}, + {?IDX_TAG, + {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1961">>}, null}, + 16), + IdxRange3 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1960">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000250">>}, null}, + 16), + IdxRange4 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000251">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1961">>}, null}, + 16), + IdxRange5 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000250">>}, <<"000251">>}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"1961">>}, null}, + 16), + IdxRange6 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|0002">>}, null}, + 16), + IdxRange7 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|0001">>}, null}, + 16), + IdxRange8 = + sst_getkvrange( + P1, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000000">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301|000100">>}, null}, + 16), + ?assertMatch(500, length(IdxRange1)), + ?assertMatch(500, length(IdxRange2)), + ?assertMatch(250, length(IdxRange3)), + ?assertMatch(250, length(IdxRange4)), + ?assertMatch(250, length(IdxRange5)), + ?assertMatch(199, length(IdxRange6)), + ?assertMatch(99, length(IdxRange7)), + ?assertMatch(100, length(IdxRange8)), + ok = sst_close(P1), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), + + ObjectKeyFun = + fun(I) -> + {{?RIAK_TAG, + {<<"btype">>, <<"bucket">>}, + list_to_binary("19601301|" + ++ io_lib:format("~6..0w", [I])), + null}, + {1, {active, infinity}, {0, 0}, null}} + end, + ObjectEntries = lists:map(ObjectKeyFun, lists:seq(1, 500)), + OptsSST = + #sst_options{press_method=native, + log_options=leveled_log:get_opts()}, + {ok, P2, {_FK2, _LK2}, _Bloom2} = + sst_new(?TEST_AREA, FileName, 1, ObjectEntries, 6000, OptsSST), + + ObjectFileStateSize = size_summary(P2), + + ObjRange1 = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1959">>, null}, + all, + 16), + ObjRange2 = + sst_getkvrange( + P2, + {?RIAK_TAG, + {<<"btype">>, <<"bucket">>}, + <<"1960">>, null}, + {?RIAK_TAG, + {<<"btype">>, <<"bucket">>}, + <<"1961">>, null}, + 16), + ObjRange3 = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"1960">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000250">>, null}, + 16), + ObjRange4 = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000251">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"1961">>, null}, + 16), + ObjRange6 = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|0002">>, null}, + 16), + ObjRange7 = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|0001">>, null}, + 16), + ObjRange8 = + sst_getkvrange( + P2, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000000">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000100">>, null}, + 16), + + ?assertMatch(500, length(ObjRange1)), + ?assertMatch(500, length(ObjRange2)), + ?assertMatch(250, length(ObjRange3)), + ?assertMatch(250, length(ObjRange4)), + ?assertMatch(199, length(ObjRange6)), + ?assertMatch(99, length(ObjRange7)), + ?assertMatch(100, length(ObjRange8)), + ok = sst_close(P2), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), + + HeadKeyFun = + fun(I) -> + {{?HEAD_TAG, + {<<"btype">>, <<"bucket">>}, + list_to_binary("19601301|" + ++ io_lib:format("~6..0w", [I])), + null}, + {1, {active, infinity}, {0, 0}, null, undefined}} + end, + HeadEntries = lists:map(HeadKeyFun, lists:seq(1, 500)), + {ok, P3, {_FK3, _LK3}, _Bloom3} = + sst_new(?TEST_AREA, FileName, 1, HeadEntries, 6000, OptsSST), + + HeadFileStateSize = size_summary(P3), + + HeadRange1 = + sst_getkvrange( + P3, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"1959">>, null}, + all, + 16), + HeadRange2 = + sst_getkvrange( + P3, + {?HEAD_TAG, + {<<"btype">>, <<"abucket">>}, + <<"1962">>, null}, + {?HEAD_TAG, + {<<"btype">>, <<"zbucket">>}, + <<"1960">>, null}, + 16), + HeadRange3 = + sst_getkvrange( + P3, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"1960">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000250">>, null}, + 16), + HeadRange4 = + sst_getkvrange( + P3, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000251">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"1961">>, null}, + 16), + HeadRange6 = + sst_getkvrange( + P3, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|0002">>, null}, + 16), + HeadRange7 = + sst_getkvrange( + P3, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|0001">>, null}, + 16), + HeadRange8 = + sst_getkvrange( + P3, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000000">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + <<"19601301|000100">>, null}, + 16), + + ?assertMatch(500, length(HeadRange1)), + ?assertMatch(500, length(HeadRange2)), + ?assertMatch(250, length(HeadRange3)), + ?assertMatch(250, length(HeadRange4)), + ?assertMatch(199, length(HeadRange6)), + ?assertMatch(99, length(HeadRange7)), + ?assertMatch(100, length(HeadRange8)), + ok = sst_close(P3), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), + + [_HdO|RestObjectEntries] = ObjectEntries, + [_HdI|RestIndexEntries] = IndexEntries, + [_Hdh|RestHeadEntries] = HeadEntries, + + {ok, P4, {_FK4, _LK4}, _Bloom4} = + sst_new( + ?TEST_AREA, + FileName, 1, + [HeadKeyFun(9999)|RestIndexEntries], + 6000, OptsSST), + print_compare_size("Index", IndexFileStateSize, size_summary(P4)), + ok = sst_close(P4), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), + + {ok, P5, {_FK5, _LK5}, _Bloom5} = + sst_new( + ?TEST_AREA, + FileName, 1, + [HeadKeyFun(9999)|RestObjectEntries], + 6000, OptsSST), + print_compare_size("Object", ObjectFileStateSize, size_summary(P5)), + ok = sst_close(P5), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), + + {ok, P6, {_FK6, _LK6}, _Bloom6} = + sst_new( + ?TEST_AREA, + FileName, 1, + RestHeadEntries ++ [IndexKeyFun(1)], + 6000, OptsSST), + print_compare_size("Head", HeadFileStateSize, size_summary(P6)), + ok = sst_close(P6), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). + +size_summary(P) -> + Summary = element(2, element(2, sys:get_state(P))), + true = is_record(Summary, summary), + erts_debug:flat_size(Summary). + +print_compare_size(Type, OptimisedSize, UnoptimisedSize) -> + io:format( + user, + "~n~s State optimised to ~w bytes unoptimised ~w bytes~n", + [Type, OptimisedSize * 8, UnoptimisedSize * 8]), + % Reduced by at least a quarter + ?assert(OptimisedSize < (UnoptimisedSize - (UnoptimisedSize div 4))). + + +single_key_test() -> + FileName = "single_key_test", + Field = <<"t1_bin">>, + LK = leveled_codec:to_ledgerkey(<<"Bucket0">>, <<"Key0">>, ?STD_TAG), + Chunk = leveled_rand:rand_bytes(16), + {_B, _K, MV, _H, _LMs} = + leveled_codec:generate_ledgerkv(LK, 1, Chunk, 16, infinity), + OptsSST = + #sst_options{press_method=native, + log_options=leveled_log:get_opts()}, + {ok, P1, {LK, LK}, _Bloom1} = + sst_new(?TEST_AREA, FileName, 1, [{LK, MV}], 6000, OptsSST), + ?assertMatch({LK, MV}, sst_get(P1, LK)), + ok = sst_close(P1), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), + + IndexSpecs = [{add, Field, <<"20220101">>}], + [{IdxK, IdxV}] = + leveled_codec:idx_indexspecs(IndexSpecs, + <<"Bucket">>, + <<"Key">>, + 1, + infinity), + {ok, P2, {IdxK, IdxK}, _Bloom2} = + sst_new(?TEST_AREA, FileName, 1, [{IdxK, IdxV}], 6000, OptsSST), + ?assertMatch( + [{IdxK, IdxV}], + sst_getkvrange( + P2, + {?IDX_TAG, <<"Bucket">>, {Field, <<"20220100">>}, null}, + all, + 16)), + ?assertMatch( + [{IdxK, IdxV}], + sst_getkvrange( + P2, + {?IDX_TAG, <<"Bucket">>, {Field, <<"20220100">>}, null}, + {?IDX_TAG, <<"Bucket">>, {Field, <<"20220101">>}, null}, + 16)), + ?assertMatch( + [{IdxK, IdxV}], + sst_getkvrange( + P2, + {?IDX_TAG, <<"Bucket">>, {Field, <<"20220101">>}, null}, + {?IDX_TAG, <<"Bucket">>, {Field, <<"20220101">>}, null}, + 16)), + ok = sst_close(P2), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). + +strange_range_test() -> + FileName = "strange_range_test", + V = leveled_head:riak_metadata_to_binary( + term_to_binary([{"actor1", 1}]), + <<1:32/integer, 0:32/integer, 0:32/integer>>), + OptsSST = + #sst_options{press_method=native, + log_options=leveled_log:get_opts()}, + + FK = leveled_codec:to_ledgerkey({<<"T0">>, <<"B0">>}, <<"K0">>, ?RIAK_TAG), + LK = leveled_codec:to_ledgerkey({<<"T0">>, <<"B0">>}, <<"K02">>, ?RIAK_TAG), + EK = leveled_codec:to_ledgerkey({<<"T0">>, <<"B0">>}, <<"K0299">>, ?RIAK_TAG), + + KL1 = + lists:map( + fun(I) -> + leveled_codec:to_ledgerkey( + {<<"T0">>, <<"B0">>}, + list_to_binary("K00" ++ integer_to_list(I)), + ?RIAK_TAG) + end, + lists:seq(1, 300)), + KL2 = + lists:map( + fun(I) -> + leveled_codec:to_ledgerkey( + {<<"T0">>, <<"B0">>}, + list_to_binary("K02" ++ integer_to_list(I)), + ?RIAK_TAG) + end, + lists:seq(1, 300)), + + GenerateValue = + fun(K) -> + element( + 3, leveled_codec:generate_ledgerkv(K, 1, V, 16, infinity)) + end, + + KVL = + lists:ukeysort( + 1, + lists:map( + fun(K) -> {K, GenerateValue(K)} end, + [FK] ++ KL1 ++ [LK] ++ KL2)), + + {ok, P1, {FK, EK}, _Bloom1} = + sst_new(?TEST_AREA, FileName, 1, KVL, 6000, OptsSST), + + ?assertMatch(LK, element(1, sst_get(P1, LK))), + ?assertMatch(FK, element(1, sst_get(P1, FK))), + ok = sst_close(P1), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), + + IndexSpecs = + lists:map( + fun(I) -> {add, <<"t1_bin">>, integer_to_binary(I)} end, + lists:seq(1, 500)), + IdxKVL = + leveled_codec:idx_indexspecs(IndexSpecs, + <<"Bucket">>, + <<"Key">>, + 1, + infinity), + {ok, P2, {_FIdxK, _EIdxK}, _Bloom2} = + sst_new( + ?TEST_AREA, FileName, 1, lists:ukeysort(1, IdxKVL), 6000, OptsSST), + [{IdxK1, _IdxV1}, {IdxK2, _IdxV2}] = + sst_getkvrange( + P2, + {?IDX_TAG, <<"Bucket">>, {<<"t1_bin">>, <<"1">>}, null}, + {?IDX_TAG, <<"Bucket">>, {<<"t1_bin">>, <<"10">>}, null}, + 16), + ?assertMatch( + {?IDX_TAG, <<"Bucket">>, {<<"t1_bin">>, <<"1">>}, <<"Key">>}, + IdxK1 + ), + ?assertMatch( + {?IDX_TAG, <<"Bucket">>, {<<"t1_bin">>, <<"10">>}, <<"Key">>}, + IdxK2 + ), + ok = sst_close(P2), + ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). + receive_fun() -> receive diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 5f5be28..827fd84 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -430,7 +430,7 @@ adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> StoreSizeI = get_size(StoreSize), if CompareSizeI =< StoreSizeI -> ExpItems = StoreSizeI div CompareSizeI - 1, - ShiftFactor = round(leveled_math:log2(CompareSizeI * ?L2_CHUNKSIZE)), + ShiftFactor = round(math:log2(CompareSizeI * ?L2_CHUNKSIZE)), ExpList = lists:map(fun(X) -> X bsl ShiftFactor end, lists:seq(1, ExpItems)), UpdSegmentList = diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index d38aaa6..e3b8982 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -587,14 +587,30 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> KNumber = lists:flatten( io_lib:format("K~8..0B", [leveled_rand:uniform(1000)])), - {K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, - {Seqn, {active, infinity}, null}}, + {K, V} = + {{o_kv, + {<<"btype">>, list_to_binary("Bucket" ++ BNumber)}, + list_to_binary("Key" ++ KNumber), + null}, + Seqn}, generate_randomkeys(Seqn + 1, Count - 1, [{K, V}|Acc], BucketLow, BRange). +generate_simplekeys(Seqn, Count) -> + generate_simplekeys(Seqn, Count, []). + +generate_simplekeys(_Seqn, 0, Acc) -> + Acc; +generate_simplekeys(Seqn, Count, Acc) -> + KNumber = + list_to_binary( + lists:flatten( + io_lib:format("K~8..0B", [leveled_rand:uniform(100000)]))), + generate_simplekeys(Seqn + 1, Count - 1, [{KNumber, Seqn}|Acc]). + tree_search_test() -> search_test_by_type(tree), @@ -685,34 +701,49 @@ tolist_test_by_type(Type) -> ?assertMatch(KL, T_Reverse). tree_timing_test() -> + log_tree_test_by_(16, tree, 8000), log_tree_test_by_(16, tree, 4000), - tree_test_by_(8, tree, 1000), - tree_test_by_(4, tree, 256). + log_tree_test_by_(4, tree, 256). idxt_timing_test() -> + log_tree_test_by_(16, idxt, 8000), log_tree_test_by_(16, idxt, 4000), - tree_test_by_(8, idxt, 1000), - tree_test_by_(4, idxt, 256). + log_tree_test_by_(4, idxt, 256), + log_tree_test_by_(16, idxt, 256), + log_tree_test_by_simplekey_(16, idxt, 256). skpl_timing_test() -> - tree_test_by_(auto, skpl, 6000), + log_tree_test_by_(auto, skpl, 8000), log_tree_test_by_(auto, skpl, 4000), - tree_test_by_(auto, skpl, 1000), - tree_test_by_(auto, skpl, 256). + log_tree_test_by_simplekey_(auto, skpl, 4000), + log_tree_test_by_(auto, skpl, 512), + log_tree_test_by_simplekey_(auto, skpl, 512), + log_tree_test_by_(auto, skpl, 256), + log_tree_test_by_simplekey_(auto, skpl, 256). log_tree_test_by_(Width, Type, N) -> - erlang:statistics(runtime), - G0 = erlang:statistics(garbage_collection), - tree_test_by_(Width, Type, N), - {_, T1} = erlang:statistics(runtime), - G1 = erlang:statistics(garbage_collection), - io:format(user, "Test took ~w ms and GC transitioned from ~w to ~w~n", - [T1, G0, G1]). - -tree_test_by_(Width, Type, N) -> - io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]), KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), - + SW = os:timestamp(), + tree_test_by_(Width, Type, KL), + io:format(user, "Test took ~w ms", + [timer:now_diff(os:timestamp(), SW) div 1000]). + +log_tree_test_by_simplekey_(Width, Type, N) -> + KL = lists:ukeysort(1, generate_simplekeys(1, N)), + SW = os:timestamp(), + tree_test_by_(Width, Type, KL, false), + io:format(user, "Test with simple key took ~w ms", + [timer:now_diff(os:timestamp(), SW) div 1000]). + +tree_test_by_(Width, Type, KL) -> + tree_test_by_(Width, Type, KL, true). + +tree_test_by_(Width, Type, KL, ComplexKey) -> + io:format( + user, + "~n~nTree test with complexkey=~w for type and width: ~w ~w~n", + [ComplexKey, Type, Width]), + OS = ets:new(test, [ordered_set, private]), ets:insert(OS, KL), SWaETS = os:timestamp(), @@ -721,6 +752,9 @@ tree_test_by_(Width, Type, N) -> " of size ~w~n", [timer:now_diff(os:timestamp(), SWaETS), tsize(Tree0)]), + io:format(user, + "Tree has footprint size ~w bytes flat_size ~w bytes~n", + [erts_debug:size(Tree0) * 8, erts_debug:flat_size(Tree0) * 8]), SWaGSL = os:timestamp(), Tree1 = from_orderedlist(KL, Type, Width), @@ -728,6 +762,10 @@ tree_test_by_(Width, Type, N) -> " of size ~w~n", [timer:now_diff(os:timestamp(), SWaGSL), tsize(Tree1)]), + io:format(user, + "Tree has footprint size ~w bytes flat_size ~w bytes~n", + [erts_debug:size(Tree1) * 8, erts_debug:flat_size(Tree1) * 8]), + SWaLUP = os:timestamp(), lists:foreach(match_fun(Tree0), KL), lists:foreach(match_fun(Tree1), KL), @@ -743,11 +781,25 @@ tree_test_by_(Width, Type, N) -> [timer:now_diff(os:timestamp(), SWaSRCH1)]), BitBiggerKeyFun = - fun(Idx) -> - {K, _V} = lists:nth(Idx, KL), - {o, B, FullKey, null} = K, - {{o, B, FullKey ++ "0", null}, lists:nth(Idx + 1, KL)} - end, + case ComplexKey of + true -> + fun(Idx) -> + {K, _V} = lists:nth(Idx, KL), + {o_kv, B, FullKey, null} = K, + {{o_kv, + B, + list_to_binary(binary_to_list(FullKey) ++ "0"), + null}, + lists:nth(Idx + 1, KL)} + end; + false -> + fun(Idx) -> + {K, _V} = lists:nth(Idx, KL), + {list_to_binary(binary_to_list(K) ++ "0"), + lists:nth(Idx + 1, KL)} + end + end, + SrchKL = lists:map(BitBiggerKeyFun, lists:seq(1, length(KL) - 1)), SWaSRCH2 = os:timestamp(), @@ -778,10 +830,14 @@ matchrange_test_by_type(Type) -> FirstKey = element(1, lists:nth(1, KL)), FinalKey = element(1, lists:last(KL)), PenultimateKey = element(1, lists:nth(length(KL) - 1, KL)), - AfterFirstKey = setelement(3, FirstKey, element(3, FirstKey) ++ "0"), - AfterPenultimateKey = setelement(3, - PenultimateKey, - element(3, PenultimateKey) ++ "0"), + AfterFirstKey = + setelement(3, + FirstKey, + list_to_binary(binary_to_list(element(3, FirstKey)) ++ "0")), + AfterPenultimateKey = + setelement(3, + PenultimateKey, + list_to_binary(binary_to_list(element(3, PenultimateKey)) ++ "0")), LengthR = fun(SK, EK, T) -> @@ -812,10 +868,12 @@ extra_matchrange_test_by_type(Type) -> fun(RangeL) -> SKeyV = lists:nth(1, RangeL), EKeyV = lists:nth(50, RangeL), - {{o, SB, SK, null}, _SV} = SKeyV, - {{o, EB, EK, null}, _EV} = EKeyV, - SRangeK = {o, SB, SK ++ "0", null}, - ERangeK = {o, EB, EK ++ "0", null}, + {{o_kv, SB, SK, null}, _SV} = SKeyV, + {{o_kv, EB, EK, null}, _EV} = EKeyV, + SRangeK = + {o_kv, SB, list_to_binary(binary_to_list(SK) ++ "0"), null}, + ERangeK = + {o_kv, EB, list_to_binary(binary_to_list(EK) ++ "0"), null}, ?assertMatch(49, length(match_range(SRangeK, ERangeK, Tree0))) end, lists:foreach(TestRangeLFun, RangeLists). @@ -840,10 +898,12 @@ extra_searchrange_test_by_type(Type) -> % start key SKeyV = lists:nth(1, RangeL), EKeyV = lists:nth(50, RangeL), - {{o, SB, SK, null}, _SV} = SKeyV, - {{o, EB, EK, null}, _EV} = EKeyV, - FRangeK = {o, SB, SK ++ "0", null}, - BRangeK = {o, EB, EK ++ "0", null}, + {{o_kv, SB, SK, null}, _SV} = SKeyV, + {{o_kv, EB, EK, null}, _EV} = EKeyV, + FRangeK = + {o_kv, SB, list_to_binary(binary_to_list(SK) ++ "0"), null}, + BRangeK = + {o_kv, EB, list_to_binary(binary_to_list(EK) ++ "0"), null}, ?assertMatch(25, length(search_range(FRangeK, BRangeK, Tree0, SKFun))) end, lists:foreach(TestRangeLFun, lists:seq(1, 50)). diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index ac1a5ea..43d3f5e 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -40,8 +40,9 @@ simple_put_fetch_head_delete(_Config) -> io:format("simple test with error and no forced logs~n"), simple_test_withlog(error, []), io:format("simple test with error and stats logs~n"), - simple_test_withlog(error, ["B0015", "B0016", "B0017", "B0018", - "P0032", "SST12", "CDB19", "SST13", "I0019"]). + simple_test_withlog( + error, + [b0015, b0016, b0017, b0018, p0032, sst12, cdb19, sst13, i0019]). simple_test_withlog(LogLevel, ForcedLogs) -> @@ -95,10 +96,12 @@ simple_test_withlog(LogLevel, ForcedLogs) -> many_put_fetch_head(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_pencillercachesize, 16000}, - {sync_strategy, riak_sync}, - {compression_point, on_compact}], + StartOpts1 = + [{root_path, RootPath}, + {max_pencillercachesize, 16000}, + {sync_strategy, riak_sync}, + {compression_point, on_compact} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), @@ -530,7 +533,8 @@ load_and_count(_Config) -> % Use artificially small files, and the load keys, counting they're all % present load_and_count(50000000, 2500, 28000), - load_and_count(200000000, 100, 300000). + load_and_count(200000000, 50, 200000), + load_and_count(50000000, 1000, 5000). load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> @@ -539,7 +543,9 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> {max_journalsize, JournalSize}, {cache_size, BookiesMemSize}, {max_pencillercachesize, PencillerMemSize}, - {sync_strategy, testutil:sync_strategy()}], + {sync_strategy, testutil:sync_strategy()}, + {stats_logfrequency, 5}, + {stats_probability, 80}], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), @@ -598,6 +604,9 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> lists:seq(1, 20)), testutil:check_forobject(Bookie1, TestObject), io:format("Loading more small objects~n"), + io:format("Now with unused snapshot so deletions are blocked~n"), + {ok, PclClone, null} = + leveled_bookie:book_snapshot(Bookie1, ledger, undefined, true), lists:foldl(fun(_X, Acc) -> testutil:load_objects(5000, [Acc + 2], @@ -614,6 +623,8 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> 200000, lists:seq(1, 20)), testutil:check_forobject(Bookie1, TestObject), + ok = leveled_penciller:pcl_close(PclClone), + {_S, 300000} = testutil:check_bucket_stats(Bookie1, "Bucket"), ok = leveled_bookie:book_close(Bookie1), {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), {_, 300000} = testutil:check_bucket_stats(Bookie2, "Bucket"), diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index c4881fd..9041983 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -9,7 +9,8 @@ handoff/1, dollar_bucket_index/1, dollar_key_index/1, - bigobject_memorycheck/1 + bigobject_memorycheck/1, + summarisable_sstindex/1 ]). all() -> [ @@ -19,7 +20,8 @@ all() -> [ handoff, dollar_bucket_index, dollar_key_index, - bigobject_memorycheck + bigobject_memorycheck, + summarisable_sstindex ]. -define(MAGIC, 53). % riak_kv -> riak_object @@ -41,7 +43,9 @@ basic_riak_tester(Bucket, KeyCount) -> {max_journalsize, 500000000}, {max_pencillercachesize, 24000}, {sync_strategy, testutil:sync_strategy()}, - {database_id, 32}], + {database_id, 32}, + {stats_logfrequency, 5}, + {stats_probability, 80}], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), IndexGenFun = @@ -243,6 +247,195 @@ basic_riak_tester(Bucket, KeyCount) -> ok = leveled_bookie:book_destroy(Bookie2). +summarisable_sstindex(_Config) -> + RootPathA = testutil:reset_filestructure("SummarisableSSTIndex"), + KeyCount = 200000, + IndexGen = fun() -> [] end, + Bucket = {<<"test_type">>, <<"test_bucket">>}, + KeyGen = fun(I) -> list_to_binary(io_lib:format("~10..0w", [I])) end, + ObjListToSort = + lists:map( + fun(I) -> + {leveled_rand:uniform(KeyCount * 10), + testutil:set_object( + Bucket, KeyGen(I), integer_to_binary(I), IndexGen, [])} + end, + lists:seq(1, KeyCount)), + UnsortedList = + lists:map( + fun({I, {O, S}}) -> {I, O, S} end, + lists:keysort(1, ObjListToSort)), + true = KeyCount == length(UnsortedList), + StartOpts1 = [{root_path, RootPathA}, + {max_journalsize, 500000000}, + {max_pencillercachesize, 8000}, + {sync_strategy, testutil:sync_strategy()}], + {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), + testutil:riakload(Bookie1, UnsortedList), + FoldAccT = {fun(_B, K, Acc) -> [K|Acc] end, []}, + KeyRangeCheckFun = + fun(SK, EK) -> + {async, FoldFun} = + leveled_bookie:book_keylist( + Bookie1, + ?RIAK_TAG, + Bucket, + {SK, EK}, + FoldAccT, + undefined), + QueryList = FoldFun(), + io:format( + "QueryCount ~w against total ~w for range ~p ~p~n", + [length(QueryList), KeyCount, SK, EK]), + QueryList + end, + + true = KeyCount == length(KeyRangeCheckFun(<<"00">>, <<"02">>)), + true = KeyCount == length(KeyRangeCheckFun(<<"000">>, <<"002">>)), + true = KeyCount == length(KeyRangeCheckFun(<<"0000">>, <<"0002">>)), + true = + (KeyCount - 1) == + length(KeyRangeCheckFun(<<"00000">>, <<"00002">>)), + true = + (KeyCount - 1) == + length(KeyRangeCheckFun(<<"00000">>, <<"000020">>)), + true = + (KeyCount - 1) == + length(KeyRangeCheckFun(<<"00000">>, <<"0000200">>)), + true = + (KeyCount div 2) == + length(KeyRangeCheckFun(<<"00001">>, <<"00002">>)), + true = + 1 == length(KeyRangeCheckFun(<<"00002">>, <<"000021">>)), + true = + ((KeyCount div 10) - 1) == + length(KeyRangeCheckFun(<<"000000">>, <<"000002">>)), + true = + (KeyCount div 20) == + length(KeyRangeCheckFun(<<"000001">>, <<"000002">>)), + true = + ((KeyCount div 100) - 1) == + length(KeyRangeCheckFun(<<"0000000">>, <<"0000002">>)), + + lists:foreach( + fun(I) -> + StartKey = KeyGen(I), + EndKey = KeyGen(I + 200 - 1), + true = 200 == length(KeyRangeCheckFun(StartKey, EndKey)) + end, + lists:map( + fun(_I) -> leveled_rand:uniform(KeyCount - 200) end, + lists:seq(1, 100))), + + IdxObjKeyCount = 50000, + TermGen = + fun(I, C) -> + list_to_binary( + lists:flatten( + io_lib:format("~10..0w", [I]) ++ integer_to_list(C))) + end, + SequentialIndexGen = + fun(I) -> + fun() -> + lists:map( + fun(C) -> + {add, <<"indexf_bin">>,TermGen(I, C)} + end, + lists:seq(1, 8)) + end + end, + IdxObjListToSort = + lists:map( + fun(I) -> + {leveled_rand:uniform(KeyCount * 10), + testutil:set_object( + Bucket, + KeyGen(I), + integer_to_binary(I - KeyCount), + SequentialIndexGen(I - KeyCount), + [])} + end, + lists:seq(KeyCount + 1, KeyCount + IdxObjKeyCount)), + UnsortedIdxObjList = + lists:map( + fun({I, {O, S}}) -> {I, O, S} end, + lists:keysort(1, IdxObjListToSort)), + testutil:riakload(Bookie1, UnsortedIdxObjList), + IdxCount = IdxObjKeyCount * 8, + + IdxQueryFun = + fun(StartTerm, EndTerm) -> + {async, FoldFun} = + leveled_bookie:book_indexfold( + Bookie1, {Bucket, <<>>}, FoldAccT, + {<<"indexf_bin">>, StartTerm, EndTerm}, + {true, undefined}), + IdxQueryList = FoldFun(), + io:format( + "IdxQueryCount ~w for range ~p ~p~n", + [length(IdxQueryList), StartTerm, EndTerm]), + IdxQueryList + end, + true = IdxCount == length(IdxQueryFun(<<"00">>, <<"05">>)), + true = IdxCount == length(IdxQueryFun(<<"000">>, <<"005">>)), + true = IdxCount == length(IdxQueryFun(<<"0000">>, <<"0005">>)), + true = IdxCount == length(IdxQueryFun(<<"00000">>, <<"00005">>)), + true = + (IdxCount - 8) == + length(IdxQueryFun(<<"000000">>, <<"000005">>)), + true = + (IdxCount - 8) == + length(IdxQueryFun(<<"000000">>, <<"0000050">>)), + true = + (IdxCount - 8) == + length(IdxQueryFun(<<"000000">>, <<"00000500">>)), + true = 8 == length(IdxQueryFun(<<"000005">>, <<"0000051">>)), + + lists:foreach( + fun(I) -> + StartTerm = TermGen(I, 0), + EndTerm = TermGen(I + 20, 9), + true = 168 == length(IdxQueryFun(StartTerm, EndTerm)) + end, + lists:map( + fun(_I) -> + leveled_rand:uniform(IdxObjKeyCount - 20) + end, + lists:seq(1, 100))), + lists:foreach( + fun(I) -> + StartTerm = TermGen(I, 0), + EndTerm = TermGen(I + 10, 9), + true = 88 == length(IdxQueryFun(StartTerm, EndTerm)) + end, + lists:map( + fun(_I) -> + leveled_rand:uniform(IdxObjKeyCount - 10) + end, + lists:seq(1, 100))), + + io:format("Redo object count checks:~n"), + NewKeyCount = KeyCount + IdxObjKeyCount, + true = NewKeyCount == length(KeyRangeCheckFun(<<"00">>, <<"025">>)), + true = NewKeyCount == length(KeyRangeCheckFun(<<"000">>, <<"0025">>)), + true = NewKeyCount == length(KeyRangeCheckFun(<<"0000">>, <<"00025">>)), + true = + (NewKeyCount - 1) == + length(KeyRangeCheckFun(<<"00000">>, <<"000025">>)), + true = 1 == length(KeyRangeCheckFun(<<"000025">>, <<"0000251">>)), + + lists:foreach( + fun(I) -> + StartKey = KeyGen(I), + EndKey = KeyGen(I + 200 - 1), + true = 200 == length(KeyRangeCheckFun(StartKey, EndKey)) + end, + lists:map( + fun(_I) -> leveled_rand:uniform(KeyCount - 200) end, + lists:seq(1, 100))), + + ok = leveled_bookie:book_destroy(Bookie1). + fetchclocks_modifiedbetween(_Config) -> RootPathA = testutil:reset_filestructure("fetchClockA"), @@ -436,16 +629,15 @@ fetchclocks_modifiedbetween(_Config) -> io:format("Comparing queries for Obj1 TS range ~w ~w~n", [ObjL1StartTS, ObjL1EndTS]), - PlusFilterStart = os:timestamp(), - R3A_PlusFilter = lists:foldl(FoldRangesFun(Bookie1A, - {ObjL1StartTS, ObjL1EndTS}, - 100000, - 100000), - {0, 0}, lists:seq(1, 1)), - PlusFilterTime = timer:now_diff(os:timestamp(), PlusFilterStart)/1000, - io:format("R3A_PlusFilter ~w~n", [R3A_PlusFilter]), - true = {20000, 20000} == R3A_PlusFilter, - + PlusFilterTimes = + lists:map( + fun(_I) -> + time_filtered_query( + FoldRangesFun, Bookie1A, ObjL1StartTS, ObjL1EndTS) + end, + lists:seq(1, 4)), + PlusFilterTime = lists:sum(PlusFilterTimes) div 4, + NoFilterStart = os:timestamp(), {async, R3A_NoFilterRunner} = leveled_bookie:book_headfold(Bookie1A, @@ -458,7 +650,7 @@ fetchclocks_modifiedbetween(_Config) -> true, false), R3A_NoFilter = R3A_NoFilterRunner(), - NoFilterTime = timer:now_diff(os:timestamp(), NoFilterStart)/1000, + NoFilterTime = timer:now_diff(os:timestamp(), NoFilterStart) div 1000, io:format("R3A_NoFilter ~w~n", [R3A_NoFilter]), true = {20000, 20000} == R3A_NoFilter, io:format("Filtered query ~w ms and unfiltered query ~w ms~n", @@ -586,7 +778,18 @@ fetchclocks_modifiedbetween(_Config) -> ok = leveled_bookie:book_destroy(Bookie1A), ok = leveled_bookie:book_destroy(Bookie1BS). - + +time_filtered_query(FoldRangesFun, Bookie, ObjL1StartTS, ObjL1EndTS) -> + PlusFilterStart = os:timestamp(), + R3A_PlusFilter = lists:foldl(FoldRangesFun(Bookie, + {ObjL1StartTS, ObjL1EndTS}, + 100000, + 100000), + {0, 0}, lists:seq(1, 1)), + PlusFilterTime = timer:now_diff(os:timestamp(), PlusFilterStart) div 1000, + io:format("R3A_PlusFilter ~w in ~w~n", [R3A_PlusFilter, PlusFilterTime]), + true = {20000, 20000} == R3A_PlusFilter, + PlusFilterTime. lmdrange_tester(Bookie1BS, SimpleCountFun, ObjL4StartTS, ObjL6StartTS, ObjL6EndTS, TooLate) -> diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index f6773f3..bd197ad 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -64,7 +64,7 @@ compact_and_wait/1]). -define(RETURN_TERMS, {true, undefined}). --define(SLOWOFFER_DELAY, 5). +-define(SLOWOFFER_DELAY, 10). -define(V1_VERS, 1). -define(MAGIC, 53). % riak_kv -> riak_object -define(MD_VTAG, <<"X-Riak-VTag">>).