Improve the quality of score
Move the average towards the current score if not scoring each run. Score from more keys to get a better score (as overheads of scoring are now better sorted by setting score_onein rather than by reducing the sample size).
This commit is contained in:
parent
bcc331da10
commit
00823584ec
2 changed files with 23 additions and 7 deletions
|
@ -97,7 +97,7 @@
|
|||
|
||||
-define(JOURNAL_FILEX, "cdb").
|
||||
-define(PENDING_FILEX, "pnd").
|
||||
-define(SAMPLE_SIZE, 100).
|
||||
-define(SAMPLE_SIZE, 192).
|
||||
-define(BATCH_SIZE, 32).
|
||||
-define(BATCHES_TO_CHECK, 8).
|
||||
-define(CRC_SIZE, 4).
|
||||
|
@ -331,9 +331,11 @@ handle_cast({score_filelist, [Entry|Tail]}, State) ->
|
|||
ScoringState = State#state.scoring_state,
|
||||
CpctPerc =
|
||||
case {leveled_cdb:cdb_getcachedscore(JournalP, os:timestamp()),
|
||||
leveled_rand:uniform(State#state.score_onein) == 1} of
|
||||
{CachedScore, UseNewScore}
|
||||
when CachedScore == undefined; UseNewScore ->
|
||||
leveled_rand:uniform(State#state.score_onein) == 1,
|
||||
State#state.score_onein} of
|
||||
{CachedScore, _UseNewScore, ScoreOneIn}
|
||||
when CachedScore == undefined; ScoreOneIn == 1 ->
|
||||
% If caches are not used, always use the current score
|
||||
check_single_file(JournalP,
|
||||
ScoringState#scoring_state.filter_fun,
|
||||
ScoringState#scoring_state.filter_server,
|
||||
|
@ -341,7 +343,21 @@ handle_cast({score_filelist, [Entry|Tail]}, State) ->
|
|||
?SAMPLE_SIZE,
|
||||
?BATCH_SIZE,
|
||||
State#state.reload_strategy);
|
||||
{CachedScore, false} ->
|
||||
{CachedScore, true, _ScoreOneIn} ->
|
||||
% If caches are used roll the score towards the current score
|
||||
% Expectation is that this will reduce instances of individual
|
||||
% files being compacted when a run is missed due to cached
|
||||
% scores being used in surrounding journals
|
||||
NewScore =
|
||||
check_single_file(JournalP,
|
||||
ScoringState#scoring_state.filter_fun,
|
||||
ScoringState#scoring_state.filter_server,
|
||||
ScoringState#scoring_state.max_sqn,
|
||||
?SAMPLE_SIZE,
|
||||
?BATCH_SIZE,
|
||||
State#state.reload_strategy),
|
||||
(NewScore + CachedScore) / 2;
|
||||
{CachedScore, false, _ScoreOneIn} ->
|
||||
CachedScore
|
||||
end,
|
||||
ok = leveled_cdb:cdb_putcachedscore(JournalP, CpctPerc),
|
||||
|
|
|
@ -850,7 +850,7 @@ code_change(_OldVsn, StateName, State, _Extra) ->
|
|||
%% @doc
|
||||
%% Expand a list of pointers, maybe ending up with a list of keys and values
|
||||
%% with a tail of pointers
|
||||
%% By defauls will not have a segment filter, or a low last_modified_date, but
|
||||
%% By default will not have a segment filter, or a low last_modified_date, but
|
||||
%% they can be used. Range checking a last modified date must still be made on
|
||||
%% the output - at this stage the low last_modified_date has been used to bulk
|
||||
%% skip those slots not containing any information over the low last modified
|
||||
|
@ -1867,7 +1867,7 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache},
|
|||
% List of segments passed so only {K, V} pairs matching those segments
|
||||
% should be returned. This required the {K, V} pair to have been added
|
||||
% with the appropriate hash - if the pair were added with no_lookup as
|
||||
% the hash value this will fial unexpectedly.
|
||||
% the hash value this will fail unexpectedly.
|
||||
BinMapFun =
|
||||
fun(Pointer, Acc) ->
|
||||
{SP, _L, ID, SK, EK} = pointer_mapfun(Pointer),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue