Improve the quality of score
Move the average towards the current score if not scoring each run. Score from more keys to get a better score (as overheads of scoring are now better sorted by setting score_onein rather than by reducing the sample size).
This commit is contained in:
parent
bcc331da10
commit
00823584ec
2 changed files with 23 additions and 7 deletions
|
@ -97,7 +97,7 @@
|
||||||
|
|
||||||
-define(JOURNAL_FILEX, "cdb").
|
-define(JOURNAL_FILEX, "cdb").
|
||||||
-define(PENDING_FILEX, "pnd").
|
-define(PENDING_FILEX, "pnd").
|
||||||
-define(SAMPLE_SIZE, 100).
|
-define(SAMPLE_SIZE, 192).
|
||||||
-define(BATCH_SIZE, 32).
|
-define(BATCH_SIZE, 32).
|
||||||
-define(BATCHES_TO_CHECK, 8).
|
-define(BATCHES_TO_CHECK, 8).
|
||||||
-define(CRC_SIZE, 4).
|
-define(CRC_SIZE, 4).
|
||||||
|
@ -331,9 +331,11 @@ handle_cast({score_filelist, [Entry|Tail]}, State) ->
|
||||||
ScoringState = State#state.scoring_state,
|
ScoringState = State#state.scoring_state,
|
||||||
CpctPerc =
|
CpctPerc =
|
||||||
case {leveled_cdb:cdb_getcachedscore(JournalP, os:timestamp()),
|
case {leveled_cdb:cdb_getcachedscore(JournalP, os:timestamp()),
|
||||||
leveled_rand:uniform(State#state.score_onein) == 1} of
|
leveled_rand:uniform(State#state.score_onein) == 1,
|
||||||
{CachedScore, UseNewScore}
|
State#state.score_onein} of
|
||||||
when CachedScore == undefined; UseNewScore ->
|
{CachedScore, _UseNewScore, ScoreOneIn}
|
||||||
|
when CachedScore == undefined; ScoreOneIn == 1 ->
|
||||||
|
% If caches are not used, always use the current score
|
||||||
check_single_file(JournalP,
|
check_single_file(JournalP,
|
||||||
ScoringState#scoring_state.filter_fun,
|
ScoringState#scoring_state.filter_fun,
|
||||||
ScoringState#scoring_state.filter_server,
|
ScoringState#scoring_state.filter_server,
|
||||||
|
@ -341,7 +343,21 @@ handle_cast({score_filelist, [Entry|Tail]}, State) ->
|
||||||
?SAMPLE_SIZE,
|
?SAMPLE_SIZE,
|
||||||
?BATCH_SIZE,
|
?BATCH_SIZE,
|
||||||
State#state.reload_strategy);
|
State#state.reload_strategy);
|
||||||
{CachedScore, false} ->
|
{CachedScore, true, _ScoreOneIn} ->
|
||||||
|
% If caches are used roll the score towards the current score
|
||||||
|
% Expectation is that this will reduce instances of individual
|
||||||
|
% files being compacted when a run is missed due to cached
|
||||||
|
% scores being used in surrounding journals
|
||||||
|
NewScore =
|
||||||
|
check_single_file(JournalP,
|
||||||
|
ScoringState#scoring_state.filter_fun,
|
||||||
|
ScoringState#scoring_state.filter_server,
|
||||||
|
ScoringState#scoring_state.max_sqn,
|
||||||
|
?SAMPLE_SIZE,
|
||||||
|
?BATCH_SIZE,
|
||||||
|
State#state.reload_strategy),
|
||||||
|
(NewScore + CachedScore) / 2;
|
||||||
|
{CachedScore, false, _ScoreOneIn} ->
|
||||||
CachedScore
|
CachedScore
|
||||||
end,
|
end,
|
||||||
ok = leveled_cdb:cdb_putcachedscore(JournalP, CpctPerc),
|
ok = leveled_cdb:cdb_putcachedscore(JournalP, CpctPerc),
|
||||||
|
|
|
@ -850,7 +850,7 @@ code_change(_OldVsn, StateName, State, _Extra) ->
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Expand a list of pointers, maybe ending up with a list of keys and values
|
%% Expand a list of pointers, maybe ending up with a list of keys and values
|
||||||
%% with a tail of pointers
|
%% with a tail of pointers
|
||||||
%% By defauls will not have a segment filter, or a low last_modified_date, but
|
%% By default will not have a segment filter, or a low last_modified_date, but
|
||||||
%% they can be used. Range checking a last modified date must still be made on
|
%% they can be used. Range checking a last modified date must still be made on
|
||||||
%% the output - at this stage the low last_modified_date has been used to bulk
|
%% the output - at this stage the low last_modified_date has been used to bulk
|
||||||
%% skip those slots not containing any information over the low last modified
|
%% skip those slots not containing any information over the low last modified
|
||||||
|
@ -1867,7 +1867,7 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache},
|
||||||
% List of segments passed so only {K, V} pairs matching those segments
|
% List of segments passed so only {K, V} pairs matching those segments
|
||||||
% should be returned. This required the {K, V} pair to have been added
|
% should be returned. This required the {K, V} pair to have been added
|
||||||
% with the appropriate hash - if the pair were added with no_lookup as
|
% with the appropriate hash - if the pair were added with no_lookup as
|
||||||
% the hash value this will fial unexpectedly.
|
% the hash value this will fail unexpectedly.
|
||||||
BinMapFun =
|
BinMapFun =
|
||||||
fun(Pointer, Acc) ->
|
fun(Pointer, Acc) ->
|
||||||
{SP, _L, ID, SK, EK} = pointer_mapfun(Pointer),
|
{SP, _L, ID, SK, EK} = pointer_mapfun(Pointer),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue