diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index c2a1f8a..f7cec00 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -97,7 +97,7 @@ -define(JOURNAL_FILEX, "cdb"). -define(PENDING_FILEX, "pnd"). --define(SAMPLE_SIZE, 100). +-define(SAMPLE_SIZE, 192). -define(BATCH_SIZE, 32). -define(BATCHES_TO_CHECK, 8). -define(CRC_SIZE, 4). @@ -331,9 +331,11 @@ handle_cast({score_filelist, [Entry|Tail]}, State) -> ScoringState = State#state.scoring_state, CpctPerc = case {leveled_cdb:cdb_getcachedscore(JournalP, os:timestamp()), - leveled_rand:uniform(State#state.score_onein) == 1} of - {CachedScore, UseNewScore} - when CachedScore == undefined; UseNewScore -> + leveled_rand:uniform(State#state.score_onein) == 1, + State#state.score_onein} of + {CachedScore, _UseNewScore, ScoreOneIn} + when CachedScore == undefined; ScoreOneIn == 1 -> + % If caches are not used, always use the current score check_single_file(JournalP, ScoringState#scoring_state.filter_fun, ScoringState#scoring_state.filter_server, @@ -341,7 +343,21 @@ handle_cast({score_filelist, [Entry|Tail]}, State) -> ?SAMPLE_SIZE, ?BATCH_SIZE, State#state.reload_strategy); - {CachedScore, false} -> + {CachedScore, true, _ScoreOneIn} -> + % If caches are used roll the score towards the current score + % Expectation is that this will reduce instances of individual + % files being compacted when a run is missed due to cached + % scores being used in surrounding journals + NewScore = + check_single_file(JournalP, + ScoringState#scoring_state.filter_fun, + ScoringState#scoring_state.filter_server, + ScoringState#scoring_state.max_sqn, + ?SAMPLE_SIZE, + ?BATCH_SIZE, + State#state.reload_strategy), + (NewScore + CachedScore) / 2; + {CachedScore, false, _ScoreOneIn} -> CachedScore end, ok = leveled_cdb:cdb_putcachedscore(JournalP, CpctPerc), diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 09b95f6..2f513ca 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -850,7 +850,7 @@ code_change(_OldVsn, StateName, State, _Extra) -> %% @doc %% Expand a list of pointers, maybe ending up with a list of keys and values %% with a tail of pointers -%% By defauls will not have a segment filter, or a low last_modified_date, but +%% By default will not have a segment filter, or a low last_modified_date, but %% they can be used. Range checking a last modified date must still be made on %% the output - at this stage the low last_modified_date has been used to bulk %% skip those slots not containing any information over the low last modified @@ -1867,7 +1867,7 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache}, % List of segments passed so only {K, V} pairs matching those segments % should be returned. This required the {K, V} pair to have been added % with the appropriate hash - if the pair were added with no_lookup as - % the hash value this will fial unexpectedly. + % the hash value this will fail unexpectedly. BinMapFun = fun(Pointer, Acc) -> {SP, _L, ID, SK, EK} = pointer_mapfun(Pointer),