Merge pull request #320 from martinsumner/mas-i319-cachescores

Allow for caching of compaction scores
2020-12-07 00:00:40 +00:00 · 2020-12-07 00:00:40 +00:00 · 3b305e0adb
commit 3b305e0adb
parent e3bcd7eaec 7bf67563ef
11 changed files with 642 additions and 173 deletions
--- a/docs/STARTUP_OPTIONS.md
+++ b/docs/STARTUP_OPTIONS.md
@ -106,8 +106,9 @@ The `compaction_runs_perday` indicates for the leveled store how many times eahc

 The `compaction_low_hour` and `compaction_high_hour` are the hours of the day which support the compaction window - set to 0 and 23 respectively if compaction is required to be a continuous process.

-The `max_run_length` controls how many files can be compacted in a single compaction run.  The scoring of files and runs is controlled through `maxrunlength_compactionpercentage` and `singlefile_compactionpercentage`.
+The `max_run_length` controls how many files can be compacted in a single compaction run.  The scoring of files and runs is controlled through `maxrunlength_compactionpercentage` and `singlefile_compactionpercentage`.  The `singlefile_compactionpercentage` is an acceptable compaction score for a file to be eligible for compaction on its own, where as the `maxrunlength_compactionpercentage` is the score required for a run of the `max_run_length` to be considered eligible.  The higher the `maxrunlength_compactionpercentage` and the lower the `singlefile_compactionpercentage` - the more likely a longer run will be chosen over a shorter run.

+The `journalcompaction_scoreonein` option controls how frequently a file will be scored.  If this is set to one, then each and every file will be scored each and every compaction run.  If this is set to an integer greater than one ('n'), then on average any given file will only be score on one in 'n' runs.  On other runs. a cached score for the file will be used.  On startup all files will be scored on the first run.  As journals get very large, and where frequent comapction is required due to mutating objects, this can save significant resource.  In Riak, this option is controlled via `leveled.compaction_scores_perday`, with the number of `leveled.compaction_runs_perday` being divided by this to produce the `journalcompaction_scoreonein`.  By default each file will only be scored once per day.

 ## Snapshot Timeouts

--- a/include/leveled.hrl
+++ b/include/leveled.hrl
@ -69,6 +69,7 @@
                        max_run_length,
                        singlefile_compactionperc :: float()|undefined,
                        maxrunlength_compactionperc :: float()|undefined,
+                        score_onein = 1 :: pos_integer(),
                        snaptimeout_long :: pos_integer() | undefined}).

 -record(penciller_options,
@ -94,4 +95,5 @@
                         compression_method = native :: lz4|native,
                         singlefile_compactionperc :: float()|undefined,
                         maxrunlength_compactionperc :: float()|undefined,
+                         score_onein = 1 :: pos_integer(),
                         reload_strategy = [] :: list()}).
--- a/priv/leveled.schema
+++ b/priv/leveled.schema
@ -100,6 +100,12 @@
  {datatype, integer}
 ]}.

+%% @doc The number of times per day to score an individual file for compaction
+{mapping, "leveled.compaction_scores_perday", "leveled.compaction_scores_perday", [
+  {default, 1},
+  {datatype, integer}
+]}.
+
 %% @doc Compaction Low Hour
 %% The hour of the day in which journal compaction can start.  Use Low hour 
 %% of 0 and High hour of 23 to have no compaction window (i.e. always compact 
@ -140,10 +146,10 @@
 %% @doc Target Percentage for Single File
 %% What is the target score for a run of a single file, to qualify for 
 %% compaction.  If less than this percentage would be retained after compaction
-%% then it is a candidate (e.g. in default case if 50% of space would be
+%% then it is a candidate (e.g. in default case if 70% of space would be
 %% recovered)
 {mapping, "leveled.singlefile_compactionpercentage", "leveled.singlefile_compactionpercentage", [
-  {default, 50.0},
+  {default, 30.0},
  {datatype, float},
  hidden
 ]}.
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@ -140,8 +140,9 @@
                {head_only, false},
                {waste_retention_period, undefined},
                {max_run_length, undefined},
-                {singlefile_compactionpercentage, 50.0},
+                {singlefile_compactionpercentage, 30.0},
                {maxrunlength_compactionpercentage, 70.0},
+                {journalcompaction_scoreonein, 1},
                {reload_strategy, []},
                {max_pencillercachesize, ?MAX_PCL_CACHE_SIZE},
                {ledger_preloadpagecache_level, ?SST_PAGECACHELEVEL_LOOKUP},
@ -292,6 +293,11 @@
            % a run of max_run_length, before that run can be a compaction 
            % candidate.  For runs between 1 and max_run_length, a 
            % proportionate score is calculated
+        {journalcompaction_scoreonein, pos_integer()} |
+            % When scoring for compaction run a probability (1 in x) of whether
+            % any file will be scored this run.  If not scored a cached score
+            % will be used, and the cached score is the average of the latest
+            % score and the rolling average of previous scores
        {reload_strategy, list()} |
            % The reload_strategy is exposed as an option as currently no firm
            % decision has been made about how recovery from failure should
@ -1757,6 +1763,8 @@ set_options(Opts) ->
    
    MaxSSTSlots = proplists:get_value(max_sstslots, Opts),

+    ScoreOneIn = proplists:get_value(journalcompaction_scoreonein, Opts),
+
    {#inker_options{root_path = JournalFP,
                        reload_strategy = ReloadStrategy,
                        max_run_length = proplists:get_value(max_run_length, Opts),
@ -1766,6 +1774,7 @@ set_options(Opts) ->
                        snaptimeout_long = SnapTimeoutLong,
                        compression_method = CompressionMethod,
                        compress_on_receipt = CompressOnReceipt,
+                        score_onein = ScoreOneIn,
                        cdb_options = 
                            #cdb_options{max_size=MaxJournalSize,
                                        max_count=MaxJournalCount,
--- a/src/leveled_cdb.erl
+++ b/src/leveled_cdb.erl
@ -113,7 +113,9 @@
            cdb_deletepending/1,
            cdb_deletepending/3,
            cdb_isrolling/1,
-            cdb_clerkcomplete/1]).
+            cdb_clerkcomplete/1,
+            cdb_getcachedscore/2,
+            cdb_putcachedscore/2]).

 -export([finished_rolling/1,
            hashtable_calc/2]).
@ -133,6 +135,8 @@
 -define(GETPOS_FACTOR, 8).
 -define(MAX_OBJECT_SIZE, 1000000000). 
    % 1GB but really should be much smaller than this
+-define(MEGA, 1000000).
+-define(CACHE_LIFE, 86400).

 -record(state, {hashtree,
                last_position :: integer() | undefined,
@ -152,7 +156,8 @@
                timings = no_timing :: cdb_timings(),
                timings_countdown = 0 :: integer(),
                log_options = leveled_log:get_opts()
-                    :: leveled_log:log_options()}).
+                    :: leveled_log:log_options(),
+                cached_score :: {float(), erlang:timestamp()}|undefined}).

 -record(cdb_timings, {sample_count = 0 :: integer(),
                        sample_cyclecount = 0 :: integer(),
@ -164,6 +169,9 @@
 -type cdb_timings() :: no_timing|#cdb_timings{}.
 -type hashtable_index() :: tuple().
 -type file_location() :: integer()|eof.
+-type filter_fun() ::
+        fun((any(), binary(), integer(), any(), fun((binary()) -> any())) ->
+            {stop|loop, any()}).



@ -369,7 +377,7 @@ cdb_deletepending(Pid) ->
 cdb_deletepending(Pid, ManSQN, Inker) ->
    gen_fsm:send_event(Pid, {delete_pending, ManSQN, Inker}).

-spec cdb_scan(pid(), fun(), any(), integer()|undefined) ->
+-spec cdb_scan(pid(), filter_fun(), any(), integer()|undefined) ->
                                                    {integer()|eof, any()}.
 %% @doc
 %% cdb_scan returns {LastPosition, Acc}.  Use LastPosition as StartPosiiton to
@ -424,6 +432,20 @@ cdb_isrolling(Pid) ->
 cdb_clerkcomplete(Pid) ->
    gen_fsm:send_all_state_event(Pid, clerk_complete).

+-spec cdb_getcachedscore(pid(), erlang:timestamp()) -> undefined|float().
+%% @doc
+%% Return the cached score for a CDB file
+cdb_getcachedscore(Pid, Now) ->
+    gen_fsm:sync_send_all_state_event(Pid, {get_cachedscore, Now}, infinity).
+
+
+-spec cdb_putcachedscore(pid(), float()) -> ok.
+%% @doc
+%% Return the cached score for a CDB file
+cdb_putcachedscore(Pid, Score) ->
+    gen_fsm:sync_send_all_state_event(Pid, {put_cachedscore, Score}, infinity).
+
+

 %%%============================================================================
 %%% gen_server callbacks
@ -829,6 +851,24 @@ handle_sync_event(cdb_filename, _From, StateName, State) ->
    {reply, State#state.filename, StateName, State};
 handle_sync_event(cdb_isrolling, _From, StateName, State) ->
    {reply, StateName == rolling, StateName, State};
+handle_sync_event({get_cachedscore, {NowMega, NowSecs, _}},
+                                                    _From, StateName, State) ->
+    ScoreToReturn =
+        case State#state.cached_score of
+            undefined ->
+                undefined;
+            {Score, {CacheMega, CacheSecs, _}} ->
+                case (NowMega * ?MEGA + NowSecs) >
+                        (CacheMega * ?MEGA + CacheSecs + ?CACHE_LIFE) of
+                    true ->
+                        undefined;
+                    false ->
+                        Score
+                end
+        end,
+    {reply, ScoreToReturn, StateName, State};
+handle_sync_event({put_cachedscore, Score}, _From, StateName, State) ->
+    {reply, ok, StateName, State#state{cached_score = {Score,os:timestamp()}}};
 handle_sync_event(cdb_close, _From, delete_pending, State) ->
    leveled_log:log("CDB05", 
                        [State#state.filename, delete_pending, cdb_close]),
@ -836,8 +876,7 @@ handle_sync_event(cdb_close, _From, delete_pending, State) ->
                        State#state.filename, 
                        State#state.waste_path),
    {stop, normal, ok, State};
-handle_sync_event(cdb_close, _From, StateName, State) ->
-    leveled_log:log("CDB05", [State#state.filename, StateName, cdb_close]),
+handle_sync_event(cdb_close, _From, _StateName, State) ->
    file:close(State#state.handle),
    {stop, normal, ok, State}.

@ -2396,6 +2435,16 @@ get_keys_byposition_manykeys_test_to() ->
    SampleList3 = cdb_getpositions(P2, KeyCount + 1),
    ?assertMatch(KeyCount, length(SampleList3)),
    
+    ?assertMatch(undefined, cdb_getcachedscore(P2, os:timestamp())),
+    ok = cdb_putcachedscore(P2, 80.0),
+    ?assertMatch(80.0, cdb_getcachedscore(P2, os:timestamp())),
+    timer:sleep(1000),
+    {NowMega, NowSecs, _} = Now = os:timestamp(),
+    ?assertMatch(80.0, cdb_getcachedscore(P2, Now)),
+    FutureEpoch = NowMega * ?MEGA + NowSecs + ?CACHE_LIFE,
+    Future = {FutureEpoch div ?MEGA, FutureEpoch rem ?MEGA, 0},
+    ?assertMatch(undefined, cdb_getcachedscore(P2, Future)),
+
    ok = cdb_close(P2),
    ok = file:delete(F2).

--- a/src/leveled_iclerk.erl
+++ b/src/leveled_iclerk.erl
@ -97,7 +97,7 @@

 -define(JOURNAL_FILEX, "cdb").
 -define(PENDING_FILEX, "pnd").
-define(SAMPLE_SIZE, 100).
+-define(SAMPLE_SIZE, 192).
 -define(BATCH_SIZE, 32).
 -define(BATCHES_TO_CHECK, 8).
 -define(CRC_SIZE, 4).
@ -117,17 +117,18 @@
                maxrunlength_compactionperc = ?MAXRUNLENGTH_COMPACTION_TARGET ::float(),
                compression_method = native :: lz4|native,
                scored_files = [] :: list(candidate()),
-                scoring_state :: scoring_state()|undefined}).
+                scoring_state :: scoring_state()|undefined,
+                score_onein = 1 :: pos_integer()}).

 -record(candidate, {low_sqn :: integer() | undefined,
                    filename :: string() | undefined,
                    journal :: pid() | undefined,
                    compaction_perc :: float() | undefined}).

-record(scoring_state, {filter_fun :: fun(),
-                        filter_server :: pid(),
+-record(scoring_state, {filter_fun :: leveled_inker:filterfun(),
+                        filter_server :: leveled_inker:filterserver(),
                        max_sqn :: non_neg_integer(),
-                        close_fun :: fun(),
+                        close_fun :: leveled_inker:filterclosefun(),
                        start_time :: erlang:timestamp()}).

 -type iclerk_options() :: #iclerk_options{}.
@ -165,8 +166,11 @@
 clerk_new(InkerClerkOpts) ->
    gen_server:start_link(?MODULE, [leveled_log:get_opts(), InkerClerkOpts], []).

-spec clerk_compact(pid(), pid(), 
-                    fun(), fun(), fun(),  
+-spec clerk_compact(pid(),
+                    pid(), 
+                    leveled_inker:filterinitfun(),
+                    leveled_inker:filterclosefun(),
+                    leveled_inker:filterfun(),  
                    list()) -> ok.
 %% @doc
 %% Trigger a compaction for this clerk if the threshold of data recovery has 
@ -280,7 +284,10 @@ init([LogOpts, IClerkOpts]) ->
                        singlefile_compactionperc = SFL_CompPerc,
                        maxrunlength_compactionperc = MRL_CompPerc,
                        compression_method = 
-                            IClerkOpts#iclerk_options.compression_method}}.
+                            IClerkOpts#iclerk_options.compression_method,
+                        score_onein = 
+                            IClerkOpts#iclerk_options.score_onein
+                        }}.

 handle_call(stop, _From, State) ->
    case State#state.scoring_state of
@ -325,13 +332,38 @@ handle_cast({score_filelist, [Entry|Tail]}, State) ->
    Candidates = State#state.scored_files,
    {LowSQN, FN, JournalP, _LK} = Entry,
    ScoringState = State#state.scoring_state,
-    CpctPerc = check_single_file(JournalP,
+    CpctPerc =
+        case {leveled_cdb:cdb_getcachedscore(JournalP, os:timestamp()),
+                leveled_rand:uniform(State#state.score_onein) == 1,
+                State#state.score_onein} of
+            {CachedScore, _UseNewScore, ScoreOneIn} 
+                    when CachedScore == undefined; ScoreOneIn == 1 ->
+                % If caches are not used, always use the current score
+                check_single_file(JournalP,
+                                    ScoringState#scoring_state.filter_fun,
+                                    ScoringState#scoring_state.filter_server,
+                                    ScoringState#scoring_state.max_sqn,
+                                    ?SAMPLE_SIZE,
+                                    ?BATCH_SIZE,
+                                    State#state.reload_strategy);
+            {CachedScore, true, _ScoreOneIn} ->
+                % If caches are used roll the score towards the current score
+                % Expectation is that this will reduce instances of individual
+                % files being compacted when a run is missed due to cached
+                % scores being used in surrounding journals
+                NewScore = 
+                    check_single_file(JournalP,
                                    ScoringState#scoring_state.filter_fun,
                                    ScoringState#scoring_state.filter_server,
                                    ScoringState#scoring_state.max_sqn,
                                    ?SAMPLE_SIZE,
                                    ?BATCH_SIZE,
                                    State#state.reload_strategy),
+                (NewScore + CachedScore) / 2;
+            {CachedScore, false, _ScoreOneIn} ->
+                CachedScore
+        end,
+    ok = leveled_cdb:cdb_putcachedscore(JournalP, CpctPerc),
    Candidate =
        #candidate{low_sqn = LowSQN,
                    filename = FN,
@ -509,7 +541,10 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) ->
 %%% Internal functions
 %%%============================================================================

-spec check_single_file(pid(), fun(), any(), non_neg_integer(),
+-spec check_single_file(pid(),
+                        leveled_inker:filterfun(),
+                        leveled_inker:filterserver(),
+                        leveled_codec:sqn(),
                        non_neg_integer(), non_neg_integer(),
                        leveled_codec:compaction_strategy()) ->
                            float().
@ -549,44 +584,31 @@ safely_log_filescore(PositionList, FN, Score, SW) ->
    leveled_log:log_timer("IC004", [Score, AvgJump, FN], SW).

 -spec size_comparison_score(list(key_size() | corrupted_test_key_size()),
-                                    fun(),
-                                    any(),
-                                    non_neg_integer(),
+                                    leveled_inker:filterfun(),
+                                    leveled_inker:filterserver(),
+                                    leveled_codec:sqn(),
                                    leveled_codec:compaction_strategy()) ->
                                        float().
 size_comparison_score(KeySizeList,
                        FilterFun, FilterServer, MaxSQN,
-                        RS) ->
+                        ReloadStrategy) ->
    FoldFunForSizeCompare =
        fun(KS, {ActSize, RplSize}) ->
            case KS of
                {{SQN, Type, PK}, Size} ->
-                    IsJournalEntry =
-                        leveled_codec:is_full_journalentry({SQN, Type, PK}),
-                    case IsJournalEntry of
-                        false ->
-                            TS = leveled_codec:get_tagstrategy(PK, RS),
-                            % If the strategy is to retain key deltas, then
-                            % scoring must reflect that.  Key deltas are
-                            % possible even if strategy does not allow as
-                            % there is support for changing strategy from
-                            % retain to recalc
-                            case TS of
-                                retain ->
-                                    {ActSize + Size - ?CRC_SIZE, RplSize};
-                                _ ->
-                                    {ActSize, RplSize + Size - ?CRC_SIZE}
-                            end;
+                    ToRetain =
+                        to_retain({SQN, Type, PK},
+                                    FilterFun,
+                                    FilterServer,
+                                    MaxSQN,
+                                    ReloadStrategy),
+                    case ToRetain of
                        true ->
-                            Check = FilterFun(FilterServer, PK, SQN),
-                            case {Check, SQN > MaxSQN} of
-                                {current, _} ->
                            {ActSize + Size - ?CRC_SIZE, RplSize};
-                                {_, true} ->
-                                    {ActSize + Size - ?CRC_SIZE, RplSize};
-                                _ ->
+                        convert ->
+                            {ActSize, RplSize + Size - ?CRC_SIZE};
+                        false ->
                            {ActSize, RplSize + Size - ?CRC_SIZE}
-                            end
                    end;
                _ ->
                    % There is a key which is not in expected format
@ -810,28 +832,49 @@ split_positions_into_batches(Positions, Journal, Batches) ->
 %% if it contains index entries.  The hot_backup approach is also not safe with
 %% a `recovr` strategy.  The recovr strategy assumes faults in the ledger will
 %% be resolved via application-level anti-entropy
-filter_output(KVCs, FilterFun, FilterServer, MaxSQN, ReloadStrategy) ->
+filter_output(KVCs, FilterFun, FilterServer, MaxSQN, Strategy) ->
    FoldFun =
-        filter_output_fun(FilterFun, FilterServer, MaxSQN, ReloadStrategy),
+        filter_output_fun(FilterFun, FilterServer, MaxSQN, Strategy),
    lists:reverse(lists:foldl(FoldFun, [], KVCs)).


-filter_output_fun(FilterFun, FilterServer, MaxSQN, ReloadStrategy) ->
+filter_output_fun(FilterFun, FilterServer, MaxSQN, Strategy) ->
    fun(KVC0, Acc) ->
        case KVC0 of
            {_InkKey, crc_wonky, false} ->
                % Bad entry, disregard, don't check
                Acc;
            {JK, JV, _Check} ->
+                ToRetain =
+                    to_retain(JK, FilterFun, FilterServer, MaxSQN, Strategy),
+                case ToRetain of
+                    true ->
+                        [KVC0|Acc];
+                    convert ->
+                        {JK0, JV0} =
+                            leveled_codec:revert_to_keydeltas(JK, JV),
+                        [{JK0, JV0, null}|Acc];
+                    false ->
+                        Acc
+                end
+        end
+    end.
+
+-spec to_retain(leveled_codec:journal_key(),
+                leveled_inker:filterfun(),
+                leveled_inker:fillter_server(),
+                leveled_codec:sqn(),
+                leveled_codec:compaction_strategy()) -> boolean()|convert.
+to_retain(JournalKey, FilterFun, FilterServer, MaxSQN, ReloadStrategy) ->
    {SQN, LK} =
-                    leveled_codec:from_journalkey(JK),
+        leveled_codec:from_journalkey(JournalKey),
    CompactStrategy =
        leveled_codec:get_tagstrategy(LK, ReloadStrategy),
    IsJournalEntry =
-                    leveled_codec:is_full_journalentry(JK),
+        leveled_codec:is_full_journalentry(JournalKey),
    case {CompactStrategy, IsJournalEntry} of
        {retain, false} ->
-                        [KVC0|Acc];
+            true;
        _ ->
            KeyCurrent = FilterFun(FilterServer, LK, SQN),
            IsInMemory = SQN > MaxSQN,
@ -839,24 +882,21 @@ filter_output_fun(FilterFun, FilterServer, MaxSQN, ReloadStrategy) ->
                {KC, InMem, _} when KC == current; InMem ->
                    % This entry may still be required
                    % regardless of strategy
-                                [KVC0|Acc];
+                    true;
                {_, _, retain} ->
                    % If we have a retain strategy, it can't be
                    % discarded - but the value part is no
                    % longer required as this version has been
                    % replaced
-                                {JK0, JV0} =
-                                    leveled_codec:revert_to_keydeltas(JK, JV),
-                                [{JK0, JV0, null}|Acc];
+                    convert;
                {_, _, _} ->
                    % This is out of date and not retained so
                    % discard
-                                Acc
-                        end
-                end
+                    false
            end
    end.

+
 write_values([], _CDBopts, Journal0, ManSlice0, _PressMethod) ->
    {Journal0, ManSlice0};
 write_values(KVCList, CDBopts, Journal0, ManSlice0, PressMethod) ->
--- a/src/leveled_inker.erl
+++ b/src/leveled_inker.erl
@ -157,6 +157,14 @@
 -type inker_options() :: #inker_options{}.
 -type ink_state() :: #state{}.
 -type registered_snapshot() :: {pid(), os:timestamp(), integer()}.
+-type filterserver() :: pid()|list(tuple()).
+-type filterfun() ::
+    fun((filterserver(), leveled_codec:ledger_key(), leveled_codec:sqn()) ->
+            current|replaced|missing).
+-type filterclosefun() :: fun((filterserver()) -> ok).
+-type filterinitfun() :: fun((pid()) -> {filterserver(), leveled_codec:sqn()}).
+
+-export_type([filterserver/0, filterfun/0, filterclosefun/0, filterinitfun/0]).

 %%%============================================================================
 %%% API
@ -806,6 +814,7 @@ start_from_file(InkOpts) ->
    PressMethod = InkOpts#inker_options.compression_method,
    PressOnReceipt = InkOpts#inker_options.compress_on_receipt,
    SnapTimeout = InkOpts#inker_options.snaptimeout_long,
+    ScoreOneIn = InkOpts#inker_options.score_onein,

    IClerkOpts = 
        #iclerk_options{inker = self(),
@ -815,7 +824,8 @@ start_from_file(InkOpts) ->
                            compression_method = PressMethod,
                            max_run_length = MRL,
                            singlefile_compactionperc = SFL_CompactPerc,
-                            maxrunlength_compactionperc = MRL_CompactPerc},
+                            maxrunlength_compactionperc = MRL_CompactPerc,
+                            score_onein = ScoreOneIn},
    
    {ok, Clerk} = leveled_iclerk:clerk_new(IClerkOpts),
    
--- a/src/leveled_sst.erl
+++ b/src/leveled_sst.erl
@ -181,6 +181,8 @@
        :: {binary(), binary(), list(integer()), leveled_codec:ledger_key()}.
 -type sst_summary()
        :: #summary{}.
+-type blockindex_cache()
+        :: any().  % An array but OTP 16 types

 %% yield_blockquery is used to determine if the work necessary to process a
 %% range query beyond the fetching the slot should be managed from within
@ -196,7 +198,7 @@
                    root_path,
                    filename,
                    yield_blockquery = false :: boolean(),
-                    blockindex_cache,
+                    blockindex_cache :: blockindex_cache()|undefined,
                    compression_method = native :: press_method(),
                    index_moddate = ?INDEX_MODDATE :: boolean(),
                    timings = no_timing :: sst_timings(),
@ -207,7 +209,8 @@
                    deferred_startup_tuple :: tuple()|undefined,
                    level :: non_neg_integer()|undefined,
                    tomb_count = not_counted
-                            :: non_neg_integer()|not_counted}).
+                            :: non_neg_integer()|not_counted,
+                    high_modified_date :: non_neg_integer()|undefined}).

 -record(sst_timings, 
                {sample_count = 0 :: integer(),
@ -526,8 +529,14 @@ starting({sst_new,
    SW = os:timestamp(),
    leveled_log:save(OptsSST#sst_options.log_options),
    PressMethod = OptsSST#sst_options.press_method,
-    {Length, SlotIndex, BlockIndex, SlotsBin, Bloom} = 
+    {Length, SlotIndex, BlockEntries, SlotsBin, Bloom} = 
        build_all_slots(SlotList),
+    {BlockIndex, HighModDate} =
+        update_blockindex_cache(true,
+                                BlockEntries,
+                                new_blockindex_cache(Length),
+                                undefined,
+                                IdxModDate),
    SummaryBin = 
        build_table_summary(SlotIndex, Level, FirstKey, Length,
                            MaxSQN, Bloom, CountOfTombs),
@ -550,6 +559,7 @@ starting({sst_new,
        {ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom},
        reader,
        UpdState#state{blockindex_cache = BlockIndex,
+                        high_modified_date = HighModDate,
                        starting_pid = StartingPID,
                        level = Level}};
 starting({sst_newlevelzero, RootPath, Filename,
@ -583,8 +593,14 @@ starting(complete_l0startup, State) ->
    Time1 = timer:now_diff(os:timestamp(), SW1),

    SW2 = os:timestamp(),
-    {SlotCount, SlotIndex, BlockIndex, SlotsBin,Bloom} =
+    {SlotCount, SlotIndex, BlockEntries, SlotsBin,Bloom} =
        build_all_slots(SlotList),
+    {BlockIndex, HighModDate} =
+        update_blockindex_cache(true,
+                                BlockEntries,
+                                new_blockindex_cache(SlotCount),
+                                undefined,
+                                IdxModDate),
    Time2 = timer:now_diff(os:timestamp(), SW2),
    
    SW3 = os:timestamp(),
@ -616,19 +632,19 @@ starting(complete_l0startup, State) ->

    case Penciller of
        undefined ->
-            {next_state, 
-                reader, 
-                UpdState#state{blockindex_cache = BlockIndex}};
+            ok;
        _ ->
            leveled_penciller:pcl_confirml0complete(Penciller,
                                                    UpdState#state.filename,
                                                    Summary#summary.first_key,
                                                    Summary#summary.last_key,
                                                    Bloom),
+            ok
+    end,
    {next_state,
        reader,
-                UpdState#state{blockindex_cache = BlockIndex}}
-    end;
+        UpdState#state{blockindex_cache = BlockIndex,
+                        high_modified_date = HighModDate}};
 starting({sst_returnslot, FetchedSlot, FetchFun, SlotCount}, State) ->
    Self = self(),
    FetchedSlots = 
@ -673,13 +689,19 @@ reader({get_kv, LedgerKey, Hash}, _From, State) ->
                                            timings_countdown = CountDown}};
 reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod},
                                                            _From, State) ->
-    {SlotsToFetchBinList, SlotsToPoint} = fetch_range(StartKey,
-                                                        EndKey,
-                                                        ScanWidth,
-                                                        SegList,
+    ReadNeeded =
+        check_modified(State#state.high_modified_date,
                        LowLastMod,
-                                                        State),
-    
+                        State#state.index_moddate),
+    {NeedBlockIdx, SlotsToFetchBinList, SlotsToPoint} =
+        case ReadNeeded of
+            true -> 
+                fetch_range(StartKey, EndKey, ScanWidth,
+                            SegList, LowLastMod,
+                            State);
+            false ->
+                {false, [], []}
+        end,
    PressMethod = State#state.compression_method,
    IdxModDate = State#state.index_moddate,
    
@ -694,34 +716,38 @@ reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod},
                reader,
                State};
        false ->
-            {L, BIC} = 
+            {L, FoundBIC} = 
                binaryslot_reader(SlotsToFetchBinList, 
-                                    PressMethod, IdxModDate, SegList),
-            FoldFun = 
-                fun(CacheEntry, Cache) ->
-                    case CacheEntry of
-                        {_ID, none} ->
-                            Cache;
-                        {ID, Header} ->
-                            array:set(ID - 1, binary:copy(Header), Cache)
-                    end
-                end,
-            BlockIdxC0 = lists:foldl(FoldFun, State#state.blockindex_cache, BIC),
+                                    PressMethod,
+                                    IdxModDate,
+                                    SegList),
+            {BlockIdxC0, HighModDate} =
+                update_blockindex_cache(NeedBlockIdx,
+                                        FoundBIC,
+                                        State#state.blockindex_cache,
+                                        State#state.high_modified_date,
+                                        State#state.index_moddate),
            {reply, 
                L ++ SlotsToPoint, 
                reader, 
-                State#state{blockindex_cache = BlockIdxC0}}
+                State#state{blockindex_cache = BlockIdxC0,
+                            high_modified_date = HighModDate}}
    end;
 reader({get_slots, SlotList, SegList, LowLastMod}, _From, State) ->
    PressMethod = State#state.compression_method,
    IdxModDate = State#state.index_moddate,
-    SlotBins = 
+    {NeedBlockIdx, SlotBins} = 
        read_slots(State#state.handle, 
                        SlotList, 
-                    {SegList, LowLastMod, State#state.blockindex_cache},
+                        {SegList,
+                            LowLastMod,
+                            State#state.blockindex_cache},
                        State#state.compression_method,
                        State#state.index_moddate),
-    {reply, {SlotBins, PressMethod, IdxModDate}, reader, State};
+    {reply,
+        {NeedBlockIdx, SlotBins, PressMethod, IdxModDate},
+        reader,
+        State};
 reader(get_maxsequencenumber, _From, State) ->
    Summary = State#state.summary,
    {reply, Summary#summary.max_sqn, reader, State};
@ -759,12 +785,8 @@ delete_pending({get_kv, LedgerKey, Hash}, _From, State) ->
    {reply, Result, delete_pending, UpdState, ?DELETE_TIMEOUT};
 delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod},
                                                            _From, State) ->
-    {SlotsToFetchBinList, SlotsToPoint} = fetch_range(StartKey,
-                                                        EndKey,
-                                                        ScanWidth,
-                                                        SegList,
-                                                        LowLastMod,
-                                                        State),
+    {_NeedBlockIdx, SlotsToFetchBinList, SlotsToPoint} =
+        fetch_range(StartKey, EndKey, ScanWidth, SegList, LowLastMod, State),
    % Always yield as about to clear and de-reference
    PressMethod = State#state.compression_method,
    IdxModDate = State#state.index_moddate,
@ -776,14 +798,14 @@ delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod},
 delete_pending({get_slots, SlotList, SegList, LowLastMod}, _From, State) ->
    PressMethod = State#state.compression_method,
    IdxModDate = State#state.index_moddate,
-    SlotBins = 
+    {_NeedBlockIdx, SlotBins} = 
        read_slots(State#state.handle, 
                    SlotList, 
                    {SegList, LowLastMod, State#state.blockindex_cache},
                    PressMethod,
                    IdxModDate),
    {reply, 
-        {SlotBins, PressMethod, IdxModDate}, 
+        {false, SlotBins, PressMethod, IdxModDate}, 
        delete_pending, 
        State, 
        ?DELETE_TIMEOUT};
@ -815,8 +837,17 @@ delete_pending(close, State) ->
 handle_sync_event(_Msg, _From, StateName, State) ->
    {reply, undefined, StateName, State}.

-handle_event(_Msg, StateName, State) ->
-    {next_state, StateName, State}.
+handle_event({update_blockindex_cache, BIC}, StateName, State) ->
+    {BlockIndexCache, HighModDate} =
+        update_blockindex_cache(true,
+                                BIC,
+                                State#state.blockindex_cache,
+                                State#state.high_modified_date,
+                                State#state.index_moddate),
+    {next_state,
+        StateName,
+        State#state{blockindex_cache = BlockIndexCache,
+                    high_modified_date = HighModDate}}.

 handle_info(tidyup_after_startup, delete_pending, State) ->
    % No need to GC, this file is to be shutdown.  This message may have
@ -850,7 +881,7 @@ code_change(_OldVsn, StateName, State, _Extra) ->
 %% @doc
 %% Expand a list of pointers, maybe ending up with a list of keys and values
 %% with a tail of pointers
-%% By defauls will not have a segment filter, or a low last_modified_date, but
+%% By default will not have a segment filter, or a low last_modified_date, but
 %% they can be used. Range checking a last modified date must still be made on
 %% the output - at this stage the low last_modified_date has been used to bulk
 %% skip those slots not containing any information over the low last modified
@ -983,11 +1014,17 @@ sst_getslots(Pid, SlotList) ->
 %% of the object, if the object is to be covered by the query
 sst_getfilteredslots(Pid, SlotList, SegList, LowLastMod) ->
    SegL0 = tune_seglist(SegList),
-    {SlotBins, PressMethod, IdxModDate} = 
+    {NeedBlockIdx, SlotBins, PressMethod, IdxModDate} = 
        gen_fsm:sync_send_event(Pid, 
                                {get_slots, SlotList, SegL0, LowLastMod},
                                infinity),
-    {L, _BIC} = binaryslot_reader(SlotBins, PressMethod, IdxModDate, SegL0),
+    {L, BIC} = binaryslot_reader(SlotBins, PressMethod, IdxModDate, SegL0),
+    case NeedBlockIdx of
+        true ->
+            gen_fsm:send_all_state_event(Pid, {update_blockindex_cache, BIC});
+        false ->
+            ok
+    end,
    L.


@ -1065,6 +1102,62 @@ tune_seglist(SegList) ->
 %%% Internal Functions
 %%%============================================================================

+-spec new_blockindex_cache(pos_integer()) -> blockindex_cache().
+new_blockindex_cache(Size) ->
+    array:new([{size, Size}, {default, none}]).
+
+-spec update_blockindex_cache(boolean(),
+                                list({integer(), binary()}),
+                                blockindex_cache(),
+                                non_neg_integer()|undefined,
+                                boolean()) ->
+                                    {blockindex_cache(),
+                                        non_neg_integer()|undefined}.
+update_blockindex_cache(Needed, Entries, BIC, HighModDate, IdxModDate)
+                                            when Needed,
+                                                    HighModDate == undefined ->
+    FoldFun = 
+        fun(CacheEntry, Cache) ->
+            case CacheEntry of
+                {ID, Header} when is_binary(Header) ->
+                    array:set(ID - 1, binary:copy(Header), Cache);
+                _ ->
+                    Cache
+            end
+        end,
+    BlockIdxC0 = lists:foldl(FoldFun, BIC, Entries),
+    Size = array:size(BlockIdxC0),
+    BestModDates =
+        case IdxModDate of
+            true ->
+                ModDateFold =
+                    fun(_ID, Header, Acc) when is_binary(Header) ->
+                        [element(2, extract_header(Header, IdxModDate))|Acc]
+                    end,
+                array:sparse_foldl(ModDateFold, [], BlockIdxC0);
+            false ->
+                []
+        end,
+    BestModDate =
+        case length(BestModDates) of
+            Size ->
+                lists:max(BestModDates);
+            _ ->
+                undefined
+        end,
+    {BlockIdxC0, BestModDate};
+update_blockindex_cache(_Needed, _Entries, BIC, HighModDate, _IdxModDate) ->
+    {BIC, HighModDate}.
+
+-spec check_modified(non_neg_integer()|undefined,
+                        non_neg_integer(),
+                        boolean())  -> boolean().
+check_modified(HighLastModifiedInSST, LowModDate, true)
+                when is_integer(HighLastModifiedInSST) ->
+    LowModDate =< HighLastModifiedInSST;
+check_modified(_, _, _) ->
+    true.
+
 -spec fetch(tuple(), 
            {integer(), integer()}|integer(), 
            sst_state(), sst_timings()) 
@ -1093,14 +1186,17 @@ fetch(LedgerKey, Hash, State, Timings0) ->
            SlotBin = read_slot(State#state.handle, Slot),
            {Result, Header} = 
                binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod, IdxModDate),
-            BlockIndexCache = 
-                array:set(SlotID - 1,
-                            binary:copy(Header),
-                            State#state.blockindex_cache),
+            {BlockIndexCache, HighModDate} =
+                update_blockindex_cache(true,
+                                        [{SlotID, Header}],
+                                        State#state.blockindex_cache,
+                                        State#state.high_modified_date,
+                                        State#state.index_moddate),
            {_SW3, Timings3} = 
                update_timings(SW2, Timings2, noncached_block, false),
            {Result, 
-                State#state{blockindex_cache = BlockIndexCache}, 
+                State#state{blockindex_cache = BlockIndexCache,
+                            high_modified_date = HighModDate}, 
                Timings3};
        {BlockLengths, _LMD, PosBin} ->
            PosList = find_pos(PosBin, extract_hash(Hash), [], 0),
@ -1150,7 +1246,8 @@ fetch(LedgerKey, Hash, State, Timings0) ->

 -spec fetch_range(tuple(), tuple(), integer(),
                    leveled_codec:segment_list(), non_neg_integer(), 
-                    sst_state()) -> {list(), list()}.
+                    sst_state()) ->
+                        {boolean(), list(), list()}.
 %% @doc
 %% Fetch the contents of the SST file for a given key range.  This will 
 %% pre-fetch some results, and append pointers for additional results.
@ -1209,13 +1306,13 @@ fetch_range(StartKey, EndKey, ScanWidth, SegList, LowLastMod, State) ->
                lists:split(ScanWidth, ExpandedSlots)
        end,

-    SlotsToFetchBinList = 
+    {NeededBlockIdx, SlotsToFetchBinList} = 
        read_slots(Handle, 
                    SlotsToFetch, 
                    {SegList, LowLastMod, State#state.blockindex_cache},
                    State#state.compression_method,
                    State#state.index_moddate),
-    {SlotsToFetchBinList, SlotsToPoint}.
+    {NeededBlockIdx, SlotsToFetchBinList, SlotsToPoint}.

 -spec compress_level(integer(), press_method()) -> press_method().
 %% @doc
@ -1258,8 +1355,7 @@ read_file(Filename, State, LoadPageCache) ->
    UpdState0 = imp_fileversion(FileVersion, State),
    {Summary, Bloom, SlotList, TombCount} =
        read_table_summary(SummaryBin, UpdState0#state.tomb_count),
-    BlockIndexCache = array:new([{size, Summary#summary.size},
-                                    {default, none}]),
+    BlockIndexCache = new_blockindex_cache(Summary#summary.size),
    UpdState1 = UpdState0#state{blockindex_cache = BlockIndexCache},
    SlotIndex = from_list(SlotList),
    UpdSummary = Summary#summary{index = SlotIndex},
@ -1389,8 +1485,7 @@ build_all_slots(SlotList) ->
                            9,
                            1,
                            [],
-                            array:new([{size, SlotCount}, 
-                                        {default, none}]),
+                            [],
                            <<>>,
                            []),
    Bloom = leveled_ebloom:create_bloom(HashLists),
@ -1410,7 +1505,7 @@ build_all_slots([SlotD|Rest], Pos, SlotID,
                    Pos + Length,
                    SlotID + 1,
                    [{LastKey, SlotIndexV}|SlotIdxAcc],
-                    array:set(SlotID - 1, BlockIdx, BlockIdxAcc),
+                    [{SlotID, BlockIdx}|BlockIdxAcc],
                    <<SlotBinAcc/binary, SlotBin/binary>>,
                    lists:append(HashLists, HashList)).

@ -1842,7 +1937,8 @@ binarysplit_mapfun(MultiSlotBin, StartPos) ->

 -spec read_slots(file:io_device(), list(), 
                    {false|list(), non_neg_integer(), binary()},
-                    press_method(), boolean()) -> list(binaryslot_element()).
+                    press_method(), boolean()) -> 
+                        {boolean(), list(binaryslot_element())}.
 %% @doc
 %% The reading of sots will return a list of either 2-tuples containing 
 %% {K, V} pairs - or 3-tuples containing {Binary, SK, EK}.  The 3 tuples 
@ -1861,15 +1957,15 @@ read_slots(Handle, SlotList, {false, 0, _BlockIndexCache},
                _PressMethod, _IdxModDate) ->
    % No list of segments passed or useful Low LastModified Date
    % Just read slots in SlotList
-    read_slotlist(SlotList, Handle);
+    {false, read_slotlist(SlotList, Handle)};
 read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache}, 
                PressMethod, IdxModDate) ->
    % List of segments passed so only {K, V} pairs matching those segments
    % should be returned.  This required the {K, V} pair to have been added 
    % with the appropriate hash - if the pair were added with no_lookup as 
-    % the hash value this will fial unexpectedly.
+    % the hash value this will fail unexpectedly.
    BinMapFun = 
-        fun(Pointer, Acc) ->
+        fun(Pointer, {NeededBlockIdx, Acc}) ->
            {SP, _L, ID, SK, EK} = pointer_mapfun(Pointer),
            CachedHeader = array:get(ID - 1, BlockIndexCache),
            case extract_header(CachedHeader, IdxModDate) of
@ -1877,7 +1973,7 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache},
                    % If there is an attempt to use the seg list query and the
                    % index block cache isn't cached for any part this may be 
                    % slower as each slot will be read in turn
-                    Acc ++ read_slotlist([Pointer], Handle);
+                    {true, Acc ++ read_slotlist([Pointer], Handle)};
                {BlockLengths, LMD, BlockIdx} ->
                    % If there is a BlockIndex cached then we can use it to 
                    % check to see if any of the expected segments are 
@ -1894,12 +1990,14 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache},
                            % LowLastMod date passed in the query - therefore
                            % there are no interesting modifications in this
                            % slot - it is all too old
-                            Acc;
+                            {NeededBlockIdx, Acc};
                        false ->
                            case SegList of
                                false ->
                                    % Need all the slot now
-                                    Acc ++ read_slotlist([Pointer], Handle);
+                                    {NeededBlockIdx,
+                                        Acc ++
+                                            read_slotlist([Pointer], Handle)};
                                _SL ->
                                    % Need to find just the right keys
                                    PositionList = 
@ -1920,12 +2018,13 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache},
                                    % to be filtered
                                    FilterFun =
                                        fun(KV) -> in_range(KV, SK, EK) end,
-                                    Acc ++ lists:filter(FilterFun, KVL)
+                                    {NeededBlockIdx,
+                                        Acc ++ lists:filter(FilterFun, KVL)}
                            end
                    end
            end 
        end,
-    lists:foldl(BinMapFun, [], SlotList).
+    lists:foldl(BinMapFun, {false, []}, SlotList).


 -spec in_range(leveled_codec:ledger_kv(),
@ -2015,7 +2114,7 @@ read_length_list(Handle, LengthList) ->


 -spec extract_header(binary()|none, boolean()) ->
-                                        {binary(), integer(), binary()}|none.
+                                {binary(), non_neg_integer(), binary()}|none.
 %% @doc
 %% Helper for extracting the binaries from the header ignoring the missing LMD
 %% if LMD is not indexed
@ -3657,8 +3756,6 @@ key_dominates_test() ->
                    key_dominates([KV7|KL2], [KV2], {true, 1})).

 nonsense_coverage_test() ->
-    {ok, Pid} = gen_fsm:start_link(?MODULE, [], []),
-    ok = gen_fsm:send_all_state_event(Pid, nonsense),
    ?assertMatch({ok, reader, #state{}}, code_change(nonsense,
                                                        reader,
                                                        #state{},
@ -3861,6 +3958,39 @@ corrupted_block_fetch_tester(PressMethod) ->
    ExpectedMisses = element(2, ?LOOK_BLOCKSIZE),
    ?assertMatch(ExpectedMisses, MissCount).

+block_index_cache_test() ->
+    {Mega, Sec, _} = os:timestamp(),
+    Now = Mega * 1000000 + Sec,
+    EntriesTS =
+        lists:map(fun(I) ->
+                        TS = Now - I + 1,
+                        {I, <<0:160/integer, TS:32/integer, 0:32/integer>>}
+                    end,
+                    lists:seq(1, 8)),
+    EntriesNoTS = 
+        lists:map(fun(I) ->
+                        {I, <<0:160/integer, 0:32/integer>>}
+                    end,
+                    lists:seq(1, 8)),
+    HeaderTS = <<0:160/integer, Now:32/integer, 0:32/integer>>,
+    HeaderNoTS = <<0:192>>,
+    BIC = array:new([{size, 8}, {default, none}]),
+    {BIC0, undefined} =
+        update_blockindex_cache(false, EntriesNoTS, BIC, undefined, false),
+    {BIC1, undefined} =
+        update_blockindex_cache(false, EntriesTS, BIC, undefined, true),
+    {BIC2, undefined} =
+        update_blockindex_cache(true, EntriesNoTS, BIC, undefined, false),
+    {BIC3, LMD3} =
+        update_blockindex_cache(true, EntriesTS, BIC, undefined, true),
+    
+    ?assertMatch(none, array:get(0, BIC0)),
+    ?assertMatch(none, array:get(0, BIC1)),
+    ?assertMatch(HeaderNoTS, array:get(0, BIC2)),
+    ?assertMatch(HeaderTS, array:get(0, BIC3)),
+    ?assertMatch(Now, LMD3).
+
+

 receive_fun() ->
    receive
--- a/test/end_to_end/basic_SUITE.erl
+++ b/test/end_to_end/basic_SUITE.erl
@ -299,7 +299,7 @@ journal_compaction_tester(Restart, WRP) ->
    end,
    ok = leveled_penciller:pcl_close(PclClone),
    ok = leveled_inker:ink_close(InkClone),
-    % Snapshot released so deletes shoudl occur at next timeout
+    % Snapshot released so deletes should occur at next timeout
    case WRP of 
        undefined ->
            timer:sleep(10100); % wait for delete_pending timeout
--- a/test/end_to_end/recovery_SUITE.erl
+++ b/test/end_to_end/recovery_SUITE.erl
@ -16,7 +16,8 @@
            journal_compaction_bustedjournal/1,
            close_duringcompaction/1,
            allkeydelta_journal_multicompact/1,
-            recompact_keydeltas/1
+            recompact_keydeltas/1,
+            simple_cachescoring/1
            ]).

 all() -> [
@ -33,7 +34,8 @@ all() -> [
            close_duringcompaction,
            allkeydelta_journal_multicompact,
            recompact_keydeltas,
-            stdtag_recalc
+            stdtag_recalc,
+            simple_cachescoring
            ].


@ -555,6 +557,79 @@ aae_missingjournal(_Config) ->
    ok = leveled_bookie:book_close(Bookie2),
    testutil:reset_filestructure().

+simple_cachescoring(_Config) ->
+    RootPath = testutil:reset_filestructure(),
+    StartOpts = [{root_path, RootPath},
+                    {max_journalobjectcount, 2000},
+                    {sync_strategy, testutil:sync_strategy()}],
+    {ok, Bookie1} =
+        leveled_bookie:book_start(StartOpts ++
+                                    [{journalcompaction_scoreonein, 8}]),
+    {TestObject, TestSpec} = testutil:generate_testobject(),
+    ok = testutil:book_riakput(Bookie1, TestObject, TestSpec),
+    testutil:check_forobject(Bookie1, TestObject),
+    GenList = [2, 32002, 64002, 96002],
+    _CLs = testutil:load_objects(32000, GenList, Bookie1, TestObject,
+                                fun testutil:generate_objects/2),
+    
+    F = fun leveled_bookie:book_islastcompactionpending/1,
+    WaitForCompaction =
+        fun(B) -> 
+            fun(X, Pending) ->
+                case X of 
+                    1 ->
+                        leveled_bookie:book_compactjournal(B, 30000);
+                    _ ->
+                        ok
+                end,
+                case Pending of
+                    false ->
+                        false;
+                    true ->
+                        io:format("Loop ~w waiting for journal "
+                            ++ "compaction to complete~n", [X]),
+                        timer:sleep(100),
+                        F(B)
+                end
+            end
+        end,
+    io:format("Scoring for first time - every file should need scoring~n"),
+    Args1 = [WaitForCompaction(Bookie1), true, lists:seq(1, 300)],
+    {TC0, false} = timer:tc(lists, foldl, Args1),
+    io:format("Score four more times with cached scoring~n"),
+    {TC1, false} = timer:tc(lists, foldl, Args1),
+    {TC2, false} = timer:tc(lists, foldl, Args1),
+    {TC3, false} = timer:tc(lists, foldl, Args1),
+    {TC4, false} = timer:tc(lists, foldl, Args1),
+    
+    ok = leveled_bookie:book_close(Bookie1),
+    {ok, Bookie2} =
+        leveled_bookie:book_start(StartOpts),
+    io:format("Re-opened bookie withour caching - re-compare compaction time~n"),
+    io:format("Scoring for first time - every file should need scoring~n"),
+    Args2 = [WaitForCompaction(Bookie2), true, lists:seq(1, 300)],
+    {TN0, false} = timer:tc(lists, foldl, Args2),
+    io:format("Score four more times with cached scoring~n"),
+    {TN1, false} = timer:tc(lists, foldl, Args2),
+    {TN2, false} = timer:tc(lists, foldl, Args2),
+    {TN3, false} = timer:tc(lists, foldl, Args2),
+    {TN4, false} = timer:tc(lists, foldl, Args2),
+    
+    AvgSecondRunCache = (TC1 + TC2 +TC3 + TC4) div 4000,
+    AvgSecondRunNoCache = (TN1 + TN2 +TN3 + TN4) div 4000,
+
+    io:format("With caching ~w first run ~w average other runs~n",
+                [TC0 div 1000, AvgSecondRunCache]),
+    io:format("Without caching ~w first run ~w average other runs~n",
+                [TN0 div 1000, AvgSecondRunNoCache]),
+    true = (TC0 > AvgSecondRunCache),
+    true = (TC0/AvgSecondRunCache) > (TN0/AvgSecondRunNoCache),
+    ok = leveled_bookie:book_close(Bookie2),
+
+    io:format("Exit having proven simply that caching score is faster~n"),
+    testutil:reset_filestructure().
+
+
 aae_bustedjournal(_Config) ->
    RootPath = testutil:reset_filestructure(),
    StartOpts = [{root_path, RootPath},
--- a/test/end_to_end/riak_SUITE.erl
+++ b/test/end_to_end/riak_SUITE.erl
@ -258,6 +258,13 @@ fetchclocks_modifiedbetween(_Config) ->
    {ok, Bookie1A} = leveled_bookie:book_start(StartOpts1A),
    {ok, Bookie1B} = leveled_bookie:book_start(StartOpts1B),

+    ObjList0 = 
+        testutil:generate_objects(100000, 
+                                    {fixed_binary, 1}, [],
+                                    leveled_rand:rand_bytes(32),
+                                    fun() -> [] end,
+                                    <<"BaselineB">>),
+
    ObjL1StartTS = testutil:convert_to_seconds(os:timestamp()),
    ObjList1 = 
        testutil:generate_objects(20000, 
@ -313,7 +320,7 @@ fetchclocks_modifiedbetween(_Config) ->
    _ObjL5EndTS = testutil:convert_to_seconds(os:timestamp()),
    timer:sleep(1000),

-    _ObjL6StartTS = testutil:convert_to_seconds(os:timestamp()),
+    ObjL6StartTS = testutil:convert_to_seconds(os:timestamp()),
    ObjList6 = 
        testutil:generate_objects(7000, 
                                    {fixed_binary, 1}, [],
@ -331,6 +338,7 @@ fetchclocks_modifiedbetween(_Config) ->
    testutil:riakload(Bookie1A, ObjList4),
    testutil:riakload(Bookie1A, ObjList6),

+    testutil:riakload(Bookie1B, ObjList0),
    testutil:riakload(Bookie1B, ObjList4),
    testutil:riakload(Bookie1B, ObjList5),
    testutil:riakload(Bookie1B, ObjList1),
@ -412,7 +420,7 @@ fetchclocks_modifiedbetween(_Config) ->
            fun(_B, K, V, {LK, AccC}) ->
                % Value is proxy_object?  Can we get the metadata and
                % read the last modified date?  The do a non-accelerated
-                % fold to chekc that it is slower
+                % fold to check that it is slower
                {proxy_object, MDBin, _Size, _Fetcher} = binary_to_term(V),
                LMDTS = testutil:get_lastmodified(MDBin),
                LMD = testutil:convert_to_seconds(LMDTS),
@ -458,13 +466,20 @@ fetchclocks_modifiedbetween(_Config) ->
    true = NoFilterTime > PlusFilterTime,

    SimpleCountFun =
-        fun(_B, _K, _V, AccC) -> AccC + 1 end,
+        fun(BucketList) ->
+            fun(B, _K, _V, AccC) -> 
+                case lists:member(B, BucketList) of
+                    true -> AccC + 1;
+                    false -> AccC
+                end
+            end
+        end,

    {async, R4A_MultiBucketRunner} = 
        leveled_bookie:book_headfold(Bookie1A,
                                        ?RIAK_TAG,
                                        {bucket_list, [<<"B0">>, <<"B2">>]},
-                                        {SimpleCountFun, 0},
+                                        {SimpleCountFun([<<"B0">>, <<"B2">>]), 0},
                                        false,
                                        true,
                                        false,
@ -482,7 +497,7 @@ fetchclocks_modifiedbetween(_Config) ->
                                        {bucket_list, [<<"B2">>, <<"B0">>]},
                                            % Reverse the buckets in the bucket
                                            % list
-                                        {SimpleCountFun, 0},
+                                        {SimpleCountFun([<<"B0">>, <<"B2">>]), 0},
                                        false,
                                        true,
                                        false,
@ -495,10 +510,10 @@ fetchclocks_modifiedbetween(_Config) ->

    {async, R5B_MultiBucketRunner} = 
        leveled_bookie:book_headfold(Bookie1B,
-                                            % Same query - other bookie
                                        ?RIAK_TAG,
-                                        {bucket_list, [<<"B2">>, <<"B0">>]},
-                                        {SimpleCountFun, 0},
+                                        {bucket_list,
+                                            [<<"BaselineB">>, <<"B2">>, <<"B0">>]},
+                                        {SimpleCountFun([<<"B0">>, <<"B2">>]), 0},
                                        false,
                                        true,
                                        false,
@ -506,7 +521,7 @@ fetchclocks_modifiedbetween(_Config) ->
                                        false),
    R5B_MultiBucket = R5B_MultiBucketRunner(),
    io:format("R5B_MultiBucket ~w ~n", [R5B_MultiBucket]),
-    true = R5A_MultiBucket == 37000,
+    true = R5B_MultiBucket == 37000,

    testutil:update_some_objects(Bookie1A, ObjList1, 1000),
    R6A_PlusFilter = lists:foldl(FoldRangesFun(Bookie1A, 
@ -523,7 +538,7 @@ fetchclocks_modifiedbetween(_Config) ->
        leveled_bookie:book_headfold(Bookie1A,
                                        ?RIAK_TAG,
                                        {bucket_list, [<<"B1">>, <<"B2">>]},
-                                        {SimpleCountFun, 0},
+                                        {SimpleCountFun([<<"B1">>, <<"B2">>]), 0},
                                        false,
                                        true,
                                        false,
@ -537,7 +552,7 @@ fetchclocks_modifiedbetween(_Config) ->
        leveled_bookie:book_headfold(Bookie1A,
                                        ?RIAK_TAG,
                                        {bucket_list, [<<"B1">>, <<"B2">>]},
-                                        {SimpleCountFun, 0},
+                                        {SimpleCountFun([<<"B1">>, <<"B2">>]), 0},
                                        false,
                                        true,
                                        false,
@ -547,9 +562,141 @@ fetchclocks_modifiedbetween(_Config) ->
    io:format("R8A_MultiBucket ~w ~n", [R8A_MultiBucket]),
    true = R8A_MultiBucket == {0, 5000},

-    ok = leveled_bookie:book_destroy(Bookie1A),
-    ok = leveled_bookie:book_destroy(Bookie1B).
+    ok = leveled_bookie:book_close(Bookie1B),

+    io:format("Double query to generate index cache and use~n"),
+    {ok, Bookie1BS} = leveled_bookie:book_start(StartOpts1B),
+    
+    TooLate = testutil:convert_to_seconds(os:timestamp()),
+
+    lmdrange_tester(Bookie1BS, SimpleCountFun,
+                    ObjL4StartTS, ObjL6StartTS, ObjL6EndTS, TooLate),
+
+    io:format("Push tested keys down levels with new objects~n"),
+    ObjList7 = 
+        testutil:generate_objects(200000, 
+                                    {fixed_binary, 1}, [],
+                                    leveled_rand:rand_bytes(32),
+                                    fun() -> [] end,
+                                    <<"B1.9">>),
+    testutil:riakload(Bookie1BS, ObjList7),
+
+    lmdrange_tester(Bookie1BS, SimpleCountFun,
+                    ObjL4StartTS, ObjL6StartTS, ObjL6EndTS, TooLate),
+
+    ok = leveled_bookie:book_destroy(Bookie1A),
+    ok = leveled_bookie:book_destroy(Bookie1BS).
+    
+
+lmdrange_tester(Bookie1BS, SimpleCountFun,
+                ObjL4StartTS, ObjL6StartTS, ObjL6EndTS, TooLate) ->
+    {async, R5B_MultiBucketRunner0} = 
+        leveled_bookie:book_headfold(Bookie1BS,
+                                        ?RIAK_TAG,
+                                        all,
+                                        {SimpleCountFun([<<"B0">>, <<"B2">>]), 0},
+                                        false,
+                                        true,
+                                        false,
+                                        {ObjL4StartTS, ObjL6EndTS},
+                                        false),
+    R5B_MultiBucket0 = R5B_MultiBucketRunner0(),
+    io:format("R5B_MultiBucket0 ~w ~n", [R5B_MultiBucket0]),
+    true = R5B_MultiBucket0 == 37000,
+    {async, R5B_MultiBucketRunner1} = 
+        leveled_bookie:book_headfold(Bookie1BS,
+                                        ?RIAK_TAG,
+                                        all,
+                                        {SimpleCountFun([<<"B0">>, <<"B2">>]), 0},
+                                        false,
+                                        true,
+                                        false,
+                                        {ObjL4StartTS, ObjL6EndTS},
+                                        false),
+    R5B_MultiBucket1 = R5B_MultiBucketRunner1(),
+    io:format("R5B_MultiBucket1 ~w ~n", [R5B_MultiBucket1]),
+    true = R5B_MultiBucket1 == 37000,
+    SimpleMinMaxFun = 
+        fun(B, K, _V, Acc) ->
+            case lists:keyfind(B, 1, Acc) of
+                {B, MinK, MaxK} ->
+                    lists:ukeysort(1, [{B, min(K, MinK), max(K, MaxK)}|Acc]);
+                false ->
+                    lists:ukeysort(1, [{B, K, K}|Acc])
+            end
+        end,
+    {async, R5B_MultiBucketRunner2} = 
+        leveled_bookie:book_headfold(Bookie1BS,
+                                        ?RIAK_TAG,
+                                        {bucket_list, [<<"B0">>, <<"B2">>]},
+                                        {SimpleMinMaxFun, []},
+                                        false,
+                                        true,
+                                        false,
+                                        {ObjL4StartTS, ObjL6EndTS},
+                                        false),
+    [{<<"B0">>, MinB0K, MaxB0K}, {<<"B2">>, MinB2K, MaxB2K}] =
+        R5B_MultiBucketRunner2(),
+    io:format("Found Min and Max Keys~n"),
+    io:format("B ~s MinK ~s MaxK ~s~n", [<<"B0">>, MinB0K, MaxB0K]),
+    io:format("B ~s MinK ~s MaxK ~s~n", [<<"B2">>, MinB2K, MaxB2K]),
+    {async, R5B_MultiBucketRunner3a} = 
+        leveled_bookie:book_headfold(Bookie1BS,
+                                        ?RIAK_TAG,
+                                        {range, <<"B0">>, {MinB0K, MaxB0K}},
+                                        {SimpleCountFun([<<"B0">>]), 0},
+                                        false,
+                                        true,
+                                        false,
+                                        {ObjL4StartTS, ObjL6EndTS},
+                                        false),
+    {async, R5B_MultiBucketRunner3b} = 
+        leveled_bookie:book_headfold(Bookie1BS,
+                                        ?RIAK_TAG,
+                                        {range, <<"B2">>, {MinB2K, MaxB2K}},
+                                        {SimpleCountFun([<<"B2">>]), 0},
+                                        false,
+                                        true,
+                                        false,
+                                        {ObjL4StartTS, ObjL6EndTS},
+                                        false),
+    R5B_MultiBucket3a = R5B_MultiBucketRunner3a(),
+    io:format("R5B_MultiBucket3a ~w ~n", [R5B_MultiBucket3a]),
+    R5B_MultiBucket3b = R5B_MultiBucketRunner3b(),
+    io:format("R5B_MultiBucket3b ~w ~n", [R5B_MultiBucket3b]),
+    true = (R5B_MultiBucket3a + R5B_MultiBucket3b) == 37000,
+
+    io:format("Query outside of time range~n"),
+    {async, R5B_MultiBucketRunner4} = 
+        leveled_bookie:book_headfold(Bookie1BS,
+                                        ?RIAK_TAG,
+                                        all,
+                                        {SimpleCountFun([<<"B0">>, <<"B2">>]), 0},
+                                        false,
+                                        true,
+                                        false,
+                                        {ObjL6EndTS,
+                                            TooLate},
+                                        false),
+    R5B_MultiBucket4 = R5B_MultiBucketRunner4(),
+    io:format("R5B_MultiBucket4 ~w ~n", [R5B_MultiBucket4]),
+    true = R5B_MultiBucket4 == 0,
+
+    io:format("Query with one foot inside of time range~n"),
+    {async, R5B_MultiBucketRunner5} = 
+        leveled_bookie:book_headfold(Bookie1BS,
+                                        ?RIAK_TAG,
+                                        all,
+                                        {SimpleCountFun([<<"B0">>, <<"B2">>]), 0},
+                                        false,
+                                        true,
+                                        false,
+                                        {ObjL6StartTS,
+                                            TooLate},
+                                        false),
+    R5B_MultiBucket5 = R5B_MultiBucketRunner5(),
+    io:format("R5B_MultiBucket5 ~w ~n", [R5B_MultiBucket5]),
+    true = R5B_MultiBucket5 == 7000.


 crossbucket_aae(_Config) ->