Add use of block index when not cached (for fetch range)

This commit is contained in:
Martin Sumner 2018-05-17 14:56:15 +01:00
parent d8dd952cba
commit 779ccd9c2a
2 changed files with 169 additions and 134 deletions

View file

@ -375,7 +375,7 @@ sst_getfilteredrange(Pid, StartKey, EndKey, ScanWidth, SegList) ->
ScanWidth, SegList0},
infinity) of
{yield, SlotsToFetchBinList, SlotsToPoint, PressMethod} ->
binaryslot_reader(SlotsToFetchBinList, PressMethod)
binaryslot_reader(SlotsToFetchBinList, PressMethod, SegList0)
++ SlotsToPoint;
Reply ->
Reply
@ -405,7 +405,7 @@ sst_getfilteredslots(Pid, SlotList, SegList) ->
SegL0 = tune_seglist(SegList),
{SlotBins, PressMethod} =
gen_fsm:sync_send_event(Pid, {get_slots, SlotList, SegL0}, infinity),
binaryslot_reader(SlotBins, PressMethod).
binaryslot_reader(SlotBins, PressMethod, SegL0).
-spec sst_getmaxsequencenumber(pid()) -> integer().
%% @doc
@ -569,13 +569,15 @@ reader({get_kv, LedgerKey, Hash}, _From, State) ->
{reply, Result, reader, UpdState#state{timings = UpdTimings0,
timings_countdown = CountDown}};
reader({get_kvrange, StartKey, EndKey, ScanWidth, SlotList}, _From, State) ->
reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, _From, State) ->
{SlotsToFetchBinList, SlotsToPoint} = fetch_range(StartKey,
EndKey,
ScanWidth,
SlotList,
SegList,
State),
PressMethod = State#state.compression_method,
case State#state.yield_blockquery of
true ->
{reply,
@ -587,7 +589,7 @@ reader({get_kvrange, StartKey, EndKey, ScanWidth, SlotList}, _From, State) ->
State};
false ->
{reply,
binaryslot_reader(SlotsToFetchBinList, PressMethod)
binaryslot_reader(SlotsToFetchBinList, PressMethod, SegList)
++ SlotsToPoint,
reader,
State}
@ -629,12 +631,12 @@ reader(close, _From, State) ->
delete_pending({get_kv, LedgerKey, Hash}, _From, State) ->
{Result, UpdState, _Ts} = fetch(LedgerKey, Hash, State, no_timing),
{reply, Result, delete_pending, UpdState, ?DELETE_TIMEOUT};
delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SlotList},
delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList},
_From, State) ->
{SlotsToFetchBinList, SlotsToPoint} = fetch_range(StartKey,
EndKey,
ScanWidth,
SlotList,
SegList,
State),
% Always yield as about to clear and de-reference
PressMethod = State#state.compression_method,
@ -756,8 +758,7 @@ fetch(LedgerKey, Hash, State, Timings0) ->
StartPos = Slot#slot_index_value.start_position,
Result =
check_blocks(PosList,
State#state.handle,
StartPos,
{State#state.handle, StartPos},
BlockLengths,
byte_size(PosBin),
LedgerKey,
@ -778,8 +779,8 @@ fetch(LedgerKey, Hash, State, Timings0) ->
end.
-spec fetch_range(tuple(), tuple(), integer(), false|list(integer()), sst_state())
-> {list(), list()}.
-spec fetch_range(tuple(), tuple(), integer(), leveled_codec:segment_list(),
sst_state()) -> {list(), list()}.
%% @doc
%% Fetch the contents of the SST file for a given key range. This will
%% pre-fetch some results, and append pointers for additional results.
@ -1254,23 +1255,30 @@ generate_binary_slot(Lookup, KVL, PressMethod, BuildTimings0) ->
{{Header, SlotBin, HashL, LastKey}, BuildTimings3}.
% Acc should start as not_present if LedgerKey is a key, and a list if
% LedgerKey is false
check_blocks([], _Handle, _StartPos, _BlockLengths, _PosBinLength,
-spec check_blocks(list(integer()),
binary()|{file:io_device(), integer()},
binary(),
integer(),
leveled_codec:ledger_key()|false,
press_methods(),
list()|not_present) -> list()|not_present.
%% @doc
%% Acc should start as not_present if LedgerKey is a key, and a list if
%% LedgerKey is false
check_blocks([], _BlockPointer, _BlockLengths, _PosBinLength,
_LedgerKeyToCheck, _PressMethod, not_present) ->
not_present;
check_blocks([], _BlockPointer, _BlockLengths, _PosBinLength,
_LedgerKeyToCheck, _PressMethod, Acc) ->
case is_list(Acc) of
true ->
lists:reverse(Acc);
false ->
Acc
end;
check_blocks([Pos|Rest], Handle, StartPos, BlockLengths, PosBinLength,
check_blocks([Pos|Rest], BlockPointer, BlockLengths, PosBinLength,
LedgerKeyToCheck, PressMethod, Acc) ->
{BlockNumber, BlockPos} = revert_position(Pos),
BlockBin =
read_block(Handle,
StartPos,
read_block(BlockPointer,
BlockLengths,
PosBinLength,
BlockNumber),
@ -1282,19 +1290,19 @@ check_blocks([Pos|Rest], Handle, StartPos, BlockLengths, PosBinLength,
_ ->
case LedgerKeyToCheck of
false ->
check_blocks(Rest, Handle, StartPos,
check_blocks(Rest, BlockPointer,
BlockLengths, PosBinLength,
LedgerKeyToCheck, PressMethod,
[{K, V}|Acc]);
_ ->
check_blocks(Rest, Handle, StartPos,
check_blocks(Rest, BlockPointer,
BlockLengths, PosBinLength,
LedgerKeyToCheck, PressMethod, Acc)
end
end.
read_block(Handle, StartPos, BlockLengths, PosBinLength, BlockID) ->
read_block({Handle, StartPos}, BlockLengths, PosBinLength, BlockID) ->
{Offset, Length} = block_offsetandlength(BlockLengths, BlockID),
{ok, BlockBin} = file:pread(Handle,
StartPos
@ -1304,6 +1312,11 @@ read_block(Handle, StartPos, BlockLengths, PosBinLength, BlockID) ->
% 4-byte CRC, 4-byte pos,
% 4-byte CRC, 5x4 byte lengths
Length),
BlockBin;
read_block(SlotBin, BlockLengths, PosBinLength, BlockID) ->
{Offset, Length} = block_offsetandlength(BlockLengths, BlockID),
StartPos = Offset + PosBinLength + 32,
<<_Pre:StartPos/binary, BlockBin:Length/binary, _Rest/binary>> = SlotBin,
BlockBin.
read_slot(Handle, Slot) ->
@ -1345,7 +1358,7 @@ binarysplit_mapfun(MultiSlotBin, StartPos) ->
%% @doc
%% The reading of sots will return a list of either 2-tuples containing
%% {K, V} pairs - or 3-tuples containing {Binary, SK, EK}. The 3 tuples
%% can be exploded into lists of {K, V} pairs using the binaryslot_reader/2
%% can be exploded into lists of {K, V} pairs using the binaryslot_reader/3
%% function
%%
%% Reading slots is generally unfiltered, but in the sepcial case when
@ -1392,7 +1405,7 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) ->
PositionList ->
Acc ++
check_blocks(PositionList,
Handle, SP,
{Handle, SP},
BlockLengths,
byte_size(BlockIdx),
false, PressMethod,
@ -1404,23 +1417,27 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) ->
-spec binaryslot_reader(list({tuple(), tuple()}|{binary(), tuple(), tuple()}),
native|lz4) -> list({tuple(), tuple()}).
native|lz4,
leveled_codec:segment_list())
-> list({tuple(), tuple()}).
%% @doc
%% Read the binary slots converting them to {K, V} pairs if they were not
%% already {K, V} pairs
binaryslot_reader(SlotBinsToFetch, PressMethod) ->
binaryslot_reader(SlotBinsToFetch, PressMethod, []).
binaryslot_reader(SlotBinsToFetch, PressMethod, SegList) ->
binaryslot_reader(SlotBinsToFetch, PressMethod, SegList, []).
binaryslot_reader([], _PressMethod, Acc) ->
binaryslot_reader([], _PressMethod, _SegList, Acc) ->
Acc;
binaryslot_reader([{SlotBin, SK, EK}|Tail], PressMethod, Acc) ->
binaryslot_reader([{SlotBin, SK, EK}|Tail], PressMethod, SegList, Acc) ->
binaryslot_reader(Tail,
PressMethod,
SegList,
Acc ++ binaryslot_trimmedlist(SlotBin,
SK, EK,
PressMethod));
binaryslot_reader([{K, V}|Tail], PressMethod, Acc) ->
binaryslot_reader(Tail, PressMethod, Acc ++ [{K, V}]).
PressMethod,
SegList));
binaryslot_reader([{K, V}|Tail], PressMethod, SegList, Acc) ->
binaryslot_reader(Tail, PressMethod, SegList, Acc ++ [{K, V}]).
read_length_list(Handle, LengthList) ->
@ -1478,66 +1495,11 @@ binaryslot_tolist(FullBin, PressMethod) ->
Out.
binaryslot_trimmedlist(FullBin, all, all, PressMethod) ->
binaryslot_trimmedlist(FullBin, all, all, PressMethod, false) ->
binaryslot_tolist(FullBin, PressMethod);
binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod, SegList) ->
LTrimFun = fun({K, _V}) -> K < StartKey end,
RTrimFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndKey, K) end,
% It will be more effecient to check a subset of blocks. To work out
% the best subset we always look in the middle block of 5, and based on
% the first and last keys of that middle block when compared to the Start
% and EndKey of the query determines a subset of blocks
%
% This isn't perfectly efficient, esepcially if the query overlaps Block2
% and Block3 (as Block 1 will also be checked), but finessing this last
% scenario is hard to do in concise code
BlocksToCheck =
case crc_check_slot(FullBin) of
{Header, Blocks} ->
<<B1L:32/integer,
B2L:32/integer,
B3L:32/integer,
B4L:32/integer,
B5L:32/integer,
_PosBinIndex/binary>> = Header,
<<Block1:B1L/binary, Block2:B2L/binary,
MidBlock:B3L/binary,
Block4:B4L/binary, Block5:B5L/binary>> = Blocks,
case B3L of
0 ->
[Block1, Block2];
_ ->
MidBlockList =
deserialise_block(MidBlock, PressMethod),
{MidFirst, _} = lists:nth(1, MidBlockList),
{MidLast, _} = lists:last(MidBlockList),
Split = {StartKey > MidLast,
StartKey >= MidFirst,
leveled_codec:endkey_passed(EndKey,
MidFirst),
leveled_codec:endkey_passed(EndKey,
MidLast)},
case Split of
{true, _, _, _} ->
[Block4, Block5];
{false, true, false, true} ->
[MidBlockList];
{false, true, false, false} ->
[MidBlockList, Block4, Block5];
{false, false, true, true} ->
[Block1, Block2];
{false, false, false, true} ->
[Block1, Block2, MidBlockList];
_ ->
[Block1, Block2, MidBlockList, Block4, Block5]
end
end;
crc_wonky ->
[]
end,
BlockCheckFun =
fun(Block, {Acc, Continue}) ->
case {Block, Continue} of
@ -1572,8 +1534,72 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) ->
end
end,
case {crc_check_slot(FullBin), SegList} of
% It will be more effecient to check a subset of blocks. To work out
% the best subset we always look in the middle block of 5, and based on
% the first and last keys of that middle block when compared to the Start
% and EndKey of the query determines a subset of blocks
%
% This isn't perfectly efficient, esepcially if the query overlaps Block2
% and Block3 (as Block 1 will also be checked), but finessing this last
% scenario is hard to do in concise code
{{Header, Blocks}, false} ->
<<B1L:32/integer,
B2L:32/integer,
B3L:32/integer,
B4L:32/integer,
B5L:32/integer,
_PosBinIndex/binary>> = Header,
<<Block1:B1L/binary, Block2:B2L/binary,
MidBlock:B3L/binary,
Block4:B4L/binary, Block5:B5L/binary>> = Blocks,
BlocksToCheck =
case B3L of
0 ->
[Block1, Block2];
_ ->
MidBlockList =
deserialise_block(MidBlock, PressMethod),
{MidFirst, _} = lists:nth(1, MidBlockList),
{MidLast, _} = lists:last(MidBlockList),
Split = {StartKey > MidLast,
StartKey >= MidFirst,
leveled_codec:endkey_passed(EndKey,
MidFirst),
leveled_codec:endkey_passed(EndKey,
MidLast)},
case Split of
{true, _, _, _} ->
[Block4, Block5];
{false, true, false, true} ->
[MidBlockList];
{false, true, false, false} ->
[MidBlockList, Block4, Block5];
{false, false, true, true} ->
[Block1, Block2];
{false, false, false, true} ->
[Block1, Block2, MidBlockList];
_ ->
[Block1, Block2, MidBlockList, Block4, Block5]
end
end,
{Acc, _Continue} = lists:foldl(BlockCheckFun, {[], true}, BlocksToCheck),
Acc.
Acc;
{{Header, _Blocks}, SegList} ->
BL = ?BLOCK_LENGTHS_LENGTH,
<<BlockLengths:BL/binary, BlockIdx/binary>> = Header,
PosList = find_pos(BlockIdx, SegList, [], 0),
check_blocks(PosList,
FullBin,
BlockLengths,
byte_size(BlockIdx),
false,
PressMethod,
[]);
{crc_wonky, _} ->
[]
end.
crc_check_slot(FullBin) ->
@ -2267,7 +2293,8 @@ indexed_list_allindexkeys_test() ->
% "Indexed list flattened in ~w microseconds ~n",
% [timer:now_diff(os:timestamp(), SW)]),
?assertMatch(Keys, BinToList),
?assertMatch(Keys, binaryslot_trimmedlist(FullBin, all, all, native)).
?assertMatch(Keys,
binaryslot_trimmedlist(FullBin, all, all, native, false)).
indexed_list_allindexkeys_nolookup_test() ->
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)),
@ -2281,7 +2308,8 @@ indexed_list_allindexkeys_nolookup_test() ->
% "Indexed list flattened in ~w microseconds ~n",
% [timer:now_diff(os:timestamp(), SW)]),
?assertMatch(Keys, BinToList),
?assertMatch(Keys, binaryslot_trimmedlist(FullBin, all, all, native)).
?assertMatch(Keys,
binaryslot_trimmedlist(FullBin, all, all, native, false)).
indexed_list_allindexkeys_trimmed_test() ->
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
@ -2299,26 +2327,27 @@ indexed_list_allindexkeys_trimmed_test() ->
"Bucket",
{"t1_int", 99999},
null},
native)),
native,
false)),
{SK1, _} = lists:nth(10, Keys),
{EK1, _} = lists:nth(100, Keys),
R1 = lists:sublist(Keys, 10, 91),
O1 = binaryslot_trimmedlist(FullBin, SK1, EK1, native),
O1 = binaryslot_trimmedlist(FullBin, SK1, EK1, native, false),
?assertMatch(91, length(O1)),
?assertMatch(R1, O1),
{SK2, _} = lists:nth(10, Keys),
{EK2, _} = lists:nth(20, Keys),
R2 = lists:sublist(Keys, 10, 11),
O2 = binaryslot_trimmedlist(FullBin, SK2, EK2, native),
O2 = binaryslot_trimmedlist(FullBin, SK2, EK2, native, false),
?assertMatch(11, length(O2)),
?assertMatch(R2, O2),
{SK3, _} = lists:nth(?LOOK_SLOTSIZE - 1, Keys),
{EK3, _} = lists:nth(?LOOK_SLOTSIZE, Keys),
R3 = lists:sublist(Keys, ?LOOK_SLOTSIZE - 1, 2),
O3 = binaryslot_trimmedlist(FullBin, SK3, EK3, native),
O3 = binaryslot_trimmedlist(FullBin, SK3, EK3, native, false),
?assertMatch(2, length(O3)),
?assertMatch(R3, O3).
@ -2379,7 +2408,7 @@ indexed_list_mixedkeys_bitflip_test() ->
{SK1, _} = lists:nth(10, Keys),
{EK1, _} = lists:nth(20, Keys),
O1 = binaryslot_trimmedlist(SlotBin3, SK1, EK1, native),
O1 = binaryslot_trimmedlist(SlotBin3, SK1, EK1, native, false),
?assertMatch([], O1),
SlotBin4 = flip_byte(SlotBin, 0, 20),
@ -2391,8 +2420,8 @@ indexed_list_mixedkeys_bitflip_test() ->
ToList5 = binaryslot_tolist(SlotBin5, native),
?assertMatch([], ToList4),
?assertMatch([], ToList5),
O4 = binaryslot_trimmedlist(SlotBin4, SK1, EK1, native),
O5 = binaryslot_trimmedlist(SlotBin4, SK1, EK1, native),
O4 = binaryslot_trimmedlist(SlotBin4, SK1, EK1, native, false),
O5 = binaryslot_trimmedlist(SlotBin4, SK1, EK1, native, false),
?assertMatch([], O4),
?assertMatch([], O5).

View file

@ -61,6 +61,37 @@ crossbucket_aae(_Config) ->
%% Check all the objects are found - used to trigger HEAD performance log
ok = testutil:checkhead_forlist(Bookie2, lists:nth(1, CLs)),
test_segfilter_query(Bookie2, CLs),
% Start a new store, and load the same objects (except fot the original
% test object) into this store
%
% This is now the comparison part of the test
StartOpts3 = [{root_path, RootPathB},
{max_journalsize, 200000000},
{max_pencillercachesize, 16000},
{sync_strategy, testutil:sync_strategy()}],
{ok, Bookie3} = leveled_bookie:book_start(StartOpts3),
lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs),
test_singledelta_stores(Bookie2, Bookie3, small, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, medium, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, xsmall, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, xxsmall, {B1, K1}),
% Test with a newly opened book (i.e with no block indexes cached)
ok = leveled_bookie:book_close(Bookie2),
{ok, Bookie2A} = leveled_bookie:book_start(StartOpts2),
test_segfilter_query(Bookie2A, CLs),
test_singledelta_stores(Bookie2A, Bookie3, small, {B1, K1}),
ok = leveled_bookie:book_close(Bookie2A),
ok = leveled_bookie:book_close(Bookie3).
test_segfilter_query(Bookie, CLs) ->
% This part of the test tests an issue with accelerating folds by segment
% list, when there is more than one key with a matching segment in the
% slot. Previously this was not handled correctly - and this test part
@ -113,16 +144,16 @@ crossbucket_aae(_Config) ->
end,
{async, SL1Folder} =
leveled_bookie:book_returnfolder(Bookie2,
leveled_bookie:book_returnfolder(Bookie,
HeadSegmentFolderGen(SL1, BK1)),
{async, SL2Folder} =
leveled_bookie:book_returnfolder(Bookie2,
leveled_bookie:book_returnfolder(Bookie,
HeadSegmentFolderGen(SL2, BK2)),
{async, SL3Folder} =
leveled_bookie:book_returnfolder(Bookie2,
leveled_bookie:book_returnfolder(Bookie,
HeadSegmentFolderGen(SL3, BK3)),
{async, SL4Folder} =
leveled_bookie:book_returnfolder(Bookie2,
leveled_bookie:book_returnfolder(Bookie,
HeadSegmentFolderGen(SL4, BK4)),
Results = [SL1Folder(), SL2Folder(), SL3Folder(), SL4Folder()],
@ -130,32 +161,7 @@ crossbucket_aae(_Config) ->
"for SliceSize ~w in ~w ms~n",
[Results, SliceSize,
timer:now_diff(os:timestamp(), SW0)/1000]),
lists:foreach(fun(R) -> true = R == SliceSize end, Results),
% Start a new store, and load the same objects (except fot the original
% test object) into this store
%
% This is now the comparison part of the test
StartOpts3 = [{root_path, RootPathB},
{max_journalsize, 200000000},
{max_pencillercachesize, 16000},
{sync_strategy, testutil:sync_strategy()}],
{ok, Bookie3} = leveled_bookie:book_start(StartOpts3),
lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs),
test_singledelta_stores(Bookie2, Bookie3, small, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, medium, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, xsmall, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, xxsmall, {B1, K1}),
% Test with a newly opend book (i.e with no block indexes cached)
ok = leveled_bookie:book_close(Bookie2),
{ok, Bookie2A} = leveled_bookie:book_start(StartOpts2),
test_singledelta_stores(Bookie2A, Bookie3, small, {B1, K1}),
ok = leveled_bookie:book_close(Bookie2A),
ok = leveled_bookie:book_close(Bookie3).
lists:foreach(fun(R) -> true = R == SliceSize end, Results).
test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) ->