Add check for too-small trees

Provide a function for generating segmentfilter lists so that it can handle trees that are "too small".

Test those smaller trees - plus also false positives and cold caches
This commit is contained in:
Martin Sumner 2017-11-01 13:18:01 +00:00
parent 6099dd1367
commit 033cf1954d
2 changed files with 74 additions and 13 deletions

View file

@ -68,7 +68,8 @@
export_tree/1, export_tree/1,
import_tree/1, import_tree/1,
valid_size/1, valid_size/1,
keyto_segment32/1 keyto_segment32/1,
generate_segmentfilter_list/2
]). ]).
@ -76,13 +77,11 @@
-define(HASH_SIZE, 4). -define(HASH_SIZE, 4).
-define(XXSMALL, {6, 64, 64 * 64}). % DO NOT USE %% UNSUUPPORTED tree sizes for accelerated segment filtering
-define(XSMALL, {7, 128, 128 * 128}). % DO NOT USE -define(XXSMALL, {6, 64, 64 * 64}).
%% DO NOT USE warnings with smaller key sizes are there as accelerated queries -define(XSMALL, {7, 128, 128 * 128}).
%% for finding segment lists will not work where the tree size is smaller than
%% 2 ^ 15
%% SUPPORTED tree sizes %% SUPPORTED tree sizes for accelerated segment filtering
-define(SMALL, {8, 256, 256 * 256}). -define(SMALL, {8, 256, 256 * 256}).
-define(MEDIUM, {9, 512, 512 * 512}). -define(MEDIUM, {9, 512, 512 * 512}).
-define(LARGE, {10, 1024, 1024 * 1024}). -define(LARGE, {10, 1024, 1024 * 1024}).
@ -343,6 +342,32 @@ keyto_segment32(BinKey) when is_binary(BinKey) ->
keyto_segment32(Key) -> keyto_segment32(Key) ->
keyto_segment32(term_to_binary(Key)). keyto_segment32(term_to_binary(Key)).
-spec generate_segmentfilter_list(list(integer()), atom())
-> false|list(integer()).
%% @doc
%% Cannot accelerate segment listing for trees below certain sizes, so check
%% the creation of segment filter lists with this function
generate_segmentfilter_list(_SegmentList, xxsmall) ->
false;
generate_segmentfilter_list(SegmentList, xsmall) ->
case length(SegmentList) =< 4 of
true ->
A0 = 1 bsl 15,
A1 = 1 bsl 14,
ExpandSegFun =
fun(X, Acc) ->
Acc ++ [X, X + A0, X + A1, X + A0 + A1]
end,
lists:foldl(ExpandSegFun, [], SegmentList);
false ->
false
end;
generate_segmentfilter_list(SegmentList, Size) ->
case lists:member(Size, ?VALID_SIZES) of
true ->
SegmentList
end.
%%%============================================================================ %%%============================================================================
%%% Internal functions %%% Internal functions
%%%============================================================================ %%%============================================================================

View file

@ -63,7 +63,15 @@ perbucket_aae(_Config) ->
lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs), lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs),
test_singledelta_stores(Bookie2, Bookie3, small, {B1, K1}), test_singledelta_stores(Bookie2, Bookie3, small, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, medium, {B1, K1}), test_singledelta_stores(Bookie2, Bookie3, medium, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, xsmall, {B1, K1}),
test_singledelta_stores(Bookie2, Bookie3, xxsmall, {B1, K1}),
% Test with a newly opend book (i.e with no blovk indexes cached)
ok = leveled_bookie:book_close(Bookie2), ok = leveled_bookie:book_close(Bookie2),
{ok, Bookie2A} = leveled_bookie:book_start(StartOpts2),
test_singledelta_stores(Bookie2A, Bookie3, small, {B1, K1}),
ok = leveled_bookie:book_close(Bookie2A),
ok = leveled_bookie:book_close(Bookie3). ok = leveled_bookie:book_close(Bookie3).
@ -120,18 +128,21 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) ->
BookASegList = BookASegFolder(), BookASegList = BookASegFolder(),
BookBSegList = BookBSegFolder(), BookBSegList = BookBSegFolder(),
Time_SL0 = timer:now_diff(os:timestamp(), SW_SL0)/1000, Time_SL0 = timer:now_diff(os:timestamp(), SW_SL0)/1000,
io:format("Two segment list folds took ~w milliseconds ~n", [Time_SL0]), io:format("Two unfiltered segment list folds took ~w milliseconds ~n",
[Time_SL0]),
io:format("Segment lists found of lengths ~w ~w~n", io:format("Segment lists found of lengths ~w ~w~n",
[length(BookASegList), length(BookBSegList)]), [length(BookASegList), length(BookBSegList)]),
Delta = lists:subtract(BookASegList, BookBSegList), Delta = lists:subtract(BookASegList, BookBSegList),
true = length(Delta) == 1, true = length(Delta) == 1,
SegFilterList = leveled_tictac:generate_segmentfilter_list(DLs, TreeSize),
SuperHeadSegmentFolder = SuperHeadSegmentFolder =
{foldheads_allkeys, {foldheads_allkeys,
?RIAK_TAG, ?RIAK_TAG,
{get_segment_folder(DLs, TreeSize), []}, {get_segment_folder(DLs, TreeSize), []},
false, true, DLs}, false, true, SegFilterList},
SW_SL1 = os:timestamp(), SW_SL1 = os:timestamp(),
{async, BookASegFolder1} = {async, BookASegFolder1} =
@ -141,13 +152,38 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) ->
BookASegList1 = BookASegFolder1(), BookASegList1 = BookASegFolder1(),
BookBSegList1 = BookBSegFolder1(), BookBSegList1 = BookBSegFolder1(),
Time_SL1 = timer:now_diff(os:timestamp(), SW_SL1)/1000, Time_SL1 = timer:now_diff(os:timestamp(), SW_SL1)/1000,
io:format("Two segment list folds took ~w milliseconds ~n", [Time_SL1]), io:format("Two filtered segment list folds took ~w milliseconds ~n",
[Time_SL1]),
io:format("Segment lists found of lengths ~w ~w~n", io:format("Segment lists found of lengths ~w ~w~n",
[length(BookASegList1), length(BookBSegList1)]), [length(BookASegList1), length(BookBSegList1)]),
Delta1 = lists:subtract(BookASegList1, BookBSegList1), FalseMatchFilter = DLs ++ [1, 100, 101, 1000, 1001],
io:format("Delta found of ~w~n", [Delta1]), SegFilterListF =
true = length(Delta1) == 1. leveled_tictac:generate_segmentfilter_list(FalseMatchFilter, TreeSize),
SuperHeadSegmentFolderF =
{foldheads_allkeys,
?RIAK_TAG,
{get_segment_folder(DLs, TreeSize), []},
false, true, SegFilterListF},
SW_SL1F = os:timestamp(),
{async, BookASegFolder1F} =
leveled_bookie:book_returnfolder(BookA, SuperHeadSegmentFolderF),
{async, BookBSegFolder1F} =
leveled_bookie:book_returnfolder(BookB, SuperHeadSegmentFolderF),
BookASegList1F = BookASegFolder1F(),
BookBSegList1F = BookBSegFolder1F(),
Time_SL1F = timer:now_diff(os:timestamp(), SW_SL1F)/1000,
io:format("Two filtered segment list folds " ++
" with false positives took ~w milliseconds ~n",
[Time_SL1F]),
io:format("Segment lists found of lengths ~w ~w~n",
[length(BookASegList1F), length(BookBSegList1F)]),
Delta1F = lists:subtract(BookASegList1F, BookBSegList1F),
io:format("Delta found of ~w~n", [Delta1F]),
true = length(Delta1F) == 1.
get_segment_folder(SegmentList, TreeSize) -> get_segment_folder(SegmentList, TreeSize) ->