diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 000ebf2..3f7bc4f 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -68,7 +68,8 @@ export_tree/1, import_tree/1, valid_size/1, - keyto_segment32/1 + keyto_segment32/1, + generate_segmentfilter_list/2 ]). @@ -76,13 +77,11 @@ -define(HASH_SIZE, 4). --define(XXSMALL, {6, 64, 64 * 64}). % DO NOT USE --define(XSMALL, {7, 128, 128 * 128}). % DO NOT USE -%% DO NOT USE warnings with smaller key sizes are there as accelerated queries -%% for finding segment lists will not work where the tree size is smaller than -%% 2 ^ 15 +%% UNSUUPPORTED tree sizes for accelerated segment filtering +-define(XXSMALL, {6, 64, 64 * 64}). +-define(XSMALL, {7, 128, 128 * 128}). -%% SUPPORTED tree sizes +%% SUPPORTED tree sizes for accelerated segment filtering -define(SMALL, {8, 256, 256 * 256}). -define(MEDIUM, {9, 512, 512 * 512}). -define(LARGE, {10, 1024, 1024 * 1024}). @@ -343,6 +342,32 @@ keyto_segment32(BinKey) when is_binary(BinKey) -> keyto_segment32(Key) -> keyto_segment32(term_to_binary(Key)). +-spec generate_segmentfilter_list(list(integer()), atom()) + -> false|list(integer()). +%% @doc +%% Cannot accelerate segment listing for trees below certain sizes, so check +%% the creation of segment filter lists with this function +generate_segmentfilter_list(_SegmentList, xxsmall) -> + false; +generate_segmentfilter_list(SegmentList, xsmall) -> + case length(SegmentList) =< 4 of + true -> + A0 = 1 bsl 15, + A1 = 1 bsl 14, + ExpandSegFun = + fun(X, Acc) -> + Acc ++ [X, X + A0, X + A1, X + A0 + A1] + end, + lists:foldl(ExpandSegFun, [], SegmentList); + false -> + false + end; +generate_segmentfilter_list(SegmentList, Size) -> + case lists:member(Size, ?VALID_SIZES) of + true -> + SegmentList + end. + %%%============================================================================ %%% Internal functions %%%============================================================================ diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index b15772f..196efee 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -63,7 +63,15 @@ perbucket_aae(_Config) -> lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs), test_singledelta_stores(Bookie2, Bookie3, small, {B1, K1}), test_singledelta_stores(Bookie2, Bookie3, medium, {B1, K1}), + test_singledelta_stores(Bookie2, Bookie3, xsmall, {B1, K1}), + test_singledelta_stores(Bookie2, Bookie3, xxsmall, {B1, K1}), + + % Test with a newly opend book (i.e with no blovk indexes cached) ok = leveled_bookie:book_close(Bookie2), + {ok, Bookie2A} = leveled_bookie:book_start(StartOpts2), + test_singledelta_stores(Bookie2A, Bookie3, small, {B1, K1}), + + ok = leveled_bookie:book_close(Bookie2A), ok = leveled_bookie:book_close(Bookie3). @@ -120,18 +128,21 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> BookASegList = BookASegFolder(), BookBSegList = BookBSegFolder(), Time_SL0 = timer:now_diff(os:timestamp(), SW_SL0)/1000, - io:format("Two segment list folds took ~w milliseconds ~n", [Time_SL0]), + io:format("Two unfiltered segment list folds took ~w milliseconds ~n", + [Time_SL0]), io:format("Segment lists found of lengths ~w ~w~n", [length(BookASegList), length(BookBSegList)]), Delta = lists:subtract(BookASegList, BookBSegList), true = length(Delta) == 1, + + SegFilterList = leveled_tictac:generate_segmentfilter_list(DLs, TreeSize), SuperHeadSegmentFolder = {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, - false, true, DLs}, + false, true, SegFilterList}, SW_SL1 = os:timestamp(), {async, BookASegFolder1} = @@ -141,13 +152,38 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> BookASegList1 = BookASegFolder1(), BookBSegList1 = BookBSegFolder1(), Time_SL1 = timer:now_diff(os:timestamp(), SW_SL1)/1000, - io:format("Two segment list folds took ~w milliseconds ~n", [Time_SL1]), + io:format("Two filtered segment list folds took ~w milliseconds ~n", + [Time_SL1]), io:format("Segment lists found of lengths ~w ~w~n", [length(BookASegList1), length(BookBSegList1)]), + - Delta1 = lists:subtract(BookASegList1, BookBSegList1), - io:format("Delta found of ~w~n", [Delta1]), - true = length(Delta1) == 1. + FalseMatchFilter = DLs ++ [1, 100, 101, 1000, 1001], + SegFilterListF = + leveled_tictac:generate_segmentfilter_list(FalseMatchFilter, TreeSize), + SuperHeadSegmentFolderF = + {foldheads_allkeys, + ?RIAK_TAG, + {get_segment_folder(DLs, TreeSize), []}, + false, true, SegFilterListF}, + + SW_SL1F = os:timestamp(), + {async, BookASegFolder1F} = + leveled_bookie:book_returnfolder(BookA, SuperHeadSegmentFolderF), + {async, BookBSegFolder1F} = + leveled_bookie:book_returnfolder(BookB, SuperHeadSegmentFolderF), + BookASegList1F = BookASegFolder1F(), + BookBSegList1F = BookBSegFolder1F(), + Time_SL1F = timer:now_diff(os:timestamp(), SW_SL1F)/1000, + io:format("Two filtered segment list folds " ++ + " with false positives took ~w milliseconds ~n", + [Time_SL1F]), + io:format("Segment lists found of lengths ~w ~w~n", + [length(BookASegList1F), length(BookBSegList1F)]), + + Delta1F = lists:subtract(BookASegList1F, BookBSegList1F), + io:format("Delta found of ~w~n", [Delta1F]), + true = length(Delta1F) == 1. get_segment_folder(SegmentList, TreeSize) ->