From 4981cdfc07deec959d4b15cdaa57ae5c5d66bb9a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 24 Oct 2018 10:19:54 +0100 Subject: [PATCH 1/6] Expand segment lists when matches are produced form smaller trees Let us say a store has precalculated segments based on large tree size, but mismatched segmentIDs are found on a small tree size (for query efficiency). the list of segment IDs need to be expanded out for matching. This may be more efficient that running match_segment for each check (expand out once and uses lists:member/2). Also this is necessary for leveled_so AAE stores (segment-ordered stores) --- src/leveled_tictac.erl | 59 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 9b1a813..ff30625 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -73,6 +73,7 @@ keyto_segment32/1, keyto_segment48/1, generate_segmentfilter_list/2, + adjust_segmentmatch_list/3, merge_binaries/2, join_segment/2, match_segment/2 @@ -383,6 +384,31 @@ generate_segmentfilter_list(SegmentList, Size) -> SegmentList end. +-spec adjust_segmentmatch_list(list(integer()), tree_size(), tree_size()) + -> list(integer()). +%% @doc +%% If we have dirty segments discovered by comparing trees of size CompareSize, +%% and we want to see if it matches a segment for a key which was created for a +%% tree of size Store Size, then we need to alter the segment list +adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> + CompareSizeI = get_size(CompareSize), + StoreSizeI = get_size(StoreSize), + if CompareSizeI =< StoreSizeI -> + ExpItems = StoreSizeI div CompareSizeI - 1, + ShiftFactor = trunc(math:log2(CompareSizeI * ?L2_CHUNKSIZE)), + ExpList = + lists:map(fun(X) -> X bsl ShiftFactor end, lists:seq(1, ExpItems)), + UpdSegmentList = + lists:foldl(fun(S, Acc) -> + L = lists:map(fun(F) -> F + S end, ExpList), + L ++ Acc + end, + [], + SegmentList), + lists:usort(UpdSegmentList ++ SegmentList) + end. + + -spec match_segment({integer(), tree_size()}, {integer(), tree_size()}) -> boolean(). %% @doc @@ -696,15 +722,40 @@ compare_trees_maxonedelta(Tree0, Tree1) -> end. segment_match_test() -> - segment_match_tester(small, large), - segment_match_tester(xlarge, medium). + segment_match_tester(small, large, <<"K0">>), + segment_match_tester(xlarge, medium, <<"K1">>), + expand_membershiplist_tester(small, large, <<"K0">>), + expand_membershiplist_tester(xsmall, large, <<"K1">>), + expand_membershiplist_tester(large, xlarge, <<"K2">>). -segment_match_tester(Size1, Size2) -> - HashKey = keyto_segment32(<<"K0">>), +segment_match_tester(Size1, Size2, Key) -> + HashKey = keyto_segment32(Key), Segment1 = get_segment(HashKey, Size1), Segment2 = get_segment(HashKey, Size2), ?assertMatch(true, match_segment({Segment1, Size1}, {Segment2, Size2})). +expand_membershiplist_tester(SmallSize, LargeSize, Key) -> + HashKey = keyto_segment32(Key), + Segment1 = get_segment(HashKey, SmallSize), + Segment2 = get_segment(HashKey, LargeSize), + AdjList = adjust_segmentmatch_list([Segment1], SmallSize, LargeSize), + ?assertMatch(true, lists:member(Segment2, AdjList)). + + +segment_expandsimple_test() -> + AdjList = adjust_segmentmatch_list([1, 100], small, medium), + io:format("List adjusted to ~w~n", [AdjList]), + ?assertMatch(true, lists:member(1, AdjList)), + ?assertMatch(true, lists:member(100, AdjList)), + ?assertMatch(true, lists:member(65537, AdjList)), + ?assertMatch(true, lists:member(131073, AdjList)), + ?assertMatch(true, lists:member(196609, AdjList)), + ?assertMatch(true, lists:member(65636, AdjList)), + ?assertMatch(true, lists:member(131172, AdjList)), + ?assertMatch(true, lists:member(196708, AdjList)), + ?assertMatch(8, length(AdjList)), + OrigList = adjust_segmentmatch_list([1, 100], medium, medium), + ?assertMatch([1, 100], OrigList). -endif. From d5d54cc13a1d9b0ebda8eb130ad5034e42ee2570 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 24 Oct 2018 11:50:36 +0100 Subject: [PATCH 2/6] Add timing test to guide selection of approach --- src/leveled_tictac.erl | 80 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index ff30625..b03ee86 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -390,6 +390,24 @@ generate_segmentfilter_list(SegmentList, Size) -> %% If we have dirty segments discovered by comparing trees of size CompareSize, %% and we want to see if it matches a segment for a key which was created for a %% tree of size Store Size, then we need to alter the segment list +%% +%% See timing_test/0 when considering using this or match_segment/2 +%% +%% Check with KeyCount=10000 SegCount=4 TreeSizes small large: +%% adjust_segmentmatch_list check took 1.256 ms match_segment took 5.229 ms + +%% Check with KeyCount=10000 SegCount=8 TreeSizes small large: +%% adjust_segmentmatch_list check took 2.065 ms match_segment took 8.637 ms + +%% Check with KeyCount=10000 SegCount=4 TreeSizes medium large: +%% adjust_segmentmatch_list check took 0.453 ms match_segment took 4.843 ms + +%% Check with KeyCount=10000 SegCount=4 TreeSizes small medium: +%% adjust_segmentmatch_list check took 0.451 ms match_segment took 5.528 ms + +%% Check with KeyCount=100000 SegCount=4 TreeSizes small large: +%% adjust_segmentmatch_list check took 11.986 ms match_segment took 56.522 ms +%% adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> CompareSizeI = get_size(CompareSize), StoreSizeI = get_size(StoreSize), @@ -757,6 +775,68 @@ segment_expandsimple_test() -> OrigList = adjust_segmentmatch_list([1, 100], medium, medium), ?assertMatch([1, 100], OrigList). + +timing_test() -> + timing_tester(10000, 4, small, large), + timing_tester(10000, 8, small, large), + timing_tester(10000, 4, medium, large), + timing_tester(10000, 4, small, medium), + timing_tester(100000, 4, small, large). + + +timing_tester(KeyCount, SegCount, SmallSize, LargeSize) -> + SegList = + lists:map(fun(_C) -> + leveled_rand:uniform(get_size(SmallSize) * ?L2_CHUNKSIZE - 1) + end, + lists:seq(1, SegCount)), + KeyToSegFun = + fun(I) -> + HK = keyto_segment32(integer_to_binary(I)), + {I, get_segment(HK, LargeSize)} + end, + + MatchList = lists:map(KeyToSegFun, lists:seq(1, KeyCount)), + + {T0, Out0} = + adjustsegmentlist_check(SegList, MatchList, SmallSize, LargeSize), + {T1, Out1} = + matchbysegment_check(SegList, MatchList, SmallSize, LargeSize), + ?assertMatch(true, Out0 == Out1), + io:format(user, "~nCheck with KeyCount=~w SegCount=~w TreeSizes ~w ~w:~n", + [KeyCount, SegCount, SmallSize, LargeSize]), + io:format(user, + "adjust_segmentmatch_list check took ~w ms " ++ + "match_segment took ~w ms~n", + [T0, T1]). + + +adjustsegmentlist_check(SegList, MatchList, SmallSize, LargeSize) -> + SW = os:timestamp(), + AdjList = adjust_segmentmatch_list(SegList, SmallSize, LargeSize), + PredFun = + fun({_I, S}) -> + lists:member(S, AdjList) + end, + OL = lists:filter(PredFun, MatchList), + {timer:now_diff(os:timestamp(), SW)/1000, OL}. + +matchbysegment_check(SegList, MatchList, SmallSize, LargeSize) -> + SW = os:timestamp(), + PredFun = + fun({_I, S}) -> + FoldFun = + fun(_SM, true) -> + true; + (SM, false) -> + match_segment({SM, SmallSize}, {S, LargeSize}) + end, + lists:foldl(FoldFun, false, SegList) + end, + OL = lists:filter(PredFun, MatchList), + {timer:now_diff(os:timestamp(), SW)/1000, OL}. + + -endif. From d171fda7051b3e004e99a1917e15bfcef9164dc9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 24 Oct 2018 11:58:25 +0100 Subject: [PATCH 3/6] Format comment --- src/leveled_tictac.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index b03ee86..b1886ce 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -395,16 +395,16 @@ generate_segmentfilter_list(SegmentList, Size) -> %% %% Check with KeyCount=10000 SegCount=4 TreeSizes small large: %% adjust_segmentmatch_list check took 1.256 ms match_segment took 5.229 ms - +%% %% Check with KeyCount=10000 SegCount=8 TreeSizes small large: %% adjust_segmentmatch_list check took 2.065 ms match_segment took 8.637 ms - +%% %% Check with KeyCount=10000 SegCount=4 TreeSizes medium large: %% adjust_segmentmatch_list check took 0.453 ms match_segment took 4.843 ms - +%% %% Check with KeyCount=10000 SegCount=4 TreeSizes small medium: %% adjust_segmentmatch_list check took 0.451 ms match_segment took 5.528 ms - +%% %% Check with KeyCount=100000 SegCount=4 TreeSizes small large: %% adjust_segmentmatch_list check took 11.986 ms match_segment took 56.522 ms %% From 6d62803567c467a71c4dce302cf8924f3559b5dc Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 24 Oct 2018 12:06:42 +0100 Subject: [PATCH 4/6] Test at higher volume Increase probability of hit --- src/leveled_tictac.erl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index b1886ce..85ada1d 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -777,11 +777,11 @@ segment_expandsimple_test() -> timing_test() -> - timing_tester(10000, 4, small, large), - timing_tester(10000, 8, small, large), - timing_tester(10000, 4, medium, large), - timing_tester(10000, 4, small, medium), - timing_tester(100000, 4, small, large). + timing_tester(100000, 4, small, large), + timing_tester(100000, 8, small, large), + timing_tester(100000, 4, medium, large), + timing_tester(100000, 4, small, medium), + timing_tester(1000000, 4, small, large). timing_tester(KeyCount, SegCount, SmallSize, LargeSize) -> From f4c65597ea7b1c148b0b45ca3cf09c4ab76dd39a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 24 Oct 2018 15:42:21 +0100 Subject: [PATCH 5/6] log 2 compat Need to support math:log2 in early versions of OTP (for Riak) --- rebar.config | 2 ++ src/leveled_math.erl | 38 ++++++++++++++++++++++++++++++++++++++ src/leveled_tictac.erl | 2 +- 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 src/leveled_math.erl diff --git a/rebar.config b/rebar.config index 7a46b31..a6c6659 100644 --- a/rebar.config +++ b/rebar.config @@ -1,8 +1,10 @@ {erl_opts, [warnings_as_errors, {platform_define, "^2[0-1]{1}", fsm_deprecated}, {platform_define, "^1[7-8]{1}", old_rand}, + {platform_define, "^17", no_log2}, {platform_define, "^R", no_sync}, {platform_define, "^R", old_rand}, + {platform_define, "^R", no_log2}, {platform_define, "^R", slow_test}]}. {xref_checks, [undefined_function_calls,undefined_functions]}. diff --git a/src/leveled_math.erl b/src/leveled_math.erl new file mode 100644 index 0000000..f88ed4f --- /dev/null +++ b/src/leveled_math.erl @@ -0,0 +1,38 @@ +%% Handle missing log2 prior to OTP18 + +-module(leveled_math). + +%% API +-export([ + log2/1 + ]). + + +-include_lib("eunit/include/eunit.hrl"). + +%%%=================================================================== +%%% Use log2 +%%%=================================================================== +-ifndef(no_log2). + +log2(X) -> + math:log2(X). + +-else. +%%%=================================================================== +%%% Old (r18) random style functions +%%%=================================================================== + +log2(X) -> + math:log(X) / 0.6931471805599453. + +-endif. + + +-ifdef(TEST). + +log2_test() -> + ?assertMatch(8, round(log2(256))), + ?assertMatch(16, round(log2(65536))). + +-endif. diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 85ada1d..e9716c0 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -413,7 +413,7 @@ adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> StoreSizeI = get_size(StoreSize), if CompareSizeI =< StoreSizeI -> ExpItems = StoreSizeI div CompareSizeI - 1, - ShiftFactor = trunc(math:log2(CompareSizeI * ?L2_CHUNKSIZE)), + ShiftFactor = round(leveled_math:log2(CompareSizeI * ?L2_CHUNKSIZE)), ExpList = lists:map(fun(X) -> X bsl ShiftFactor end, lists:seq(1, ExpItems)), UpdSegmentList = From 3803a190af6c3a6294d9512c8a47f50cbec4f15b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 24 Oct 2018 16:01:41 +0100 Subject: [PATCH 6/6] No need to test at high volume every run --- src/leveled_tictac.erl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index e9716c0..76d77da 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -777,11 +777,11 @@ segment_expandsimple_test() -> timing_test() -> - timing_tester(100000, 4, small, large), - timing_tester(100000, 8, small, large), - timing_tester(100000, 4, medium, large), - timing_tester(100000, 4, small, medium), - timing_tester(1000000, 4, small, large). + timing_tester(10000, 4, small, large), + timing_tester(10000, 8, small, large), + timing_tester(10000, 4, medium, large), + timing_tester(10000, 4, small, medium), + timing_tester(100000, 4, small, large). timing_tester(KeyCount, SegCount, SmallSize, LargeSize) ->