From c722f3132d66a09b0b5fe9233b66f16909687f4e Mon Sep 17 00:00:00 2001 From: martinsumner Date: Thu, 19 Jan 2017 22:49:32 +0000 Subject: [PATCH 01/25] Initial version of potential new tree module with basic Unit Tests. Intended to replace skiplist --- src/leveled_tree.erl | 255 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 src/leveled_tree.erl diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl new file mode 100644 index 0000000..68fe385 --- /dev/null +++ b/src/leveled_tree.erl @@ -0,0 +1,255 @@ +%% -------- TREE --------- +%% +%% This module is intended to address two issues +%% - the lack of iterator_from support in OTP16 gb_trees +%% - the time to convert from/to list in gb_trees +%% +%% Leveled had had a skiplist implementation previously, and this is a +%% variation on that. The Treein this case is a bunch of sublists of length +%% SKIP_WIDTH with the start_keys in a gb_tree. + +-module(leveled_tree). + +-include("include/leveled.hrl"). + +-export([ + from_orderedlist/1, + from_orderedset/1, + to_list/1, + match_range/3, + % search_range/3, + match/2, + search/2, + tsize/1 + ]). + +-include_lib("eunit/include/eunit.hrl"). + +-define(SKIP_WIDTH, 16). + + +%%%============================================================================ +%%% API +%%%============================================================================ + +from_orderedlist(OrderedList) -> + L = length(OrderedList), + {tree, L, from_orderedlist(OrderedList, gb_trees:empty(), L)}. + +from_orderedset(Table) -> + from_orderedlist(ets:tab2list(Table)). + +match(Key, {tree, _L, Tree}) -> + Iter = gb_trees:iterator_from(Key, Tree), + case gb_trees:next(Iter) of + none -> + none; + {_NK, SL, _Iter} -> + lookup_match(Key, SL) + end. + +match_range(StartKey, EndKey, {tree, _L, Tree}) -> + Iter0 = gb_trees:iterator_from(StartKey, Tree), + case gb_trees:next(Iter0) of + none -> + []; + {NK, SL, Iter1} -> + PredFun = + fun({K, _V}) -> + K < StartKey + end, + {_LHS, RHS} = lists:splitwith(PredFun, SL), + lookup_match_range(EndKey, {NK, RHS}, Iter1, []) + end. + +search(Key, {tree, _L, Tree}) -> + Iter = gb_trees:iterator_from(Key, Tree), + case gb_trees:next(Iter) of + none -> + none; + {_NK, SL, _Iter} -> + lookup_best(Key, SL) + end. + +to_list({tree, _L, Tree}) -> + FoldFun = + fun({_MK, SL}, Acc) -> + Acc ++ SL + end, + lists:foldl(FoldFun, [], gb_trees:to_list(Tree)). + +tsize({tree, L, _Tree}) -> + L. + +%%%============================================================================ +%%% Internal Functions +%%%============================================================================ + + +from_orderedlist([], Tree, _L) -> + Tree; +from_orderedlist(OrdList, Tree, L) -> + SubLL = min(?SKIP_WIDTH, L), + {Head, Tail} = lists:split(SubLL, OrdList), + {LastK, _LastV} = lists:last(Head), + from_orderedlist(Tail, gb_trees:insert(LastK, Head, Tree), L - SubLL). + +lookup_match(_Key, []) -> + none; +lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> + none; +lookup_match(Key, [{Key, EV}|_Tail]) -> + {value, EV}; +lookup_match(Key, [_Top|Tail]) -> + lookup_match(Key, Tail). + +lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> + {EK, EV}; +lookup_best(Key, [_Top|Tail]) -> + lookup_best(Key, Tail). + +lookup_match_range(EndKey, {NK0, SL0}, Iter0, Output) -> + PredFun = + fun({K, _V}) -> + not leveled_codec:endkey_passed(EndKey, K) + end, + case leveled_codec:endkey_passed(EndKey, NK0) of + true -> + {LHS, RHS} = lists:splitwith(PredFun, SL0), + case RHS of + [{EndKey, FirstValue}|_Tail] -> + Output ++ LHS ++ [{EndKey, FirstValue}]; + _ -> + Output ++ LHS + end; + false -> + UpdOutput = Output ++ SL0, + case gb_trees:next(Iter0) of + none -> + UpdOutput; + {NK1, SL1, Iter1} -> + lookup_match_range(EndKey, {NK1, SL1}, Iter1, UpdOutput) + end + end. + + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> + generate_randomkeys(Seqn, + Count, + [], + BucketRangeLow, + BucketRangeHigh). + +generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> + Acc; +generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> + BNumber = + case BRange of + 0 -> + string:right(integer_to_list(BucketLow), 4, $0); + _ -> + BRand = random:uniform(BRange), + string:right(integer_to_list(BucketLow + BRand), 4, $0) + end, + KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0), + {K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, + {Seqn, {active, infinity}, null}}, + generate_randomkeys(Seqn + 1, + Count - 1, + [{K, V}|Acc], + BucketLow, + BRange). + + +tree_test() -> + N = 4000, + KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), + + OS = ets:new(test, [ordered_set, private]), + ets:insert(OS, KL), + SWaETS = os:timestamp(), + Tree0 = from_orderedset(OS), + io:format(user, "Generating tree from ETS in ~w microseconds" ++ + " of size ~w~n", + [timer:now_diff(os:timestamp(), SWaETS), + tsize(Tree0)]), + + SWaGSL = os:timestamp(), + Tree1 = from_orderedlist(KL), + io:format(user, "Generating tree from orddict in ~w microseconds" ++ + " of size ~w~n", + [timer:now_diff(os:timestamp(), SWaGSL), + tsize(Tree1)]), + SWaLUP = os:timestamp(), + lists:foreach(match_fun(Tree0), KL), + lists:foreach(match_fun(Tree1), KL), + io:format(user, "Looked up all keys twice in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaLUP)]), + + ?assertMatch(Tree0, Tree1), + + SWaSRCH1 = os:timestamp(), + lists:foreach(search_exactmatch_fun(Tree0), KL), + lists:foreach(search_exactmatch_fun(Tree1), KL), + io:format(user, "Search all keys twice for exact match in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaSRCH1)]), + + BitBiggerKeyFun = + fun(Idx) -> + {K, _V} = lists:nth(Idx, KL), + {o, B, FullKey, null} = K, + {{o, B, FullKey ++ "0", null}, lists:nth(Idx + 1, KL)} + end, + SrchKL = lists:map(BitBiggerKeyFun, lists:seq(1, length(KL) - 1)), + + SWaSRCH2 = os:timestamp(), + lists:foreach(search_nearmatch_fun(Tree0), SrchKL), + lists:foreach(search_nearmatch_fun(Tree1), SrchKL), + io:format(user, "Search all keys twice for near match in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaSRCH2)]), + + FirstKey = element(1, lists:nth(1, KL)), + FinalKey = element(1, lists:last(KL)), + PenultimateKey = element(1, lists:nth(length(KL) - 1, KL)), + AfterFirstKey = setelement(3, FirstKey, element(3, FirstKey) ++ "0"), + AfterPenultimateKey = setelement(3, + PenultimateKey, + element(3, PenultimateKey) ++ "0"), + + LengthR = + fun(SK, EK, T) -> + length(match_range(SK, EK, T)) + end, + + KL_Length = length(KL), + ?assertMatch(KL_Length, LengthR(FirstKey, FinalKey, Tree0)), + ?assertMatch(KL_Length, LengthR(FirstKey, PenultimateKey, Tree0) + 1), + ?assertMatch(1, LengthR(all, FirstKey, Tree0)), + ?assertMatch(KL_Length, LengthR(all, PenultimateKey, Tree0) + 1), + ?assertMatch(KL_Length, LengthR(all, all, Tree0)), + ?assertMatch(2, LengthR(PenultimateKey, FinalKey, Tree0)), + ?assertMatch(KL_Length, LengthR(AfterFirstKey, PenultimateKey, Tree0) + 2), + ?assertMatch(1, LengthR(AfterPenultimateKey, FinalKey, Tree0)). + +match_fun(Tree) -> + fun({K, V}) -> + ?assertMatch({value, V}, match(K, Tree)) + end. + +search_exactmatch_fun(Tree) -> + fun({K, V}) -> + ?assertMatch({K, V}, search(K, Tree)) + end. + +search_nearmatch_fun(Tree) -> + fun({K, {NK, NV}}) -> + ?assertMatch({NK, NV}, search(K, Tree)) + end. + +-endif. \ No newline at end of file From 860a8a95e3f2e04abea19f9f14d8bbdee9693404 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 19 Jan 2017 22:59:53 +0000 Subject: [PATCH 02/25] Width of 32 may be better With 4000 keys - build time dropped significantly --- src/leveled_tree.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 68fe385..aa14a6c 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -25,7 +25,7 @@ -include_lib("eunit/include/eunit.hrl"). --define(SKIP_WIDTH, 16). +-define(SKIP_WIDTH, 32). %%%============================================================================ From 220d493b5a0d4c8e9c34f6078513acb66a9856d7 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Fri, 20 Jan 2017 16:15:00 +0000 Subject: [PATCH 03/25] Tree changes --- src/leveled_tree.erl | 153 ++++++++++++++++++++++++++++++++----------- 1 file changed, 115 insertions(+), 38 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index aa14a6c..3440ba7 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -17,15 +17,16 @@ from_orderedset/1, to_list/1, match_range/3, - % search_range/3, + search_range/4, match/2, - search/2, - tsize/1 + search/3, + tsize/1, + empty/0 ]). -include_lib("eunit/include/eunit.hrl"). --define(SKIP_WIDTH, 32). +-define(SKIP_WIDTH, 16). %%%============================================================================ @@ -34,53 +35,64 @@ from_orderedlist(OrderedList) -> L = length(OrderedList), - {tree, L, from_orderedlist(OrderedList, gb_trees:empty(), L)}. + {tree, L, from_orderedlist(OrderedList, empty_tree(), L)}. from_orderedset(Table) -> from_orderedlist(ets:tab2list(Table)). match(Key, {tree, _L, Tree}) -> - Iter = gb_trees:iterator_from(Key, Tree), - case gb_trees:next(Iter) of + Iter = tree_iterator_from(Key, Tree), + case tree_next(Iter) of none -> none; {_NK, SL, _Iter} -> lookup_match(Key, SL) end. -match_range(StartKey, EndKey, {tree, _L, Tree}) -> - Iter0 = gb_trees:iterator_from(StartKey, Tree), - case gb_trees:next(Iter0) of - none -> - []; - {NK, SL, Iter1} -> - PredFun = - fun({K, _V}) -> - K < StartKey - end, - {_LHS, RHS} = lists:splitwith(PredFun, SL), - lookup_match_range(EndKey, {NK, RHS}, Iter1, []) - end. - -search(Key, {tree, _L, Tree}) -> - Iter = gb_trees:iterator_from(Key, Tree), - case gb_trees:next(Iter) of +search(Key, {tree, _L, Tree}, StartKeyFun) -> + Iter = tree_iterator_from(Key, Tree), + case tree_next(Iter) of none -> none; {_NK, SL, _Iter} -> - lookup_best(Key, SL) + {K, V} = lookup_best(Key, SL), + case K < StartKeyFun(V) of + true -> + none; + false -> + {K, V} + end end. +match_range(StartRange, EndRange, {tree, _L, Tree}) -> + EndRangeFun = + fun(ER, FirstRHSKey, _FirstRHSValue) -> + ER == FirstRHSKey + end, + lookup_range_start(StartRange, EndRange, Tree, EndRangeFun). + + +search_range(StartRange, EndRange, {tree, _L, Tree}, StartKeyFun) -> + EndRangeFun = + fun(ER, _FirstRHSKey, FirstRHSValue) -> + StartRHSKey = StartKeyFun(FirstRHSValue), + ER >= StartRHSKey + end, + lookup_range_start(StartRange, EndRange, Tree, EndRangeFun). + to_list({tree, _L, Tree}) -> FoldFun = fun({_MK, SL}, Acc) -> Acc ++ SL end, - lists:foldl(FoldFun, [], gb_trees:to_list(Tree)). + lists:foldl(FoldFun, [], tree_to_list(Tree)). tsize({tree, L, _Tree}) -> L. +empty() -> + {tree, 0, empty_tree()}. + %%%============================================================================ %%% Internal Functions %%%============================================================================ @@ -92,7 +104,7 @@ from_orderedlist(OrdList, Tree, L) -> SubLL = min(?SKIP_WIDTH, L), {Head, Tail} = lists:split(SubLL, OrdList), {LastK, _LastV} = lists:last(Head), - from_orderedlist(Tail, gb_trees:insert(LastK, Head, Tree), L - SubLL). + from_orderedlist(Tail, tree_insert(LastK, Head, Tree), L - SubLL). lookup_match(_Key, []) -> none; @@ -108,30 +120,71 @@ lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> lookup_best(Key, [_Top|Tail]) -> lookup_best(Key, Tail). -lookup_match_range(EndKey, {NK0, SL0}, Iter0, Output) -> +lookup_range_start(StartRange, EndRange, Tree, EndRangeFun) -> + Iter0 = tree_iterator_from(StartRange, Tree), + case tree_next(Iter0) of + none -> + []; + {NK, SL, Iter1} -> + PredFun = + fun({K, _V}) -> + K < StartRange + end, + {_LHS, RHS} = lists:splitwith(PredFun, SL), + lookup_range_end(EndRange, {NK, RHS}, Iter1, [], EndRangeFun) + end. + +lookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) -> PredFun = fun({K, _V}) -> - not leveled_codec:endkey_passed(EndKey, K) + not leveled_codec:endkey_passed(EndRange, K) end, - case leveled_codec:endkey_passed(EndKey, NK0) of + case leveled_codec:endkey_passed(EndRange, NK0) of true -> {LHS, RHS} = lists:splitwith(PredFun, SL0), case RHS of - [{EndKey, FirstValue}|_Tail] -> - Output ++ LHS ++ [{EndKey, FirstValue}]; - _ -> - Output ++ LHS + [] -> + Output ++ LHS; + [{FirstRHSKey, FirstRHSValue}|_Rest] -> + case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of + true -> + Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}]; + false -> + Output ++ LHS + end end; false -> UpdOutput = Output ++ SL0, - case gb_trees:next(Iter0) of + case tree_next(Iter0) of none -> UpdOutput; {NK1, SL1, Iter1} -> - lookup_match_range(EndKey, {NK1, SL1}, Iter1, UpdOutput) + lookup_range_end(EndRange, + {NK1, SL1}, + Iter1, + UpdOutput, + EndRangeFun) end end. +%%%============================================================================ +%%% Balance tree implementation +%%%============================================================================ + +empty_tree() -> + gb_trees:empty(). + +tree_insert(K, V, T) -> + gb_trees:insert(K, V, T). + +tree_to_list(T) -> + gb_trees:to_list(T). + +tree_iterator_from(K, T) -> + gb_trees:iterator_from(K, T). + +tree_next(I) -> + gb_trees:next(I). %%%============================================================================ %%% Test @@ -166,6 +219,28 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> BucketLow, BRange). + +tree_search_test() -> + MapFun = + fun(N) -> + {N * 4, N * 4 - 2} + end, + KL = lists:map(MapFun, lists:seq(1, 50)), + T = from_orderedlist(KL), + + StartKeyFun = fun(V) -> V end, + + ?assertMatch([], search_range(0, 1, T, StartKeyFun)), + ?assertMatch([], search_range(201, 202, T, StartKeyFun)), + ?assertMatch([{4, 2}], search_range(2, 4, T, StartKeyFun)), + ?assertMatch([{4, 2}], search_range(2, 5, T, StartKeyFun)), + ?assertMatch([{4, 2}, {8, 6}], search_range(2, 6, T, StartKeyFun)), + ?assertMatch(50, length(search_range(2, 200, T, StartKeyFun))), + ?assertMatch(50, length(search_range(2, 198, T, StartKeyFun))), + ?assertMatch(49, length(search_range(2, 197, T, StartKeyFun))), + ?assertMatch(49, length(search_range(4, 197, T, StartKeyFun))), + ?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))). + tree_test() -> N = 4000, @@ -243,13 +318,15 @@ match_fun(Tree) -> end. search_exactmatch_fun(Tree) -> + StartKeyFun = fun(_V) -> all end, fun({K, V}) -> - ?assertMatch({K, V}, search(K, Tree)) + ?assertMatch({K, V}, search(K, Tree, StartKeyFun)) end. search_nearmatch_fun(Tree) -> + StartKeyFun = fun(_V) -> all end, fun({K, {NK, NV}}) -> - ?assertMatch({NK, NV}, search(K, Tree)) + ?assertMatch({NK, NV}, search(K, Tree, StartKeyFun)) end. -endif. \ No newline at end of file From 1745ba6863d5e10442b3f3042a9bc9b8603e9933 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 20 Jan 2017 16:35:38 +0000 Subject: [PATCH 04/25] Improve from_list speed --- src/leveled_tree.erl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 3440ba7..40022c9 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -35,7 +35,7 @@ from_orderedlist(OrderedList) -> L = length(OrderedList), - {tree, L, from_orderedlist(OrderedList, empty_tree(), L)}. + {tree, L, from_orderedlist(OrderedList, [], L)}. from_orderedset(Table) -> from_orderedlist(ets:tab2list(Table)). @@ -98,13 +98,13 @@ empty() -> %%%============================================================================ -from_orderedlist([], Tree, _L) -> - Tree; -from_orderedlist(OrdList, Tree, L) -> +from_orderedlist([], TmpList, _L) -> + gb_trees:from_orddict(TmpList); +from_orderedlist(OrdList, TmpList, L) -> SubLL = min(?SKIP_WIDTH, L), {Head, Tail} = lists:split(SubLL, OrdList), {LastK, _LastV} = lists:last(Head), - from_orderedlist(Tail, tree_insert(LastK, Head, Tree), L - SubLL). + from_orderedlist(Tail, TmpList ++ [{LastK, Head}], L - SubLL). lookup_match(_Key, []) -> none; @@ -174,9 +174,6 @@ lookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) -> empty_tree() -> gb_trees:empty(). -tree_insert(K, V, T) -> - gb_trees:insert(K, V, T). - tree_to_list(T) -> gb_trees:to_list(T). @@ -230,6 +227,7 @@ tree_search_test() -> StartKeyFun = fun(V) -> V end, + SW = os:timestamp(), ?assertMatch([], search_range(0, 1, T, StartKeyFun)), ?assertMatch([], search_range(201, 202, T, StartKeyFun)), ?assertMatch([{4, 2}], search_range(2, 4, T, StartKeyFun)), @@ -239,11 +237,13 @@ tree_search_test() -> ?assertMatch(50, length(search_range(2, 198, T, StartKeyFun))), ?assertMatch(49, length(search_range(2, 197, T, StartKeyFun))), ?assertMatch(49, length(search_range(4, 197, T, StartKeyFun))), - ?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))). + ?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))), + io:format(user, "10 range tests in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SW)]). tree_test() -> - N = 4000, + N = 2000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), OS = ets:new(test, [ordered_set, private]), From 3d9903609327a165b9e2774590b6c35b1e76ff7c Mon Sep 17 00:00:00 2001 From: martinsumner Date: Fri, 20 Jan 2017 16:36:20 +0000 Subject: [PATCH 05/25] Switch the LM1 cache to be a tree Use a tree of lists not a skiplist --- src/leveled_bookie.erl | 23 +++++---- src/leveled_inker.erl | 6 ++- src/leveled_penciller.erl | 99 ++++++++++++++++++++------------------- src/leveled_pmem.erl | 37 ++++++++------- 4 files changed, 88 insertions(+), 77 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 0960c68..53359c2 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -139,6 +139,7 @@ get_opt/3, load_snapshot/2, empty_ledgercache/0, + loadqueue_ledgercache/1, push_ledgercache/2]). -include_lib("eunit/include/eunit.hrl"). @@ -153,7 +154,8 @@ -define(LONG_RUNNING, 80000). -record(ledger_cache, {mem :: ets:tab(), - loader = leveled_skiplist:empty(false) :: tuple(), + loader = leveled_tree:empty() :: tuple(), + load_queue = [] :: list(), index = leveled_pmem:new_index(), % array min_sqn = infinity :: integer()|infinity, max_sqn = 0 :: integer()}). @@ -474,6 +476,11 @@ push_ledgercache(Penciller, Cache) -> Cache#ledger_cache.max_sqn}, leveled_penciller:pcl_pushmem(Penciller, CacheToLoad). +loadqueue_ledgercache(Cache) -> + SL = lists:ukeysort(1, Cache#ledger_cache.load_queue), + T = leveled_tree:from_orderedlist(SL), + Cache#ledger_cache{load_queue = [], loader = T}. + %%%============================================================================ %%% Internal functions %%%============================================================================ @@ -719,11 +726,11 @@ snapshot_store(State, SnapType) -> readycache_forsnapshot(LedgerCache) -> % Need to convert the Ledger Cache away from using the ETS table - SkipList = leveled_skiplist:from_orderedset(LedgerCache#ledger_cache.mem), + Tree = leveled_tree:from_orderedset(LedgerCache#ledger_cache.mem), Idx = LedgerCache#ledger_cache.index, MinSQN = LedgerCache#ledger_cache.min_sqn, MaxSQN = LedgerCache#ledger_cache.max_sqn, - #ledger_cache{loader=SkipList, index=Idx, min_sqn=MinSQN, max_sqn=MaxSQN}. + #ledger_cache{loader=Tree, index=Idx, min_sqn=MinSQN, max_sqn=MaxSQN}. set_options(Opts) -> MaxJournalSize0 = get_opt(max_journalsize, Opts, 10000000000), @@ -961,14 +968,10 @@ addto_ledgercache({H, SQN, KeyChanges}, Cache) -> max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}. addto_ledgercache({H, SQN, KeyChanges}, Cache, loader) -> - FoldChangesFun = - fun({K, V}, SL0) -> - leveled_skiplist:enter_nolookup(K, V, SL0) - end, - UpdSL = lists:foldl(FoldChangesFun, Cache#ledger_cache.loader, KeyChanges), + UpdQ = KeyChanges ++ Cache#ledger_cache.load_queue, UpdIndex = leveled_pmem:prepare_for_index(Cache#ledger_cache.index, H), Cache#ledger_cache{index = UpdIndex, - loader = UpdSL, + load_queue = UpdQ, min_sqn=min(SQN, Cache#ledger_cache.min_sqn), max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}. @@ -979,7 +982,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) -> TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize), if TimeToPush -> - CacheToLoad = {leveled_skiplist:from_orderedset(Tab), + CacheToLoad = {leveled_tree:from_orderedset(Tab), Cache#ledger_cache.index, Cache#ledger_cache.min_sqn, Cache#ledger_cache.max_sqn}, diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index f56ea20..6789302 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -669,10 +669,14 @@ load_between_sequence(MinSQN, MaxSQN, FilterFun, Penciller, push_to_penciller(Penciller, LedgerCache) -> % The push to penciller must start as a tree to correctly de-duplicate % the list by order before becoming a de-duplicated list for loading + LC0 = leveled_bookie:loadqueue_ledgercache(LedgerCache), + push_to_penciller_loop(Penciller, LC0). + +push_to_penciller_loop(Penciller, LedgerCache) -> case leveled_bookie:push_ledgercache(Penciller, LedgerCache) of returned -> timer:sleep(?LOADING_PAUSE), - push_to_penciller(Penciller, LedgerCache); + push_to_penciller_loop(Penciller, LedgerCache); ok -> ok end. diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 9a6daf3..d18cb09 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -9,7 +9,7 @@ %% the Penciller's Clerk %% - The Penciller can be cloned and maintains a register of clones who have %% requested snapshots of the Ledger -%% - The accepts new dumps (in the form of a leveled_skiplist accomponied by +%% - The accepts new dumps (in the form of a leveled_tree accomponied by %% an array of hash-listing binaries) from the Bookie, and responds either 'ok' %% to the bookie if the information is accepted nad the Bookie can refresh its %% memory, or 'returned' if the bookie must continue without refreshing as the @@ -224,7 +224,7 @@ levelzero_pending = false :: boolean(), levelzero_constructor :: pid(), - levelzero_cache = [] :: list(), % a list of skiplists + levelzero_cache = [] :: list(), % a list of trees levelzero_size = 0 :: integer(), levelzero_maxcachesize :: integer(), levelzero_cointoss = false :: boolean(), @@ -345,9 +345,9 @@ handle_call({push_mem, {PushedTree, PushedIdx, MinSQN, MaxSQN}}, State=#state{is_snapshot=Snap}) when Snap == false -> % The push_mem process is as follows: % - % 1 - Receive a cache. The cache has four parts: a skiplist of keys and + % 1 - Receive a cache. The cache has four parts: a tree of keys and % values, an array of 256 binaries listing the hashes present in the - % skiplist, a min SQN and a max SQN + % tree, a min SQN and a max SQN % % 2 - Check to see if there is a levelzero file pending. If so, the % update must be returned. If not the update can be accepted @@ -404,7 +404,7 @@ handle_call({fetch_keys, StartKey, EndKey, AccFun, InitAcc, MaxKeys}, leveled_pmem:merge_trees(StartKey, EndKey, State#state.levelzero_cache, - leveled_skiplist:empty()); + leveled_tree:empty()); List -> List end, @@ -1072,10 +1072,10 @@ clean_subdir(DirPath) -> maybe_pause_push(PCL, KL) -> - T0 = leveled_skiplist:empty(true), + T0 = [], I0 = leveled_pmem:new_index(), T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) -> - UpdSL = leveled_skiplist:enter(K, V, AccSL), + UpdSL = [{K, V}|AccSL], SQN = leveled_codec:strip_to_seqonly({K, V}), H = leveled_codec:magic_hash(K), UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H), @@ -1083,7 +1083,10 @@ maybe_pause_push(PCL, KL) -> end, {T0, I0, infinity, 0}, KL), - case pcl_pushmem(PCL, T1) of + SL = element(1, T1), + Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, SL)), + T2 = setelement(1, T1, Tree), + case pcl_pushmem(PCL, T2) of returned -> timer:sleep(50), maybe_pause_push(PCL, KL); @@ -1315,63 +1318,63 @@ sqnoverlap_otherway_findnextkey_test() -> foldwithimm_simple_test() -> QueryArray = [ - {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}}, - {{o, "Bucket1", "Key5"}, {1, {active, infinity}, 0, null}}]}, - {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]}, - {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]} + {2, [{{o, "Bucket1", "Key1", null}, + {5, {active, infinity}, 0, null}}, + {{o, "Bucket1", "Key5", null}, + {1, {active, infinity}, 0, null}}]}, + {3, [{{o, "Bucket1", "Key3", null}, + {3, {active, infinity}, 0, null}}]}, + {5, [{{o, "Bucket1", "Key5", null}, + {2, {active, infinity}, 0, null}}]} ], - IMM0 = leveled_skiplist:enter({o, "Bucket1", "Key6"}, - {7, {active, infinity}, 0, null}, - leveled_skiplist:empty()), - IMM1 = leveled_skiplist:enter({o, "Bucket1", "Key1"}, - {8, {active, infinity}, 0, null}, - IMM0), - IMM2 = leveled_skiplist:enter({o, "Bucket1", "Key8"}, - {9, {active, infinity}, 0, null}, - IMM1), - IMMiter = leveled_skiplist:to_range(IMM2, {o, "Bucket1", "Key1"}), + KL1A = [{{o, "Bucket1", "Key6", null}, {7, {active, infinity}, 0, null}}, + {{o, "Bucket1", "Key1", null}, {8, {active, infinity}, 0, null}}, + {{o, "Bucket1", "Key8", null}, {9, {active, infinity}, 0, null}}], + IMM2 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1A)), + IMMiter = leveled_tree:match_range({o, "Bucket1", "Key1", null}, + {o, null, null, null}, + IMM2), AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}), Acc ++ [{K, SQN}] end, Acc = keyfolder(IMMiter, QueryArray, - {o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"}, + {o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null}, {AccFun, []}), - ?assertMatch([{{o, "Bucket1", "Key1"}, 8}, - {{o, "Bucket1", "Key3"}, 3}, - {{o, "Bucket1", "Key5"}, 2}, - {{o, "Bucket1", "Key6"}, 7}], Acc), + ?assertMatch([{{o, "Bucket1", "Key1", null}, 8}, + {{o, "Bucket1", "Key3", null}, 3}, + {{o, "Bucket1", "Key5", null}, 2}, + {{o, "Bucket1", "Key6", null}, 7}], Acc), - IMM1A = leveled_skiplist:enter({o, "Bucket1", "Key1"}, - {8, {active, infinity}, 0, null}, - leveled_skiplist:empty()), - IMMiterA = leveled_skiplist:to_range(IMM1A, {o, "Bucket1", "Key1"}), + IMMiterA = [{{o, "Bucket1", "Key1", null}, + {8, {active, infinity}, 0, null}}], AccA = keyfolder(IMMiterA, - QueryArray, - {o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"}, - {AccFun, []}), - ?assertMatch([{{o, "Bucket1", "Key1"}, 8}, - {{o, "Bucket1", "Key3"}, 3}, - {{o, "Bucket1", "Key5"}, 2}], AccA), + QueryArray, + {o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null}, + {AccFun, []}), + ?assertMatch([{{o, "Bucket1", "Key1", null}, 8}, + {{o, "Bucket1", "Key3", null}, 3}, + {{o, "Bucket1", "Key5", null}, 2}], AccA), - IMM3 = leveled_skiplist:enter({o, "Bucket1", "Key4"}, - {10, {active, infinity}, 0, null}, - IMM2), - IMMiterB = leveled_skiplist:to_range(IMM3, {o, "Bucket1", "Key1"}), + KL1B = [{{o, "Bucket1", "Key4", null}, {10, {active, infinity}, 0, null}}|KL1A], + IMM3 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1B)), + IMMiterB = leveled_tree:match_range({o, "Bucket1", "Key1", null}, + {o, null, null, null}, + IMM3), AccB = keyfolder(IMMiterB, QueryArray, - {o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"}, + {o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null}, {AccFun, []}), - ?assertMatch([{{o, "Bucket1", "Key1"}, 8}, - {{o, "Bucket1", "Key3"}, 3}, - {{o, "Bucket1", "Key4"}, 10}, - {{o, "Bucket1", "Key5"}, 2}, - {{o, "Bucket1", "Key6"}, 7}], AccB). + ?assertMatch([{{o, "Bucket1", "Key1", null}, 8}, + {{o, "Bucket1", "Key3", null}, 3}, + {{o, "Bucket1", "Key4", null}, 10}, + {{o, "Bucket1", "Key5", null}, 2}, + {{o, "Bucket1", "Key6", null}, 7}], AccB). create_file_test() -> Filename = "../test/new_file.sst", ok = file:write_file(Filename, term_to_binary("hello")), KVL = lists:usort(generate_randomkeys(10000)), - Tree = leveled_skiplist:from_list(KVL), + Tree = leveled_tree:from_orderedlist(KVL), FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end, {ok, SP, diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl index 9480abe..fff113d 100644 --- a/src/leveled_pmem.erl +++ b/src/leveled_pmem.erl @@ -57,7 +57,7 @@ prepare_for_index(IndexArray, Hash) -> add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) -> - LM1Size = leveled_skiplist:size(LevelMinus1), + LM1Size = leveled_tree:tsize(LevelMinus1), case LM1Size of 0 -> {LedgerSQN, L0Size, TreeList}; @@ -99,7 +99,7 @@ to_list(Slots, FetchFun) -> SlotList = lists:reverse(lists:seq(1, Slots)), FullList = lists:foldl(fun(Slot, Acc) -> Tree = FetchFun(Slot), - L = leveled_skiplist:to_list(Tree), + L = leveled_tree:to_list(Tree), lists:ukeymerge(1, Acc, L) end, [], @@ -119,14 +119,14 @@ check_levelzero(Key, Hash, PosList, TreeList) -> check_slotlist(Key, Hash, PosList, TreeList). -merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) -> - lists:foldl(fun(SkipList, Acc) -> - R = leveled_skiplist:to_range(SkipList, - StartKey, - EndKey), +merge_trees(StartKey, EndKey, TreeList, LevelMinus1) -> + lists:foldl(fun(Tree, Acc) -> + R = leveled_tree:match_range(StartKey, + EndKey, + Tree), lists:ukeymerge(1, Acc, R) end, [], - [LevelMinus1|lists:reverse(SkipListList)]). + [LevelMinus1|lists:reverse(TreeList)]). %%%============================================================================ %%% Internal Functions @@ -148,7 +148,7 @@ split_hash(Hash) -> H0 = (Hash bsr 8) band 8388607, {Slot, H0}. -check_slotlist(Key, Hash, CheckList, TreeList) -> +check_slotlist(Key, _Hash, CheckList, TreeList) -> SlotCheckFun = fun(SlotToCheck, {Found, KV}) -> case Found of @@ -156,7 +156,7 @@ check_slotlist(Key, Hash, CheckList, TreeList) -> {Found, KV}; false -> CheckTree = lists:nth(SlotToCheck, TreeList), - case leveled_skiplist:lookup(Key, Hash, CheckTree) of + case leveled_tree:match(Key, CheckTree) of none -> {Found, KV}; {value, Value} -> @@ -188,7 +188,7 @@ generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> [], BucketRangeLow, BucketRangeHigh), - leveled_skiplist:from_list(KVL). + leveled_tree:from_orderedlist(lists:ukeysort(1, KVL)). generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> Acc; @@ -223,7 +223,7 @@ compare_method_test() -> ?assertMatch(32000, SQN), ?assertMatch(true, Size =< 32000), - TestList = leveled_skiplist:to_list(generate_randomkeys(1, 2000, 1, 800)), + TestList = leveled_tree:to_list(generate_randomkeys(1, 2000, 1, 800)), FindKeyFun = fun(Key) -> @@ -232,7 +232,7 @@ compare_method_test() -> true -> {true, KV}; false -> - L0 = leveled_skiplist:lookup(Key, Tree), + L0 = leveled_tree:match(Key, Tree), case L0 of none -> {false, not_found}; @@ -270,19 +270,20 @@ compare_method_test() -> P = leveled_codec:endkey_passed(EndKey, K), case {K, P} of {K, false} when K >= StartKey -> - leveled_skiplist:enter(K, V, Acc); + [{K, V}|Acc]; _ -> Acc end end, - leveled_skiplist:empty(), + [], DumpList), - Sz0 = leveled_skiplist:size(Q0), + Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, Q0)), + Sz0 = leveled_tree:tsize(Tree), io:format("Crude method took ~w microseconds resulting in tree of " ++ "size ~w~n", [timer:now_diff(os:timestamp(), SWa), Sz0]), SWb = os:timestamp(), - Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_skiplist:empty()), + Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty()), Sz1 = length(Q1), io:format("Merge method took ~w microseconds resulting in tree of " ++ "size ~w~n", @@ -299,7 +300,7 @@ with_index_test() -> fun(_X, {{LedgerSQN, L0Size, L0TreeList}, L0Idx, SrcList}) -> LM1 = generate_randomkeys_aslist(LedgerSQN + 1, 2000, 1, 500), LM1Array = lists:foldl(IndexPrepareFun, new_index(), LM1), - LM1SL = leveled_skiplist:from_list(LM1), + LM1SL = leveled_tree:from_orderedlist(lists:ukeysort(1, LM1)), UpdL0Index = add_to_index(LM1Array, L0Idx, length(L0TreeList) + 1), R = add_to_cache(L0Size, {LM1SL, LedgerSQN + 1, LedgerSQN + 2000}, From 9e32e228bbb41466df74443a14fd8651136128c2 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 20 Jan 2017 17:34:57 +0000 Subject: [PATCH 06/25] Include gb_trees code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gb_trees in OTP16 doesn’t have iterative from support. Included here so that we can continue to use it regardless in Riak. --- src/leveled_tree.erl | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 40022c9..c54fdb6 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -178,10 +178,43 @@ tree_to_list(T) -> gb_trees:to_list(T). tree_iterator_from(K, T) -> - gb_trees:iterator_from(K, T). + % For OTP 16 compatibility with gb_trees + iterator_from(K, T). tree_next(I) -> - gb_trees:next(I). + % For OTP 16 compatibility with gb_trees + next(I). + + +iterator_from(S, {_, T}) -> + iterator_1_from(S, T). + +iterator_1_from(S, T) -> + iterator_from(S, T, []). + +iterator_from(S, {K, _, _, T}, As) when K < S -> + iterator_from(S, T, As); +iterator_from(_, {_, _, nil, _} = T, As) -> + [T | As]; +iterator_from(S, {_, _, L, _} = T, As) -> + iterator_from(S, L, [T | As]); +iterator_from(_, nil, As) -> + As. + +next([{X, V, _, T} | As]) -> + {X, V, iterator(T, As)}; +next([]) -> + none. + +%% The iterator structure is really just a list corresponding to +%% the call stack of an in-order traversal. This is quite fast. + +iterator({_, _, nil, _} = T, As) -> + [T | As]; +iterator({_, _, L, _} = T, As) -> + iterator(L, [T | As]); +iterator(nil, As) -> + As. %%%============================================================================ %%% Test From 51f04022929a6d6e71d9f6861568a7bf4a56ec99 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 20 Jan 2017 21:31:17 +0000 Subject: [PATCH 07/25] Change Skip Width --- src/leveled_tree.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index c54fdb6..792c05d 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -26,7 +26,7 @@ -include_lib("eunit/include/eunit.hrl"). --define(SKIP_WIDTH, 16). +-define(SKIP_WIDTH, 32). %%%============================================================================ From 48462723932e2dfbdcc41c2006fd675d701e6da5 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 21 Jan 2017 01:48:38 +0000 Subject: [PATCH 08/25] Build more efficiently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don’t use ++ to build the tree --- src/leveled_tree.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 792c05d..6f50c28 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -26,7 +26,7 @@ -include_lib("eunit/include/eunit.hrl"). --define(SKIP_WIDTH, 32). +-define(SKIP_WIDTH, 16). %%%============================================================================ @@ -99,12 +99,12 @@ empty() -> from_orderedlist([], TmpList, _L) -> - gb_trees:from_orddict(TmpList); + gb_trees:from_orddict(lists:reverse(TmpList)); from_orderedlist(OrdList, TmpList, L) -> SubLL = min(?SKIP_WIDTH, L), {Head, Tail} = lists:split(SubLL, OrdList), {LastK, _LastV} = lists:last(Head), - from_orderedlist(Tail, TmpList ++ [{LastK, Head}], L - SubLL). + from_orderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL). lookup_match(_Key, []) -> none; @@ -276,7 +276,7 @@ tree_search_test() -> tree_test() -> - N = 2000, + N = 4000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), OS = ets:new(test, [ordered_set, private]), From ed96d0ca7ac5c63b6d27939feefa9823ddc6e8f2 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 21 Jan 2017 11:38:26 +0000 Subject: [PATCH 09/25] Change so that type of cache is parameterised The Tree doesn't seem to be better than the skiplist. Get ready to make this switchable --- include/leveled.hrl | 2 ++ src/leveled_bookie.erl | 9 +++---- src/leveled_penciller.erl | 10 ++++---- src/leveled_pmem.erl | 8 +++---- src/leveled_tree.erl | 49 +++++++++++++++++++++++++-------------- 5 files changed, 48 insertions(+), 30 deletions(-) diff --git a/include/leveled.hrl b/include/leveled.hrl index 13d862e..16fd39b 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -15,6 +15,8 @@ %% Inker key type used for tombstones -define(INKT_TOMB, tomb). +-define(CACHE_TYPE, tree). + -record(sft_options, {wait = true :: boolean(), expire_tombstones = false :: boolean(), diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 53359c2..245aa1c 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -154,7 +154,7 @@ -define(LONG_RUNNING, 80000). -record(ledger_cache, {mem :: ets:tab(), - loader = leveled_tree:empty() :: tuple(), + loader = leveled_tree:empty(?CACHE_TYPE) :: tuple(), load_queue = [] :: list(), index = leveled_pmem:new_index(), % array min_sqn = infinity :: integer()|infinity, @@ -478,7 +478,7 @@ push_ledgercache(Penciller, Cache) -> loadqueue_ledgercache(Cache) -> SL = lists:ukeysort(1, Cache#ledger_cache.load_queue), - T = leveled_tree:from_orderedlist(SL), + T = leveled_tree:from_orderedlist(SL, ?CACHE_TYPE), Cache#ledger_cache{load_queue = [], loader = T}. %%%============================================================================ @@ -726,7 +726,8 @@ snapshot_store(State, SnapType) -> readycache_forsnapshot(LedgerCache) -> % Need to convert the Ledger Cache away from using the ETS table - Tree = leveled_tree:from_orderedset(LedgerCache#ledger_cache.mem), + Tree = leveled_tree:from_orderedset(LedgerCache#ledger_cache.mem, + ?CACHE_TYPE), Idx = LedgerCache#ledger_cache.index, MinSQN = LedgerCache#ledger_cache.min_sqn, MaxSQN = LedgerCache#ledger_cache.max_sqn, @@ -982,7 +983,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) -> TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize), if TimeToPush -> - CacheToLoad = {leveled_tree:from_orderedset(Tab), + CacheToLoad = {leveled_tree:from_orderedset(Tab, ?CACHE_TYPE), Cache#ledger_cache.index, Cache#ledger_cache.min_sqn, Cache#ledger_cache.max_sqn}, diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index d18cb09..57b522c 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -404,7 +404,7 @@ handle_call({fetch_keys, StartKey, EndKey, AccFun, InitAcc, MaxKeys}, leveled_pmem:merge_trees(StartKey, EndKey, State#state.levelzero_cache, - leveled_tree:empty()); + leveled_tree:empty(?CACHE_TYPE)); List -> List end, @@ -1084,7 +1084,7 @@ maybe_pause_push(PCL, KL) -> {T0, I0, infinity, 0}, KL), SL = element(1, T1), - Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, SL)), + Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, SL), ?CACHE_TYPE), T2 = setelement(1, T1, Tree), case pcl_pushmem(PCL, T2) of returned -> @@ -1330,7 +1330,7 @@ foldwithimm_simple_test() -> KL1A = [{{o, "Bucket1", "Key6", null}, {7, {active, infinity}, 0, null}}, {{o, "Bucket1", "Key1", null}, {8, {active, infinity}, 0, null}}, {{o, "Bucket1", "Key8", null}, {9, {active, infinity}, 0, null}}], - IMM2 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1A)), + IMM2 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1A), ?CACHE_TYPE), IMMiter = leveled_tree:match_range({o, "Bucket1", "Key1", null}, {o, null, null, null}, IMM2), @@ -1356,7 +1356,7 @@ foldwithimm_simple_test() -> {{o, "Bucket1", "Key5", null}, 2}], AccA), KL1B = [{{o, "Bucket1", "Key4", null}, {10, {active, infinity}, 0, null}}|KL1A], - IMM3 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1B)), + IMM3 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1B), ?CACHE_TYPE), IMMiterB = leveled_tree:match_range({o, "Bucket1", "Key1", null}, {o, null, null, null}, IMM3), @@ -1374,7 +1374,7 @@ create_file_test() -> Filename = "../test/new_file.sst", ok = file:write_file(Filename, term_to_binary("hello")), KVL = lists:usort(generate_randomkeys(10000)), - Tree = leveled_tree:from_orderedlist(KVL), + Tree = leveled_tree:from_orderedlist(KVL, ?CACHE_TYPE), FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end, {ok, SP, diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl index fff113d..97e4d5c 100644 --- a/src/leveled_pmem.erl +++ b/src/leveled_pmem.erl @@ -188,7 +188,7 @@ generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> [], BucketRangeLow, BucketRangeHigh), - leveled_tree:from_orderedlist(lists:ukeysort(1, KVL)). + leveled_tree:from_orderedlist(lists:ukeysort(1, KVL), ?CACHE_TYPE). generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> Acc; @@ -277,13 +277,13 @@ compare_method_test() -> end, [], DumpList), - Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, Q0)), + Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, Q0), ?CACHE_TYPE), Sz0 = leveled_tree:tsize(Tree), io:format("Crude method took ~w microseconds resulting in tree of " ++ "size ~w~n", [timer:now_diff(os:timestamp(), SWa), Sz0]), SWb = os:timestamp(), - Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty()), + Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty(?CACHE_TYPE)), Sz1 = length(Q1), io:format("Merge method took ~w microseconds resulting in tree of " ++ "size ~w~n", @@ -300,7 +300,7 @@ with_index_test() -> fun(_X, {{LedgerSQN, L0Size, L0TreeList}, L0Idx, SrcList}) -> LM1 = generate_randomkeys_aslist(LedgerSQN + 1, 2000, 1, 500), LM1Array = lists:foldl(IndexPrepareFun, new_index(), LM1), - LM1SL = leveled_tree:from_orderedlist(lists:ukeysort(1, LM1)), + LM1SL = leveled_tree:from_orderedlist(lists:ukeysort(1, LM1), ?CACHE_TYPE), UpdL0Index = add_to_index(LM1Array, L0Idx, length(L0TreeList) + 1), R = add_to_cache(L0Size, {LM1SL, LedgerSQN + 1, LedgerSQN + 2000}, diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 6f50c28..8ffde13 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -13,15 +13,17 @@ -include("include/leveled.hrl"). -export([ - from_orderedlist/1, - from_orderedset/1, + from_orderedlist/2, + from_orderedset/2, + from_orderedlist/3, + from_orderedset/3, to_list/1, match_range/3, search_range/4, match/2, search/3, tsize/1, - empty/0 + empty/1 ]). -include_lib("eunit/include/eunit.hrl"). @@ -33,12 +35,18 @@ %%% API %%%============================================================================ -from_orderedlist(OrderedList) -> - L = length(OrderedList), - {tree, L, from_orderedlist(OrderedList, [], L)}. +from_orderedset(Table, tree) -> + from_orderedlist(ets:tab2list(Table), tree, ?SKIP_WIDTH). -from_orderedset(Table) -> - from_orderedlist(ets:tab2list(Table)). +from_orderedset(Table, tree, SkipWidth) -> + from_orderedlist(ets:tab2list(Table), tree, SkipWidth). + +from_orderedlist(OrderedList, tree) -> + from_orderedlist(OrderedList, tree, ?SKIP_WIDTH). + +from_orderedlist(OrderedList, tree, SkipWidth) -> + L = length(OrderedList), + {tree, L, from_orderedlist(OrderedList, [], L, SkipWidth)}. match(Key, {tree, _L, Tree}) -> Iter = tree_iterator_from(Key, Tree), @@ -90,7 +98,7 @@ to_list({tree, _L, Tree}) -> tsize({tree, L, _Tree}) -> L. -empty() -> +empty(tree) -> {tree, 0, empty_tree()}. %%%============================================================================ @@ -98,13 +106,13 @@ empty() -> %%%============================================================================ -from_orderedlist([], TmpList, _L) -> +from_orderedlist([], TmpList, _L, _SkipWidth) -> gb_trees:from_orddict(lists:reverse(TmpList)); -from_orderedlist(OrdList, TmpList, L) -> - SubLL = min(?SKIP_WIDTH, L), +from_orderedlist(OrdList, TmpList, L, SkipWidth) -> + SubLL = min(SkipWidth, L), {Head, Tail} = lists:split(SubLL, OrdList), {LastK, _LastV} = lists:last(Head), - from_orderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL). + from_orderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL, SkipWidth). lookup_match(_Key, []) -> none; @@ -256,7 +264,7 @@ tree_search_test() -> {N * 4, N * 4 - 2} end, KL = lists:map(MapFun, lists:seq(1, 50)), - T = from_orderedlist(KL), + T = from_orderedlist(KL, tree), StartKeyFun = fun(V) -> V end, @@ -274,22 +282,29 @@ tree_search_test() -> io:format(user, "10 range tests in ~w microseconds~n", [timer:now_diff(os:timestamp(), SW)]). - + tree_test() -> + tree_test_by_width(8), + tree_test_by_width(16), + tree_test_by_width(32), + tree_test_by_width(4). + +tree_test_by_width(Width) -> + io:format(user, "~nTree test for width: ~w~n", [Width]), N = 4000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), OS = ets:new(test, [ordered_set, private]), ets:insert(OS, KL), SWaETS = os:timestamp(), - Tree0 = from_orderedset(OS), + Tree0 = from_orderedset(OS, tree, Width), io:format(user, "Generating tree from ETS in ~w microseconds" ++ " of size ~w~n", [timer:now_diff(os:timestamp(), SWaETS), tsize(Tree0)]), SWaGSL = os:timestamp(), - Tree1 = from_orderedlist(KL), + Tree1 = from_orderedlist(KL, tree, Width), io:format(user, "Generating tree from orddict in ~w microseconds" ++ " of size ~w~n", [timer:now_diff(os:timestamp(), SWaGSL), From d6761259e1ab3eb4e2eea9035c2cdd49e1979923 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 21 Jan 2017 12:54:32 +0000 Subject: [PATCH 10/25] Experiment with seperating index from elements --- src/leveled_tree.erl | 90 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 20 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 8ffde13..aed19a7 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -35,18 +35,22 @@ %%% API %%%============================================================================ -from_orderedset(Table, tree) -> - from_orderedlist(ets:tab2list(Table), tree, ?SKIP_WIDTH). +from_orderedset(Table, Type) -> + from_orderedlist(ets:tab2list(Table), Type, ?SKIP_WIDTH). -from_orderedset(Table, tree, SkipWidth) -> - from_orderedlist(ets:tab2list(Table), tree, SkipWidth). -from_orderedlist(OrderedList, tree) -> - from_orderedlist(OrderedList, tree, ?SKIP_WIDTH). +from_orderedset(Table, Type, SkipWidth) -> + from_orderedlist(ets:tab2list(Table), Type, SkipWidth). + +from_orderedlist(OrderedList, Type) -> + from_orderedlist(OrderedList, Type, ?SKIP_WIDTH). from_orderedlist(OrderedList, tree, SkipWidth) -> L = length(OrderedList), - {tree, L, from_orderedlist(OrderedList, [], L, SkipWidth)}. + {tree, L, tree_fromorderedlist(OrderedList, [], L, SkipWidth)}; +from_orderedlist(OrderedList, idxt, SkipWidth) -> + L = length(OrderedList), + {idxt, L, idxt_fromorderedlist(OrderedList, {[], [], 1}, L, SkipWidth)}. match(Key, {tree, _L, Tree}) -> Iter = tree_iterator_from(Key, Tree), @@ -55,6 +59,14 @@ match(Key, {tree, _L, Tree}) -> none; {_NK, SL, _Iter} -> lookup_match(Key, SL) + end; +match(Key, {idxt, _L, {TLI, IDX}}) -> + Iter = tree_iterator_from(Key, IDX), + case tree_next(Iter) of + none -> + none; + {_NK, ListID, _Iter} -> + lookup_match(Key, element(ListID, TLI)) end. search(Key, {tree, _L, Tree}, StartKeyFun) -> @@ -70,8 +82,23 @@ search(Key, {tree, _L, Tree}, StartKeyFun) -> false -> {K, V} end + end; +search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) -> + Iter = tree_iterator_from(Key, IDX), + case tree_next(Iter) of + none -> + none; + {_NK, ListID, _Iter} -> + {K, V} = lookup_best(Key, element(ListID, TLI)), + case K < StartKeyFun(V) of + true -> + none; + false -> + {K, V} + end end. + match_range(StartRange, EndRange, {tree, _L, Tree}) -> EndRangeFun = fun(ER, FirstRHSKey, _FirstRHSValue) -> @@ -95,7 +122,7 @@ to_list({tree, _L, Tree}) -> end, lists:foldl(FoldFun, [], tree_to_list(Tree)). -tsize({tree, L, _Tree}) -> +tsize({_Type, L, _Tree}) -> L. empty(tree) -> @@ -106,14 +133,28 @@ empty(tree) -> %%%============================================================================ -from_orderedlist([], TmpList, _L, _SkipWidth) -> +tree_fromorderedlist([], TmpList, _L, _SkipWidth) -> gb_trees:from_orddict(lists:reverse(TmpList)); -from_orderedlist(OrdList, TmpList, L, SkipWidth) -> +tree_fromorderedlist(OrdList, TmpList, L, SkipWidth) -> SubLL = min(SkipWidth, L), {Head, Tail} = lists:split(SubLL, OrdList), {LastK, _LastV} = lists:last(Head), - from_orderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL, SkipWidth). + tree_fromorderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL, SkipWidth). +idxt_fromorderedlist([], {TmpListElements, TmpListIdx, _C}, _L, _SkipWidth) -> + {list_to_tuple(lists:reverse(TmpListElements)), + gb_trees:from_orddict(lists:reverse(TmpListIdx))}; +idxt_fromorderedlist(OrdList, {TmpListElements, TmpListIdx, C}, L, SkipWidth) -> + SubLL = min(SkipWidth, L), + {Head, Tail} = lists:split(SubLL, OrdList), + {LastK, _LastV} = lists:last(Head), + idxt_fromorderedlist(Tail, + {[Head|TmpListElements], + [{LastK, C}|TmpListIdx], + C + 1}, + L - SubLL, + SkipWidth). + lookup_match(_Key, []) -> none; lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> @@ -284,27 +325,29 @@ tree_search_test() -> tree_test() -> - tree_test_by_width(8), - tree_test_by_width(16), - tree_test_by_width(32), - tree_test_by_width(4). + tree_test_by_width(8, tree), + tree_test_by_width(16, tree), + tree_test_by_width(32, tree), + tree_test_by_width(4, tree), + + tree_test_by_width(16, idxt). -tree_test_by_width(Width) -> - io:format(user, "~nTree test for width: ~w~n", [Width]), +tree_test_by_width(Width, Type) -> + io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]), N = 4000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), OS = ets:new(test, [ordered_set, private]), ets:insert(OS, KL), SWaETS = os:timestamp(), - Tree0 = from_orderedset(OS, tree, Width), + Tree0 = from_orderedset(OS, Type, Width), io:format(user, "Generating tree from ETS in ~w microseconds" ++ " of size ~w~n", [timer:now_diff(os:timestamp(), SWaETS), tsize(Tree0)]), SWaGSL = os:timestamp(), - Tree1 = from_orderedlist(KL, tree, Width), + Tree1 = from_orderedlist(KL, Type, Width), io:format(user, "Generating tree from orddict in ~w microseconds" ++ " of size ~w~n", [timer:now_diff(os:timestamp(), SWaGSL), @@ -335,8 +378,15 @@ tree_test_by_width(Width) -> lists:foreach(search_nearmatch_fun(Tree0), SrchKL), lists:foreach(search_nearmatch_fun(Tree1), SrchKL), io:format(user, "Search all keys twice for near match in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWaSRCH2)]), + [timer:now_diff(os:timestamp(), SWaSRCH2)]). + + +tree_matchrange_test() -> + N = 4000, + KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), + Tree0 = from_orderedlist(KL, tree), + FirstKey = element(1, lists:nth(1, KL)), FinalKey = element(1, lists:last(KL)), PenultimateKey = element(1, lists:nth(length(KL) - 1, KL)), From 917473dfdc03547c1a6d42f96cafd6f4c7db6014 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 21 Jan 2017 13:41:41 +0000 Subject: [PATCH 11/25] Alternative tree As an alternatuve to a tree - try a tree where the list of elements and the tree to lookup the list of elements are kept in seperate objects --- src/leveled_tree.erl | 120 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 95 insertions(+), 25 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index aed19a7..4cf30ee 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -99,28 +99,39 @@ search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) -> end. -match_range(StartRange, EndRange, {tree, _L, Tree}) -> +match_range(StartRange, EndRange, Tree) -> EndRangeFun = fun(ER, FirstRHSKey, _FirstRHSValue) -> ER == FirstRHSKey end, - lookup_range_start(StartRange, EndRange, Tree, EndRangeFun). + match_range(StartRange, EndRange, Tree, EndRangeFun). +match_range(StartRange, EndRange, {tree, _L, Tree}, EndRangeFun) -> + treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun); +match_range(StartRange, EndRange, {idxt, _L, Tree}, EndRangeFun) -> + idxtlookup_range_start(StartRange, EndRange, Tree, EndRangeFun). -search_range(StartRange, EndRange, {tree, _L, Tree}, StartKeyFun) -> +search_range(StartRange, EndRange, Tree, StartKeyFun) -> EndRangeFun = fun(ER, _FirstRHSKey, FirstRHSValue) -> StartRHSKey = StartKeyFun(FirstRHSValue), ER >= StartRHSKey end, - lookup_range_start(StartRange, EndRange, Tree, EndRangeFun). + case Tree of + {tree, _L, T} -> + treelookup_range_start(StartRange, EndRange, T, EndRangeFun); + {idxt, _L, T} -> + idxtlookup_range_start(StartRange, EndRange, T, EndRangeFun) + end. to_list({tree, _L, Tree}) -> FoldFun = fun({_MK, SL}, Acc) -> Acc ++ SL end, - lists:foldl(FoldFun, [], tree_to_list(Tree)). + lists:foldl(FoldFun, [], tree_to_list(Tree)); +to_list({idxt, _L, {TLI, _IDX}}) -> + lists:append(tuple_to_list(TLI)). tsize({_Type, L, _Tree}) -> L. @@ -169,7 +180,7 @@ lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> lookup_best(Key, [_Top|Tail]) -> lookup_best(Key, Tail). -lookup_range_start(StartRange, EndRange, Tree, EndRangeFun) -> +treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun) -> Iter0 = tree_iterator_from(StartRange, Tree), case tree_next(Iter0) of none -> @@ -180,10 +191,10 @@ lookup_range_start(StartRange, EndRange, Tree, EndRangeFun) -> K < StartRange end, {_LHS, RHS} = lists:splitwith(PredFun, SL), - lookup_range_end(EndRange, {NK, RHS}, Iter1, [], EndRangeFun) + treelookup_range_end(EndRange, {NK, RHS}, Iter1, [], EndRangeFun) end. -lookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) -> +treelookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) -> PredFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndRange, K) @@ -208,11 +219,58 @@ lookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) -> none -> UpdOutput; {NK1, SL1, Iter1} -> - lookup_range_end(EndRange, - {NK1, SL1}, - Iter1, - UpdOutput, - EndRangeFun) + treelookup_range_end(EndRange, + {NK1, SL1}, + Iter1, + UpdOutput, + EndRangeFun) + end + end. + +idxtlookup_range_start(StartRange, EndRange, {TLI, IDX}, EndRangeFun) -> + Iter0 = tree_iterator_from(StartRange, IDX), + case tree_next(Iter0) of + none -> + []; + {NK, ListID, Iter1} -> + PredFun = + fun({K, _V}) -> + K < StartRange + end, + {_LHS, RHS} = lists:splitwith(PredFun, element(ListID, TLI)), + idxtlookup_range_end(EndRange, {TLI, NK, RHS}, Iter1, [], EndRangeFun) + end. + +idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) -> + PredFun = + fun({K, _V}) -> + not leveled_codec:endkey_passed(EndRange, K) + end, + case leveled_codec:endkey_passed(EndRange, NK0) of + true -> + {LHS, RHS} = lists:splitwith(PredFun, SL0), + case RHS of + [] -> + Output ++ LHS; + [{FirstRHSKey, FirstRHSValue}|_Rest] -> + case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of + true -> + Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}]; + false -> + Output ++ LHS + end + end; + false -> + UpdOutput = Output ++ SL0, + case tree_next(Iter0) of + none -> + UpdOutput; + {NK1, ListID, Iter1} -> + idxtlookup_range_end(EndRange, + {TLI, NK1, element(ListID, TLI)}, + Iter1, + UpdOutput, + EndRangeFun) end end. @@ -300,12 +358,18 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> tree_search_test() -> + search_test_by_type(tree). + +idxt_search_test() -> + search_test_by_type(idxt). + +search_test_by_type(Type) -> MapFun = fun(N) -> {N * 4, N * 4 - 2} end, KL = lists:map(MapFun, lists:seq(1, 50)), - T = from_orderedlist(KL, tree), + T = from_orderedlist(KL, Type), StartKeyFun = fun(V) -> V end, @@ -320,19 +384,19 @@ tree_search_test() -> ?assertMatch(49, length(search_range(2, 197, T, StartKeyFun))), ?assertMatch(49, length(search_range(4, 197, T, StartKeyFun))), ?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))), - io:format(user, "10 range tests in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SW)]). + io:format(user, "10 range tests with type ~w in ~w microseconds~n", + [Type, timer:now_diff(os:timestamp(), SW)]). -tree_test() -> - tree_test_by_width(8, tree), - tree_test_by_width(16, tree), - tree_test_by_width(32, tree), - tree_test_by_width(4, tree), - - tree_test_by_width(16, idxt). +tree_timing_test() -> + tree_test_by_(8, tree), + tree_test_by_(16, tree). -tree_test_by_width(Width, Type) -> +idxt_timing_test() -> + tree_test_by_(16, idxt), + tree_test_by_(8, idxt). + +tree_test_by_(Width, Type) -> io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]), N = 4000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), @@ -383,9 +447,15 @@ tree_test_by_width(Width, Type) -> tree_matchrange_test() -> + matchrange_test_by_type(tree). + +idxt_matchrange_test() -> + matchrange_test_by_type(idxt). + +matchrange_test_by_type(Type) -> N = 4000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), - Tree0 = from_orderedlist(KL, tree), + Tree0 = from_orderedlist(KL, Type), FirstKey = element(1, lists:nth(1, KL)), FinalKey = element(1, lists:last(KL)), From f031558b28372ca324122eb6402556a9b6b9f81a Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 21 Jan 2017 13:45:19 +0000 Subject: [PATCH 12/25] Make idxt active implementation Use idxt for ledger_cache - ready for volume test --- include/leveled.hrl | 2 +- src/leveled_tree.erl | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/leveled.hrl b/include/leveled.hrl index 16fd39b..e1c9646 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -15,7 +15,7 @@ %% Inker key type used for tombstones -define(INKT_TOMB, tomb). --define(CACHE_TYPE, tree). +-define(CACHE_TYPE, idxt). -record(sft_options, {wait = true :: boolean(), diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 4cf30ee..a052641 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -137,7 +137,9 @@ tsize({_Type, L, _Tree}) -> L. empty(tree) -> - {tree, 0, empty_tree()}. + {tree, 0, empty_tree()}; +empty(idxt) -> + {idxt, 0, {{}, empty_tree()}}. %%%============================================================================ %%% Internal Functions From 0fa22ab4b3176ef0bd5df6a659411e4681afd01d Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 21 Jan 2017 16:23:24 +0000 Subject: [PATCH 13/25] Add skiplist into leveled_tree Getting ready to remove seperate skiplist module. Need to add the skiplist support in leveled_tree for relative testing. --- src/leveled_tree.erl | 88 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 9 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index a052641..e0de47a 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -29,6 +29,7 @@ -include_lib("eunit/include/eunit.hrl"). -define(SKIP_WIDTH, 16). +-define(WIDTH_MAP, [{64, 4}, {512, 8}, {4096, 16}, {infinity, 32}]). %%%============================================================================ @@ -38,10 +39,10 @@ from_orderedset(Table, Type) -> from_orderedlist(ets:tab2list(Table), Type, ?SKIP_WIDTH). - from_orderedset(Table, Type, SkipWidth) -> from_orderedlist(ets:tab2list(Table), Type, SkipWidth). + from_orderedlist(OrderedList, Type) -> from_orderedlist(OrderedList, Type, ?SKIP_WIDTH). @@ -50,7 +51,19 @@ from_orderedlist(OrderedList, tree, SkipWidth) -> {tree, L, tree_fromorderedlist(OrderedList, [], L, SkipWidth)}; from_orderedlist(OrderedList, idxt, SkipWidth) -> L = length(OrderedList), - {idxt, L, idxt_fromorderedlist(OrderedList, {[], [], 1}, L, SkipWidth)}. + {idxt, L, idxt_fromorderedlist(OrderedList, {[], [], 1}, L, SkipWidth)}; +from_orderedlist(OrderedList, skpl, _SkipWidth) -> + L = length(OrderedList), + SkipWidth = + % Autosize the skip width + case L of + L when L > 4096 -> 32; + L when L > 512 -> 16; + L when L > 64 -> 8; + _ -> 4 + end, + {skpl, L, skpl_fromorderedlist(OrderedList, L, SkipWidth, 2)}. + match(Key, {tree, _L, Tree}) -> Iter = tree_iterator_from(Key, Tree), @@ -67,7 +80,20 @@ match(Key, {idxt, _L, {TLI, IDX}}) -> none; {_NK, ListID, _Iter} -> lookup_match(Key, element(ListID, TLI)) - end. + end; +match(Key, {skpl, _L, SkipList}) -> + FoldFun = + fun({Mark, SL}, Acc) -> + case {Acc, Mark} of + {[], Mark} when Mark >= Key -> + SL; + _ -> + Acc + end + end, + SL1 = lists:foldl(FoldFun, [], SkipList), + SL0 = lists:foldl(FoldFun, [], SL1), + lookup_match(Key, SL0). search(Key, {tree, _L, Tree}, StartKeyFun) -> Iter = tree_iterator_from(Key, Tree), @@ -96,6 +122,25 @@ search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) -> false -> {K, V} end + end; +search(Key, {skpl, _L, SkipList}, StartKeyFun) -> + FoldFun = + fun({Mark, SL}, Acc) -> + case {Acc, Mark} of + {[], Mark} when Mark >= Key -> + SL; + _ -> + Acc + end + end, + SL1 = lists:foldl(FoldFun, [], SkipList), + SL0 = lists:foldl(FoldFun, [], SL1), + {K, V} = lookup_best(Key, SL0), + case K < StartKeyFun(V) of + true -> + none; + false -> + {K, V} end. @@ -111,6 +156,7 @@ match_range(StartRange, EndRange, {tree, _L, Tree}, EndRangeFun) -> match_range(StartRange, EndRange, {idxt, _L, Tree}, EndRangeFun) -> idxtlookup_range_start(StartRange, EndRange, Tree, EndRangeFun). + search_range(StartRange, EndRange, Tree, StartKeyFun) -> EndRangeFun = fun(ER, _FirstRHSKey, FirstRHSValue) -> @@ -124,6 +170,7 @@ search_range(StartRange, EndRange, Tree, StartKeyFun) -> idxtlookup_range_start(StartRange, EndRange, T, EndRangeFun) end. + to_list({tree, _L, Tree}) -> FoldFun = fun({_MK, SL}, Acc) -> @@ -133,6 +180,7 @@ to_list({tree, _L, Tree}) -> to_list({idxt, _L, {TLI, _IDX}}) -> lists:append(tuple_to_list(TLI)). + tsize({_Type, L, _Tree}) -> L. @@ -168,6 +216,22 @@ idxt_fromorderedlist(OrdList, {TmpListElements, TmpListIdx, C}, L, SkipWidth) -> L - SubLL, SkipWidth). +skpl_fromorderedlist(SkipList, _L, _SkipWidth, 0) -> + SkipList; +skpl_fromorderedlist(SkipList, L, SkipWidth, Height) -> + SkipList0 = roll_list(SkipList, L, [], SkipWidth), + skpl_fromorderedlist(SkipList0, length(SkipList0), SkipWidth, Height - 1). + +roll_list([], 0, SkipList, _SkipWidth) -> + lists:reverse(SkipList); +roll_list(KVList, L, SkipList, SkipWidth) -> + SubLL = min(SkipWidth, L), + {Head, Tail} = lists:split(SubLL, KVList), + {LastK, _LastV} = lists:last(Head), + roll_list(Tail, L - SubLL, [{LastK, Head}|SkipList], SkipWidth). + + + lookup_match(_Key, []) -> none; lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> @@ -391,16 +455,22 @@ search_test_by_type(Type) -> tree_timing_test() -> - tree_test_by_(8, tree), - tree_test_by_(16, tree). + tree_test_by_(16, tree, 4000), + tree_test_by_(8, tree, 1000), + tree_test_by_(4, tree, 128). idxt_timing_test() -> - tree_test_by_(16, idxt), - tree_test_by_(8, idxt). + tree_test_by_(16, idxt, 4000), + tree_test_by_(8, idxt, 1000), + tree_test_by_(4, idxt, 128). -tree_test_by_(Width, Type) -> +skpl_timing_test() -> + tree_test_by_(auto, skpl, 4000), + tree_test_by_(auto, skpl, 1000), + tree_test_by_(auto, skpl, 128). + +tree_test_by_(Width, Type, N) -> io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]), - N = 4000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), OS = ets:new(test, [ordered_set, private]), From efec232e7116eb3a249ecb559cd48d42d8e6a88b Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 21 Jan 2017 16:26:23 +0000 Subject: [PATCH 14/25] Adjust test size to match SST summary --- src/leveled_tree.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index e0de47a..8e09285 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -457,17 +457,17 @@ search_test_by_type(Type) -> tree_timing_test() -> tree_test_by_(16, tree, 4000), tree_test_by_(8, tree, 1000), - tree_test_by_(4, tree, 128). + tree_test_by_(4, tree, 256). idxt_timing_test() -> tree_test_by_(16, idxt, 4000), tree_test_by_(8, idxt, 1000), - tree_test_by_(4, idxt, 128). + tree_test_by_(4, idxt, 256). skpl_timing_test() -> tree_test_by_(auto, skpl, 4000), tree_test_by_(auto, skpl, 1000), - tree_test_by_(auto, skpl, 128). + tree_test_by_(auto, skpl, 256). tree_test_by_(Width, Type, N) -> io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]), From 6d2eb1d57cada6238084bd51e274862c416cb1e0 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 21 Jan 2017 21:51:35 +0000 Subject: [PATCH 15/25] Added skiplist to datatypes --- src/leveled_tree.erl | 123 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 98 insertions(+), 25 deletions(-) diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 8e09285..e72a887 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -82,17 +82,7 @@ match(Key, {idxt, _L, {TLI, IDX}}) -> lookup_match(Key, element(ListID, TLI)) end; match(Key, {skpl, _L, SkipList}) -> - FoldFun = - fun({Mark, SL}, Acc) -> - case {Acc, Mark} of - {[], Mark} when Mark >= Key -> - SL; - _ -> - Acc - end - end, - SL1 = lists:foldl(FoldFun, [], SkipList), - SL0 = lists:foldl(FoldFun, [], SL1), + SL0 = skpl_getsublist(Key, SkipList), lookup_match(Key, SL0). search(Key, {tree, _L, Tree}, StartKeyFun) -> @@ -124,17 +114,7 @@ search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) -> end end; search(Key, {skpl, _L, SkipList}, StartKeyFun) -> - FoldFun = - fun({Mark, SL}, Acc) -> - case {Acc, Mark} of - {[], Mark} when Mark >= Key -> - SL; - _ -> - Acc - end - end, - SL1 = lists:foldl(FoldFun, [], SkipList), - SL0 = lists:foldl(FoldFun, [], SL1), + SL0 = skpl_getsublist(Key, SkipList), {K, V} = lookup_best(Key, SL0), case K < StartKeyFun(V) of true -> @@ -154,7 +134,9 @@ match_range(StartRange, EndRange, Tree) -> match_range(StartRange, EndRange, {tree, _L, Tree}, EndRangeFun) -> treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun); match_range(StartRange, EndRange, {idxt, _L, Tree}, EndRangeFun) -> - idxtlookup_range_start(StartRange, EndRange, Tree, EndRangeFun). + idxtlookup_range_start(StartRange, EndRange, Tree, EndRangeFun); +match_range(StartRange, EndRange, {skpl, _L, SkipList}, EndRangeFun) -> + skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun). search_range(StartRange, EndRange, Tree, StartKeyFun) -> @@ -167,7 +149,9 @@ search_range(StartRange, EndRange, Tree, StartKeyFun) -> {tree, _L, T} -> treelookup_range_start(StartRange, EndRange, T, EndRangeFun); {idxt, _L, T} -> - idxtlookup_range_start(StartRange, EndRange, T, EndRangeFun) + idxtlookup_range_start(StartRange, EndRange, T, EndRangeFun); + {skpl, _L, SL} -> + skpllookup_to_range(StartRange, EndRange, SL, EndRangeFun) end. @@ -340,6 +324,88 @@ idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) -> end end. + +skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) -> + FoldFun = + fun({K, SL}, {PassedStart, PassedEnd, Acc}) -> + case {PassedStart, PassedEnd} of + {false, false} -> + case StartRange > K of + true -> + {PassedStart, PassedEnd, Acc}; + false -> + case leveled_codec:endkey_passed(EndRange, K) of + true -> + {true, true, [SL|Acc]}; + false -> + {true, false, [SL|Acc]} + end + end; + {true, false} -> + case leveled_codec:endkey_passed(EndRange, K) of + true -> + {true, true, [SL|Acc]}; + false -> + {true, false, [SL|Acc]} + end; + {true, true} -> + {PassedStart, PassedEnd, Acc} + end + end, + Lv1List = lists:reverse(element(3, + lists:foldl(FoldFun, + {false, false, []}, + SkipList))), + Lv0List = lists:reverse(element(3, + lists:foldl(FoldFun, + {false, false, []}, + lists:append(Lv1List)))), + BeforeFun = + fun({K, _V}) -> + K < StartRange + end, + AfterFun = + fun({K, V}) -> + case leveled_codec:endkey_passed(EndRange, K) of + false -> + true; + true -> + EndRangeFun(EndRange, K, V) + end + end, + + case length(Lv0List) of + 0 -> + []; + 1 -> + RHS = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), + lists:takewhile(AfterFun, RHS); + 2 -> + RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), + LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)), + RHSofLHL ++ LHSofRHL; + L -> + RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), + LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)), + MidLists = lists:sublist(Lv0List, 2, L - 2), + lists:append([RHSofLHL] ++ MidLists ++ [LHSofRHL]) + end. + + +skpl_getsublist(Key, SkipList) -> + FoldFun = + fun({Mark, SL}, Acc) -> + case {Acc, Mark} of + {none, Mark} when Mark >= Key -> + SL; + _ -> + Acc + end + end, + SL1 = lists:foldl(FoldFun, none, SkipList), + lists:foldl(FoldFun, none, SL1). + + %%%============================================================================ %%% Balance tree implementation %%%============================================================================ @@ -429,6 +495,9 @@ tree_search_test() -> idxt_search_test() -> search_test_by_type(idxt). +skpl_search_test() -> + search_test_by_type(skpl). + search_test_by_type(Type) -> MapFun = fun(N) -> @@ -517,13 +586,16 @@ tree_test_by_(Width, Type, N) -> [timer:now_diff(os:timestamp(), SWaSRCH2)]). - tree_matchrange_test() -> matchrange_test_by_type(tree). idxt_matchrange_test() -> matchrange_test_by_type(idxt). +skpl_matchrange_test() -> + matchrange_test_by_type(skpl). + + matchrange_test_by_type(Type) -> N = 4000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), @@ -543,6 +615,7 @@ matchrange_test_by_type(Type) -> end, KL_Length = length(KL), + io:format("KL_Length ~w~n", [KL_Length]), ?assertMatch(KL_Length, LengthR(FirstKey, FinalKey, Tree0)), ?assertMatch(KL_Length, LengthR(FirstKey, PenultimateKey, Tree0) + 1), ?assertMatch(1, LengthR(all, FirstKey, Tree0)), From 58cda7d1573e516d44e9f8c27da35e002b899214 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 21 Jan 2017 22:34:56 +0000 Subject: [PATCH 16/25] Switch to using skip lists from leveled_tree Remove now unused leveled_skiplist and leveled_tinybloom --- include/leveled.hrl | 2 +- src/leveled_skiplist.erl | 661 -------------------------------------- src/leveled_tinybloom.erl | 159 --------- src/leveled_tree.erl | 47 ++- 4 files changed, 34 insertions(+), 835 deletions(-) delete mode 100644 src/leveled_skiplist.erl delete mode 100644 src/leveled_tinybloom.erl diff --git a/include/leveled.hrl b/include/leveled.hrl index e1c9646..fa4dd11 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -15,7 +15,7 @@ %% Inker key type used for tombstones -define(INKT_TOMB, tomb). --define(CACHE_TYPE, idxt). +-define(CACHE_TYPE, skpl). -record(sft_options, {wait = true :: boolean(), diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl deleted file mode 100644 index b79d050..0000000 --- a/src/leveled_skiplist.erl +++ /dev/null @@ -1,661 +0,0 @@ -%% -------- SKIPLIST --------- -%% -%% For storing small numbers of {K, V} pairs where reasonable insertion and -%% fetch times, but with fast support for flattening to a list or a sublist -%% within a certain key range -%% -%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's -%% ongoing dependency on OTP16) -%% -%% Not a proper skip list. Only supports a fixed depth. Good enough for the -%% purposes of leveled. Also uses peculiar enkey_passed function within -%% leveled. Not tested beyond a depth of 2. - --module(leveled_skiplist). - --include("include/leveled.hrl"). - --export([ - from_list/1, - from_list/2, - from_sortedlist/1, - from_sortedlist/2, - from_orderedset/1, - from_orderedset/2, - to_list/1, - enter/3, - enter/4, - enter_nolookup/3, - to_range/2, - to_range/3, - lookup/2, - lookup/3, - empty/0, - empty/1, - size/1 - ]). - --include_lib("eunit/include/eunit.hrl"). - --define(SKIP_WIDTH, 16). --define(LIST_HEIGHT, 2). --define(INFINITY_KEY, {null, null, null, null, null}). --define(BITARRAY_SIZE, 2048). - -%%%============================================================================ -%%% SkipList API -%%%============================================================================ - -enter(Key, Value, SkipList) -> - Hash = leveled_codec:magic_hash(Key), - enter(Key, Hash, Value, SkipList). - -enter(Key, Hash, Value, SkipList) -> - Bloom0 = - case element(1, SkipList) of - list_only -> - list_only; - Bloom -> - leveled_tinybloom:enter({hash, Hash}, Bloom) - end, - {Bloom0, - enter(Key, Value, erlang:phash2(Key), - element(2, SkipList), - ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -%% Can iterate over a key entered this way, but never lookup the key -%% used for index terms -%% The key may still be a marker key - and the much cheaper native hash -%% is used to dtermine this, avoiding the more expensive magic hash -enter_nolookup(Key, Value, SkipList) -> - {element(1, SkipList), - enter(Key, Value, erlang:phash2(Key), - element(2, SkipList), - ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -from_orderedset(Table) -> - from_orderedset(Table, false). - -from_orderedset(Table, Bloom) -> - from_sortedlist(ets:tab2list(Table), Bloom). - -from_list(UnsortedKVL) -> - from_list(UnsortedKVL, false). - -from_list(UnsortedKVL, BloomProtect) -> - KVL = lists:ukeysort(1, UnsortedKVL), - from_sortedlist(KVL, BloomProtect). - -from_sortedlist(SortedKVL) -> - from_sortedlist(SortedKVL, false). - -from_sortedlist([], BloomProtect) -> - empty(BloomProtect); -from_sortedlist(SortedKVL, BloomProtect) -> - Bloom0 = - case BloomProtect of - true -> - lists:foldr(fun({K, _V}, Bloom) -> - leveled_tinybloom:enter(K, Bloom) end, - leveled_tinybloom:empty(?SKIP_WIDTH), - SortedKVL); - false -> - list_only - end, - {Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -lookup(Key, SkipList) -> - case element(1, SkipList) of - list_only -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT); - _ -> - lookup(Key, leveled_codec:magic_hash(Key), SkipList) - end. - -lookup(Key, Hash, SkipList) -> - case element(1, SkipList) of - list_only -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT); - _ -> - case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of - false -> - none; - true -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT) - end - end. - - -%% Rather than support iterator_from like gb_trees, will just an output a key -%% sorted list for the desired range, which can the be iterated over as normal -to_range(SkipList, Start) -> - to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT). - -to_range(SkipList, Start, End) -> - to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT). - -to_list(SkipList) -> - to_list(element(2, SkipList), ?LIST_HEIGHT). - -empty() -> - empty(false). - -empty(BloomProtect) -> - case BloomProtect of - true -> - {leveled_tinybloom:empty(?SKIP_WIDTH), - empty([], ?LIST_HEIGHT)}; - false -> - {list_only, empty([], ?LIST_HEIGHT)} - end. - -size(SkipList) -> - size(element(2, SkipList), ?LIST_HEIGHT). - - -%%%============================================================================ -%%% SkipList Base Functions -%%%============================================================================ - -enter(Key, Value, Hash, SkipList, Width, 1) -> - {MarkerKey, SubList} = find_mark(Key, SkipList), - case Hash rem Width of - 0 -> - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> - K =< Key end, - SubList), - SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), - SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1], - lists:ukeysort(1, SkpL2); - _ -> - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList), - UpdSubList = - case RHS of - [] -> - LHS ++ [{Key, Value}]; - [{FirstKey, _V}|RHSTail] -> - case FirstKey of - Key -> - LHS ++ [{Key, Value}] ++ RHSTail; - _ -> - LHS ++ [{Key, Value}] ++ RHS - end - end, - lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList}) - end; -enter(Key, Value, Hash, SkipList, Width, Level) -> - HashMatch = width(Level, Width), - {MarkerKey, SubSkipList} = find_mark(Key, SkipList), - UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1), - case Hash rem HashMatch of - 0 -> - % - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> - K =< Key end, - UpdSubSkipList), - SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), - lists:ukeysort(1, [{Key, LHS}|SkpL1]); - _ -> - % Need to replace Marker Key with sublist - lists:keyreplace(MarkerKey, - 1, - SkipList, - {MarkerKey, UpdSubSkipList}) - end. - -from_list(SkipList, _SkipWidth, 0) -> - SkipList; -from_list(KVList, SkipWidth, ListHeight) -> - L0 = length(KVList), - SL0 = - case L0 > SkipWidth of - true -> - from_list(KVList, L0, [], SkipWidth); - false -> - {LastK, _LastSL} = lists:last(KVList), - [{LastK, KVList}] - end, - from_list(SL0, SkipWidth, ListHeight - 1). - -from_list([], 0, SkipList, _SkipWidth) -> - SkipList; -from_list(KVList, L, SkipList, SkipWidth) -> - SubLL = min(SkipWidth, L), - {Head, Tail} = lists:split(SubLL, KVList), - {LastK, _LastV} = lists:last(Head), - from_list(Tail, L - SubLL, SkipList ++ [{LastK, Head}], SkipWidth). - - -list_lookup(Key, SkipList, 1) -> - SubList = get_sublist(Key, SkipList), - case lists:keyfind(Key, 1, SubList) of - false -> - none; - {Key, V} -> - {value, V} - end; -list_lookup(Key, SkipList, Level) -> - SubList = get_sublist(Key, SkipList), - case SubList of - null -> - none; - _ -> - list_lookup(Key, SubList, Level - 1) - end. - - -to_list(SkipList, 1) -> - lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList); -to_list(SkipList, Level) -> - lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end, - [], - SkipList). - - -to_range(SkipList, StartKey, EndKey, ListHeight) -> - to_range(SkipList, StartKey, EndKey, ListHeight, [], true). - -to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) -> - SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl), - case SL of - [] -> - Acc; - _ -> - {LK, _LV} = lists:last(SL), - case leveled_codec:endkey_passed(EndKey, LK) of - false -> - to_range(SkipList, - LK, - EndKey, - ListHeight, - Acc ++ SL, - false); - true -> - SplitFun = - fun({K, _V}) -> - not leveled_codec:endkey_passed(EndKey, K) end, - LHS = lists:takewhile(SplitFun, SL), - Acc ++ LHS - end - end. - -sublist_above(SkipList, StartKey, 0, StartIncl) -> - TestFun = - fun({K, _V}) -> - case StartIncl of - true -> - K < StartKey; - false -> - K =< StartKey - end end, - lists:dropwhile(TestFun, SkipList); -sublist_above(SkipList, StartKey, Level, StartIncl) -> - TestFun = - fun({K, _SL}) -> - case StartIncl of - true -> - K < StartKey; - false -> - K =< StartKey - end end, - RHS = lists:dropwhile(TestFun, SkipList), - case RHS of - [] -> - []; - [{_K, SL}|_Rest] -> - sublist_above(SL, StartKey, Level - 1, StartIncl) - end. - -empty(SkipList, 1) -> - [{?INFINITY_KEY, SkipList}]; -empty(SkipList, Level) -> - empty([{?INFINITY_KEY, SkipList}], Level - 1). - -size(SkipList, 1) -> - lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList); -size(SkipList, Level) -> - lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end, - 0, - SkipList). - - -%%%============================================================================ -%%% Internal Functions -%%%============================================================================ - -width(1, Width) -> - Width; -width(N, Width) -> - width(N - 1, Width * Width). - -find_mark(Key, SkipList) -> - lists:foldl(fun({Marker, SL}, Acc) -> - case Acc of - false -> - case Marker >= Key of - true -> - {Marker, SL}; - false -> - Acc - end; - _ -> - Acc - end end, - false, - SkipList). - -get_sublist(Key, SkipList) -> - lists:foldl(fun({SkipKey, SL}, Acc) -> - case {Acc, SkipKey} of - {null, SkipKey} when SkipKey >= Key -> - SL; - _ -> - Acc - end end, - null, - SkipList). - -%%%============================================================================ -%%% Test -%%%============================================================================ - --ifdef(TEST). - -generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> - generate_randomkeys(Seqn, - Count, - [], - BucketRangeLow, - BucketRangeHigh). - -generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> - Acc; -generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> - BNumber = - case BRange of - 0 -> - string:right(integer_to_list(BucketLow), 4, $0); - _ -> - BRand = random:uniform(BRange), - string:right(integer_to_list(BucketLow + BRand), 4, $0) - end, - KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0), - {K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, - {Seqn, {active, infinity}, null}}, - generate_randomkeys(Seqn + 1, - Count - 1, - [{K, V}|Acc], - BucketLow, - BRange). - -skiplist_small_test() -> - % Check nothing bad happens with very small lists - lists:foreach(fun(N) -> dotest_skiplist_small(N) end, lists:seq(1, 32)). - - -dotest_skiplist_small(N) -> - KL = generate_randomkeys(1, N, 1, 2), - SkipList1 = - lists:foldl(fun({K, V}, SL) -> - enter(K, V, SL) - end, - empty(), - KL), - SkipList2 = from_list(lists:reverse(KL)), - lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList1)) - end, - lists:ukeysort(1, lists:reverse(KL))), - lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList2)) - end, - lists:ukeysort(1, lists:reverse(KL))). - -skiplist_withbloom_test() -> - io:format(user, "~n~nBloom protected skiplist test:~n~n", []), - skiplist_tester(true). - -skiplist_nobloom_test() -> - io:format(user, "~n~nBloom free skiplist test:~n~n", []), - skiplist_tester(false). - -skiplist_tester(Bloom) -> - N = 4000, - KL = generate_randomkeys(1, N, 1, N div 5), - - OS = ets:new(test, [ordered_set, private]), - ets:insert(OS, KL), - SWaETS = os:timestamp(), - SkipList = from_orderedset(OS, Bloom), - io:format(user, "Generating skip list with ~w keys in ~w microseconds " ++ - "from ordered set~n", - [N, timer:now_diff(os:timestamp(), SWaETS)]), - - SWaGSL = os:timestamp(), - SkipList = from_list(lists:reverse(KL), Bloom), - io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++ - "Top level key count of ~w~n", - [N, - timer:now_diff(os:timestamp(), SWaGSL), - length(element(2, SkipList))]), - io:format(user, "Second tier key counts of ~w~n", - [lists:map(fun({_L, SL}) -> length(SL) end, - element(2, SkipList))]), - KLSorted = lists:ukeysort(1, lists:reverse(KL)), - - SWaGSL2 = os:timestamp(), - SkipList = from_sortedlist(KLSorted, Bloom), - io:format(user, "Generating skip list with ~w sorted keys in ~w " ++ - "microseconds~n", - [N, timer:now_diff(os:timestamp(), SWaGSL2)]), - - SWaDSL = os:timestamp(), - SkipList1 = - lists:foldl(fun({K, V}, SL) -> - enter(K, V, SL) - end, - empty(Bloom), - KL), - io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++ - "microseconds~n" ++ - "Top level key count of ~w~n", - [N, - timer:now_diff(os:timestamp(), SWaDSL), - length(element(2, SkipList1))]), - io:format(user, "Second tier key counts of ~w~n", - [lists:map(fun({_L, SL}) -> length(SL) end, - element(2, SkipList1))]), - - io:format(user, "~nRunning timing tests for generated skiplist:~n", []), - skiplist_timingtest(KLSorted, SkipList, N, Bloom), - - io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []), - skiplist_timingtest(KLSorted, SkipList1, N, Bloom). - - -skiplist_timingtest(KL, SkipList, N, Bloom) -> - io:format(user, "Timing tests on skiplist of size ~w~n", - [leveled_skiplist:size(SkipList)]), - CheckList1 = lists:sublist(KL, N div 4, 200), - CheckList2 = lists:sublist(KL, N div 3, 200), - CheckList3 = lists:sublist(KL, N div 2, 200), - CheckList4 = lists:sublist(KL, N - 1000, 200), - CheckList5 = lists:sublist(KL, N - 500, 200), - CheckList6 = lists:sublist(KL, 1, 10), - CheckList7 = lists:nthtail(N - 200, KL), - CheckList8 = lists:sublist(KL, N div 2, 1), - CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++ - CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7, - - SWb = os:timestamp(), - lists:foreach(fun({K, V}) -> - ?assertMatch({value, V}, lookup(K, SkipList)) - end, - CheckAll), - io:format(user, "Finding 1020 keys took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWb)]), - - RangeFun = - fun(SkipListToQuery, CheckListForQ, Assert) -> - KR = - to_range(SkipListToQuery, - element(1, lists:nth(1, CheckListForQ)), - element(1, lists:last(CheckListForQ))), - case Assert of - true -> - CompareL = length(lists:usort(CheckListForQ)), - ?assertMatch(CompareL, length(KR)); - false -> - KR - end - end, - - SWc = os:timestamp(), - RangeFun(SkipList, CheckList1, true), - RangeFun(SkipList, CheckList2, true), - RangeFun(SkipList, CheckList3, true), - RangeFun(SkipList, CheckList4, true), - RangeFun(SkipList, CheckList5, true), - RangeFun(SkipList, CheckList6, true), - RangeFun(SkipList, CheckList7, true), - RangeFun(SkipList, CheckList8, true), - - KL_OOR1 = generate_randomkeys(1, 4, N div 5 + 1, N div 5 + 10), - KR9 = RangeFun(SkipList, KL_OOR1, false), - ?assertMatch([], KR9), - - KL_OOR2 = generate_randomkeys(1, 4, 0, 0), - KR10 = RangeFun(SkipList, KL_OOR2, false), - ?assertMatch([], KR10), - - io:format(user, "Finding 10 ranges took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWc)]), - - AltKL1 = generate_randomkeys(1, 2000, 1, 200), - SWd0 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - lookup(K, SkipList) - end, - AltKL1), - io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd0)]), - SWd1 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - leveled_codec:magic_hash(K) - end, - AltKL1), - io:format(user, "Generating 2000 magic hashes took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd1)]), - SWd2 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - erlang:phash2(K) - end, - AltKL1), - io:format(user, "Generating 2000 not so magic hashes took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd2)]), - - AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300), - SWe = os:timestamp(), - lists:foreach(fun({K, _V}) -> - none = lookup(K, SkipList) - end, - AltKL2), - io:format(user, "Getting 1000 missing keys above range took ~w " ++ - "microseconds~n", - [timer:now_diff(os:timestamp(), SWe)]), - AltKL3 = generate_randomkeys(1, 1000, 0, 0), - SWf = os:timestamp(), - lists:foreach(fun({K, _V}) -> - none = lookup(K, SkipList) - end, - AltKL3), - io:format(user, "Getting 1000 missing keys below range took ~w " ++ - "microseconds~n", - [timer:now_diff(os:timestamp(), SWf)]), - - SWg = os:timestamp(), - FlatList = to_list(SkipList), - io:format(user, "Flattening skiplist took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWg)]), - ?assertMatch(KL, FlatList), - - case Bloom of - true -> - HashList = lists:map(fun(_X) -> - random:uniform(4294967295) end, - lists:seq(1, 2000)), - SWh = os:timestamp(), - lists:foreach(fun(X) -> - lookup(X, X, SkipList) end, - HashList), - io:format(user, - "Getting 2000 missing keys when hash was known " ++ - "took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWh)]); - false -> - ok - end. - -define_kv(X) -> - {{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null}, - {X, {active, infinity}, null}}. - -skiplist_roundsize_test() -> - KVL = lists:map(fun(X) -> define_kv(X) end, lists:seq(1, 4096)), - SkipList = from_list(KVL), - lists:foreach(fun({K, V}) -> - ?assertMatch({value, V}, lookup(K, SkipList)) end, - KVL), - lists:foreach(fun(X) -> - {KS, _VS} = define_kv(X * 32 + 1), - {KE, _VE} = define_kv((X + 1) * 32), - R = to_range(SkipList, KS, KE), - L = lists:sublist(KVL, - X * 32 + 1, - 32), - ?assertMatch(L, R) end, - lists:seq(0, 24)). - -skiplist_nolookup_test() -> - N = 4000, - KL = generate_randomkeys(1, N, 1, N div 5), - SkipList = lists:foldl(fun({K, V}, Acc) -> - enter_nolookup(K, V, Acc) end, - empty(true), - KL), - KLSorted = lists:ukeysort(1, lists:reverse(KL)), - lists:foreach(fun({K, _V}) -> - ?assertMatch(none, lookup(K, SkipList)) end, - KL), - ?assertMatch(KLSorted, to_list(SkipList)). - -skiplist_range_test() -> - N = 150, - KL = generate_randomkeys(1, N, 1, N div 5), - - KLSL1 = lists:sublist(lists:ukeysort(1, KL), 128), - SkipList1 = from_list(KLSL1), - {LastK1, V1} = lists:last(KLSL1), - R1 = to_range(SkipList1, LastK1, LastK1), - ?assertMatch([{LastK1, V1}], R1), - - KLSL2 = lists:sublist(lists:ukeysort(1, KL), 127), - SkipList2 = from_list(KLSL2), - {LastK2, V2} = lists:last(KLSL2), - R2 = to_range(SkipList2, LastK2, LastK2), - ?assertMatch([{LastK2, V2}], R2), - - KLSL3 = lists:sublist(lists:ukeysort(1, KL), 129), - SkipList3 = from_list(KLSL3), - {LastK3, V3} = lists:last(KLSL3), - R3 = to_range(SkipList3, LastK3, LastK3), - ?assertMatch([{LastK3, V3}], R3), - - {FirstK4, V4} = lists:nth(1, KLSL3), - R4 = to_range(SkipList3, FirstK4, FirstK4), - ?assertMatch([{FirstK4, V4}], R4). - - -empty_skiplist_size_test() -> - ?assertMatch(0, leveled_skiplist:size(empty(false))), - ?assertMatch(0, leveled_skiplist:size(empty(true))). - --endif. \ No newline at end of file diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl deleted file mode 100644 index 2278c2a..0000000 --- a/src/leveled_tinybloom.erl +++ /dev/null @@ -1,159 +0,0 @@ -%% -------- TINY BLOOM --------- -%% -%% For sheltering relatively expensive lookups with a probabilistic check -%% -%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array. -%% Even at 1000 keys should still offer only a 20% false positive -%% -%% Restricted to no more than 256 arrays - so can't handle more than 250K keys -%% in total -%% -%% Implemented this way to make it easy to control false positive (just by -%% setting the width). Also only requires binary manipulations of a single -%% hash - --module(leveled_tinybloom). - --include("include/leveled.hrl"). - --export([ - enter/2, - check/2, - empty/1 - ]). - - --include_lib("eunit/include/eunit.hrl"). - -%%%============================================================================ -%%% Bloom API -%%%============================================================================ - -empty(Width) when Width =< 256 -> - FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end, - lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)). - -enter({hash, no_lookup}, Bloom) -> - Bloom; -enter({hash, Hash}, Bloom) -> - {Slot0, Bit1, Bit2} = split_hash(Hash), - Slot = Slot0 rem dict:size(Bloom), - BitArray0 = dict:fetch(Slot, Bloom), - FoldFun = - fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end, - BitArray1 = lists:foldl(FoldFun, - BitArray0, - lists:usort([Bit1, Bit2])), - dict:store(Slot, <>, Bloom); -enter(Key, Bloom) -> - Hash = leveled_codec:magic_hash(Key), - enter({hash, Hash}, Bloom). - - -check({hash, Hash}, Bloom) -> - {Slot0, Bit1, Bit2} = split_hash(Hash), - Slot = Slot0 rem dict:size(Bloom), - BitArray = dict:fetch(Slot, Bloom), - - case getbit(Bit1, BitArray, 4096) of - <<0:1>> -> - false; - <<1:1>> -> - case getbit(Bit2, BitArray, 4096) of - <<0:1>> -> - false; - <<1:1>> -> - true - end - end; -check(Key, Bloom) -> - Hash = leveled_codec:magic_hash(Key), - check({hash, Hash}, Bloom). - - -%%%============================================================================ -%%% Internal Functions -%%%============================================================================ - -split_hash(Hash) -> - H0 = Hash band 255, - H1 = (Hash bsr 8) band 4095, - H2 = Hash bsr 20, - {H0, H1, H2}. - -add_to_array(Bit, BitArray, ArrayLength) -> - RestLen = ArrayLength - Bit - 1, - <> = BitArray, - <>. - -getbit(Bit, BitArray, ArrayLength) -> - RestLen = ArrayLength - Bit - 1, - <<_Head:Bit/bitstring, - B:1/bitstring, - _Rest:RestLen/bitstring>> = BitArray, - B. - - -%%%============================================================================ -%%% Test -%%%============================================================================ - --ifdef(TEST). - -simple_test() -> - N = 4000, - W = 6, - KLin = lists:map(fun(X) -> "Key_" ++ - integer_to_list(X) ++ - integer_to_list(random:uniform(100)) ++ - binary_to_list(crypto:rand_bytes(2)) - end, - lists:seq(1, N)), - KLout = lists:map(fun(X) -> - "NotKey_" ++ - integer_to_list(X) ++ - integer_to_list(random:uniform(100)) ++ - binary_to_list(crypto:rand_bytes(2)) - end, - lists:seq(1, N)), - SW0_PH = os:timestamp(), - lists:foreach(fun(X) -> erlang:phash2(X) end, KLin), - io:format(user, - "~nNative hash function hashes ~w keys in ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW0_PH)]), - SW0_MH = os:timestamp(), - lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin), - io:format(user, - "~nMagic hash function hashes ~w keys in ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW0_MH)]), - - SW1 = os:timestamp(), - Bloom = lists:foldr(fun enter/2, empty(W), KLin), - io:format(user, - "~nAdding ~w keys to bloom took ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW1)]), - - SW2 = os:timestamp(), - lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin), - io:format(user, - "~nChecking ~w keys in bloom took ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW2)]), - - SW3 = os:timestamp(), - FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of - true -> Acc + 1; - false -> Acc - end end, - 0, - KLout), - io:format(user, - "~nChecking ~w keys out of bloom took ~w microseconds " ++ - "with ~w false positive rate~n", - [N, timer:now_diff(os:timestamp(), SW3), FP / N]), - ?assertMatch(true, FP < (N div 4)). - - - --endif. \ No newline at end of file diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index e72a887..f027f54 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -162,7 +162,17 @@ to_list({tree, _L, Tree}) -> end, lists:foldl(FoldFun, [], tree_to_list(Tree)); to_list({idxt, _L, {TLI, _IDX}}) -> - lists:append(tuple_to_list(TLI)). + lists:append(tuple_to_list(TLI)); +to_list({skpl, _L, SkipList}) -> + FoldFun = + fun({_M, SL}, Acc) -> + [SL|Acc] + end, + + Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)), + Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))), + lists:append(Lv0List). + tsize({_Type, L, _Tree}) -> @@ -171,7 +181,9 @@ tsize({_Type, L, _Tree}) -> empty(tree) -> {tree, 0, empty_tree()}; empty(idxt) -> - {idxt, 0, {{}, empty_tree()}}. + {idxt, 0, {{}, empty_tree()}}; +empty(skpl) -> + {skpl, 0, []}. %%%============================================================================ %%% Internal Functions @@ -216,14 +228,22 @@ roll_list(KVList, L, SkipList, SkipWidth) -> -lookup_match(_Key, []) -> - none; -lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> - none; -lookup_match(Key, [{Key, EV}|_Tail]) -> - {value, EV}; -lookup_match(Key, [_Top|Tail]) -> - lookup_match(Key, Tail). +% lookup_match(_Key, []) -> +% none; +% lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> +% none; +% lookup_match(Key, [{Key, EV}|_Tail]) -> +% {value, EV}; +% lookup_match(Key, [_Top|Tail]) -> +% lookup_match(Key, Tail). + +lookup_match(Key, KVList) -> + case lists:keyfind(Key, 1, KVList) of + false -> + none; + {Key, Value} -> + {value, Value} + end. lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> {EK, EV}; @@ -396,15 +416,14 @@ skpl_getsublist(Key, SkipList) -> FoldFun = fun({Mark, SL}, Acc) -> case {Acc, Mark} of - {none, Mark} when Mark >= Key -> + {[], Mark} when Mark >= Key -> SL; _ -> Acc end end, - SL1 = lists:foldl(FoldFun, none, SkipList), - lists:foldl(FoldFun, none, SL1). - + SL1 = lists:foldl(FoldFun, [], SkipList), + lists:foldl(FoldFun, [], SL1). %%%============================================================================ %%% Balance tree implementation From cc9494735b883a0787e2b2f835262c3e1e30655c Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sun, 22 Jan 2017 23:36:16 +0000 Subject: [PATCH 17/25] Use an index tree for the lower levels of penciller manifest --- include/leveled.hrl | 2 +- src/leveled_pmanifest.erl | 148 ++++++++++++++++++++++++++++++-------- src/leveled_tree.erl | 9 ++- 3 files changed, 129 insertions(+), 30 deletions(-) diff --git a/include/leveled.hrl b/include/leveled.hrl index fa4dd11..e1c9646 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -15,7 +15,7 @@ %% Inker key type used for tombstones -define(INKT_TOMB, tomb). --define(CACHE_TYPE, skpl). +-define(CACHE_TYPE, idxt). -record(sft_options, {wait = true :: boolean(), diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 9fa50ef..6e4db8a 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -51,6 +51,8 @@ -define(MANIFEST_FILEX, "man"). -define(MANIFEST_FP, "ledger_manifest"). -define(MAX_LEVELS, 8). +-define(TREE_TYPE, idxt). +-define(TREE_WIDTH, 8). -record(manifest, {levels, % an array of lists or trees representing the manifest @@ -73,8 +75,16 @@ %%%============================================================================ new_manifest() -> + LevelArray0 = array:new([{size, ?MAX_LEVELS + 1}, {default, []}]), + SetLowerLevelFun = + fun(IDX, Acc) -> + array:set(IDX, leveled_tree:empty(?TREE_TYPE), Acc) + end, + LevelArray1 = lists:foldl(SetLowerLevelFun, + LevelArray0, + lists:seq(2, ?MAX_LEVELS)), #manifest{ - levels = array:new([{size, ?MAX_LEVELS + 1}, {default, []}]), + levels = LevelArray1, manifest_sqn = 0, snapshots = [], pending_deletes = dict:new(), @@ -322,58 +332,115 @@ levelzero_present(Manifest) -> %%% Internal Functions %%%============================================================================ + %% All these internal functions that work on a level are also passed LeveIdx %% even if this is not presently relevant. Currnetly levels are lists, but %% future branches may make lower levels trees or skiplists to improve fetch %% efficiency -load_level(_LevelIdx, Level, PidFun, SQNFun) -> - LevelLoadFun = +load_level(LevelIdx, Level, PidFun, SQNFun) -> + HigherLevelLoadFun = fun(ME, {L_Out, L_MaxSQN}) -> FN = ME#manifest_entry.filename, P = PidFun(FN), SQN = SQNFun(P), {[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)} end, - lists:foldr(LevelLoadFun, {[], 0}, Level). + LowerLevelLoadFun = + fun({EK, ME}, {L_Out, L_MaxSQN}) -> + FN = ME#manifest_entry.filename, + P = PidFun(FN), + SQN = SQNFun(P), + {[{EK, ME#manifest_entry{owner=P}}|L_Out], max(SQN, L_MaxSQN)} + end, + case LevelIdx =< 1 of + true -> + lists:foldr(HigherLevelLoadFun, {[], 0}, Level); + false -> + {L0, MaxSQN} = lists:foldr(LowerLevelLoadFun, + {[], 0}, + leveled_tree:to_list(Level)), + {leveled_tree:from_orderedlist(L0), MaxSQN} + end. +close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 -> + lists:foreach(CloseEntryFun, Level); close_level(_LevelIdx, Level, CloseEntryFun) -> - lists:foreach(CloseEntryFun, Level). + lists:foreach(CloseEntryFun, leveled_tree:to_list(Level)). is_empty(_LevelIdx, []) -> true; -is_empty(_LevelIdx, _Level) -> - false. +is_empty(LevelIdx, _Level) when LevelIdx =< 1 -> + false; +is_empty(_LevelIdx, Level) -> + leveled_tree:tsize(Level) == 0. +size(LevelIdx, Level) when LevelIdx =< 1 -> + length(Level); size(_LevelIdx, Level) -> - length(Level). + leveled_tree:tsize(Level). -add_entry(_LevelIdx, Level, Entries) when is_list(Entries) -> - lists:sort(Level ++ Entries); -add_entry(_LevelIdx, Level, Entry) -> - lists:sort([Entry|Level]). +pred_fun(LevelIdx, StartKey, _EndKey) when LevelIdx =< 1 -> + fun(ME) -> + ME#manifest_entry.start_key < StartKey + end; +pred_fun(_LevelIdx, _StartKey, EndKey) -> + fun({EK, _ME}) -> + EK < EndKey + end. -remove_entry(_LevelIdx, Level, Entries) when is_list(Entries) -> +add_entry(LevelIdx, Level, Entries) when is_list(Entries) -> + FirstEntry = lists:nth(1, Entries), + PredFun = pred_fun(LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key), + case LevelIdx =< 1 of + true -> + {LHS, RHS} = lists:splitwith(PredFun, Level), + lists:append([LHS, Entries, RHS]); + false -> + {LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)), + MapFun = + fun(ME) -> + {ME#manifest_entry.end_key, ME} + end, + Entries0 = lists:map(MapFun, Entries), + leveled_tree:from_orderedlist(lists:append([LHS, Entries0, RHS]), + ?TREE_TYPE, + ?TREE_WIDTH) + end; +add_entry(LevelIdx, Level, Entry) -> + add_entry(LevelIdx, Level, [Entry]). + +remove_entry(LevelIdx, Level, Entries) when is_list(Entries) -> % We're assuming we're removing a sorted sublist RemLength = length(Entries), [RemStart|_Tail] = Entries, - remove_section(Level, RemStart#manifest_entry.start_key, RemLength); -remove_entry(_LevelIdx, Level, Entry) -> - remove_section(Level, Entry#manifest_entry.start_key, 1). + remove_section(LevelIdx, Level, RemStart, RemLength); +remove_entry(LevelIdx, Level, Entry) -> + remove_section(LevelIdx, Level, Entry, 1). -remove_section(Level, SectionStartKey, SectionLength) -> - PredFun = - fun(E) -> - E#manifest_entry.start_key < SectionStartKey - end, - {Pre, Rest} = lists:splitwith(PredFun, Level), - Post = lists:nthtail(SectionLength, Rest), - Pre ++ Post. +remove_section(LevelIdx, Level, FirstEntry, SectionLength) -> + PredFun = pred_fun(LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key), + case LevelIdx =< 1 of + true -> + {LHS, RHS} = lists:splitwith(PredFun, Level), + Post = lists:nthtail(SectionLength, RHS), + lists:append([LHS, Post]); + false -> + {LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)), + Post = lists:nthtail(SectionLength, RHS), + leveled_tree:from_orderedlist(lists:append([LHS, Post]), + ?TREE_TYPE, + ?TREE_WIDTH) + end. -key_lookup_level(_LevelIdx, [], _Key) -> +key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 -> false; -key_lookup_level(LevelIdx, [Entry|Rest], Key) -> +key_lookup_level(LevelIdx, [Entry|Rest], Key) when LevelIdx =< 1 -> case Entry#manifest_entry.end_key >= Key of true -> case Key >= Entry#manifest_entry.start_key of @@ -384,8 +451,20 @@ key_lookup_level(LevelIdx, [Entry|Rest], Key) -> end; false -> key_lookup_level(LevelIdx, Rest, Key) + end; +key_lookup_level(_LevelIdx, Level, Key) -> + StartKeyFun = + fun(ME) -> + ME#manifest_entry.start_key + end, + case leveled_tree:search(Key, Level, StartKeyFun) of + none -> + false; + {_EK, ME} -> + ME#manifest_entry.owner end. + range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) -> Range = case LevelIdx > Manifest#manifest.basement of @@ -400,7 +479,7 @@ range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) -> end, lists:map(MakePointerFun, Range). -range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) -> +range_lookup_level(LevelIdx, Level, QStartKey, QEndKey) when LevelIdx =< 1 -> BeforeFun = fun(M) -> QStartKey > M#manifest_entry.end_key @@ -412,7 +491,19 @@ range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) -> end, {_Before, MaybeIn} = lists:splitwith(BeforeFun, Level), {In, _After} = lists:splitwith(NotAfterFun, MaybeIn), - In. + In; +range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) -> + StartKeyFun = + fun(ME) -> + ME#manifest_entry.start_key + end, + Range = leveled_tree:search_range(QStartKey, QEndKey, Level, StartKeyFun), + MapFun = + fun({_EK, ME}) -> + ME + end, + lists:map(MapFun, Range). + get_basement(Levels) -> GetBaseFun = @@ -456,6 +547,7 @@ open_manifestfile(RootPath, [TopManSQN|Rest]) -> open_manifestfile(RootPath, Rest) end. + %%%============================================================================ %%% Test %%%============================================================================ diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index f027f54..23404f5 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -29,7 +29,6 @@ -include_lib("eunit/include/eunit.hrl"). -define(SKIP_WIDTH, 16). --define(WIDTH_MAP, [{64, 4}, {512, 8}, {4096, 16}, {infinity, 32}]). %%%============================================================================ @@ -661,4 +660,12 @@ search_nearmatch_fun(Tree) -> ?assertMatch({NK, NV}, search(K, Tree, StartKeyFun)) end. +empty_test() -> + T0 = empty(tree), + ?assertMatch(0, tsize(T0)), + T1 = empty(skpl), + ?assertMatch(0, tsize(T1)), + T2 = empty(idxt), + ?assertMatch(0, tsize(T2)). + -endif. \ No newline at end of file From 2c4c5c959739b543030a1ffc684557f0cf6c12b3 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 23 Jan 2017 00:22:53 +0000 Subject: [PATCH 18/25] Corrections Support an empty list of entries being added. Also specify a tree correctly in all from_orderedlist scenarios --- src/leveled_log.erl | 2 +- src/leveled_pmanifest.erl | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index d2c8a3b..f3dbb25 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -263,7 +263,7 @@ {"SST09", {warn, "Read request exposes slot with bad CRC"}}, {"SST10", - {info, "Expansion sought to support pointer to pid ~w status ~w"}}, + {debug, "Expansion sought to support pointer to pid ~w status ~w"}}, {"CDB01", {info, "Opening file for writing with filename ~s"}}, diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 6e4db8a..d65ec9c 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -360,7 +360,7 @@ load_level(LevelIdx, Level, PidFun, SQNFun) -> {L0, MaxSQN} = lists:foldr(LowerLevelLoadFun, {[], 0}, leveled_tree:to_list(Level)), - {leveled_tree:from_orderedlist(L0), MaxSQN} + {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), MaxSQN} end. close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 -> @@ -389,6 +389,8 @@ pred_fun(_LevelIdx, _StartKey, EndKey) -> EK < EndKey end. +add_entry(_LevelIdx, Level, []) -> + Level; add_entry(LevelIdx, Level, Entries) when is_list(Entries) -> FirstEntry = lists:nth(1, Entries), PredFun = pred_fun(LevelIdx, From 5105df1cd64205fd2217c48ffd1a1ce45a9f8bc1 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 23 Jan 2017 11:02:54 +0000 Subject: [PATCH 19/25] Add replace capability to manifest --- src/leveled_pclerk.erl | 10 +--- src/leveled_pmanifest.erl | 116 ++++++++++++++++++++++++++++++-------- 2 files changed, 97 insertions(+), 29 deletions(-) diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 66b7c74..5055a01 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -183,16 +183,12 @@ perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN) -> ME end, SinkManifestList = lists:map(RevertPointerFun, SinkList), - Man0 = leveled_pmanifest:remove_manifest_entry(Manifest, - NewSQN, - SinkLevel, - SinkManifestList), - Man1 = leveled_pmanifest:insert_manifest_entry(Man0, + Man0 = leveled_pmanifest:replace_manifest_entry(Manifest, NewSQN, SinkLevel, + SinkManifestList, Additions), - - Man2 = leveled_pmanifest:remove_manifest_entry(Man1, + Man2 = leveled_pmanifest:remove_manifest_entry(Man0, NewSQN, SrcLevel, Src), diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index d65ec9c..d8e2ba4 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -32,6 +32,7 @@ merge_lookup/4, insert_manifest_entry/4, remove_manifest_entry/4, + replace_manifest_entry/5, switch_manifest_entry/4, mergefile_selector/2, add_snapshot/3, @@ -149,6 +150,32 @@ save_manifest(Manifest, RootPath) -> CRC = erlang:crc32(ManBin), ok = file:write_file(FP, <>). + +replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, []) -> + remove_manifest_entry(Manifest, ManSQN, LevelIdx, Removals); +replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) -> + Levels = Manifest#manifest.levels, + Level = array:get(LevelIdx, Levels), + UpdLevel = replace_entry(LevelIdx, Level, Removals, Additions), + leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]), + PendingDeletes = update_pendingdeletes(ManSQN, + Removals, + Manifest#manifest.pending_deletes), + UpdLevels = array:set(LevelIdx, UpdLevel, Levels), + case is_empty(LevelIdx, UpdLevel) of + true -> + Manifest#manifest{levels = UpdLevels, + basement = get_basement(UpdLevels), + manifest_sqn = ManSQN, + pending_deletes = PendingDeletes}; + false -> + Basement = max(LevelIdx, Manifest#manifest.basement), + Manifest#manifest{levels = UpdLevels, + basement = Basement, + manifest_sqn = ManSQN, + pending_deletes = PendingDeletes} + end. + insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> Levels = Manifest#manifest.levels, Level = array:get(LevelIdx, Levels), @@ -164,22 +191,9 @@ remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> Level = array:get(LevelIdx, Levels), UpdLevel = remove_entry(LevelIdx, Level, Entry), leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]), - DelFun = - fun(E, Acc) -> - dict:store(E#manifest_entry.filename, - {ManSQN, E}, - Acc) - end, - Entries = - case is_list(Entry) of - true -> - Entry; - false -> - [Entry] - end, - PendingDeletes = lists:foldl(DelFun, - Manifest#manifest.pending_deletes, - Entries), + PendingDeletes = update_pendingdeletes(ManSQN, + Entry, + Manifest#manifest.pending_deletes), UpdLevels = array:set(LevelIdx, UpdLevel, Levels), case is_empty(LevelIdx, UpdLevel) of true -> @@ -414,13 +428,18 @@ add_entry(LevelIdx, Level, Entries) when is_list(Entries) -> add_entry(LevelIdx, Level, Entry) -> add_entry(LevelIdx, Level, [Entry]). -remove_entry(LevelIdx, Level, Entries) when is_list(Entries) -> +remove_entry(LevelIdx, Level, Entries) -> % We're assuming we're removing a sorted sublist - RemLength = length(Entries), - [RemStart|_Tail] = Entries, - remove_section(LevelIdx, Level, RemStart, RemLength); -remove_entry(LevelIdx, Level, Entry) -> - remove_section(LevelIdx, Level, Entry, 1). + {RemLength, FirstRemoval} = measure_removals(Entries), + remove_section(LevelIdx, Level, FirstRemoval, RemLength). + +measure_removals(Removals) -> + case is_list(Removals) of + true -> + {length(Removals), lists:nth(1, Removals)}; + false -> + {1, Removals} + end. remove_section(LevelIdx, Level, FirstEntry, SectionLength) -> PredFun = pred_fun(LevelIdx, @@ -439,6 +458,59 @@ remove_section(LevelIdx, Level, FirstEntry, SectionLength) -> ?TREE_WIDTH) end. +replace_entry(LevelIdx, Level, Removals, Additions) when LevelIdx =< 1 -> + {SectionLength, FirstEntry} = measure_removals(Removals), + PredFun = pred_fun(LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key), + {LHS, RHS} = lists:splitwith(PredFun, Level), + Post = lists:nthtail(SectionLength, RHS), + case is_list(Additions) of + true -> + lists:append([LHS, Additions, Post]); + false -> + lists:append([LHS, [Additions], Post]) + end; +replace_entry(LevelIdx, Level, Removals, Additions) -> + {SectionLength, FirstEntry} = measure_removals(Removals), + PredFun = pred_fun(LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key), + {LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)), + Post = lists:nthtail(SectionLength, RHS), + UpdList = + case is_list(Additions) of + true -> + MapFun = + fun(ME) -> + {ME#manifest_entry.end_key, ME} + end, + Additions0 = lists:map(MapFun, Additions), + lists:append([LHS, Additions0, Post]); + false -> + lists:append([LHS, + [{Additions#manifest_entry.end_key, + Additions}], + Post]) + end, + leveled_tree:from_orderedlist(UpdList, ?TREE_TYPE, ?TREE_WIDTH). + + +update_pendingdeletes(ManSQN, Removals, PendingDeletes) -> + DelFun = + fun(E, Acc) -> + dict:store(E#manifest_entry.filename, + {ManSQN, E}, + Acc) + end, + Entries = + case is_list(Removals) of + true -> + Removals; + false -> + [Removals] + end, + lists:foldl(DelFun, PendingDeletes, Entries). key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 -> false; From 90c920fe86f2f4511bd838ab120717071fbd7767 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 23 Jan 2017 15:15:40 +0000 Subject: [PATCH 20/25] Additional unit test work Reverts a previous ct test fix --- src/leveled_codec.erl | 35 ---------------- src/leveled_log.erl | 4 +- src/leveled_pmanifest.erl | 70 ++++++++++++++++++++++++++++++- src/leveled_tree.erl | 88 ++++++++++++++++++++++++++++++--------- 4 files changed, 139 insertions(+), 58 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 6dbbff4..6360e2b 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -34,7 +34,6 @@ -export([ inker_reload_strategy/1, - strip_to_keyonly/1, strip_to_seqonly/1, strip_to_statusonly/1, strip_to_keyseqonly/1, @@ -44,7 +43,6 @@ endkey_passed/2, key_dominates/2, maybe_reap_expiredkey/2, - print_key/1, to_ledgerkey/3, to_ledgerkey/5, from_ledgerkey/1, @@ -108,8 +106,6 @@ inker_reload_strategy(AltList) -> ReloadStrategy0, AltList). -strip_to_keyonly({K, _V}) -> K. - strip_to_statusonly({_, {_, St, _, _}}) -> St. strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN. @@ -252,33 +248,6 @@ create_value_for_journal(Value) -> hash(Obj) -> erlang:phash2(term_to_binary(Obj)). -% Return a tuple of strings to ease the printing of keys to logs -print_key(Key) -> - {A_STR, B_TERM, C_TERM} = case Key of - {?STD_TAG, B, K, _SK} -> - {"Object", B, K}; - {?RIAK_TAG, B, K, _SK} -> - {"RiakObject", B, K}; - {?IDX_TAG, B, {F, _V}, _K} -> - {"Index", B, F} - end, - B_STR = turn_to_string(B_TERM), - C_STR = turn_to_string(C_TERM), - {A_STR, B_STR, C_STR}. - -turn_to_string(Item) -> - if - is_binary(Item) == true -> - binary_to_list(Item); - is_integer(Item) == true -> - integer_to_list(Item); - is_list(Item) == true -> - Item; - true -> - [Output] = io_lib:format("~w", [Item]), - Output - end. - % Compare a key against a query key, only comparing elements that are non-null % in the Query key. This is used for comparing against end keys in queries. @@ -461,10 +430,6 @@ endkey_passed_test() -> ?assertMatch(false, endkey_passed(TestKey, K1)), ?assertMatch(true, endkey_passed(TestKey, K2)). -stringcheck_test() -> - ?assertMatch("Bucket", turn_to_string("Bucket")), - ?assertMatch("Bucket", turn_to_string(<<"Bucket">>)), - ?assertMatch("bucket", turn_to_string(bucket)). %% Test below proved that the overhead of performing hashes was trivial %% Maybe 5 microseconds per hash diff --git a/src/leveled_log.erl b/src/leveled_log.erl index f3dbb25..618c871 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -15,8 +15,8 @@ sst_timing/3]). -define(PUT_LOGPOINT, 20000). --define(HEAD_LOGPOINT, 160000). --define(GET_LOGPOINT, 160000). +-define(HEAD_LOGPOINT, 50000). +-define(GET_LOGPOINT, 50000). -define(SST_LOGPOINT, 20000). -define(LOG_LEVEL, [info, warn, error, critical]). -define(SAMPLE_RATE, 16). diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index d8e2ba4..18e80c4 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -151,8 +151,6 @@ save_manifest(Manifest, RootPath) -> ok = file:write_file(FP, <>). -replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, []) -> - remove_manifest_entry(Manifest, ManSQN, LevelIdx, Removals); replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) -> Levels = Manifest#manifest.levels, Level = array:get(LevelIdx, Levels), @@ -753,6 +751,74 @@ keylookup_manifest_test() -> ?assertMatch("pid_y3", key_lookup(Man13, 1, LK1_4)), ?assertMatch("pid_z5", key_lookup(Man13, 2, LK1_4)). +ext_keylookup_manifest_test() -> + RP = "../test", + {_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(), + save_manifest(Man6, RP), + + E7 = #manifest_entry{start_key={o, "Bucket1", "K997", null}, + end_key={o, "Bucket1", "K999", null}, + filename="Z7", + owner="pid_z7"}, + Man7 = insert_manifest_entry(Man6, 2, 2, E7), + save_manifest(Man7, RP), + ManOpen1 = open_manifest(RP), + ?assertMatch(2, get_manifest_sqn(ManOpen1)), + + Man7FN = filepath(RP, 2, current_manifest), + {ok, Bin} = file:read_file(Man7FN), + RandPos = random:uniform(bit_size(Bin) - 1), + <> = Bin, + Flipped = BitToFlip bxor 1, + ok = file:write_file(Man7FN, + <>), + + ?assertMatch(2, get_manifest_sqn(Man7)), + + ManOpen2 = open_manifest(RP), + ?assertMatch(1, get_manifest_sqn(ManOpen2)), + + E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"}, + end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"}, + filename="Z1", + owner="pid_z1"}, + E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"}, + end_key={o, "Bucket1", "K71", null}, + filename="Z2", + owner="pid_z2"}, + E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null}, + end_key={o, "Bucket1", "K993", null}, + filename="Z3", + owner="pid_z3"}, + + E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"}, + end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"}, + owner="pid_y1", + filename="Y1"}, + E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"}, + end_key={o, "Bucket1", "K45", null}, + owner="pid_y2", + filename="Y2"}, + E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null}, + end_key={o, "Bucket1", "K812", null}, + owner="pid_y3", + filename="Y3"}, + E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null}, + end_key={o, "Bucket1", "K998", null}, + owner="pid_y4", + filename="Y4"}, + + Man8 = replace_manifest_entry(ManOpen2, 2, 1, E1, E1_2), + Man9 = remove_manifest_entry(Man8, 2, 1, [E2, E3]), + Man10 = insert_manifest_entry(Man9, 2, 1, [E2_2, E3_2, E4_2]), + ?assertMatch(2, get_manifest_sqn(Man10)), + + LK1_4 = {o, "Bucket1", "K75", null}, + ?assertMatch("pid_y3", key_lookup(Man10, 1, LK1_4)), + ?assertMatch("pid_z5", key_lookup(Man10, 2, LK1_4)). + rangequery_manifest_test() -> {_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(), diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 23404f5..8804847 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -91,7 +91,7 @@ search(Key, {tree, _L, Tree}, StartKeyFun) -> none; {_NK, SL, _Iter} -> {K, V} = lookup_best(Key, SL), - case K < StartKeyFun(V) of + case Key < StartKeyFun(V) of true -> none; false -> @@ -105,7 +105,7 @@ search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) -> none; {_NK, ListID, _Iter} -> {K, V} = lookup_best(Key, element(ListID, TLI)), - case K < StartKeyFun(V) of + case Key < StartKeyFun(V) of true -> none; false -> @@ -114,15 +114,18 @@ search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) -> end; search(Key, {skpl, _L, SkipList}, StartKeyFun) -> SL0 = skpl_getsublist(Key, SkipList), - {K, V} = lookup_best(Key, SL0), - case K < StartKeyFun(V) of - true -> - none; - false -> - {K, V} + case lookup_best(Key, SL0) of + {K, V} -> + case Key < StartKeyFun(V) of + true -> + none; + false -> + {K, V} + end; + none -> + none end. - match_range(StartRange, EndRange, Tree) -> EndRangeFun = fun(ER, FirstRHSKey, _FirstRHSValue) -> @@ -244,6 +247,8 @@ lookup_match(Key, KVList) -> {value, Value} end. +lookup_best(_Key, []) -> + none; lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> {EK, EV}; lookup_best(Key, [_Top|Tail]) -> @@ -489,14 +494,8 @@ generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> Acc; generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> - BNumber = - case BRange of - 0 -> - string:right(integer_to_list(BucketLow), 4, $0); - _ -> - BRand = random:uniform(BRange), - string:right(integer_to_list(BucketLow + BRand), 4, $0) - end, + BRand = random:uniform(BRange), + BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0), KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0), {K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, {Seqn, {active, infinity}, null}}, @@ -525,7 +524,6 @@ search_test_by_type(Type) -> T = from_orderedlist(KL, Type), StartKeyFun = fun(V) -> V end, - SW = os:timestamp(), ?assertMatch([], search_range(0, 1, T, StartKeyFun)), ?assertMatch([], search_range(201, 202, T, StartKeyFun)), @@ -539,8 +537,59 @@ search_test_by_type(Type) -> ?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))), io:format(user, "10 range tests with type ~w in ~w microseconds~n", [Type, timer:now_diff(os:timestamp(), SW)]). - + +tree_oor_test() -> + outofrange_test_by_type(tree). + +idxt_oor_test() -> + outofrange_test_by_type(idxt). + +skpl_oor_test() -> + outofrange_test_by_type(skpl). + +outofrange_test_by_type(Type) -> + MapFun = + fun(N) -> + {N * 4, N * 4 - 2} + end, + KL = lists:map(MapFun, lists:seq(1, 50)), + T = from_orderedlist(KL, Type), + + io:format("Out of range searches~n"), + ?assertMatch(none, match(0, T)), + ?assertMatch(none, match(5, T)), + ?assertMatch(none, match(97, T)), + ?assertMatch(none, match(197, T)), + ?assertMatch(none, match(201, T)), + + StartKeyFun = fun(V) -> V end, + + ?assertMatch(none, search(0, T, StartKeyFun)), + ?assertMatch(none, search(5, T, StartKeyFun)), + ?assertMatch(none, search(97, T, StartKeyFun)), + ?assertMatch(none, search(197, T, StartKeyFun)), + ?assertMatch(none, search(201, T, StartKeyFun)). + +tree_tolist_test() -> + tolist_test_by_type(tree). + +idxt_tolist_test() -> + tolist_test_by_type(idxt). + +skpl_tolist_test() -> + tolist_test_by_type(skpl). + +tolist_test_by_type(Type) -> + MapFun = + fun(N) -> + {N * 4, N * 4 - 2} + end, + KL = lists:map(MapFun, lists:seq(1, 50)), + T = from_orderedlist(KL, Type), + T_Reverse = to_list(T), + ?assertMatch(KL, T_Reverse). + tree_timing_test() -> tree_test_by_(16, tree, 4000), tree_test_by_(8, tree, 1000), @@ -552,6 +601,7 @@ idxt_timing_test() -> tree_test_by_(4, idxt, 256). skpl_timing_test() -> + tree_test_by_(auto, skpl, 6000), tree_test_by_(auto, skpl, 4000), tree_test_by_(auto, skpl, 1000), tree_test_by_(auto, skpl, 256). From fb896f13b1065830529951c1f8fee3157e5c8129 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 23 Jan 2017 18:56:01 +0000 Subject: [PATCH 21/25] Improve logging - add timestamp to logs --- src/leveled_log.erl | 23 +++++++++++++++++++---- src/leveled_tree.erl | 20 +++++++++++++++----- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 618c871..f60ca76 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -309,10 +309,12 @@ log(LogReference, Subs) -> - {ok, {LogLevel, LogText}} = dict:find(LogReference, ?LOGBASE), + {LogLevel, LogText} = dict:fetch(LogReference, ?LOGBASE), case lists:member(LogLevel, ?LOG_LEVEL) of true -> - io:format(LogReference ++ " ~w " ++ LogText ++ "~n", + io:format(format_time() + ++ " " ++ LogReference ++ " ~w " + ++ LogText ++ "~n", [self()|Subs]); false -> ok @@ -320,7 +322,7 @@ log(LogReference, Subs) -> log_timer(LogReference, Subs, StartTime) -> - {ok, {LogLevel, LogText}} = dict:find(LogReference, ?LOGBASE), + {LogLevel, LogText} = dict:fetch(LogReference, ?LOGBASE), case lists:member(LogLevel, ?LOG_LEVEL) of true -> MicroS = timer:now_diff(os:timestamp(), StartTime), @@ -330,7 +332,9 @@ log_timer(LogReference, Subs, StartTime) -> MicroS -> {"ms", MicroS div 1000} end, - io:format(LogReference ++ " ~w " ++ LogText + io:format(format_time() + ++ LogReference ++ " ~w " + ++ LogText ++ " with time taken ~w " ++ Unit ++ "~n", [self()|Subs] ++ [Time]); false -> @@ -510,6 +514,17 @@ gen_timing_int({N, TimerD}, T0, TimerType, _KeyListFun, _LogPoint, _LogRef) -> TimerD)}. +format_time() -> + format_time(localtime_ms()). + +localtime_ms() -> + {_, _, Micro} = Now = os:timestamp(), + {Date, {Hours, Minutes, Seconds}} = calendar:now_to_local_time(Now), + {Date, {Hours, Minutes, Seconds, Micro div 1000 rem 1000}}. + +format_time({{Y, M, D}, {H, Mi, S, Ms}}) -> + io_lib:format("~b-~2..0b-~2..0b", [Y, M, D]) ++ "T" ++ + io_lib:format("~2..0b:~2..0b:~2..0b.~3..0b", [H, Mi, S, Ms]). %%%============================================================================ diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 8804847..ba07a3f 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -524,7 +524,7 @@ search_test_by_type(Type) -> T = from_orderedlist(KL, Type), StartKeyFun = fun(V) -> V end, - SW = os:timestamp(), + statistics(runtime), ?assertMatch([], search_range(0, 1, T, StartKeyFun)), ?assertMatch([], search_range(201, 202, T, StartKeyFun)), ?assertMatch([{4, 2}], search_range(2, 4, T, StartKeyFun)), @@ -535,8 +535,9 @@ search_test_by_type(Type) -> ?assertMatch(49, length(search_range(2, 197, T, StartKeyFun))), ?assertMatch(49, length(search_range(4, 197, T, StartKeyFun))), ?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))), + {_, T1} = statistics(runtime), io:format(user, "10 range tests with type ~w in ~w microseconds~n", - [Type, timer:now_diff(os:timestamp(), SW)]). + [Type, T1]). tree_oor_test() -> @@ -591,21 +592,30 @@ tolist_test_by_type(Type) -> ?assertMatch(KL, T_Reverse). tree_timing_test() -> - tree_test_by_(16, tree, 4000), + log_tree_test_by_(16, tree, 4000), tree_test_by_(8, tree, 1000), tree_test_by_(4, tree, 256). idxt_timing_test() -> - tree_test_by_(16, idxt, 4000), + log_tree_test_by_(16, idxt, 4000), tree_test_by_(8, idxt, 1000), tree_test_by_(4, idxt, 256). skpl_timing_test() -> tree_test_by_(auto, skpl, 6000), - tree_test_by_(auto, skpl, 4000), + log_tree_test_by_(auto, skpl, 4000), tree_test_by_(auto, skpl, 1000), tree_test_by_(auto, skpl, 256). +log_tree_test_by_(Width, Type, N) -> + erlang:statistics(runtime), + G0 = erlang:statistics(garbage_collection), + tree_test_by_(Width, Type, N), + {_, T1} = erlang:statistics(runtime), + G1 = erlang:statistics(garbage_collection), + io:format(user, "Test took ~w ms and GC transitioned from ~w to ~w~n", + [T1, G0, G1]). + tree_test_by_(Width, Type, N) -> io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]), KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), From 861cedf45e5f4b74877511f367297bfa324ed25c Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 23 Jan 2017 19:16:17 +0000 Subject: [PATCH 22/25] Add back missed space in logs --- src/leveled_log.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index f60ca76..c736fe9 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -333,7 +333,7 @@ log_timer(LogReference, Subs, StartTime) -> {"ms", MicroS div 1000} end, io:format(format_time() - ++ LogReference ++ " ~w " + ++ " " ++ LogReference ++ " ~w " ++ LogText ++ " with time taken ~w " ++ Unit ++ "~n", [self()|Subs] ++ [Time]); From 94762c79caf03f5ed0ae64a6d299d4f6c5dd2351 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 23 Jan 2017 21:43:33 +0000 Subject: [PATCH 23/25] Improve test coverage Add tests for other scenarios --- src/leveled_pmanifest.erl | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 18e80c4..47e7120 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -475,7 +475,13 @@ replace_entry(LevelIdx, Level, Removals, Additions) -> FirstEntry#manifest_entry.start_key, FirstEntry#manifest_entry.end_key), {LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)), - Post = lists:nthtail(SectionLength, RHS), + Post = + case RHS of + [] -> + []; + _ -> + lists:nthtail(SectionLength, RHS) + end, UpdList = case is_list(Additions) of true -> @@ -766,7 +772,10 @@ ext_keylookup_manifest_test() -> ?assertMatch(2, get_manifest_sqn(ManOpen1)), Man7FN = filepath(RP, 2, current_manifest), + Man7FNAlt = filename:rootname(Man7FN) ++ ".pnd", + {ok, BytesCopied} = file:copy(Man7FN, Man7FNAlt), {ok, Bin} = file:read_file(Man7FN), + ?assertMatch(BytesCopied, byte_size(Bin)), RandPos = random:uniform(bit_size(Bin) - 1), <> = Bin, Flipped = BitToFlip bxor 1, @@ -817,8 +826,29 @@ ext_keylookup_manifest_test() -> LK1_4 = {o, "Bucket1", "K75", null}, ?assertMatch("pid_y3", key_lookup(Man10, 1, LK1_4)), - ?assertMatch("pid_z5", key_lookup(Man10, 2, LK1_4)). - + ?assertMatch("pid_z5", key_lookup(Man10, 2, LK1_4)), + + E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"}, + end_key={o, "Bucket1", "K78", null}, + filename="Z5", + owner="pid_z5"}, + E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null}, + end_key={o, "Bucket1", "K996", null}, + filename="Z6", + owner="pid_z6"}, + + Man11 = remove_manifest_entry(Man10, 3, 2, [E5, E6]), + ?assertMatch(3, get_manifest_sqn(Man11)), + ?assertMatch(false, key_lookup(Man11, 2, LK1_4)), + + E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"}, + end_key={o, "Bucket1", "K45", null}, + owner="pid_y2", + filename="Y2"}, + + Man12 = replace_manifest_entry(Man11, 4, 2, E2_2, E5), + ?assertMatch(4, get_manifest_sqn(Man12)), + ?assertMatch("pid_z5", key_lookup(Man12, 2, LK1_4)). rangequery_manifest_test() -> {_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(), From 1102133723c64f434987964dc5e411abcf75af8d Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 23 Jan 2017 21:45:34 +0000 Subject: [PATCH 24/25] Use skpl as ledger_cache Under assumption that it generates less GC noise (based on micro-benchmark in leveled_tree eunit testing). Note to confirm, needed to swap around the test order, and this showed less collections in each position for skpl - and a < 10% performance hit --- include/leveled.hrl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/leveled.hrl b/include/leveled.hrl index e1c9646..fa4dd11 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -15,7 +15,7 @@ %% Inker key type used for tombstones -define(INKT_TOMB, tomb). --define(CACHE_TYPE, idxt). +-define(CACHE_TYPE, skpl). -record(sft_options, {wait = true :: boolean(), From d225f4d7f5b7d3b281e57eeb28117733faf65567 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 23 Jan 2017 22:58:51 +0000 Subject: [PATCH 25/25] Add use of leveled_tree to sst summary --- src/leveled_sst.erl | 110 +++++++++++--------------------------------- 1 file changed, 28 insertions(+), 82 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index c9102d1..58573a8 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -77,6 +77,8 @@ -define(INDEX_MARKER_WIDTH, 16). -define(DISCARD_EXT, ".discarded"). -define(DELETE_TIMEOUT, 10000). +-define(TREE_TYPE, idxt). +-define(TREE_SIZE, 4). -include_lib("eunit/include/eunit.hrl"). @@ -676,93 +678,37 @@ generate_filenames(RootFilename) -> %% The Slot Index is stored as a flat (sorted) list of {Key, Slot} where Key %% is the last key within the slot. %% -%% This implementation of the SlotIndex stores it as a tuple with the original -%% list as the second element and a list of mark points as the first element -%% containing every 16th key. The Mark points are stored as {Mark, Index}, -%% where the Index correspnds with the nth point in the original list that the -%% Mark occurs. +%% This implementation of the SlotIndex uses leveled_tree from_list(SlotList) -> - L = length(SlotList), - MarkerList = set_marks(lists:reverse(SlotList), - {?INDEX_MARKER_WIDTH, L rem ?INDEX_MARKER_WIDTH}, - L, - []), - {MarkerList, SlotList}. + leveled_tree:from_orderedlist(SlotList, ?TREE_TYPE, ?TREE_SIZE). -set_marks([], _MarkInfo, 0, MarkerList) -> - MarkerList; -set_marks([{Key, _Slot}|Rest], {MarkerWidth, MarkPoint}, Count, MarkerList) -> - case Count rem MarkerWidth of - MarkPoint -> - set_marks(Rest, - {MarkerWidth, MarkPoint}, - Count - 1, - [{Key, Count}|MarkerList]); - _ -> - set_marks(Rest, - {MarkerWidth, MarkPoint}, - Count - 1, - MarkerList) - end. - -find_mark(Key, [{Mark, Pos}|_Rest]) when Mark >= Key -> - Pos; -find_mark(Key, [_H|T]) -> - find_mark(Key, T). - -lookup_slot(Key, {MarkerList, SlotList}) -> - Pos = find_mark(Key, MarkerList), - SubList = lists:sublist(SlotList, max(1, Pos - ?INDEX_MARKER_WIDTH), Pos), - Slot = find_mark(Key, SubList), +lookup_slot(Key, Tree) -> + StartKeyFun = + fun(_V) -> + all + end, + % The penciller should never ask for presence out of range - so will + % always return a slot (As we don't compare to StartKey) + {_LK, Slot} = leveled_tree:search(Key, Tree, StartKeyFun), Slot. -%% Returns a section from the summary index and two booleans to indicate if -%% the first slot needs trimming, or the last slot -lookup_slots(StartKey, EndKey, {_MarkerList, SlotList}) -> - SlotsOnlyFun = fun({_K, V}) -> V end, - {KSL, LTrim, RTrim} = lookup_slots_int(StartKey, EndKey, SlotList), - {lists:map(SlotsOnlyFun, KSL), LTrim, RTrim}. - -lookup_slots_int(all, all, SlotList) -> - {SlotList, false, false}; -lookup_slots_int(StartKey, all, SlotList) -> - LTrimFun = fun({K, _V}) -> K < StartKey end, - {_LDrop, RKeep0} = lists:splitwith(LTrimFun, SlotList), - {RKeep0, true, false}; -lookup_slots_int(StartKey, EndKey, SlotList) -> - {RKeep, true, false} = lookup_slots_int(StartKey, all, SlotList), - [LeftMost|RKeep0] = RKeep, - {LeftMostK, LeftMostV} = LeftMost, - RTrimFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndKey, K) end, - case leveled_codec:endkey_passed(EndKey, LeftMostK) of - true -> - {[{LeftMostK, LeftMostV}], - true, - true}; - false -> - case LeftMostK of - EndKey -> - {[{LeftMostK, LeftMostV}], - true, - false}; - _ -> - {LKeep, RDisc} = lists:splitwith(RTrimFun, RKeep0), - case RDisc of - [] -> - {[LeftMost|LKeep], - true, - true}; - [{RDiscK1, RDiscV1}|_Rest] when RDiscK1 == EndKey -> - {[LeftMost|LKeep] ++ [{RDiscK1, RDiscV1}], - true, - false}; - [{RDiscK1, RDiscV1}|_Rest] -> - {[LeftMost|LKeep] ++ [{RDiscK1, RDiscV1}], - true, - true} - end - end +lookup_slots(StartKey, EndKey, Tree) -> + StartKeyFun = + fun(_V) -> + all + end, + MapFun = + fun({_LK, Slot}) -> + Slot + end, + SlotList = leveled_tree:search_range(StartKey, EndKey, Tree, StartKeyFun), + {EK, _EndSlot} = lists:last(SlotList), + case EK of + EndKey -> + {lists:map(MapFun, SlotList), true, false}; + _ -> + {lists:map(MapFun, SlotList), true, true} end.