From 743d59c71befb376efdb600f84dcd545e06791ec Mon Sep 17 00:00:00 2001 From: martinsumner Date: Wed, 30 Nov 2016 15:36:27 +0000 Subject: [PATCH] Make SkipList Two-Level Read and write times were increasing as the siz eof skiplist got larger (e.g. over 2000). Tried instead with a smaller skip width and a two-tier skiplist and this gave much more regular eprformance as the size of the skiplist went up. --- src/leveled_skiplist.erl | 232 +++++++++++++++++++++++++++++---------- 1 file changed, 171 insertions(+), 61 deletions(-) diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl index 5a68c0a..5513eab 100644 --- a/src/leveled_skiplist.erl +++ b/src/leveled_skiplist.erl @@ -28,9 +28,10 @@ -include_lib("eunit/include/eunit.hrl"). --define(SKIP_WIDTH, 64). +-define(SKIP_WIDTH, 16). -define(INFINITY_KEY, {null, null, null, null, null}). -define(EMPTY_SKIPLIST, [{?INFINITY_KEY, []}]). +-define(EMPTY_SKIPLIST_TWOLEVEL, [{?INFINITY_KEY, ?EMPTY_SKIPLIST}]). %%%============================================================================ @@ -39,6 +40,35 @@ enter(Key, Value, SkipList) -> + Hash = erlang:phash2(Key), + {MarkerKey, SubSkipList} = + lists:foldl(fun({Marker, SL}, Acc) -> + case Acc of + false -> + case Marker >= Key of + true -> + {Marker, SL}; + false -> + Acc + end; + _ -> + Acc + end end, + false, + SkipList), + UpdSubSkipList = enter_ground(Key, Value, SubSkipList), + case Hash rem (?SKIP_WIDTH * ?SKIP_WIDTH) of + 0 -> + % + {LHS, RHS} = lists:splitwith(fun({K, _V}) -> K =< Key end, UpdSubSkipList), + SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), + lists:ukeysort(1, [{Key, LHS}|SkpL1]); + _ -> + % Need to replace Marker Key with sublist + lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubSkipList}) + end. + +enter_ground(Key, Value, SkipList) -> Hash = erlang:phash2(Key), {MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) -> case Acc of @@ -80,6 +110,34 @@ enter(Key, Value, SkipList) -> from_list(UnsortedKVL) -> KVL = lists:ukeysort(1, UnsortedKVL), + SkipWidth = ?SKIP_WIDTH * ?SKIP_WIDTH, + LoftSlots = length(KVL) div SkipWidth, + case LoftSlots of + 0 -> + {K, _V} = lists:last(KVL), + [{K, from_list_ground(KVL, true)}]; + _ -> + SkipList0 = + lists:map(fun(X) -> + N = X * SkipWidth, + {K, _V} = lists:nth(N, KVL), + SL = lists:sublist(KVL, + N - SkipWidth + 1, + SkipWidth), + {K, from_list_ground(SL, true)} + end, + lists:seq(1, LoftSlots)), + case LoftSlots * SkipWidth < length(KVL) of + true -> + {LastK, _V} = lists:last(KVL), + TailList = lists:nthtail(LoftSlots * SkipWidth, KVL), + SkipList0 ++ [{LastK, from_list_ground(TailList, true)}]; + false -> + SkipList0 + end + end. + +from_list_ground(KVL, true) -> Slots = length(KVL) div ?SKIP_WIDTH, SkipList0 = lists:map(fun(X) -> N = X * ?SKIP_WIDTH, @@ -96,6 +154,7 @@ from_list(UnsortedKVL) -> false -> SkipList0 end. + lookup(Key, SkipList) -> SubList = lists:foldl(fun({SkipKey, SL}, Acc) -> @@ -107,6 +166,23 @@ lookup(Key, SkipList) -> end end, null, SkipList), + case SubList of + null -> + none; + _ -> + lookup_ground(Key, SubList) + end. + +lookup_ground(Key, SkipList) -> + SubList = lists:foldl(fun({SkipKey, SL}, Acc) -> + case {Acc, SkipKey} of + {null, SkipKey} when SkipKey >= Key -> + SL; + _ -> + Acc + end end, + null, + SkipList), case SubList of null -> none; @@ -119,7 +195,13 @@ lookup(Key, SkipList) -> end end. + to_list(SkipList) -> + lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list_ground(SL) end, + [], + SkipList). + +to_list_ground(SkipList) -> lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList). %% Rather than support iterator_from like gb_trees, will just an output a key @@ -128,6 +210,46 @@ to_range(SkipList, Start) -> to_range(SkipList, Start, ?INFINITY_KEY). to_range(SkipList, Start, End) -> + R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) -> + + case {PassedStart, PassedEnd} of + {true, true} -> + {true, true, Acc, null}; + {false, false} -> + case Start > Mark of + true -> + {false, false, Acc, SL}; + false -> + SkipLRange = to_range_ground(PrevList, + Start, + End) ++ + to_range_ground(SL, + Start, + End), + case leveled_codec:endkey_passed(End, Mark) of + true -> + {true, true, SkipLRange, null}; + false -> + {true, false, SkipLRange, null} + end + end; + {true, false} -> + SkipLRange = to_range_ground(SL, Start, End), + case leveled_codec:endkey_passed(End, Mark) of + true -> + {true, true, Acc ++ SkipLRange, null}; + false -> + {true, false, Acc ++ SkipLRange, null} + end + end end, + + {false, false, [], []}, + SkipList), + {_Bool1, _Bool2, SubList, _PrevList} = R, + SubList. + + +to_range_ground(SkipList, Start, End) -> R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) -> case {PassedStart, PassedEnd} of @@ -163,11 +285,18 @@ to_range(SkipList, Start, End) -> SubList. empty() -> - ?EMPTY_SKIPLIST. + ?EMPTY_SKIPLIST_TWOLEVEL. size(SkipList) -> + lists:foldl(fun({_Mark, SL}, Acc) -> size_ground(SL) + Acc end, + 0, + SkipList). + +size_ground(SkipList) -> lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList). + + %%%============================================================================ %%% Internal Functions %%%============================================================================ @@ -230,35 +359,37 @@ skiplist_test() -> SkipList = from_list(KL), io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n", [timer:now_diff(os:timestamp(), SWaGSL)]), + + SWaDSL = os:timestamp(), SkipList1 = lists:foldl(fun({K, V}, SL) -> enter(K, V, SL) end, - ?EMPTY_SKIPLIST, + empty(), KL), io:format(user, "Dynamic load of skiplist took ~w microseconds~n~n", [timer:now_diff(os:timestamp(), SWaDSL)]), - + + io:format(user, "~nRunning timing tests for generated skiplist:~n", []), skiplist_timingtest(KL, SkipList), io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []), - skiplist_timingtest(KL, SkipList1), - io:format(user, "~n", []). + skiplist_timingtest(KL, SkipList1). skiplist_timingtest(KL, SkipList) -> io:format(user, "Timing tests on skiplist of size ~w~n", [leveled_skiplist:size(SkipList)]), - CheckList1 = lists:sublist(KL, 1200, 100), - CheckList2 = lists:sublist(KL, 1600, 100), - CheckList3 = lists:sublist(KL, 2000, 100), - CheckList4 = lists:sublist(KL, 2400, 100), - CheckList5 = lists:sublist(KL, 2800, 100), + CheckList1 = lists:sublist(KL, 1200, 200), + CheckList2 = lists:sublist(KL, 1600, 200), + CheckList3 = lists:sublist(KL, 2000, 200), + CheckList4 = lists:sublist(KL, 2400, 200), + CheckList5 = lists:sublist(KL, 2800, 200), CheckList6 = lists:sublist(KL, 1, 10), CheckList7 = lists:nthtail(3800, KL), - CheckList8 = lists:sublist(KL, 3000, 1), + CheckList8 = lists:sublist(KL, 2000, 1), CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++ CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7, @@ -267,66 +398,45 @@ skiplist_timingtest(KL, SkipList) -> ?assertMatch({value, V}, lookup(K, SkipList)) end, CheckAll), - io:format(user, "Finding 520 keys took ~w microseconds~n", + io:format(user, "Finding 1020 keys took ~w microseconds~n", [timer:now_diff(os:timestamp(), SWb)]), + RangeFun = + fun(SkipListToQuery, CheckListForQ, Assert) -> + KR = + to_range(SkipListToQuery, + element(1, lists:nth(1, CheckListForQ)), + element(1, lists:last(CheckListForQ))), + case Assert of + true -> + CompareL = length(lists:usort(CheckListForQ)), + ?assertMatch(CompareL, length(KR)); + false -> + KR + end + end, + SWc = os:timestamp(), - KR1 = to_range(SkipList, - element(1, lists:nth(1, CheckList1)), - element(1, lists:last(CheckList1))), - io:format("Result length ~w ~n", [length(KR1)]), - CompareL1 = length(lists:usort(CheckList1)), - ?assertMatch(CompareL1, length(KR1)), - KR2 = to_range(SkipList, - element(1, lists:nth(1, CheckList2)), - element(1, lists:last(CheckList2))), - CompareL2 = length(lists:usort(CheckList2)), - ?assertMatch(CompareL2, length(KR2)), - KR3 = to_range(SkipList, - element(1, lists:nth(1, CheckList3)), - element(1, lists:last(CheckList3))), - CompareL3 = length(lists:usort(CheckList3)), - ?assertMatch(CompareL3, length(KR3)), - KR4 = to_range(SkipList, - element(1, lists:nth(1, CheckList4)), - element(1, lists:last(CheckList4))), - CompareL4 = length(lists:usort(CheckList4)), - ?assertMatch(CompareL4, length(KR4)), - KR5 = to_range(SkipList, - element(1, lists:nth(1, CheckList5)), - element(1, lists:last(CheckList5))), - CompareL5 = length(lists:usort(CheckList5)), - ?assertMatch(CompareL5, length(KR5)), - KR6 = to_range(SkipList, - element(1, lists:nth(1, CheckList6)), - element(1, lists:last(CheckList6))), - CompareL6 = length(lists:usort(CheckList6)), - ?assertMatch(CompareL6, length(KR6)), - KR7 = to_range(SkipList, - element(1, lists:nth(1, CheckList7)), - element(1, lists:last(CheckList7))), - CompareL7 = length(lists:usort(CheckList7)), - ?assertMatch(CompareL7, length(KR7)), - KR8 = to_range(SkipList, - element(1, lists:nth(1, CheckList8)), - element(1, lists:last(CheckList8))), - CompareL8 = length(lists:usort(CheckList8)), - ?assertMatch(CompareL8, length(KR8)), + RangeFun(SkipList, CheckList1, true), + RangeFun(SkipList, CheckList2, true), + RangeFun(SkipList, CheckList3, true), + RangeFun(SkipList, CheckList4, true), + RangeFun(SkipList, CheckList5, true), + RangeFun(SkipList, CheckList6, true), + RangeFun(SkipList, CheckList7, true), + RangeFun(SkipList, CheckList8, true), KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, 4, 201, 202)), - KR9 = to_range(SkipList, - element(1, lists:nth(1, KL_OOR1)), - element(1, lists:last(KL_OOR1))), + KR9 = RangeFun(SkipList, KL_OOR1, false), ?assertMatch([], KR9), + KL_OOR2 = gb_trees:to_list(generate_randomkeys(1, 4, 0, 0)), - KR10 = to_range(SkipList, - element(1, lists:nth(1, KL_OOR2)), - element(1, lists:last(KL_OOR2))), + KR10 = RangeFun(SkipList, KL_OOR2, false), ?assertMatch([], KR10), io:format(user, "Finding 10 ranges took ~w microseconds~n", [timer:now_diff(os:timestamp(), SWc)]), - + AltKL1 = gb_trees:to_list(generate_randomkeys(1, 1000, 1, 200)), SWd = os:timestamp(), lists:foreach(fun({K, _V}) ->