diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl index 5513eab..9fe6cb2 100644 --- a/src/leveled_skiplist.erl +++ b/src/leveled_skiplist.erl @@ -17,6 +17,7 @@ -export([ from_list/1, + from_sortedlist/1, to_list/1, enter/3, to_range/2, @@ -29,64 +30,58 @@ -include_lib("eunit/include/eunit.hrl"). -define(SKIP_WIDTH, 16). +-define(LIST_HEIGHT, 2). -define(INFINITY_KEY, {null, null, null, null, null}). --define(EMPTY_SKIPLIST, [{?INFINITY_KEY, []}]). --define(EMPTY_SKIPLIST_TWOLEVEL, [{?INFINITY_KEY, ?EMPTY_SKIPLIST}]). %%%============================================================================ %%% SkipList API %%%============================================================================ - enter(Key, Value, SkipList) -> - Hash = erlang:phash2(Key), - {MarkerKey, SubSkipList} = - lists:foldl(fun({Marker, SL}, Acc) -> - case Acc of - false -> - case Marker >= Key of - true -> - {Marker, SL}; - false -> - Acc - end; - _ -> - Acc - end end, - false, - SkipList), - UpdSubSkipList = enter_ground(Key, Value, SubSkipList), - case Hash rem (?SKIP_WIDTH * ?SKIP_WIDTH) of - 0 -> - % - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> K =< Key end, UpdSubSkipList), - SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), - lists:ukeysort(1, [{Key, LHS}|SkpL1]); - _ -> - % Need to replace Marker Key with sublist - lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubSkipList}) - end. + enter(Key, Value, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT). -enter_ground(Key, Value, SkipList) -> +from_list(UnsortedKVL) -> + KVL = lists:ukeysort(1, UnsortedKVL), + from_list(KVL, ?SKIP_WIDTH, ?LIST_HEIGHT). + +from_sortedlist(SortedKVL) -> + from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT). + +lookup(Key, SkipList) -> + lookup(Key, SkipList, ?LIST_HEIGHT). + + +%% Rather than support iterator_from like gb_trees, will just an output a key +%% sorted list for the desired range, which can the be iterated over as normal +to_range(SkipList, Start) -> + to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT). + +to_range(SkipList, Start, End) -> + to_range(SkipList, Start, End, ?LIST_HEIGHT). + +to_list(SkipList) -> + to_list(SkipList, ?LIST_HEIGHT). + +empty() -> + empty([], ?LIST_HEIGHT). + +size(SkipList) -> + size(SkipList, ?LIST_HEIGHT). + + +%%%============================================================================ +%%% SkipList Base Functions +%%%============================================================================ + +enter(Key, Value, SkipList, Width, 1) -> Hash = erlang:phash2(Key), - {MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) -> - case Acc of - false -> - case Marker >= Key of - true -> - {Marker, SL}; - false -> - Acc - end; - _ -> - Acc - end end, - false, - SkipList), - case Hash rem ?SKIP_WIDTH of + {MarkerKey, SubList} = find_mark(Key, SkipList), + case Hash rem Width of 0 -> - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> K =< Key end, SubList), + {LHS, RHS} = lists:splitwith(fun({K, _V}) -> + K =< Key end, + SubList), SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1], lists:ukeysort(1, SkpL2); @@ -105,17 +100,53 @@ enter_ground(Key, Value, SkipList) -> end end, lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList}) + end; +enter(Key, Value, SkipList, Width, Level) -> + Hash = erlang:phash2(Key), + HashMatch = width(Level, Width), + {MarkerKey, SubSkipList} = find_mark(Key, SkipList), + UpdSubSkipList = enter(Key, Value, SubSkipList, Width, Level - 1), + case Hash rem HashMatch of + 0 -> + % + {LHS, RHS} = lists:splitwith(fun({K, _V}) -> + K =< Key end, + UpdSubSkipList), + SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), + lists:ukeysort(1, [{Key, LHS}|SkpL1]); + _ -> + % Need to replace Marker Key with sublist + lists:keyreplace(MarkerKey, + 1, + SkipList, + {MarkerKey, UpdSubSkipList}) end. - -from_list(UnsortedKVL) -> - KVL = lists:ukeysort(1, UnsortedKVL), - SkipWidth = ?SKIP_WIDTH * ?SKIP_WIDTH, + +from_list(KVL, Width, 1) -> + Slots = length(KVL) div Width, + SkipList0 = lists:map(fun(X) -> + N = X * Width, + {K, _V} = lists:nth(N, KVL), + {K, lists:sublist(KVL, + N - Width + 1, + Width)} + end, + lists:seq(1, length(KVL) div Width)), + case Slots * Width < length(KVL) of + true -> + {LastK, _V} = lists:last(KVL), + SkipList0 ++ [{LastK, lists:nthtail(Slots * Width, KVL)}]; + false -> + SkipList0 + end; +from_list(KVL, Width, Level) -> + SkipWidth = width(Level, Width), LoftSlots = length(KVL) div SkipWidth, case LoftSlots of 0 -> {K, _V} = lists:last(KVL), - [{K, from_list_ground(KVL, true)}]; + [{K, from_list(KVL, Width, Level - 1)}]; _ -> SkipList0 = lists:map(fun(X) -> @@ -124,65 +155,24 @@ from_list(UnsortedKVL) -> SL = lists:sublist(KVL, N - SkipWidth + 1, SkipWidth), - {K, from_list_ground(SL, true)} + {K, from_list(SL, Width, Level - 1)} end, lists:seq(1, LoftSlots)), case LoftSlots * SkipWidth < length(KVL) of true -> {LastK, _V} = lists:last(KVL), TailList = lists:nthtail(LoftSlots * SkipWidth, KVL), - SkipList0 ++ [{LastK, from_list_ground(TailList, true)}]; + SkipList0 ++ [{LastK, from_list(TailList, + Width, + Level - 1)}]; false -> SkipList0 end end. - -from_list_ground(KVL, true) -> - Slots = length(KVL) div ?SKIP_WIDTH, - SkipList0 = lists:map(fun(X) -> - N = X * ?SKIP_WIDTH, - {K, _V} = lists:nth(N, KVL), - {K, lists:sublist(KVL, - N - ?SKIP_WIDTH + 1, - ?SKIP_WIDTH)} - end, - lists:seq(1, length(KVL) div ?SKIP_WIDTH)), - case Slots * ?SKIP_WIDTH < length(KVL) of - true -> - {LastK, _V} = lists:last(KVL), - SkipList0 ++ [{LastK, lists:nthtail(Slots * ?SKIP_WIDTH, KVL)}]; - false -> - SkipList0 - end. - -lookup(Key, SkipList) -> - SubList = lists:foldl(fun({SkipKey, SL}, Acc) -> - case {Acc, SkipKey} of - {null, SkipKey} when SkipKey >= Key -> - SL; - _ -> - Acc - end end, - null, - SkipList), - case SubList of - null -> - none; - _ -> - lookup_ground(Key, SubList) - end. -lookup_ground(Key, SkipList) -> - SubList = lists:foldl(fun({SkipKey, SL}, Acc) -> - case {Acc, SkipKey} of - {null, SkipKey} when SkipKey >= Key -> - SL; - _ -> - Acc - end end, - null, - SkipList), +lookup(Key, SkipList, 1) -> + SubList = get_sublist(Key, SkipList), case SubList of null -> none; @@ -193,63 +183,25 @@ lookup_ground(Key, SkipList) -> {Key, V} -> {value, V} end + end; +lookup(Key, SkipList, Level) -> + SubList = get_sublist(Key, SkipList), + case SubList of + null -> + none; + _ -> + lookup(Key, SubList, Level - 1) end. -to_list(SkipList) -> - lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list_ground(SL) end, +to_list(SkipList, 1) -> + lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList); +to_list(SkipList, Level) -> + lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end, [], SkipList). -to_list_ground(SkipList) -> - lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList). - -%% Rather than support iterator_from like gb_trees, will just an output a key -%% sorted list for the desired range, which can the be iterated over as normal -to_range(SkipList, Start) -> - to_range(SkipList, Start, ?INFINITY_KEY). - -to_range(SkipList, Start, End) -> - R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) -> - - case {PassedStart, PassedEnd} of - {true, true} -> - {true, true, Acc, null}; - {false, false} -> - case Start > Mark of - true -> - {false, false, Acc, SL}; - false -> - SkipLRange = to_range_ground(PrevList, - Start, - End) ++ - to_range_ground(SL, - Start, - End), - case leveled_codec:endkey_passed(End, Mark) of - true -> - {true, true, SkipLRange, null}; - false -> - {true, false, SkipLRange, null} - end - end; - {true, false} -> - SkipLRange = to_range_ground(SL, Start, End), - case leveled_codec:endkey_passed(End, Mark) of - true -> - {true, true, Acc ++ SkipLRange, null}; - false -> - {true, false, Acc ++ SkipLRange, null} - end - end end, - - {false, false, [], []}, - SkipList), - {_Bool1, _Bool2, SubList, _PrevList} = R, - SubList. - - -to_range_ground(SkipList, Start, End) -> +to_range(SkipList, Start, End, 1) -> R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) -> case {PassedStart, PassedEnd} of @@ -282,25 +234,95 @@ to_range_ground(SkipList, Start, End) -> {false, false, [], []}, SkipList), {_Bool1, _Bool2, SubList, _PrevList} = R, + SubList; +to_range(SkipList, Start, End, Level) -> + R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) -> + + case {PassedStart, PassedEnd} of + {true, true} -> + {true, true, Acc, null}; + {false, false} -> + case Start > Mark of + true -> + {false, false, Acc, SL}; + false -> + SkipLRange = to_range(PrevList, + Start, End, + Level - 1) ++ + to_range(SL, + Start, End, + Level - 1), + case leveled_codec:endkey_passed(End, Mark) of + true -> + {true, true, SkipLRange, null}; + false -> + {true, false, SkipLRange, null} + end + end; + {true, false} -> + SkipLRange = to_range(SL, Start, End, Level - 1), + case leveled_codec:endkey_passed(End, Mark) of + true -> + {true, true, Acc ++ SkipLRange, null}; + false -> + {true, false, Acc ++ SkipLRange, null} + end + end end, + + {false, false, [], []}, + SkipList), + {_Bool1, _Bool2, SubList, _PrevList} = R, SubList. -empty() -> - ?EMPTY_SKIPLIST_TWOLEVEL. -size(SkipList) -> - lists:foldl(fun({_Mark, SL}, Acc) -> size_ground(SL) + Acc end, +empty(SkipList, 1) -> + [{?INFINITY_KEY, SkipList}]; +empty(SkipList, Level) -> + empty([{?INFINITY_KEY, SkipList}], Level - 1). + +size(SkipList, 1) -> + lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList); +size(SkipList, Level) -> + lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end, 0, SkipList). -size_ground(SkipList) -> - lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList). - - %%%============================================================================ %%% Internal Functions %%%============================================================================ +width(1, Width) -> + Width; +width(N, Width) -> + width(N - 1, Width * Width). + +find_mark(Key, SkipList) -> + lists:foldl(fun({Marker, SL}, Acc) -> + case Acc of + false -> + case Marker >= Key of + true -> + {Marker, SL}; + false -> + Acc + end; + _ -> + Acc + end end, + false, + SkipList). + +get_sublist(Key, SkipList) -> + lists:foldl(fun({SkipKey, SL}, Acc) -> + case {Acc, SkipKey} of + {null, SkipKey} when SkipKey >= Key -> + SL; + _ -> + Acc + end end, + null, + SkipList). splitlist_start(StartKey, SL) -> {_LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < StartKey end, SL), @@ -347,20 +369,24 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> BRange). skiplist_test() -> - KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)), - SWaD = os:timestamp(), - _D = lists:foldl(fun({K, V}, AccD) -> dict:store(K, V, AccD) end, - dict:new(), - KL), - io:format(user, "Loading dict with 4000 keys in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWaD)]), + N = 8000, + KL = gb_trees:to_list(generate_randomkeys(1, N, 1, N div 5)), SWaGSL = os:timestamp(), SkipList = from_list(KL), - io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWaGSL)]), - + io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++ + "Top level key count of ~w~n", + [N, timer:now_diff(os:timestamp(), SWaGSL), length(SkipList)]), + io:format(user, "Second tier key counts of ~w~n", + [lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]), + KLSorted = lists:ukeysort(1, KL), + SWaGSL2 = os:timestamp(), + SkipList = from_sortedlist(KLSorted), + io:format(user, "Generating skip list with ~w sorted keys in ~w " ++ + "microseconds~n", + [N, timer:now_diff(os:timestamp(), SWaGSL2)]), + SWaDSL = os:timestamp(), SkipList1 = lists:foldl(fun({K, V}, SL) -> @@ -368,28 +394,31 @@ skiplist_test() -> end, empty(), KL), - io:format(user, "Dynamic load of skiplist took ~w microseconds~n~n", - [timer:now_diff(os:timestamp(), SWaDSL)]), - - + io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++ + "microseconds~n" ++ + "Top level key count of ~w~n", + [N, timer:now_diff(os:timestamp(), SWaDSL), length(SkipList1)]), + io:format(user, "Second tier key counts of ~w~n", + [lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]), + io:format(user, "~nRunning timing tests for generated skiplist:~n", []), - skiplist_timingtest(KL, SkipList), + skiplist_timingtest(KL, SkipList, N), io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []), - skiplist_timingtest(KL, SkipList1). + skiplist_timingtest(KL, SkipList1, N). -skiplist_timingtest(KL, SkipList) -> +skiplist_timingtest(KL, SkipList, N) -> io:format(user, "Timing tests on skiplist of size ~w~n", [leveled_skiplist:size(SkipList)]), - CheckList1 = lists:sublist(KL, 1200, 200), - CheckList2 = lists:sublist(KL, 1600, 200), - CheckList3 = lists:sublist(KL, 2000, 200), - CheckList4 = lists:sublist(KL, 2400, 200), - CheckList5 = lists:sublist(KL, 2800, 200), + CheckList1 = lists:sublist(KL, N div 4, 200), + CheckList2 = lists:sublist(KL, N div 3, 200), + CheckList3 = lists:sublist(KL, N div 2, 200), + CheckList4 = lists:sublist(KL, N - 1000, 200), + CheckList5 = lists:sublist(KL, N - 500, 200), CheckList6 = lists:sublist(KL, 1, 10), - CheckList7 = lists:nthtail(3800, KL), - CheckList8 = lists:sublist(KL, 2000, 1), + CheckList7 = lists:nthtail(N - 200, KL), + CheckList8 = lists:sublist(KL, N div 2, 1), CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++ CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7, @@ -426,7 +455,10 @@ skiplist_timingtest(KL, SkipList) -> RangeFun(SkipList, CheckList7, true), RangeFun(SkipList, CheckList8, true), - KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, 4, 201, 202)), + KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, + 4, + N div 5 + 1, + N div 5 + 10)), KR9 = RangeFun(SkipList, KL_OOR1, false), ?assertMatch([], KR9), @@ -445,7 +477,10 @@ skiplist_timingtest(KL, SkipList) -> AltKL1), io:format(user, "Getting 1000 mainly missing keys took ~w microseconds~n", [timer:now_diff(os:timestamp(), SWd)]), - AltKL2 = gb_trees:to_list(generate_randomkeys(1, 1000, 201, 300)), + AltKL2 = gb_trees:to_list(generate_randomkeys(1, + 1000, + N div 5 + 1, + N div 5 + 300)), SWe = os:timestamp(), lists:foreach(fun({K, _V}) -> none = lookup(K, SkipList)