Add potential support for deeper layers

Unproven attempt to make skiplist code more generic
This commit is contained in:
martinsumner 2016-11-30 22:35:23 +00:00
parent 743d59c71b
commit 364527f3b8

View file

@ -17,6 +17,7 @@
-export([ -export([
from_list/1, from_list/1,
from_sortedlist/1,
to_list/1, to_list/1,
enter/3, enter/3,
to_range/2, to_range/2,
@ -29,64 +30,58 @@
-include_lib("eunit/include/eunit.hrl"). -include_lib("eunit/include/eunit.hrl").
-define(SKIP_WIDTH, 16). -define(SKIP_WIDTH, 16).
-define(LIST_HEIGHT, 2).
-define(INFINITY_KEY, {null, null, null, null, null}). -define(INFINITY_KEY, {null, null, null, null, null}).
-define(EMPTY_SKIPLIST, [{?INFINITY_KEY, []}]).
-define(EMPTY_SKIPLIST_TWOLEVEL, [{?INFINITY_KEY, ?EMPTY_SKIPLIST}]).
%%%============================================================================ %%%============================================================================
%%% SkipList API %%% SkipList API
%%%============================================================================ %%%============================================================================
enter(Key, Value, SkipList) -> enter(Key, Value, SkipList) ->
Hash = erlang:phash2(Key), enter(Key, Value, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT).
{MarkerKey, SubSkipList} =
lists:foldl(fun({Marker, SL}, Acc) ->
case Acc of
false ->
case Marker >= Key of
true ->
{Marker, SL};
false ->
Acc
end;
_ ->
Acc
end end,
false,
SkipList),
UpdSubSkipList = enter_ground(Key, Value, SubSkipList),
case Hash rem (?SKIP_WIDTH * ?SKIP_WIDTH) of
0 ->
%
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K =< Key end, UpdSubSkipList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
_ ->
% Need to replace Marker Key with sublist
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubSkipList})
end.
enter_ground(Key, Value, SkipList) -> from_list(UnsortedKVL) ->
KVL = lists:ukeysort(1, UnsortedKVL),
from_list(KVL, ?SKIP_WIDTH, ?LIST_HEIGHT).
from_sortedlist(SortedKVL) ->
from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT).
lookup(Key, SkipList) ->
lookup(Key, SkipList, ?LIST_HEIGHT).
%% Rather than support iterator_from like gb_trees, will just an output a key
%% sorted list for the desired range, which can the be iterated over as normal
to_range(SkipList, Start) ->
to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT).
to_range(SkipList, Start, End) ->
to_range(SkipList, Start, End, ?LIST_HEIGHT).
to_list(SkipList) ->
to_list(SkipList, ?LIST_HEIGHT).
empty() ->
empty([], ?LIST_HEIGHT).
size(SkipList) ->
size(SkipList, ?LIST_HEIGHT).
%%%============================================================================
%%% SkipList Base Functions
%%%============================================================================
enter(Key, Value, SkipList, Width, 1) ->
Hash = erlang:phash2(Key), Hash = erlang:phash2(Key),
{MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) -> {MarkerKey, SubList} = find_mark(Key, SkipList),
case Acc of case Hash rem Width of
false ->
case Marker >= Key of
true ->
{Marker, SL};
false ->
Acc
end;
_ ->
Acc
end end,
false,
SkipList),
case Hash rem ?SKIP_WIDTH of
0 -> 0 ->
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K =< Key end, SubList), {LHS, RHS} = lists:splitwith(fun({K, _V}) ->
K =< Key end,
SubList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1], SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
lists:ukeysort(1, SkpL2); lists:ukeysort(1, SkpL2);
@ -105,17 +100,53 @@ enter_ground(Key, Value, SkipList) ->
end end
end, end,
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList}) lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
end;
enter(Key, Value, SkipList, Width, Level) ->
Hash = erlang:phash2(Key),
HashMatch = width(Level, Width),
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
UpdSubSkipList = enter(Key, Value, SubSkipList, Width, Level - 1),
case Hash rem HashMatch of
0 ->
%
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
K =< Key end,
UpdSubSkipList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
_ ->
% Need to replace Marker Key with sublist
lists:keyreplace(MarkerKey,
1,
SkipList,
{MarkerKey, UpdSubSkipList})
end. end.
from_list(UnsortedKVL) ->
KVL = lists:ukeysort(1, UnsortedKVL), from_list(KVL, Width, 1) ->
SkipWidth = ?SKIP_WIDTH * ?SKIP_WIDTH, Slots = length(KVL) div Width,
SkipList0 = lists:map(fun(X) ->
N = X * Width,
{K, _V} = lists:nth(N, KVL),
{K, lists:sublist(KVL,
N - Width + 1,
Width)}
end,
lists:seq(1, length(KVL) div Width)),
case Slots * Width < length(KVL) of
true ->
{LastK, _V} = lists:last(KVL),
SkipList0 ++ [{LastK, lists:nthtail(Slots * Width, KVL)}];
false ->
SkipList0
end;
from_list(KVL, Width, Level) ->
SkipWidth = width(Level, Width),
LoftSlots = length(KVL) div SkipWidth, LoftSlots = length(KVL) div SkipWidth,
case LoftSlots of case LoftSlots of
0 -> 0 ->
{K, _V} = lists:last(KVL), {K, _V} = lists:last(KVL),
[{K, from_list_ground(KVL, true)}]; [{K, from_list(KVL, Width, Level - 1)}];
_ -> _ ->
SkipList0 = SkipList0 =
lists:map(fun(X) -> lists:map(fun(X) ->
@ -124,65 +155,24 @@ from_list(UnsortedKVL) ->
SL = lists:sublist(KVL, SL = lists:sublist(KVL,
N - SkipWidth + 1, N - SkipWidth + 1,
SkipWidth), SkipWidth),
{K, from_list_ground(SL, true)} {K, from_list(SL, Width, Level - 1)}
end, end,
lists:seq(1, LoftSlots)), lists:seq(1, LoftSlots)),
case LoftSlots * SkipWidth < length(KVL) of case LoftSlots * SkipWidth < length(KVL) of
true -> true ->
{LastK, _V} = lists:last(KVL), {LastK, _V} = lists:last(KVL),
TailList = lists:nthtail(LoftSlots * SkipWidth, KVL), TailList = lists:nthtail(LoftSlots * SkipWidth, KVL),
SkipList0 ++ [{LastK, from_list_ground(TailList, true)}]; SkipList0 ++ [{LastK, from_list(TailList,
Width,
Level - 1)}];
false -> false ->
SkipList0 SkipList0
end end
end. end.
from_list_ground(KVL, true) ->
Slots = length(KVL) div ?SKIP_WIDTH,
SkipList0 = lists:map(fun(X) ->
N = X * ?SKIP_WIDTH,
{K, _V} = lists:nth(N, KVL),
{K, lists:sublist(KVL,
N - ?SKIP_WIDTH + 1,
?SKIP_WIDTH)}
end,
lists:seq(1, length(KVL) div ?SKIP_WIDTH)),
case Slots * ?SKIP_WIDTH < length(KVL) of
true ->
{LastK, _V} = lists:last(KVL),
SkipList0 ++ [{LastK, lists:nthtail(Slots * ?SKIP_WIDTH, KVL)}];
false ->
SkipList0
end.
lookup(Key, SkipList) ->
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
case {Acc, SkipKey} of
{null, SkipKey} when SkipKey >= Key ->
SL;
_ ->
Acc
end end,
null,
SkipList),
case SubList of
null ->
none;
_ ->
lookup_ground(Key, SubList)
end.
lookup_ground(Key, SkipList) -> lookup(Key, SkipList, 1) ->
SubList = lists:foldl(fun({SkipKey, SL}, Acc) -> SubList = get_sublist(Key, SkipList),
case {Acc, SkipKey} of
{null, SkipKey} when SkipKey >= Key ->
SL;
_ ->
Acc
end end,
null,
SkipList),
case SubList of case SubList of
null -> null ->
none; none;
@ -193,63 +183,25 @@ lookup_ground(Key, SkipList) ->
{Key, V} -> {Key, V} ->
{value, V} {value, V}
end end
end;
lookup(Key, SkipList, Level) ->
SubList = get_sublist(Key, SkipList),
case SubList of
null ->
none;
_ ->
lookup(Key, SubList, Level - 1)
end. end.
to_list(SkipList) -> to_list(SkipList, 1) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list_ground(SL) end, lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList);
to_list(SkipList, Level) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end,
[], [],
SkipList). SkipList).
to_list_ground(SkipList) -> to_range(SkipList, Start, End, 1) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList).
%% Rather than support iterator_from like gb_trees, will just an output a key
%% sorted list for the desired range, which can the be iterated over as normal
to_range(SkipList, Start) ->
to_range(SkipList, Start, ?INFINITY_KEY).
to_range(SkipList, Start, End) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of
{true, true} ->
{true, true, Acc, null};
{false, false} ->
case Start > Mark of
true ->
{false, false, Acc, SL};
false ->
SkipLRange = to_range_ground(PrevList,
Start,
End) ++
to_range_ground(SL,
Start,
End),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, SkipLRange, null};
false ->
{true, false, SkipLRange, null}
end
end;
{true, false} ->
SkipLRange = to_range_ground(SL, Start, End),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, Acc ++ SkipLRange, null};
false ->
{true, false, Acc ++ SkipLRange, null}
end
end end,
{false, false, [], []},
SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R,
SubList.
to_range_ground(SkipList, Start, End) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) -> R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of case {PassedStart, PassedEnd} of
@ -282,25 +234,95 @@ to_range_ground(SkipList, Start, End) ->
{false, false, [], []}, {false, false, [], []},
SkipList), SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R, {_Bool1, _Bool2, SubList, _PrevList} = R,
SubList;
to_range(SkipList, Start, End, Level) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of
{true, true} ->
{true, true, Acc, null};
{false, false} ->
case Start > Mark of
true ->
{false, false, Acc, SL};
false ->
SkipLRange = to_range(PrevList,
Start, End,
Level - 1) ++
to_range(SL,
Start, End,
Level - 1),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, SkipLRange, null};
false ->
{true, false, SkipLRange, null}
end
end;
{true, false} ->
SkipLRange = to_range(SL, Start, End, Level - 1),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, Acc ++ SkipLRange, null};
false ->
{true, false, Acc ++ SkipLRange, null}
end
end end,
{false, false, [], []},
SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R,
SubList. SubList.
empty() ->
?EMPTY_SKIPLIST_TWOLEVEL.
size(SkipList) -> empty(SkipList, 1) ->
lists:foldl(fun({_Mark, SL}, Acc) -> size_ground(SL) + Acc end, [{?INFINITY_KEY, SkipList}];
empty(SkipList, Level) ->
empty([{?INFINITY_KEY, SkipList}], Level - 1).
size(SkipList, 1) ->
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList);
size(SkipList, Level) ->
lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end,
0, 0,
SkipList). SkipList).
size_ground(SkipList) ->
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList).
%%%============================================================================ %%%============================================================================
%%% Internal Functions %%% Internal Functions
%%%============================================================================ %%%============================================================================
width(1, Width) ->
Width;
width(N, Width) ->
width(N - 1, Width * Width).
find_mark(Key, SkipList) ->
lists:foldl(fun({Marker, SL}, Acc) ->
case Acc of
false ->
case Marker >= Key of
true ->
{Marker, SL};
false ->
Acc
end;
_ ->
Acc
end end,
false,
SkipList).
get_sublist(Key, SkipList) ->
lists:foldl(fun({SkipKey, SL}, Acc) ->
case {Acc, SkipKey} of
{null, SkipKey} when SkipKey >= Key ->
SL;
_ ->
Acc
end end,
null,
SkipList).
splitlist_start(StartKey, SL) -> splitlist_start(StartKey, SL) ->
{_LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < StartKey end, SL), {_LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < StartKey end, SL),
@ -347,20 +369,24 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BRange). BRange).
skiplist_test() -> skiplist_test() ->
KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)), N = 8000,
SWaD = os:timestamp(), KL = gb_trees:to_list(generate_randomkeys(1, N, 1, N div 5)),
_D = lists:foldl(fun({K, V}, AccD) -> dict:store(K, V, AccD) end,
dict:new(),
KL),
io:format(user, "Loading dict with 4000 keys in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWaD)]),
SWaGSL = os:timestamp(), SWaGSL = os:timestamp(),
SkipList = from_list(KL), SkipList = from_list(KL),
io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n", io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
[timer:now_diff(os:timestamp(), SWaGSL)]), "Top level key count of ~w~n",
[N, timer:now_diff(os:timestamp(), SWaGSL), length(SkipList)]),
io:format(user, "Second tier key counts of ~w~n",
[lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]),
KLSorted = lists:ukeysort(1, KL),
SWaGSL2 = os:timestamp(),
SkipList = from_sortedlist(KLSorted),
io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
"microseconds~n",
[N, timer:now_diff(os:timestamp(), SWaGSL2)]),
SWaDSL = os:timestamp(), SWaDSL = os:timestamp(),
SkipList1 = SkipList1 =
lists:foldl(fun({K, V}, SL) -> lists:foldl(fun({K, V}, SL) ->
@ -368,28 +394,31 @@ skiplist_test() ->
end, end,
empty(), empty(),
KL), KL),
io:format(user, "Dynamic load of skiplist took ~w microseconds~n~n", io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
[timer:now_diff(os:timestamp(), SWaDSL)]), "microseconds~n" ++
"Top level key count of ~w~n",
[N, timer:now_diff(os:timestamp(), SWaDSL), length(SkipList1)]),
io:format(user, "Second tier key counts of ~w~n",
[lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]),
io:format(user, "~nRunning timing tests for generated skiplist:~n", []), io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
skiplist_timingtest(KL, SkipList), skiplist_timingtest(KL, SkipList, N),
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []), io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
skiplist_timingtest(KL, SkipList1). skiplist_timingtest(KL, SkipList1, N).
skiplist_timingtest(KL, SkipList) -> skiplist_timingtest(KL, SkipList, N) ->
io:format(user, "Timing tests on skiplist of size ~w~n", io:format(user, "Timing tests on skiplist of size ~w~n",
[leveled_skiplist:size(SkipList)]), [leveled_skiplist:size(SkipList)]),
CheckList1 = lists:sublist(KL, 1200, 200), CheckList1 = lists:sublist(KL, N div 4, 200),
CheckList2 = lists:sublist(KL, 1600, 200), CheckList2 = lists:sublist(KL, N div 3, 200),
CheckList3 = lists:sublist(KL, 2000, 200), CheckList3 = lists:sublist(KL, N div 2, 200),
CheckList4 = lists:sublist(KL, 2400, 200), CheckList4 = lists:sublist(KL, N - 1000, 200),
CheckList5 = lists:sublist(KL, 2800, 200), CheckList5 = lists:sublist(KL, N - 500, 200),
CheckList6 = lists:sublist(KL, 1, 10), CheckList6 = lists:sublist(KL, 1, 10),
CheckList7 = lists:nthtail(3800, KL), CheckList7 = lists:nthtail(N - 200, KL),
CheckList8 = lists:sublist(KL, 2000, 1), CheckList8 = lists:sublist(KL, N div 2, 1),
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++ CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7, CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
@ -426,7 +455,10 @@ skiplist_timingtest(KL, SkipList) ->
RangeFun(SkipList, CheckList7, true), RangeFun(SkipList, CheckList7, true),
RangeFun(SkipList, CheckList8, true), RangeFun(SkipList, CheckList8, true),
KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, 4, 201, 202)), KL_OOR1 = gb_trees:to_list(generate_randomkeys(1,
4,
N div 5 + 1,
N div 5 + 10)),
KR9 = RangeFun(SkipList, KL_OOR1, false), KR9 = RangeFun(SkipList, KL_OOR1, false),
?assertMatch([], KR9), ?assertMatch([], KR9),
@ -445,7 +477,10 @@ skiplist_timingtest(KL, SkipList) ->
AltKL1), AltKL1),
io:format(user, "Getting 1000 mainly missing keys took ~w microseconds~n", io:format(user, "Getting 1000 mainly missing keys took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWd)]), [timer:now_diff(os:timestamp(), SWd)]),
AltKL2 = gb_trees:to_list(generate_randomkeys(1, 1000, 201, 300)), AltKL2 = gb_trees:to_list(generate_randomkeys(1,
1000,
N div 5 + 1,
N div 5 + 300)),
SWe = os:timestamp(), SWe = os:timestamp(),
lists:foreach(fun({K, _V}) -> lists:foreach(fun({K, _V}) ->
none = lookup(K, SkipList) none = lookup(K, SkipList)