Add potential support for deeper layers

Unproven attempt to make skiplist code more generic
This commit is contained in:
martinsumner 2016-11-30 22:35:23 +00:00
parent 743d59c71b
commit 364527f3b8

View file

@ -17,6 +17,7 @@
-export([
from_list/1,
from_sortedlist/1,
to_list/1,
enter/3,
to_range/2,
@ -29,64 +30,58 @@
-include_lib("eunit/include/eunit.hrl").
-define(SKIP_WIDTH, 16).
-define(LIST_HEIGHT, 2).
-define(INFINITY_KEY, {null, null, null, null, null}).
-define(EMPTY_SKIPLIST, [{?INFINITY_KEY, []}]).
-define(EMPTY_SKIPLIST_TWOLEVEL, [{?INFINITY_KEY, ?EMPTY_SKIPLIST}]).
%%%============================================================================
%%% SkipList API
%%%============================================================================
enter(Key, Value, SkipList) ->
Hash = erlang:phash2(Key),
{MarkerKey, SubSkipList} =
lists:foldl(fun({Marker, SL}, Acc) ->
case Acc of
false ->
case Marker >= Key of
true ->
{Marker, SL};
false ->
Acc
end;
_ ->
Acc
end end,
false,
SkipList),
UpdSubSkipList = enter_ground(Key, Value, SubSkipList),
case Hash rem (?SKIP_WIDTH * ?SKIP_WIDTH) of
0 ->
%
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K =< Key end, UpdSubSkipList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
_ ->
% Need to replace Marker Key with sublist
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubSkipList})
end.
enter(Key, Value, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT).
enter_ground(Key, Value, SkipList) ->
from_list(UnsortedKVL) ->
KVL = lists:ukeysort(1, UnsortedKVL),
from_list(KVL, ?SKIP_WIDTH, ?LIST_HEIGHT).
from_sortedlist(SortedKVL) ->
from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT).
lookup(Key, SkipList) ->
lookup(Key, SkipList, ?LIST_HEIGHT).
%% Rather than support iterator_from like gb_trees, will just an output a key
%% sorted list for the desired range, which can the be iterated over as normal
to_range(SkipList, Start) ->
to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT).
to_range(SkipList, Start, End) ->
to_range(SkipList, Start, End, ?LIST_HEIGHT).
to_list(SkipList) ->
to_list(SkipList, ?LIST_HEIGHT).
empty() ->
empty([], ?LIST_HEIGHT).
size(SkipList) ->
size(SkipList, ?LIST_HEIGHT).
%%%============================================================================
%%% SkipList Base Functions
%%%============================================================================
enter(Key, Value, SkipList, Width, 1) ->
Hash = erlang:phash2(Key),
{MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) ->
case Acc of
false ->
case Marker >= Key of
true ->
{Marker, SL};
false ->
Acc
end;
_ ->
Acc
end end,
false,
SkipList),
case Hash rem ?SKIP_WIDTH of
{MarkerKey, SubList} = find_mark(Key, SkipList),
case Hash rem Width of
0 ->
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K =< Key end, SubList),
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
K =< Key end,
SubList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
lists:ukeysort(1, SkpL2);
@ -105,17 +100,53 @@ enter_ground(Key, Value, SkipList) ->
end
end,
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
end;
enter(Key, Value, SkipList, Width, Level) ->
Hash = erlang:phash2(Key),
HashMatch = width(Level, Width),
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
UpdSubSkipList = enter(Key, Value, SubSkipList, Width, Level - 1),
case Hash rem HashMatch of
0 ->
%
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
K =< Key end,
UpdSubSkipList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
_ ->
% Need to replace Marker Key with sublist
lists:keyreplace(MarkerKey,
1,
SkipList,
{MarkerKey, UpdSubSkipList})
end.
from_list(UnsortedKVL) ->
KVL = lists:ukeysort(1, UnsortedKVL),
SkipWidth = ?SKIP_WIDTH * ?SKIP_WIDTH,
from_list(KVL, Width, 1) ->
Slots = length(KVL) div Width,
SkipList0 = lists:map(fun(X) ->
N = X * Width,
{K, _V} = lists:nth(N, KVL),
{K, lists:sublist(KVL,
N - Width + 1,
Width)}
end,
lists:seq(1, length(KVL) div Width)),
case Slots * Width < length(KVL) of
true ->
{LastK, _V} = lists:last(KVL),
SkipList0 ++ [{LastK, lists:nthtail(Slots * Width, KVL)}];
false ->
SkipList0
end;
from_list(KVL, Width, Level) ->
SkipWidth = width(Level, Width),
LoftSlots = length(KVL) div SkipWidth,
case LoftSlots of
0 ->
{K, _V} = lists:last(KVL),
[{K, from_list_ground(KVL, true)}];
[{K, from_list(KVL, Width, Level - 1)}];
_ ->
SkipList0 =
lists:map(fun(X) ->
@ -124,65 +155,24 @@ from_list(UnsortedKVL) ->
SL = lists:sublist(KVL,
N - SkipWidth + 1,
SkipWidth),
{K, from_list_ground(SL, true)}
{K, from_list(SL, Width, Level - 1)}
end,
lists:seq(1, LoftSlots)),
case LoftSlots * SkipWidth < length(KVL) of
true ->
{LastK, _V} = lists:last(KVL),
TailList = lists:nthtail(LoftSlots * SkipWidth, KVL),
SkipList0 ++ [{LastK, from_list_ground(TailList, true)}];
SkipList0 ++ [{LastK, from_list(TailList,
Width,
Level - 1)}];
false ->
SkipList0
end
end.
from_list_ground(KVL, true) ->
Slots = length(KVL) div ?SKIP_WIDTH,
SkipList0 = lists:map(fun(X) ->
N = X * ?SKIP_WIDTH,
{K, _V} = lists:nth(N, KVL),
{K, lists:sublist(KVL,
N - ?SKIP_WIDTH + 1,
?SKIP_WIDTH)}
end,
lists:seq(1, length(KVL) div ?SKIP_WIDTH)),
case Slots * ?SKIP_WIDTH < length(KVL) of
true ->
{LastK, _V} = lists:last(KVL),
SkipList0 ++ [{LastK, lists:nthtail(Slots * ?SKIP_WIDTH, KVL)}];
false ->
SkipList0
end.
lookup(Key, SkipList) ->
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
case {Acc, SkipKey} of
{null, SkipKey} when SkipKey >= Key ->
SL;
_ ->
Acc
end end,
null,
SkipList),
case SubList of
null ->
none;
_ ->
lookup_ground(Key, SubList)
end.
lookup_ground(Key, SkipList) ->
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
case {Acc, SkipKey} of
{null, SkipKey} when SkipKey >= Key ->
SL;
_ ->
Acc
end end,
null,
SkipList),
lookup(Key, SkipList, 1) ->
SubList = get_sublist(Key, SkipList),
case SubList of
null ->
none;
@ -193,63 +183,25 @@ lookup_ground(Key, SkipList) ->
{Key, V} ->
{value, V}
end
end;
lookup(Key, SkipList, Level) ->
SubList = get_sublist(Key, SkipList),
case SubList of
null ->
none;
_ ->
lookup(Key, SubList, Level - 1)
end.
to_list(SkipList) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list_ground(SL) end,
to_list(SkipList, 1) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList);
to_list(SkipList, Level) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end,
[],
SkipList).
to_list_ground(SkipList) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList).
%% Rather than support iterator_from like gb_trees, will just an output a key
%% sorted list for the desired range, which can the be iterated over as normal
to_range(SkipList, Start) ->
to_range(SkipList, Start, ?INFINITY_KEY).
to_range(SkipList, Start, End) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of
{true, true} ->
{true, true, Acc, null};
{false, false} ->
case Start > Mark of
true ->
{false, false, Acc, SL};
false ->
SkipLRange = to_range_ground(PrevList,
Start,
End) ++
to_range_ground(SL,
Start,
End),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, SkipLRange, null};
false ->
{true, false, SkipLRange, null}
end
end;
{true, false} ->
SkipLRange = to_range_ground(SL, Start, End),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, Acc ++ SkipLRange, null};
false ->
{true, false, Acc ++ SkipLRange, null}
end
end end,
{false, false, [], []},
SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R,
SubList.
to_range_ground(SkipList, Start, End) ->
to_range(SkipList, Start, End, 1) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of
@ -282,25 +234,95 @@ to_range_ground(SkipList, Start, End) ->
{false, false, [], []},
SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R,
SubList;
to_range(SkipList, Start, End, Level) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of
{true, true} ->
{true, true, Acc, null};
{false, false} ->
case Start > Mark of
true ->
{false, false, Acc, SL};
false ->
SkipLRange = to_range(PrevList,
Start, End,
Level - 1) ++
to_range(SL,
Start, End,
Level - 1),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, SkipLRange, null};
false ->
{true, false, SkipLRange, null}
end
end;
{true, false} ->
SkipLRange = to_range(SL, Start, End, Level - 1),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, Acc ++ SkipLRange, null};
false ->
{true, false, Acc ++ SkipLRange, null}
end
end end,
{false, false, [], []},
SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R,
SubList.
empty() ->
?EMPTY_SKIPLIST_TWOLEVEL.
size(SkipList) ->
lists:foldl(fun({_Mark, SL}, Acc) -> size_ground(SL) + Acc end,
empty(SkipList, 1) ->
[{?INFINITY_KEY, SkipList}];
empty(SkipList, Level) ->
empty([{?INFINITY_KEY, SkipList}], Level - 1).
size(SkipList, 1) ->
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList);
size(SkipList, Level) ->
lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end,
0,
SkipList).
size_ground(SkipList) ->
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList).
%%%============================================================================
%%% Internal Functions
%%%============================================================================
width(1, Width) ->
Width;
width(N, Width) ->
width(N - 1, Width * Width).
find_mark(Key, SkipList) ->
lists:foldl(fun({Marker, SL}, Acc) ->
case Acc of
false ->
case Marker >= Key of
true ->
{Marker, SL};
false ->
Acc
end;
_ ->
Acc
end end,
false,
SkipList).
get_sublist(Key, SkipList) ->
lists:foldl(fun({SkipKey, SL}, Acc) ->
case {Acc, SkipKey} of
{null, SkipKey} when SkipKey >= Key ->
SL;
_ ->
Acc
end end,
null,
SkipList).
splitlist_start(StartKey, SL) ->
{_LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < StartKey end, SL),
@ -347,19 +369,23 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BRange).
skiplist_test() ->
KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)),
SWaD = os:timestamp(),
_D = lists:foldl(fun({K, V}, AccD) -> dict:store(K, V, AccD) end,
dict:new(),
KL),
io:format(user, "Loading dict with 4000 keys in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWaD)]),
N = 8000,
KL = gb_trees:to_list(generate_randomkeys(1, N, 1, N div 5)),
SWaGSL = os:timestamp(),
SkipList = from_list(KL),
io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWaGSL)]),
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
"Top level key count of ~w~n",
[N, timer:now_diff(os:timestamp(), SWaGSL), length(SkipList)]),
io:format(user, "Second tier key counts of ~w~n",
[lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]),
KLSorted = lists:ukeysort(1, KL),
SWaGSL2 = os:timestamp(),
SkipList = from_sortedlist(KLSorted),
io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
"microseconds~n",
[N, timer:now_diff(os:timestamp(), SWaGSL2)]),
SWaDSL = os:timestamp(),
SkipList1 =
@ -368,28 +394,31 @@ skiplist_test() ->
end,
empty(),
KL),
io:format(user, "Dynamic load of skiplist took ~w microseconds~n~n",
[timer:now_diff(os:timestamp(), SWaDSL)]),
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
"microseconds~n" ++
"Top level key count of ~w~n",
[N, timer:now_diff(os:timestamp(), SWaDSL), length(SkipList1)]),
io:format(user, "Second tier key counts of ~w~n",
[lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]),
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
skiplist_timingtest(KL, SkipList),
skiplist_timingtest(KL, SkipList, N),
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
skiplist_timingtest(KL, SkipList1).
skiplist_timingtest(KL, SkipList1, N).
skiplist_timingtest(KL, SkipList) ->
skiplist_timingtest(KL, SkipList, N) ->
io:format(user, "Timing tests on skiplist of size ~w~n",
[leveled_skiplist:size(SkipList)]),
CheckList1 = lists:sublist(KL, 1200, 200),
CheckList2 = lists:sublist(KL, 1600, 200),
CheckList3 = lists:sublist(KL, 2000, 200),
CheckList4 = lists:sublist(KL, 2400, 200),
CheckList5 = lists:sublist(KL, 2800, 200),
CheckList1 = lists:sublist(KL, N div 4, 200),
CheckList2 = lists:sublist(KL, N div 3, 200),
CheckList3 = lists:sublist(KL, N div 2, 200),
CheckList4 = lists:sublist(KL, N - 1000, 200),
CheckList5 = lists:sublist(KL, N - 500, 200),
CheckList6 = lists:sublist(KL, 1, 10),
CheckList7 = lists:nthtail(3800, KL),
CheckList8 = lists:sublist(KL, 2000, 1),
CheckList7 = lists:nthtail(N - 200, KL),
CheckList8 = lists:sublist(KL, N div 2, 1),
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
@ -426,7 +455,10 @@ skiplist_timingtest(KL, SkipList) ->
RangeFun(SkipList, CheckList7, true),
RangeFun(SkipList, CheckList8, true),
KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, 4, 201, 202)),
KL_OOR1 = gb_trees:to_list(generate_randomkeys(1,
4,
N div 5 + 1,
N div 5 + 10)),
KR9 = RangeFun(SkipList, KL_OOR1, false),
?assertMatch([], KR9),
@ -445,7 +477,10 @@ skiplist_timingtest(KL, SkipList) ->
AltKL1),
io:format(user, "Getting 1000 mainly missing keys took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWd)]),
AltKL2 = gb_trees:to_list(generate_randomkeys(1, 1000, 201, 300)),
AltKL2 = gb_trees:to_list(generate_randomkeys(1,
1000,
N div 5 + 1,
N div 5 + 300)),
SWe = os:timestamp(),
lists:foreach(fun({K, _V}) ->
none = lookup(K, SkipList)