Make SkipList Two-Level
Read and write times were increasing as the siz eof skiplist got larger (e.g. over 2000). Tried instead with a smaller skip width and a two-tier skiplist and this gave much more regular eprformance as the size of the skiplist went up.
This commit is contained in:
parent
e3783485de
commit
743d59c71b
1 changed files with 171 additions and 61 deletions
|
@ -28,9 +28,10 @@
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
-define(SKIP_WIDTH, 64).
|
-define(SKIP_WIDTH, 16).
|
||||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
-define(INFINITY_KEY, {null, null, null, null, null}).
|
||||||
-define(EMPTY_SKIPLIST, [{?INFINITY_KEY, []}]).
|
-define(EMPTY_SKIPLIST, [{?INFINITY_KEY, []}]).
|
||||||
|
-define(EMPTY_SKIPLIST_TWOLEVEL, [{?INFINITY_KEY, ?EMPTY_SKIPLIST}]).
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
@ -39,6 +40,35 @@
|
||||||
|
|
||||||
|
|
||||||
enter(Key, Value, SkipList) ->
|
enter(Key, Value, SkipList) ->
|
||||||
|
Hash = erlang:phash2(Key),
|
||||||
|
{MarkerKey, SubSkipList} =
|
||||||
|
lists:foldl(fun({Marker, SL}, Acc) ->
|
||||||
|
case Acc of
|
||||||
|
false ->
|
||||||
|
case Marker >= Key of
|
||||||
|
true ->
|
||||||
|
{Marker, SL};
|
||||||
|
false ->
|
||||||
|
Acc
|
||||||
|
end;
|
||||||
|
_ ->
|
||||||
|
Acc
|
||||||
|
end end,
|
||||||
|
false,
|
||||||
|
SkipList),
|
||||||
|
UpdSubSkipList = enter_ground(Key, Value, SubSkipList),
|
||||||
|
case Hash rem (?SKIP_WIDTH * ?SKIP_WIDTH) of
|
||||||
|
0 ->
|
||||||
|
%
|
||||||
|
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K =< Key end, UpdSubSkipList),
|
||||||
|
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
||||||
|
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
|
||||||
|
_ ->
|
||||||
|
% Need to replace Marker Key with sublist
|
||||||
|
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubSkipList})
|
||||||
|
end.
|
||||||
|
|
||||||
|
enter_ground(Key, Value, SkipList) ->
|
||||||
Hash = erlang:phash2(Key),
|
Hash = erlang:phash2(Key),
|
||||||
{MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) ->
|
{MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) ->
|
||||||
case Acc of
|
case Acc of
|
||||||
|
@ -80,6 +110,34 @@ enter(Key, Value, SkipList) ->
|
||||||
|
|
||||||
from_list(UnsortedKVL) ->
|
from_list(UnsortedKVL) ->
|
||||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
KVL = lists:ukeysort(1, UnsortedKVL),
|
||||||
|
SkipWidth = ?SKIP_WIDTH * ?SKIP_WIDTH,
|
||||||
|
LoftSlots = length(KVL) div SkipWidth,
|
||||||
|
case LoftSlots of
|
||||||
|
0 ->
|
||||||
|
{K, _V} = lists:last(KVL),
|
||||||
|
[{K, from_list_ground(KVL, true)}];
|
||||||
|
_ ->
|
||||||
|
SkipList0 =
|
||||||
|
lists:map(fun(X) ->
|
||||||
|
N = X * SkipWidth,
|
||||||
|
{K, _V} = lists:nth(N, KVL),
|
||||||
|
SL = lists:sublist(KVL,
|
||||||
|
N - SkipWidth + 1,
|
||||||
|
SkipWidth),
|
||||||
|
{K, from_list_ground(SL, true)}
|
||||||
|
end,
|
||||||
|
lists:seq(1, LoftSlots)),
|
||||||
|
case LoftSlots * SkipWidth < length(KVL) of
|
||||||
|
true ->
|
||||||
|
{LastK, _V} = lists:last(KVL),
|
||||||
|
TailList = lists:nthtail(LoftSlots * SkipWidth, KVL),
|
||||||
|
SkipList0 ++ [{LastK, from_list_ground(TailList, true)}];
|
||||||
|
false ->
|
||||||
|
SkipList0
|
||||||
|
end
|
||||||
|
end.
|
||||||
|
|
||||||
|
from_list_ground(KVL, true) ->
|
||||||
Slots = length(KVL) div ?SKIP_WIDTH,
|
Slots = length(KVL) div ?SKIP_WIDTH,
|
||||||
SkipList0 = lists:map(fun(X) ->
|
SkipList0 = lists:map(fun(X) ->
|
||||||
N = X * ?SKIP_WIDTH,
|
N = X * ?SKIP_WIDTH,
|
||||||
|
@ -97,7 +155,25 @@ from_list(UnsortedKVL) ->
|
||||||
SkipList0
|
SkipList0
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
lookup(Key, SkipList) ->
|
lookup(Key, SkipList) ->
|
||||||
|
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
|
||||||
|
case {Acc, SkipKey} of
|
||||||
|
{null, SkipKey} when SkipKey >= Key ->
|
||||||
|
SL;
|
||||||
|
_ ->
|
||||||
|
Acc
|
||||||
|
end end,
|
||||||
|
null,
|
||||||
|
SkipList),
|
||||||
|
case SubList of
|
||||||
|
null ->
|
||||||
|
none;
|
||||||
|
_ ->
|
||||||
|
lookup_ground(Key, SubList)
|
||||||
|
end.
|
||||||
|
|
||||||
|
lookup_ground(Key, SkipList) ->
|
||||||
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
|
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
|
||||||
case {Acc, SkipKey} of
|
case {Acc, SkipKey} of
|
||||||
{null, SkipKey} when SkipKey >= Key ->
|
{null, SkipKey} when SkipKey >= Key ->
|
||||||
|
@ -119,7 +195,13 @@ lookup(Key, SkipList) ->
|
||||||
end
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
to_list(SkipList) ->
|
to_list(SkipList) ->
|
||||||
|
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list_ground(SL) end,
|
||||||
|
[],
|
||||||
|
SkipList).
|
||||||
|
|
||||||
|
to_list_ground(SkipList) ->
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList).
|
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList).
|
||||||
|
|
||||||
%% Rather than support iterator_from like gb_trees, will just an output a key
|
%% Rather than support iterator_from like gb_trees, will just an output a key
|
||||||
|
@ -128,6 +210,46 @@ to_range(SkipList, Start) ->
|
||||||
to_range(SkipList, Start, ?INFINITY_KEY).
|
to_range(SkipList, Start, ?INFINITY_KEY).
|
||||||
|
|
||||||
to_range(SkipList, Start, End) ->
|
to_range(SkipList, Start, End) ->
|
||||||
|
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
|
||||||
|
|
||||||
|
case {PassedStart, PassedEnd} of
|
||||||
|
{true, true} ->
|
||||||
|
{true, true, Acc, null};
|
||||||
|
{false, false} ->
|
||||||
|
case Start > Mark of
|
||||||
|
true ->
|
||||||
|
{false, false, Acc, SL};
|
||||||
|
false ->
|
||||||
|
SkipLRange = to_range_ground(PrevList,
|
||||||
|
Start,
|
||||||
|
End) ++
|
||||||
|
to_range_ground(SL,
|
||||||
|
Start,
|
||||||
|
End),
|
||||||
|
case leveled_codec:endkey_passed(End, Mark) of
|
||||||
|
true ->
|
||||||
|
{true, true, SkipLRange, null};
|
||||||
|
false ->
|
||||||
|
{true, false, SkipLRange, null}
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
{true, false} ->
|
||||||
|
SkipLRange = to_range_ground(SL, Start, End),
|
||||||
|
case leveled_codec:endkey_passed(End, Mark) of
|
||||||
|
true ->
|
||||||
|
{true, true, Acc ++ SkipLRange, null};
|
||||||
|
false ->
|
||||||
|
{true, false, Acc ++ SkipLRange, null}
|
||||||
|
end
|
||||||
|
end end,
|
||||||
|
|
||||||
|
{false, false, [], []},
|
||||||
|
SkipList),
|
||||||
|
{_Bool1, _Bool2, SubList, _PrevList} = R,
|
||||||
|
SubList.
|
||||||
|
|
||||||
|
|
||||||
|
to_range_ground(SkipList, Start, End) ->
|
||||||
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
|
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
|
||||||
|
|
||||||
case {PassedStart, PassedEnd} of
|
case {PassedStart, PassedEnd} of
|
||||||
|
@ -163,11 +285,18 @@ to_range(SkipList, Start, End) ->
|
||||||
SubList.
|
SubList.
|
||||||
|
|
||||||
empty() ->
|
empty() ->
|
||||||
?EMPTY_SKIPLIST.
|
?EMPTY_SKIPLIST_TWOLEVEL.
|
||||||
|
|
||||||
size(SkipList) ->
|
size(SkipList) ->
|
||||||
|
lists:foldl(fun({_Mark, SL}, Acc) -> size_ground(SL) + Acc end,
|
||||||
|
0,
|
||||||
|
SkipList).
|
||||||
|
|
||||||
|
size_ground(SkipList) ->
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList).
|
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% Internal Functions
|
%%% Internal Functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
@ -230,35 +359,37 @@ skiplist_test() ->
|
||||||
SkipList = from_list(KL),
|
SkipList = from_list(KL),
|
||||||
io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n",
|
io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n",
|
||||||
[timer:now_diff(os:timestamp(), SWaGSL)]),
|
[timer:now_diff(os:timestamp(), SWaGSL)]),
|
||||||
|
|
||||||
|
|
||||||
SWaDSL = os:timestamp(),
|
SWaDSL = os:timestamp(),
|
||||||
SkipList1 =
|
SkipList1 =
|
||||||
lists:foldl(fun({K, V}, SL) ->
|
lists:foldl(fun({K, V}, SL) ->
|
||||||
enter(K, V, SL)
|
enter(K, V, SL)
|
||||||
end,
|
end,
|
||||||
?EMPTY_SKIPLIST,
|
empty(),
|
||||||
KL),
|
KL),
|
||||||
io:format(user, "Dynamic load of skiplist took ~w microseconds~n~n",
|
io:format(user, "Dynamic load of skiplist took ~w microseconds~n~n",
|
||||||
[timer:now_diff(os:timestamp(), SWaDSL)]),
|
[timer:now_diff(os:timestamp(), SWaDSL)]),
|
||||||
|
|
||||||
|
|
||||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
||||||
skiplist_timingtest(KL, SkipList),
|
skiplist_timingtest(KL, SkipList),
|
||||||
|
|
||||||
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
||||||
skiplist_timingtest(KL, SkipList1),
|
skiplist_timingtest(KL, SkipList1).
|
||||||
io:format(user, "~n", []).
|
|
||||||
|
|
||||||
|
|
||||||
skiplist_timingtest(KL, SkipList) ->
|
skiplist_timingtest(KL, SkipList) ->
|
||||||
io:format(user, "Timing tests on skiplist of size ~w~n",
|
io:format(user, "Timing tests on skiplist of size ~w~n",
|
||||||
[leveled_skiplist:size(SkipList)]),
|
[leveled_skiplist:size(SkipList)]),
|
||||||
CheckList1 = lists:sublist(KL, 1200, 100),
|
CheckList1 = lists:sublist(KL, 1200, 200),
|
||||||
CheckList2 = lists:sublist(KL, 1600, 100),
|
CheckList2 = lists:sublist(KL, 1600, 200),
|
||||||
CheckList3 = lists:sublist(KL, 2000, 100),
|
CheckList3 = lists:sublist(KL, 2000, 200),
|
||||||
CheckList4 = lists:sublist(KL, 2400, 100),
|
CheckList4 = lists:sublist(KL, 2400, 200),
|
||||||
CheckList5 = lists:sublist(KL, 2800, 100),
|
CheckList5 = lists:sublist(KL, 2800, 200),
|
||||||
CheckList6 = lists:sublist(KL, 1, 10),
|
CheckList6 = lists:sublist(KL, 1, 10),
|
||||||
CheckList7 = lists:nthtail(3800, KL),
|
CheckList7 = lists:nthtail(3800, KL),
|
||||||
CheckList8 = lists:sublist(KL, 3000, 1),
|
CheckList8 = lists:sublist(KL, 2000, 1),
|
||||||
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
||||||
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
||||||
|
|
||||||
|
@ -267,61 +398,40 @@ skiplist_timingtest(KL, SkipList) ->
|
||||||
?assertMatch({value, V}, lookup(K, SkipList))
|
?assertMatch({value, V}, lookup(K, SkipList))
|
||||||
end,
|
end,
|
||||||
CheckAll),
|
CheckAll),
|
||||||
io:format(user, "Finding 520 keys took ~w microseconds~n",
|
io:format(user, "Finding 1020 keys took ~w microseconds~n",
|
||||||
[timer:now_diff(os:timestamp(), SWb)]),
|
[timer:now_diff(os:timestamp(), SWb)]),
|
||||||
|
|
||||||
|
RangeFun =
|
||||||
|
fun(SkipListToQuery, CheckListForQ, Assert) ->
|
||||||
|
KR =
|
||||||
|
to_range(SkipListToQuery,
|
||||||
|
element(1, lists:nth(1, CheckListForQ)),
|
||||||
|
element(1, lists:last(CheckListForQ))),
|
||||||
|
case Assert of
|
||||||
|
true ->
|
||||||
|
CompareL = length(lists:usort(CheckListForQ)),
|
||||||
|
?assertMatch(CompareL, length(KR));
|
||||||
|
false ->
|
||||||
|
KR
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
|
||||||
SWc = os:timestamp(),
|
SWc = os:timestamp(),
|
||||||
KR1 = to_range(SkipList,
|
RangeFun(SkipList, CheckList1, true),
|
||||||
element(1, lists:nth(1, CheckList1)),
|
RangeFun(SkipList, CheckList2, true),
|
||||||
element(1, lists:last(CheckList1))),
|
RangeFun(SkipList, CheckList3, true),
|
||||||
io:format("Result length ~w ~n", [length(KR1)]),
|
RangeFun(SkipList, CheckList4, true),
|
||||||
CompareL1 = length(lists:usort(CheckList1)),
|
RangeFun(SkipList, CheckList5, true),
|
||||||
?assertMatch(CompareL1, length(KR1)),
|
RangeFun(SkipList, CheckList6, true),
|
||||||
KR2 = to_range(SkipList,
|
RangeFun(SkipList, CheckList7, true),
|
||||||
element(1, lists:nth(1, CheckList2)),
|
RangeFun(SkipList, CheckList8, true),
|
||||||
element(1, lists:last(CheckList2))),
|
|
||||||
CompareL2 = length(lists:usort(CheckList2)),
|
|
||||||
?assertMatch(CompareL2, length(KR2)),
|
|
||||||
KR3 = to_range(SkipList,
|
|
||||||
element(1, lists:nth(1, CheckList3)),
|
|
||||||
element(1, lists:last(CheckList3))),
|
|
||||||
CompareL3 = length(lists:usort(CheckList3)),
|
|
||||||
?assertMatch(CompareL3, length(KR3)),
|
|
||||||
KR4 = to_range(SkipList,
|
|
||||||
element(1, lists:nth(1, CheckList4)),
|
|
||||||
element(1, lists:last(CheckList4))),
|
|
||||||
CompareL4 = length(lists:usort(CheckList4)),
|
|
||||||
?assertMatch(CompareL4, length(KR4)),
|
|
||||||
KR5 = to_range(SkipList,
|
|
||||||
element(1, lists:nth(1, CheckList5)),
|
|
||||||
element(1, lists:last(CheckList5))),
|
|
||||||
CompareL5 = length(lists:usort(CheckList5)),
|
|
||||||
?assertMatch(CompareL5, length(KR5)),
|
|
||||||
KR6 = to_range(SkipList,
|
|
||||||
element(1, lists:nth(1, CheckList6)),
|
|
||||||
element(1, lists:last(CheckList6))),
|
|
||||||
CompareL6 = length(lists:usort(CheckList6)),
|
|
||||||
?assertMatch(CompareL6, length(KR6)),
|
|
||||||
KR7 = to_range(SkipList,
|
|
||||||
element(1, lists:nth(1, CheckList7)),
|
|
||||||
element(1, lists:last(CheckList7))),
|
|
||||||
CompareL7 = length(lists:usort(CheckList7)),
|
|
||||||
?assertMatch(CompareL7, length(KR7)),
|
|
||||||
KR8 = to_range(SkipList,
|
|
||||||
element(1, lists:nth(1, CheckList8)),
|
|
||||||
element(1, lists:last(CheckList8))),
|
|
||||||
CompareL8 = length(lists:usort(CheckList8)),
|
|
||||||
?assertMatch(CompareL8, length(KR8)),
|
|
||||||
|
|
||||||
KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, 4, 201, 202)),
|
KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, 4, 201, 202)),
|
||||||
KR9 = to_range(SkipList,
|
KR9 = RangeFun(SkipList, KL_OOR1, false),
|
||||||
element(1, lists:nth(1, KL_OOR1)),
|
|
||||||
element(1, lists:last(KL_OOR1))),
|
|
||||||
?assertMatch([], KR9),
|
?assertMatch([], KR9),
|
||||||
|
|
||||||
KL_OOR2 = gb_trees:to_list(generate_randomkeys(1, 4, 0, 0)),
|
KL_OOR2 = gb_trees:to_list(generate_randomkeys(1, 4, 0, 0)),
|
||||||
KR10 = to_range(SkipList,
|
KR10 = RangeFun(SkipList, KL_OOR2, false),
|
||||||
element(1, lists:nth(1, KL_OOR2)),
|
|
||||||
element(1, lists:last(KL_OOR2))),
|
|
||||||
?assertMatch([], KR10),
|
?assertMatch([], KR10),
|
||||||
|
|
||||||
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue