Ongoing work on skip lists

Initial rough implementation with some timing tests
This commit is contained in:
martinsumner 2016-11-24 20:16:41 +00:00
parent 96b9e1faa3
commit 2d3b1bbf2c

View file

@ -51,14 +51,20 @@
-include_lib("eunit/include/eunit.hrl"). -include_lib("eunit/include/eunit.hrl").
-define(SLOT_WIDTH, {4096, 12}). -define(SLOT_WIDTH, {2048, 11}).
-define(SKIP_WIDTH, 128). -define(SKIP_WIDTH, 32).
-define(INFINITE_KEY, {null, null, null, null, null}).
-define(EMPTY_SKIPLIST, [{?INFINITE_KEY, []}]).
%%%============================================================================ %%%============================================================================
%%% API %%% API
%%%============================================================================ %%%============================================================================
add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) -> add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
SW = os:timestamp(), SW = os:timestamp(),
SlotInTreeList = length(TreeList) + 1, SlotInTreeList = length(TreeList) + 1,
@ -152,8 +158,73 @@ merge_trees(StartKey, EndKey, TreeList, LevelMinus1) ->
%%%============================================================================ %%%============================================================================
generate_skiplist(Dict) -> addkey_to_index(HashIndex, Key, Count) ->
KVL = lists:ukeysort(1, dict:to_list(Dict)), {Hash, Slot} = hash_to_slot(Key),
L = array:get(Slot, HashIndex),
case lists:member(Hash, L) of
true ->
{HashIndex, Count};
false ->
{array:set(Slot, [Hash|L], HashIndex), Count + 1}
end.
merge_indexes(HashIndex, MergedIndex, Count, L0Slot) ->
lists:foldl(fun(Slot, {MHI, AccCount}) ->
HashList = array:get(Slot, HashIndex),
case length(HashList) > 0 of
true ->
merge_indexes_singleslot(HashList,
Slot,
MHI,
L0Slot,
AccCount);
false ->
{MHI, AccCount}
end end,
{MergedIndex, Count},
lists:seq(0, element(1, ?SLOT_WIDTH) - 1)).
merge_indexes_singleslot(HashList, IndexSlot, MergedIndex, L0Slot, Count) ->
L = array:get(IndexSlot, MergedIndex),
{UpdHL, UpdCount} = lists:foldl(fun(H, {HL, C}) ->
case lists:keymember(H, 1, L) of
true ->
{[{H, L0Slot}|HL], C + 1};
false ->
{[{H, L0Slot}|HL], C}
end end,
{L, Count},
HashList),
{array:set(IndexSlot, UpdHL, MergedIndex), UpdCount}.
load_dynamic_skiplist(SkipList, Key, Value, Hash) ->
{MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) ->
case Acc of
false ->
case Marker >= Key of
true ->
{Marker, SL};
false ->
Acc
end;
_ ->
Acc
end end,
false,
SkipList),
case Hash rem ?SKIP_WIDTH of
0 ->
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
lists:ukeysort(1, SkpL2);
_ ->
UpdSubList = lists:ukeysort(1, [{Key, Value}|SubList]),
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
end.
generate_balanced_skiplist(UnsortedKVL) ->
KVL = lists:ukeysort(1, UnsortedKVL),
Slots = length(KVL) div ?SKIP_WIDTH, Slots = length(KVL) div ?SKIP_WIDTH,
SkipList0 = lists:map(fun(X) -> SkipList0 = lists:map(fun(X) ->
N = X * ?SKIP_WIDTH, N = X * ?SKIP_WIDTH,
@ -171,7 +242,6 @@ generate_skiplist(Dict) ->
SkipList0 SkipList0
end. end.
fetchkey_from_skiplist(SkipList, Key) -> fetchkey_from_skiplist(SkipList, Key) ->
SubList = lists:foldl(fun({SkipKey, SL}, Acc) -> SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
case {Acc, SkipKey} of case {Acc, SkipKey} of
@ -196,13 +266,22 @@ fetchkey_from_skiplist(SkipList, Key) ->
fetchrange_from_skiplist(SkipList, StartKey, EndKey) -> fetchrange_from_skiplist(SkipList, StartKey, EndKey) ->
R = lists:foldl(fun({SkipKey, SL}, {Continue, Acc}) -> R = lists:foldl(fun({SkipKey, SL}, {Continue, Acc}) ->
% io:format("SkipKey ~w StartKey ~w EndKey ~w~n", [SkipKey, StartKey, EndKey]),
case Continue of case Continue of
true -> true ->
case SkipKey of case StartKey > SkipKey of
SkipKey when StartKey >= SkipKey -> true ->
% io:format("StartKey after SkipKey~n"),
{true, Acc}; {true, Acc};
SkipKey when EndKey < SkipKey -> false ->
{false, Acc ++ SL} case leveled_codec:endkey_passed(EndKey, SkipKey) of
true ->
% io:format("EndKey after SkipKey~n"),
{false, Acc ++ SL};
false ->
% io:format("EndKey before SkipKey~n"),
{true, Acc ++ SL}
end
end; end;
false -> false ->
{false, Acc} {false, Acc}
@ -333,25 +412,29 @@ compare_method_test() ->
gb_trees:empty(), gb_trees:empty(),
DumpList), DumpList),
Sz0 = gb_trees:size(Q0), Sz0 = gb_trees:size(Q0),
io:format("Crude method took ~w microseconds resulting in tree of " ++ io:format(user, "Crude method took ~w microseconds resulting in tree of "
"size ~w~n", ++ "size ~w~n",
[timer:now_diff(os:timestamp(), SWa), Sz0]), [timer:now_diff(os:timestamp(), SWa), Sz0]),
SWb = os:timestamp(), SWb = os:timestamp(),
Q1 = merge_trees(StartKey, EndKey, TreeList, gb_trees:empty()), Q1 = merge_trees(StartKey, EndKey, TreeList, gb_trees:empty()),
Sz1 = gb_trees:size(Q1), Sz1 = gb_trees:size(Q1),
io:format("Merge method took ~w microseconds resulting in tree of " ++ io:format(user, "Merge method took ~w microseconds resulting in tree of "
"size ~w~n", ++ "size ~w~n",
[timer:now_diff(os:timestamp(), SWb), Sz1]), [timer:now_diff(os:timestamp(), SWb), Sz1]),
?assertMatch(Sz0, Sz1). ?assertMatch(Sz0, Sz1).
skiplist_test() -> skiplist_test() ->
KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)), KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)),
D = lists:foldl(fun({K, V}, Acc) -> dict:store(K, V, Acc) end, SWaD = os:timestamp(),
_D = lists:foldl(fun({K, V}, AccD) -> dict:store(K, V, AccD) end,
dict:new(), dict:new(),
KL), KL),
io:format(user, "Loading dict with 4000 keys in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWaD)]),
SWa = os:timestamp(), SWa = os:timestamp(),
SkipList = generate_skiplist(D), SkipList = generate_balanced_skiplist(KL),
io:format("Generating skip list with 4000 keys in ~w microseconds~n", io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWa)]), [timer:now_diff(os:timestamp(), SWa)]),
CheckList1 = lists:sublist(KL, 1200, 100), CheckList1 = lists:sublist(KL, 1200, 100),
@ -370,48 +453,141 @@ skiplist_test() ->
fetchkey_from_skiplist(SkipList, K)) fetchkey_from_skiplist(SkipList, K))
end, end,
CheckAll), CheckAll),
io:format("Finding 520 keys took ~w microseconds~n", io:format(user, "Finding 520 keys took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWb)]), [timer:now_diff(os:timestamp(), SWb)]),
SWc = os:timestamp(), SWc = os:timestamp(),
KR1 = fetchrange_from_skiplist(SkipList, KR1 = fetchrange_from_skiplist(SkipList,
lists:nth(1, CheckList1), element(1, lists:nth(1, CheckList1)),
lists:last(CheckList1)), element(1, lists:last(CheckList1))),
io:format("Result length ~w ~n", [length(KR1)]),
?assertMatch(true, length(KR1) >= 100), ?assertMatch(true, length(KR1) >= 100),
?assertMatch(true, length(KR1) < 400), ?assertMatch(true, length(KR1) < 400),
KR2 = fetchrange_from_skiplist(SkipList, KR2 = fetchrange_from_skiplist(SkipList,
lists:nth(1, CheckList2), element(1, lists:nth(1, CheckList2)),
lists:last(CheckList2)), element(1, lists:last(CheckList2))),
?assertMatch(true, length(KR2) >= 100), ?assertMatch(true, length(KR2) >= 100),
?assertMatch(true, length(KR2) < 400), ?assertMatch(true, length(KR2) < 400),
KR3 = fetchrange_from_skiplist(SkipList, KR3 = fetchrange_from_skiplist(SkipList,
lists:nth(1, CheckList3), element(1, lists:nth(1, CheckList3)),
lists:last(CheckList3)), element(1, lists:last(CheckList3))),
?assertMatch(true, length(KR3) >= 100), ?assertMatch(true, length(KR3) >= 100),
?assertMatch(true, length(KR3) < 400), ?assertMatch(true, length(KR3) < 400),
KR4 = fetchrange_from_skiplist(SkipList, KR4 = fetchrange_from_skiplist(SkipList,
lists:nth(1, CheckList4), element(1, lists:nth(1, CheckList4)),
lists:last(CheckList4)), element(1, lists:last(CheckList4))),
?assertMatch(true, length(KR4) >= 100), ?assertMatch(true, length(KR4) >= 100),
?assertMatch(true, length(KR4) < 400), ?assertMatch(true, length(KR4) < 400),
KR5 = fetchrange_from_skiplist(SkipList, KR5 = fetchrange_from_skiplist(SkipList,
lists:nth(1, CheckList5), element(1, lists:nth(1, CheckList5)),
lists:last(CheckList5)), element(1, lists:last(CheckList5))),
?assertMatch(true, length(KR5) >= 100), ?assertMatch(true, length(KR5) >= 100),
?assertMatch(true, length(KR5) < 400), ?assertMatch(true, length(KR5) < 400),
KR6 = fetchrange_from_skiplist(SkipList, KR6 = fetchrange_from_skiplist(SkipList,
lists:nth(1, CheckList6), element(1, lists:nth(1, CheckList6)),
lists:last(CheckList6)), element(1, lists:last(CheckList6))),
?assertMatch(true, length(KR6) >= 10), ?assertMatch(true, length(KR6) >= 10),
?assertMatch(true, length(KR6) < 200), ?assertMatch(true, length(KR6) < 200),
KR7 = fetchrange_from_skiplist(SkipList, KR7 = fetchrange_from_skiplist(SkipList,
lists:nth(1, CheckList7), element(1, lists:nth(1, CheckList7)),
lists:last(CheckList7)), element(1, lists:last(CheckList7))),
?assertMatch(true, length(KR7) >= 10), ?assertMatch(true, length(KR7) >= 10),
?assertMatch(true, length(KR7) < 200), ?assertMatch(true, length(KR7) < 200),
io:format("Finding 7 ranges took ~w microseconds~n", io:format(user, "Finding 7 ranges took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWc)]), [timer:now_diff(os:timestamp(), SWc)]).
hash_index_test() ->
KeyCount = 4000,
SlotWidth = element(1, ?SLOT_WIDTH),
HI0 = new_index(),
MHI0 = new_index(),
KL0 = gb_trees:to_list(generate_randomkeys(1, KeyCount, 1, 200)),
CheckList1 = lists:sublist(KL0, 1200, 100),
CheckList2 = lists:sublist(KL0, 1600, 100),
CheckList3 = lists:sublist(KL0, 2000, 100),
CheckList4 = lists:sublist(KL0, 2400, 100),
CheckList5 = lists:sublist(KL0, 2800, 100),
CheckList6 = lists:sublist(KL0, 1, 10),
CheckList7 = lists:nthtail(3800, KL0),
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
?assertMatch(true, false). SWa = os:timestamp(),
SkipList1 =
lists:foldl(fun({K, V}, Acc) ->
{H, _Slot} = hash_to_slot(K),
load_dynamic_skiplist(Acc, K, V, H) end,
?EMPTY_SKIPLIST,
KL0),
io:format(user, "Dynamic load of skiplist took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWa)]),
{LL, LN} = lists:foldl(fun({K, SL}, {Count, Number}) ->
{Count + length(SL), Number + 1} end,
{0, 0},
SkipList1),
io:format(user,
"Skip list has ~w markers with total members of ~w~n",
[LN, LL]),
?assertMatch(true, LL / LN > ?SKIP_WIDTH / 2 ),
?assertMatch(true, LL / LN < ?SKIP_WIDTH * 2 ),
SWb = os:timestamp(),
lists:foreach(fun({K, V}) ->
?assertMatch({K, V},
fetchkey_from_skiplist(SkipList1, K))
end,
CheckAll),
io:format(user, "Fetching ~w keys from skiplist took ~w microseconds~n",
[KeyCount, timer:now_diff(os:timestamp(), SWb)]),
SWc = os:timestamp(),
{HI1, _C1} = lists:foldl(fun({K, _V}, {HI, C}) ->
addkey_to_index(HI, K, C) end,
{HI0, 0},
KL0),
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
[KeyCount, timer:now_diff(os:timestamp(), SWc)]),
?assertMatch(SlotWidth, array:size(HI1)),
SWd = os:timestamp(),
{MHI1, TC1} = merge_indexes(HI1, MHI0, 0, 0),
io:format(user, "First merge to hashindex took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWd)]),
?assertMatch(SlotWidth, array:size(MHI1)),
KL1 = gb_trees:to_list(generate_randomkeys(1, KeyCount, 1, 200)),
SWe = os:timestamp(),
HI2 = new_index(),
{HI3, _C2} = lists:foldl(fun({K, _V}, {HI, C}) ->
addkey_to_index(HI, K, C) end,
{HI2, 0},
KL1),
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
[KeyCount, timer:now_diff(os:timestamp(), SWe)]),
SWf = os:timestamp(),
{MHI2, TC2} = merge_indexes(HI3, MHI1, TC1, 1),
io:format(user, "Second merge to hashindex took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWf)]),
?assertMatch(SlotWidth, array:size(MHI2)),
SWg = os:timestamp(),
HI4 = new_index(),
{HI5, _C3} = lists:foldl(fun({K, _V}, {HI, C}) ->
addkey_to_index(HI, K, C) end,
{HI4, 0},
KL1),
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
[KeyCount, timer:now_diff(os:timestamp(), SWg)]),
SWh = os:timestamp(),
{MHI3, _TC3} = merge_indexes(HI5, MHI2, TC2, 2),
io:format(user, "Third merge to hashindex took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWh)]),
?assertMatch(SlotWidth, array:size(MHI2)).
-endif. -endif.