Ongoing work on skip lists
Initial rough implementation with some timing tests
This commit is contained in:
parent
96b9e1faa3
commit
2d3b1bbf2c
1 changed files with 211 additions and 35 deletions
|
@ -51,14 +51,20 @@
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
-define(SLOT_WIDTH, {4096, 12}).
|
-define(SLOT_WIDTH, {2048, 11}).
|
||||||
-define(SKIP_WIDTH, 128).
|
-define(SKIP_WIDTH, 32).
|
||||||
|
-define(INFINITE_KEY, {null, null, null, null, null}).
|
||||||
|
-define(EMPTY_SKIPLIST, [{?INFINITE_KEY, []}]).
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% API
|
%%% API
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
|
add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
|
||||||
SW = os:timestamp(),
|
SW = os:timestamp(),
|
||||||
SlotInTreeList = length(TreeList) + 1,
|
SlotInTreeList = length(TreeList) + 1,
|
||||||
|
@ -152,8 +158,73 @@ merge_trees(StartKey, EndKey, TreeList, LevelMinus1) ->
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
|
|
||||||
generate_skiplist(Dict) ->
|
addkey_to_index(HashIndex, Key, Count) ->
|
||||||
KVL = lists:ukeysort(1, dict:to_list(Dict)),
|
{Hash, Slot} = hash_to_slot(Key),
|
||||||
|
L = array:get(Slot, HashIndex),
|
||||||
|
case lists:member(Hash, L) of
|
||||||
|
true ->
|
||||||
|
{HashIndex, Count};
|
||||||
|
false ->
|
||||||
|
{array:set(Slot, [Hash|L], HashIndex), Count + 1}
|
||||||
|
end.
|
||||||
|
|
||||||
|
merge_indexes(HashIndex, MergedIndex, Count, L0Slot) ->
|
||||||
|
lists:foldl(fun(Slot, {MHI, AccCount}) ->
|
||||||
|
HashList = array:get(Slot, HashIndex),
|
||||||
|
case length(HashList) > 0 of
|
||||||
|
true ->
|
||||||
|
merge_indexes_singleslot(HashList,
|
||||||
|
Slot,
|
||||||
|
MHI,
|
||||||
|
L0Slot,
|
||||||
|
AccCount);
|
||||||
|
false ->
|
||||||
|
{MHI, AccCount}
|
||||||
|
end end,
|
||||||
|
{MergedIndex, Count},
|
||||||
|
lists:seq(0, element(1, ?SLOT_WIDTH) - 1)).
|
||||||
|
|
||||||
|
merge_indexes_singleslot(HashList, IndexSlot, MergedIndex, L0Slot, Count) ->
|
||||||
|
L = array:get(IndexSlot, MergedIndex),
|
||||||
|
{UpdHL, UpdCount} = lists:foldl(fun(H, {HL, C}) ->
|
||||||
|
case lists:keymember(H, 1, L) of
|
||||||
|
true ->
|
||||||
|
{[{H, L0Slot}|HL], C + 1};
|
||||||
|
false ->
|
||||||
|
{[{H, L0Slot}|HL], C}
|
||||||
|
end end,
|
||||||
|
{L, Count},
|
||||||
|
HashList),
|
||||||
|
{array:set(IndexSlot, UpdHL, MergedIndex), UpdCount}.
|
||||||
|
|
||||||
|
load_dynamic_skiplist(SkipList, Key, Value, Hash) ->
|
||||||
|
{MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) ->
|
||||||
|
case Acc of
|
||||||
|
false ->
|
||||||
|
case Marker >= Key of
|
||||||
|
true ->
|
||||||
|
{Marker, SL};
|
||||||
|
false ->
|
||||||
|
Acc
|
||||||
|
end;
|
||||||
|
_ ->
|
||||||
|
Acc
|
||||||
|
end end,
|
||||||
|
false,
|
||||||
|
SkipList),
|
||||||
|
case Hash rem ?SKIP_WIDTH of
|
||||||
|
0 ->
|
||||||
|
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
|
||||||
|
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
||||||
|
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
|
||||||
|
lists:ukeysort(1, SkpL2);
|
||||||
|
_ ->
|
||||||
|
UpdSubList = lists:ukeysort(1, [{Key, Value}|SubList]),
|
||||||
|
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
||||||
|
end.
|
||||||
|
|
||||||
|
generate_balanced_skiplist(UnsortedKVL) ->
|
||||||
|
KVL = lists:ukeysort(1, UnsortedKVL),
|
||||||
Slots = length(KVL) div ?SKIP_WIDTH,
|
Slots = length(KVL) div ?SKIP_WIDTH,
|
||||||
SkipList0 = lists:map(fun(X) ->
|
SkipList0 = lists:map(fun(X) ->
|
||||||
N = X * ?SKIP_WIDTH,
|
N = X * ?SKIP_WIDTH,
|
||||||
|
@ -171,7 +242,6 @@ generate_skiplist(Dict) ->
|
||||||
SkipList0
|
SkipList0
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
fetchkey_from_skiplist(SkipList, Key) ->
|
fetchkey_from_skiplist(SkipList, Key) ->
|
||||||
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
|
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
|
||||||
case {Acc, SkipKey} of
|
case {Acc, SkipKey} of
|
||||||
|
@ -196,13 +266,22 @@ fetchkey_from_skiplist(SkipList, Key) ->
|
||||||
|
|
||||||
fetchrange_from_skiplist(SkipList, StartKey, EndKey) ->
|
fetchrange_from_skiplist(SkipList, StartKey, EndKey) ->
|
||||||
R = lists:foldl(fun({SkipKey, SL}, {Continue, Acc}) ->
|
R = lists:foldl(fun({SkipKey, SL}, {Continue, Acc}) ->
|
||||||
|
% io:format("SkipKey ~w StartKey ~w EndKey ~w~n", [SkipKey, StartKey, EndKey]),
|
||||||
case Continue of
|
case Continue of
|
||||||
true ->
|
true ->
|
||||||
case SkipKey of
|
case StartKey > SkipKey of
|
||||||
SkipKey when StartKey >= SkipKey ->
|
true ->
|
||||||
|
% io:format("StartKey after SkipKey~n"),
|
||||||
{true, Acc};
|
{true, Acc};
|
||||||
SkipKey when EndKey < SkipKey ->
|
false ->
|
||||||
{false, Acc ++ SL}
|
case leveled_codec:endkey_passed(EndKey, SkipKey) of
|
||||||
|
true ->
|
||||||
|
% io:format("EndKey after SkipKey~n"),
|
||||||
|
{false, Acc ++ SL};
|
||||||
|
false ->
|
||||||
|
% io:format("EndKey before SkipKey~n"),
|
||||||
|
{true, Acc ++ SL}
|
||||||
|
end
|
||||||
end;
|
end;
|
||||||
false ->
|
false ->
|
||||||
{false, Acc}
|
{false, Acc}
|
||||||
|
@ -333,25 +412,29 @@ compare_method_test() ->
|
||||||
gb_trees:empty(),
|
gb_trees:empty(),
|
||||||
DumpList),
|
DumpList),
|
||||||
Sz0 = gb_trees:size(Q0),
|
Sz0 = gb_trees:size(Q0),
|
||||||
io:format("Crude method took ~w microseconds resulting in tree of " ++
|
io:format(user, "Crude method took ~w microseconds resulting in tree of "
|
||||||
"size ~w~n",
|
++ "size ~w~n",
|
||||||
[timer:now_diff(os:timestamp(), SWa), Sz0]),
|
[timer:now_diff(os:timestamp(), SWa), Sz0]),
|
||||||
SWb = os:timestamp(),
|
SWb = os:timestamp(),
|
||||||
Q1 = merge_trees(StartKey, EndKey, TreeList, gb_trees:empty()),
|
Q1 = merge_trees(StartKey, EndKey, TreeList, gb_trees:empty()),
|
||||||
Sz1 = gb_trees:size(Q1),
|
Sz1 = gb_trees:size(Q1),
|
||||||
io:format("Merge method took ~w microseconds resulting in tree of " ++
|
io:format(user, "Merge method took ~w microseconds resulting in tree of "
|
||||||
"size ~w~n",
|
++ "size ~w~n",
|
||||||
[timer:now_diff(os:timestamp(), SWb), Sz1]),
|
[timer:now_diff(os:timestamp(), SWb), Sz1]),
|
||||||
?assertMatch(Sz0, Sz1).
|
?assertMatch(Sz0, Sz1).
|
||||||
|
|
||||||
skiplist_test() ->
|
skiplist_test() ->
|
||||||
KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)),
|
KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)),
|
||||||
D = lists:foldl(fun({K, V}, Acc) -> dict:store(K, V, Acc) end,
|
SWaD = os:timestamp(),
|
||||||
|
_D = lists:foldl(fun({K, V}, AccD) -> dict:store(K, V, AccD) end,
|
||||||
dict:new(),
|
dict:new(),
|
||||||
KL),
|
KL),
|
||||||
|
io:format(user, "Loading dict with 4000 keys in ~w microseconds~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWaD)]),
|
||||||
|
|
||||||
SWa = os:timestamp(),
|
SWa = os:timestamp(),
|
||||||
SkipList = generate_skiplist(D),
|
SkipList = generate_balanced_skiplist(KL),
|
||||||
io:format("Generating skip list with 4000 keys in ~w microseconds~n",
|
io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n",
|
||||||
[timer:now_diff(os:timestamp(), SWa)]),
|
[timer:now_diff(os:timestamp(), SWa)]),
|
||||||
|
|
||||||
CheckList1 = lists:sublist(KL, 1200, 100),
|
CheckList1 = lists:sublist(KL, 1200, 100),
|
||||||
|
@ -370,48 +453,141 @@ skiplist_test() ->
|
||||||
fetchkey_from_skiplist(SkipList, K))
|
fetchkey_from_skiplist(SkipList, K))
|
||||||
end,
|
end,
|
||||||
CheckAll),
|
CheckAll),
|
||||||
io:format("Finding 520 keys took ~w microseconds~n",
|
io:format(user, "Finding 520 keys took ~w microseconds~n",
|
||||||
[timer:now_diff(os:timestamp(), SWb)]),
|
[timer:now_diff(os:timestamp(), SWb)]),
|
||||||
|
|
||||||
SWc = os:timestamp(),
|
SWc = os:timestamp(),
|
||||||
KR1 = fetchrange_from_skiplist(SkipList,
|
KR1 = fetchrange_from_skiplist(SkipList,
|
||||||
lists:nth(1, CheckList1),
|
element(1, lists:nth(1, CheckList1)),
|
||||||
lists:last(CheckList1)),
|
element(1, lists:last(CheckList1))),
|
||||||
|
io:format("Result length ~w ~n", [length(KR1)]),
|
||||||
?assertMatch(true, length(KR1) >= 100),
|
?assertMatch(true, length(KR1) >= 100),
|
||||||
?assertMatch(true, length(KR1) < 400),
|
?assertMatch(true, length(KR1) < 400),
|
||||||
KR2 = fetchrange_from_skiplist(SkipList,
|
KR2 = fetchrange_from_skiplist(SkipList,
|
||||||
lists:nth(1, CheckList2),
|
element(1, lists:nth(1, CheckList2)),
|
||||||
lists:last(CheckList2)),
|
element(1, lists:last(CheckList2))),
|
||||||
?assertMatch(true, length(KR2) >= 100),
|
?assertMatch(true, length(KR2) >= 100),
|
||||||
?assertMatch(true, length(KR2) < 400),
|
?assertMatch(true, length(KR2) < 400),
|
||||||
KR3 = fetchrange_from_skiplist(SkipList,
|
KR3 = fetchrange_from_skiplist(SkipList,
|
||||||
lists:nth(1, CheckList3),
|
element(1, lists:nth(1, CheckList3)),
|
||||||
lists:last(CheckList3)),
|
element(1, lists:last(CheckList3))),
|
||||||
?assertMatch(true, length(KR3) >= 100),
|
?assertMatch(true, length(KR3) >= 100),
|
||||||
?assertMatch(true, length(KR3) < 400),
|
?assertMatch(true, length(KR3) < 400),
|
||||||
KR4 = fetchrange_from_skiplist(SkipList,
|
KR4 = fetchrange_from_skiplist(SkipList,
|
||||||
lists:nth(1, CheckList4),
|
element(1, lists:nth(1, CheckList4)),
|
||||||
lists:last(CheckList4)),
|
element(1, lists:last(CheckList4))),
|
||||||
?assertMatch(true, length(KR4) >= 100),
|
?assertMatch(true, length(KR4) >= 100),
|
||||||
?assertMatch(true, length(KR4) < 400),
|
?assertMatch(true, length(KR4) < 400),
|
||||||
KR5 = fetchrange_from_skiplist(SkipList,
|
KR5 = fetchrange_from_skiplist(SkipList,
|
||||||
lists:nth(1, CheckList5),
|
element(1, lists:nth(1, CheckList5)),
|
||||||
lists:last(CheckList5)),
|
element(1, lists:last(CheckList5))),
|
||||||
?assertMatch(true, length(KR5) >= 100),
|
?assertMatch(true, length(KR5) >= 100),
|
||||||
?assertMatch(true, length(KR5) < 400),
|
?assertMatch(true, length(KR5) < 400),
|
||||||
KR6 = fetchrange_from_skiplist(SkipList,
|
KR6 = fetchrange_from_skiplist(SkipList,
|
||||||
lists:nth(1, CheckList6),
|
element(1, lists:nth(1, CheckList6)),
|
||||||
lists:last(CheckList6)),
|
element(1, lists:last(CheckList6))),
|
||||||
?assertMatch(true, length(KR6) >= 10),
|
?assertMatch(true, length(KR6) >= 10),
|
||||||
?assertMatch(true, length(KR6) < 200),
|
?assertMatch(true, length(KR6) < 200),
|
||||||
KR7 = fetchrange_from_skiplist(SkipList,
|
KR7 = fetchrange_from_skiplist(SkipList,
|
||||||
lists:nth(1, CheckList7),
|
element(1, lists:nth(1, CheckList7)),
|
||||||
lists:last(CheckList7)),
|
element(1, lists:last(CheckList7))),
|
||||||
?assertMatch(true, length(KR7) >= 10),
|
?assertMatch(true, length(KR7) >= 10),
|
||||||
?assertMatch(true, length(KR7) < 200),
|
?assertMatch(true, length(KR7) < 200),
|
||||||
io:format("Finding 7 ranges took ~w microseconds~n",
|
io:format(user, "Finding 7 ranges took ~w microseconds~n",
|
||||||
[timer:now_diff(os:timestamp(), SWc)]),
|
[timer:now_diff(os:timestamp(), SWc)]).
|
||||||
|
|
||||||
|
hash_index_test() ->
|
||||||
|
KeyCount = 4000,
|
||||||
|
SlotWidth = element(1, ?SLOT_WIDTH),
|
||||||
|
HI0 = new_index(),
|
||||||
|
MHI0 = new_index(),
|
||||||
|
KL0 = gb_trees:to_list(generate_randomkeys(1, KeyCount, 1, 200)),
|
||||||
|
CheckList1 = lists:sublist(KL0, 1200, 100),
|
||||||
|
CheckList2 = lists:sublist(KL0, 1600, 100),
|
||||||
|
CheckList3 = lists:sublist(KL0, 2000, 100),
|
||||||
|
CheckList4 = lists:sublist(KL0, 2400, 100),
|
||||||
|
CheckList5 = lists:sublist(KL0, 2800, 100),
|
||||||
|
CheckList6 = lists:sublist(KL0, 1, 10),
|
||||||
|
CheckList7 = lists:nthtail(3800, KL0),
|
||||||
|
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
||||||
|
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
||||||
|
|
||||||
?assertMatch(true, false).
|
SWa = os:timestamp(),
|
||||||
|
SkipList1 =
|
||||||
|
lists:foldl(fun({K, V}, Acc) ->
|
||||||
|
{H, _Slot} = hash_to_slot(K),
|
||||||
|
load_dynamic_skiplist(Acc, K, V, H) end,
|
||||||
|
?EMPTY_SKIPLIST,
|
||||||
|
KL0),
|
||||||
|
io:format(user, "Dynamic load of skiplist took ~w microseconds~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWa)]),
|
||||||
|
|
||||||
|
{LL, LN} = lists:foldl(fun({K, SL}, {Count, Number}) ->
|
||||||
|
{Count + length(SL), Number + 1} end,
|
||||||
|
{0, 0},
|
||||||
|
SkipList1),
|
||||||
|
io:format(user,
|
||||||
|
"Skip list has ~w markers with total members of ~w~n",
|
||||||
|
[LN, LL]),
|
||||||
|
?assertMatch(true, LL / LN > ?SKIP_WIDTH / 2 ),
|
||||||
|
?assertMatch(true, LL / LN < ?SKIP_WIDTH * 2 ),
|
||||||
|
|
||||||
|
SWb = os:timestamp(),
|
||||||
|
lists:foreach(fun({K, V}) ->
|
||||||
|
?assertMatch({K, V},
|
||||||
|
fetchkey_from_skiplist(SkipList1, K))
|
||||||
|
end,
|
||||||
|
CheckAll),
|
||||||
|
io:format(user, "Fetching ~w keys from skiplist took ~w microseconds~n",
|
||||||
|
[KeyCount, timer:now_diff(os:timestamp(), SWb)]),
|
||||||
|
|
||||||
|
SWc = os:timestamp(),
|
||||||
|
{HI1, _C1} = lists:foldl(fun({K, _V}, {HI, C}) ->
|
||||||
|
addkey_to_index(HI, K, C) end,
|
||||||
|
{HI0, 0},
|
||||||
|
KL0),
|
||||||
|
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
|
||||||
|
[KeyCount, timer:now_diff(os:timestamp(), SWc)]),
|
||||||
|
?assertMatch(SlotWidth, array:size(HI1)),
|
||||||
|
|
||||||
|
SWd = os:timestamp(),
|
||||||
|
{MHI1, TC1} = merge_indexes(HI1, MHI0, 0, 0),
|
||||||
|
io:format(user, "First merge to hashindex took ~w microseconds~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWd)]),
|
||||||
|
?assertMatch(SlotWidth, array:size(MHI1)),
|
||||||
|
|
||||||
|
KL1 = gb_trees:to_list(generate_randomkeys(1, KeyCount, 1, 200)),
|
||||||
|
|
||||||
|
SWe = os:timestamp(),
|
||||||
|
HI2 = new_index(),
|
||||||
|
{HI3, _C2} = lists:foldl(fun({K, _V}, {HI, C}) ->
|
||||||
|
addkey_to_index(HI, K, C) end,
|
||||||
|
{HI2, 0},
|
||||||
|
KL1),
|
||||||
|
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
|
||||||
|
[KeyCount, timer:now_diff(os:timestamp(), SWe)]),
|
||||||
|
|
||||||
|
SWf = os:timestamp(),
|
||||||
|
{MHI2, TC2} = merge_indexes(HI3, MHI1, TC1, 1),
|
||||||
|
io:format(user, "Second merge to hashindex took ~w microseconds~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWf)]),
|
||||||
|
?assertMatch(SlotWidth, array:size(MHI2)),
|
||||||
|
|
||||||
|
SWg = os:timestamp(),
|
||||||
|
HI4 = new_index(),
|
||||||
|
{HI5, _C3} = lists:foldl(fun({K, _V}, {HI, C}) ->
|
||||||
|
addkey_to_index(HI, K, C) end,
|
||||||
|
{HI4, 0},
|
||||||
|
KL1),
|
||||||
|
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
|
||||||
|
[KeyCount, timer:now_diff(os:timestamp(), SWg)]),
|
||||||
|
|
||||||
|
SWh = os:timestamp(),
|
||||||
|
{MHI3, _TC3} = merge_indexes(HI5, MHI2, TC2, 2),
|
||||||
|
io:format(user, "Third merge to hashindex took ~w microseconds~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWh)]),
|
||||||
|
?assertMatch(SlotWidth, array:size(MHI2)).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-endif.
|
-endif.
|
Loading…
Add table
Add a link
Reference in a new issue