Switch to using skip lists from leveled_tree
Remove now unused leveled_skiplist and leveled_tinybloom
This commit is contained in:
parent
6d2eb1d57c
commit
58cda7d157
4 changed files with 34 additions and 835 deletions
|
@ -15,7 +15,7 @@
|
||||||
%% Inker key type used for tombstones
|
%% Inker key type used for tombstones
|
||||||
-define(INKT_TOMB, tomb).
|
-define(INKT_TOMB, tomb).
|
||||||
|
|
||||||
-define(CACHE_TYPE, idxt).
|
-define(CACHE_TYPE, skpl).
|
||||||
|
|
||||||
-record(sft_options,
|
-record(sft_options,
|
||||||
{wait = true :: boolean(),
|
{wait = true :: boolean(),
|
||||||
|
|
|
@ -1,661 +0,0 @@
|
||||||
%% -------- SKIPLIST ---------
|
|
||||||
%%
|
|
||||||
%% For storing small numbers of {K, V} pairs where reasonable insertion and
|
|
||||||
%% fetch times, but with fast support for flattening to a list or a sublist
|
|
||||||
%% within a certain key range
|
|
||||||
%%
|
|
||||||
%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's
|
|
||||||
%% ongoing dependency on OTP16)
|
|
||||||
%%
|
|
||||||
%% Not a proper skip list. Only supports a fixed depth. Good enough for the
|
|
||||||
%% purposes of leveled. Also uses peculiar enkey_passed function within
|
|
||||||
%% leveled. Not tested beyond a depth of 2.
|
|
||||||
|
|
||||||
-module(leveled_skiplist).
|
|
||||||
|
|
||||||
-include("include/leveled.hrl").
|
|
||||||
|
|
||||||
-export([
|
|
||||||
from_list/1,
|
|
||||||
from_list/2,
|
|
||||||
from_sortedlist/1,
|
|
||||||
from_sortedlist/2,
|
|
||||||
from_orderedset/1,
|
|
||||||
from_orderedset/2,
|
|
||||||
to_list/1,
|
|
||||||
enter/3,
|
|
||||||
enter/4,
|
|
||||||
enter_nolookup/3,
|
|
||||||
to_range/2,
|
|
||||||
to_range/3,
|
|
||||||
lookup/2,
|
|
||||||
lookup/3,
|
|
||||||
empty/0,
|
|
||||||
empty/1,
|
|
||||||
size/1
|
|
||||||
]).
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
-define(SKIP_WIDTH, 16).
|
|
||||||
-define(LIST_HEIGHT, 2).
|
|
||||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
|
||||||
-define(BITARRAY_SIZE, 2048).
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% SkipList API
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
enter(Key, Value, SkipList) ->
|
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
|
||||||
enter(Key, Hash, Value, SkipList).
|
|
||||||
|
|
||||||
enter(Key, Hash, Value, SkipList) ->
|
|
||||||
Bloom0 =
|
|
||||||
case element(1, SkipList) of
|
|
||||||
list_only ->
|
|
||||||
list_only;
|
|
||||||
Bloom ->
|
|
||||||
leveled_tinybloom:enter({hash, Hash}, Bloom)
|
|
||||||
end,
|
|
||||||
{Bloom0,
|
|
||||||
enter(Key, Value, erlang:phash2(Key),
|
|
||||||
element(2, SkipList),
|
|
||||||
?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
|
||||||
|
|
||||||
%% Can iterate over a key entered this way, but never lookup the key
|
|
||||||
%% used for index terms
|
|
||||||
%% The key may still be a marker key - and the much cheaper native hash
|
|
||||||
%% is used to dtermine this, avoiding the more expensive magic hash
|
|
||||||
enter_nolookup(Key, Value, SkipList) ->
|
|
||||||
{element(1, SkipList),
|
|
||||||
enter(Key, Value, erlang:phash2(Key),
|
|
||||||
element(2, SkipList),
|
|
||||||
?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
|
||||||
|
|
||||||
from_orderedset(Table) ->
|
|
||||||
from_orderedset(Table, false).
|
|
||||||
|
|
||||||
from_orderedset(Table, Bloom) ->
|
|
||||||
from_sortedlist(ets:tab2list(Table), Bloom).
|
|
||||||
|
|
||||||
from_list(UnsortedKVL) ->
|
|
||||||
from_list(UnsortedKVL, false).
|
|
||||||
|
|
||||||
from_list(UnsortedKVL, BloomProtect) ->
|
|
||||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
|
||||||
from_sortedlist(KVL, BloomProtect).
|
|
||||||
|
|
||||||
from_sortedlist(SortedKVL) ->
|
|
||||||
from_sortedlist(SortedKVL, false).
|
|
||||||
|
|
||||||
from_sortedlist([], BloomProtect) ->
|
|
||||||
empty(BloomProtect);
|
|
||||||
from_sortedlist(SortedKVL, BloomProtect) ->
|
|
||||||
Bloom0 =
|
|
||||||
case BloomProtect of
|
|
||||||
true ->
|
|
||||||
lists:foldr(fun({K, _V}, Bloom) ->
|
|
||||||
leveled_tinybloom:enter(K, Bloom) end,
|
|
||||||
leveled_tinybloom:empty(?SKIP_WIDTH),
|
|
||||||
SortedKVL);
|
|
||||||
false ->
|
|
||||||
list_only
|
|
||||||
end,
|
|
||||||
{Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
|
||||||
|
|
||||||
lookup(Key, SkipList) ->
|
|
||||||
case element(1, SkipList) of
|
|
||||||
list_only ->
|
|
||||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
|
|
||||||
_ ->
|
|
||||||
lookup(Key, leveled_codec:magic_hash(Key), SkipList)
|
|
||||||
end.
|
|
||||||
|
|
||||||
lookup(Key, Hash, SkipList) ->
|
|
||||||
case element(1, SkipList) of
|
|
||||||
list_only ->
|
|
||||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
|
|
||||||
_ ->
|
|
||||||
case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of
|
|
||||||
false ->
|
|
||||||
none;
|
|
||||||
true ->
|
|
||||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT)
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
%% Rather than support iterator_from like gb_trees, will just an output a key
|
|
||||||
%% sorted list for the desired range, which can the be iterated over as normal
|
|
||||||
to_range(SkipList, Start) ->
|
|
||||||
to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
|
|
||||||
|
|
||||||
to_range(SkipList, Start, End) ->
|
|
||||||
to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT).
|
|
||||||
|
|
||||||
to_list(SkipList) ->
|
|
||||||
to_list(element(2, SkipList), ?LIST_HEIGHT).
|
|
||||||
|
|
||||||
empty() ->
|
|
||||||
empty(false).
|
|
||||||
|
|
||||||
empty(BloomProtect) ->
|
|
||||||
case BloomProtect of
|
|
||||||
true ->
|
|
||||||
{leveled_tinybloom:empty(?SKIP_WIDTH),
|
|
||||||
empty([], ?LIST_HEIGHT)};
|
|
||||||
false ->
|
|
||||||
{list_only, empty([], ?LIST_HEIGHT)}
|
|
||||||
end.
|
|
||||||
|
|
||||||
size(SkipList) ->
|
|
||||||
size(element(2, SkipList), ?LIST_HEIGHT).
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% SkipList Base Functions
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
enter(Key, Value, Hash, SkipList, Width, 1) ->
|
|
||||||
{MarkerKey, SubList} = find_mark(Key, SkipList),
|
|
||||||
case Hash rem Width of
|
|
||||||
0 ->
|
|
||||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
|
|
||||||
K =< Key end,
|
|
||||||
SubList),
|
|
||||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
|
||||||
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
|
|
||||||
lists:ukeysort(1, SkpL2);
|
|
||||||
_ ->
|
|
||||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
|
|
||||||
UpdSubList =
|
|
||||||
case RHS of
|
|
||||||
[] ->
|
|
||||||
LHS ++ [{Key, Value}];
|
|
||||||
[{FirstKey, _V}|RHSTail] ->
|
|
||||||
case FirstKey of
|
|
||||||
Key ->
|
|
||||||
LHS ++ [{Key, Value}] ++ RHSTail;
|
|
||||||
_ ->
|
|
||||||
LHS ++ [{Key, Value}] ++ RHS
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
|
||||||
end;
|
|
||||||
enter(Key, Value, Hash, SkipList, Width, Level) ->
|
|
||||||
HashMatch = width(Level, Width),
|
|
||||||
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
|
|
||||||
UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1),
|
|
||||||
case Hash rem HashMatch of
|
|
||||||
0 ->
|
|
||||||
%
|
|
||||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
|
|
||||||
K =< Key end,
|
|
||||||
UpdSubSkipList),
|
|
||||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
|
||||||
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
|
|
||||||
_ ->
|
|
||||||
% Need to replace Marker Key with sublist
|
|
||||||
lists:keyreplace(MarkerKey,
|
|
||||||
1,
|
|
||||||
SkipList,
|
|
||||||
{MarkerKey, UpdSubSkipList})
|
|
||||||
end.
|
|
||||||
|
|
||||||
from_list(SkipList, _SkipWidth, 0) ->
|
|
||||||
SkipList;
|
|
||||||
from_list(KVList, SkipWidth, ListHeight) ->
|
|
||||||
L0 = length(KVList),
|
|
||||||
SL0 =
|
|
||||||
case L0 > SkipWidth of
|
|
||||||
true ->
|
|
||||||
from_list(KVList, L0, [], SkipWidth);
|
|
||||||
false ->
|
|
||||||
{LastK, _LastSL} = lists:last(KVList),
|
|
||||||
[{LastK, KVList}]
|
|
||||||
end,
|
|
||||||
from_list(SL0, SkipWidth, ListHeight - 1).
|
|
||||||
|
|
||||||
from_list([], 0, SkipList, _SkipWidth) ->
|
|
||||||
SkipList;
|
|
||||||
from_list(KVList, L, SkipList, SkipWidth) ->
|
|
||||||
SubLL = min(SkipWidth, L),
|
|
||||||
{Head, Tail} = lists:split(SubLL, KVList),
|
|
||||||
{LastK, _LastV} = lists:last(Head),
|
|
||||||
from_list(Tail, L - SubLL, SkipList ++ [{LastK, Head}], SkipWidth).
|
|
||||||
|
|
||||||
|
|
||||||
list_lookup(Key, SkipList, 1) ->
|
|
||||||
SubList = get_sublist(Key, SkipList),
|
|
||||||
case lists:keyfind(Key, 1, SubList) of
|
|
||||||
false ->
|
|
||||||
none;
|
|
||||||
{Key, V} ->
|
|
||||||
{value, V}
|
|
||||||
end;
|
|
||||||
list_lookup(Key, SkipList, Level) ->
|
|
||||||
SubList = get_sublist(Key, SkipList),
|
|
||||||
case SubList of
|
|
||||||
null ->
|
|
||||||
none;
|
|
||||||
_ ->
|
|
||||||
list_lookup(Key, SubList, Level - 1)
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
to_list(SkipList, 1) ->
|
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList);
|
|
||||||
to_list(SkipList, Level) ->
|
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end,
|
|
||||||
[],
|
|
||||||
SkipList).
|
|
||||||
|
|
||||||
|
|
||||||
to_range(SkipList, StartKey, EndKey, ListHeight) ->
|
|
||||||
to_range(SkipList, StartKey, EndKey, ListHeight, [], true).
|
|
||||||
|
|
||||||
to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) ->
|
|
||||||
SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl),
|
|
||||||
case SL of
|
|
||||||
[] ->
|
|
||||||
Acc;
|
|
||||||
_ ->
|
|
||||||
{LK, _LV} = lists:last(SL),
|
|
||||||
case leveled_codec:endkey_passed(EndKey, LK) of
|
|
||||||
false ->
|
|
||||||
to_range(SkipList,
|
|
||||||
LK,
|
|
||||||
EndKey,
|
|
||||||
ListHeight,
|
|
||||||
Acc ++ SL,
|
|
||||||
false);
|
|
||||||
true ->
|
|
||||||
SplitFun =
|
|
||||||
fun({K, _V}) ->
|
|
||||||
not leveled_codec:endkey_passed(EndKey, K) end,
|
|
||||||
LHS = lists:takewhile(SplitFun, SL),
|
|
||||||
Acc ++ LHS
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
sublist_above(SkipList, StartKey, 0, StartIncl) ->
|
|
||||||
TestFun =
|
|
||||||
fun({K, _V}) ->
|
|
||||||
case StartIncl of
|
|
||||||
true ->
|
|
||||||
K < StartKey;
|
|
||||||
false ->
|
|
||||||
K =< StartKey
|
|
||||||
end end,
|
|
||||||
lists:dropwhile(TestFun, SkipList);
|
|
||||||
sublist_above(SkipList, StartKey, Level, StartIncl) ->
|
|
||||||
TestFun =
|
|
||||||
fun({K, _SL}) ->
|
|
||||||
case StartIncl of
|
|
||||||
true ->
|
|
||||||
K < StartKey;
|
|
||||||
false ->
|
|
||||||
K =< StartKey
|
|
||||||
end end,
|
|
||||||
RHS = lists:dropwhile(TestFun, SkipList),
|
|
||||||
case RHS of
|
|
||||||
[] ->
|
|
||||||
[];
|
|
||||||
[{_K, SL}|_Rest] ->
|
|
||||||
sublist_above(SL, StartKey, Level - 1, StartIncl)
|
|
||||||
end.
|
|
||||||
|
|
||||||
empty(SkipList, 1) ->
|
|
||||||
[{?INFINITY_KEY, SkipList}];
|
|
||||||
empty(SkipList, Level) ->
|
|
||||||
empty([{?INFINITY_KEY, SkipList}], Level - 1).
|
|
||||||
|
|
||||||
size(SkipList, 1) ->
|
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList);
|
|
||||||
size(SkipList, Level) ->
|
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end,
|
|
||||||
0,
|
|
||||||
SkipList).
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Internal Functions
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
width(1, Width) ->
|
|
||||||
Width;
|
|
||||||
width(N, Width) ->
|
|
||||||
width(N - 1, Width * Width).
|
|
||||||
|
|
||||||
find_mark(Key, SkipList) ->
|
|
||||||
lists:foldl(fun({Marker, SL}, Acc) ->
|
|
||||||
case Acc of
|
|
||||||
false ->
|
|
||||||
case Marker >= Key of
|
|
||||||
true ->
|
|
||||||
{Marker, SL};
|
|
||||||
false ->
|
|
||||||
Acc
|
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
Acc
|
|
||||||
end end,
|
|
||||||
false,
|
|
||||||
SkipList).
|
|
||||||
|
|
||||||
get_sublist(Key, SkipList) ->
|
|
||||||
lists:foldl(fun({SkipKey, SL}, Acc) ->
|
|
||||||
case {Acc, SkipKey} of
|
|
||||||
{null, SkipKey} when SkipKey >= Key ->
|
|
||||||
SL;
|
|
||||||
_ ->
|
|
||||||
Acc
|
|
||||||
end end,
|
|
||||||
null,
|
|
||||||
SkipList).
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Test
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
|
|
||||||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
|
||||||
generate_randomkeys(Seqn,
|
|
||||||
Count,
|
|
||||||
[],
|
|
||||||
BucketRangeLow,
|
|
||||||
BucketRangeHigh).
|
|
||||||
|
|
||||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
|
||||||
Acc;
|
|
||||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
|
||||||
BNumber =
|
|
||||||
case BRange of
|
|
||||||
0 ->
|
|
||||||
string:right(integer_to_list(BucketLow), 4, $0);
|
|
||||||
_ ->
|
|
||||||
BRand = random:uniform(BRange),
|
|
||||||
string:right(integer_to_list(BucketLow + BRand), 4, $0)
|
|
||||||
end,
|
|
||||||
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
|
||||||
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
|
||||||
{Seqn, {active, infinity}, null}},
|
|
||||||
generate_randomkeys(Seqn + 1,
|
|
||||||
Count - 1,
|
|
||||||
[{K, V}|Acc],
|
|
||||||
BucketLow,
|
|
||||||
BRange).
|
|
||||||
|
|
||||||
skiplist_small_test() ->
|
|
||||||
% Check nothing bad happens with very small lists
|
|
||||||
lists:foreach(fun(N) -> dotest_skiplist_small(N) end, lists:seq(1, 32)).
|
|
||||||
|
|
||||||
|
|
||||||
dotest_skiplist_small(N) ->
|
|
||||||
KL = generate_randomkeys(1, N, 1, 2),
|
|
||||||
SkipList1 =
|
|
||||||
lists:foldl(fun({K, V}, SL) ->
|
|
||||||
enter(K, V, SL)
|
|
||||||
end,
|
|
||||||
empty(),
|
|
||||||
KL),
|
|
||||||
SkipList2 = from_list(lists:reverse(KL)),
|
|
||||||
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList1))
|
|
||||||
end,
|
|
||||||
lists:ukeysort(1, lists:reverse(KL))),
|
|
||||||
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList2))
|
|
||||||
end,
|
|
||||||
lists:ukeysort(1, lists:reverse(KL))).
|
|
||||||
|
|
||||||
skiplist_withbloom_test() ->
|
|
||||||
io:format(user, "~n~nBloom protected skiplist test:~n~n", []),
|
|
||||||
skiplist_tester(true).
|
|
||||||
|
|
||||||
skiplist_nobloom_test() ->
|
|
||||||
io:format(user, "~n~nBloom free skiplist test:~n~n", []),
|
|
||||||
skiplist_tester(false).
|
|
||||||
|
|
||||||
skiplist_tester(Bloom) ->
|
|
||||||
N = 4000,
|
|
||||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
|
||||||
|
|
||||||
OS = ets:new(test, [ordered_set, private]),
|
|
||||||
ets:insert(OS, KL),
|
|
||||||
SWaETS = os:timestamp(),
|
|
||||||
SkipList = from_orderedset(OS, Bloom),
|
|
||||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds " ++
|
|
||||||
"from ordered set~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SWaETS)]),
|
|
||||||
|
|
||||||
SWaGSL = os:timestamp(),
|
|
||||||
SkipList = from_list(lists:reverse(KL), Bloom),
|
|
||||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
|
|
||||||
"Top level key count of ~w~n",
|
|
||||||
[N,
|
|
||||||
timer:now_diff(os:timestamp(), SWaGSL),
|
|
||||||
length(element(2, SkipList))]),
|
|
||||||
io:format(user, "Second tier key counts of ~w~n",
|
|
||||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
|
||||||
element(2, SkipList))]),
|
|
||||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
|
||||||
|
|
||||||
SWaGSL2 = os:timestamp(),
|
|
||||||
SkipList = from_sortedlist(KLSorted, Bloom),
|
|
||||||
io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
|
|
||||||
"microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SWaGSL2)]),
|
|
||||||
|
|
||||||
SWaDSL = os:timestamp(),
|
|
||||||
SkipList1 =
|
|
||||||
lists:foldl(fun({K, V}, SL) ->
|
|
||||||
enter(K, V, SL)
|
|
||||||
end,
|
|
||||||
empty(Bloom),
|
|
||||||
KL),
|
|
||||||
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
|
|
||||||
"microseconds~n" ++
|
|
||||||
"Top level key count of ~w~n",
|
|
||||||
[N,
|
|
||||||
timer:now_diff(os:timestamp(), SWaDSL),
|
|
||||||
length(element(2, SkipList1))]),
|
|
||||||
io:format(user, "Second tier key counts of ~w~n",
|
|
||||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
|
||||||
element(2, SkipList1))]),
|
|
||||||
|
|
||||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
|
||||||
skiplist_timingtest(KLSorted, SkipList, N, Bloom),
|
|
||||||
|
|
||||||
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
|
||||||
skiplist_timingtest(KLSorted, SkipList1, N, Bloom).
|
|
||||||
|
|
||||||
|
|
||||||
skiplist_timingtest(KL, SkipList, N, Bloom) ->
|
|
||||||
io:format(user, "Timing tests on skiplist of size ~w~n",
|
|
||||||
[leveled_skiplist:size(SkipList)]),
|
|
||||||
CheckList1 = lists:sublist(KL, N div 4, 200),
|
|
||||||
CheckList2 = lists:sublist(KL, N div 3, 200),
|
|
||||||
CheckList3 = lists:sublist(KL, N div 2, 200),
|
|
||||||
CheckList4 = lists:sublist(KL, N - 1000, 200),
|
|
||||||
CheckList5 = lists:sublist(KL, N - 500, 200),
|
|
||||||
CheckList6 = lists:sublist(KL, 1, 10),
|
|
||||||
CheckList7 = lists:nthtail(N - 200, KL),
|
|
||||||
CheckList8 = lists:sublist(KL, N div 2, 1),
|
|
||||||
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
|
||||||
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
|
||||||
|
|
||||||
SWb = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, V}) ->
|
|
||||||
?assertMatch({value, V}, lookup(K, SkipList))
|
|
||||||
end,
|
|
||||||
CheckAll),
|
|
||||||
io:format(user, "Finding 1020 keys took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWb)]),
|
|
||||||
|
|
||||||
RangeFun =
|
|
||||||
fun(SkipListToQuery, CheckListForQ, Assert) ->
|
|
||||||
KR =
|
|
||||||
to_range(SkipListToQuery,
|
|
||||||
element(1, lists:nth(1, CheckListForQ)),
|
|
||||||
element(1, lists:last(CheckListForQ))),
|
|
||||||
case Assert of
|
|
||||||
true ->
|
|
||||||
CompareL = length(lists:usort(CheckListForQ)),
|
|
||||||
?assertMatch(CompareL, length(KR));
|
|
||||||
false ->
|
|
||||||
KR
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
|
|
||||||
SWc = os:timestamp(),
|
|
||||||
RangeFun(SkipList, CheckList1, true),
|
|
||||||
RangeFun(SkipList, CheckList2, true),
|
|
||||||
RangeFun(SkipList, CheckList3, true),
|
|
||||||
RangeFun(SkipList, CheckList4, true),
|
|
||||||
RangeFun(SkipList, CheckList5, true),
|
|
||||||
RangeFun(SkipList, CheckList6, true),
|
|
||||||
RangeFun(SkipList, CheckList7, true),
|
|
||||||
RangeFun(SkipList, CheckList8, true),
|
|
||||||
|
|
||||||
KL_OOR1 = generate_randomkeys(1, 4, N div 5 + 1, N div 5 + 10),
|
|
||||||
KR9 = RangeFun(SkipList, KL_OOR1, false),
|
|
||||||
?assertMatch([], KR9),
|
|
||||||
|
|
||||||
KL_OOR2 = generate_randomkeys(1, 4, 0, 0),
|
|
||||||
KR10 = RangeFun(SkipList, KL_OOR2, false),
|
|
||||||
?assertMatch([], KR10),
|
|
||||||
|
|
||||||
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWc)]),
|
|
||||||
|
|
||||||
AltKL1 = generate_randomkeys(1, 2000, 1, 200),
|
|
||||||
SWd0 = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
lookup(K, SkipList)
|
|
||||||
end,
|
|
||||||
AltKL1),
|
|
||||||
io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWd0)]),
|
|
||||||
SWd1 = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
leveled_codec:magic_hash(K)
|
|
||||||
end,
|
|
||||||
AltKL1),
|
|
||||||
io:format(user, "Generating 2000 magic hashes took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWd1)]),
|
|
||||||
SWd2 = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
erlang:phash2(K)
|
|
||||||
end,
|
|
||||||
AltKL1),
|
|
||||||
io:format(user, "Generating 2000 not so magic hashes took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWd2)]),
|
|
||||||
|
|
||||||
AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300),
|
|
||||||
SWe = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
none = lookup(K, SkipList)
|
|
||||||
end,
|
|
||||||
AltKL2),
|
|
||||||
io:format(user, "Getting 1000 missing keys above range took ~w " ++
|
|
||||||
"microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWe)]),
|
|
||||||
AltKL3 = generate_randomkeys(1, 1000, 0, 0),
|
|
||||||
SWf = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
none = lookup(K, SkipList)
|
|
||||||
end,
|
|
||||||
AltKL3),
|
|
||||||
io:format(user, "Getting 1000 missing keys below range took ~w " ++
|
|
||||||
"microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWf)]),
|
|
||||||
|
|
||||||
SWg = os:timestamp(),
|
|
||||||
FlatList = to_list(SkipList),
|
|
||||||
io:format(user, "Flattening skiplist took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWg)]),
|
|
||||||
?assertMatch(KL, FlatList),
|
|
||||||
|
|
||||||
case Bloom of
|
|
||||||
true ->
|
|
||||||
HashList = lists:map(fun(_X) ->
|
|
||||||
random:uniform(4294967295) end,
|
|
||||||
lists:seq(1, 2000)),
|
|
||||||
SWh = os:timestamp(),
|
|
||||||
lists:foreach(fun(X) ->
|
|
||||||
lookup(X, X, SkipList) end,
|
|
||||||
HashList),
|
|
||||||
io:format(user,
|
|
||||||
"Getting 2000 missing keys when hash was known " ++
|
|
||||||
"took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWh)]);
|
|
||||||
false ->
|
|
||||||
ok
|
|
||||||
end.
|
|
||||||
|
|
||||||
define_kv(X) ->
|
|
||||||
{{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null},
|
|
||||||
{X, {active, infinity}, null}}.
|
|
||||||
|
|
||||||
skiplist_roundsize_test() ->
|
|
||||||
KVL = lists:map(fun(X) -> define_kv(X) end, lists:seq(1, 4096)),
|
|
||||||
SkipList = from_list(KVL),
|
|
||||||
lists:foreach(fun({K, V}) ->
|
|
||||||
?assertMatch({value, V}, lookup(K, SkipList)) end,
|
|
||||||
KVL),
|
|
||||||
lists:foreach(fun(X) ->
|
|
||||||
{KS, _VS} = define_kv(X * 32 + 1),
|
|
||||||
{KE, _VE} = define_kv((X + 1) * 32),
|
|
||||||
R = to_range(SkipList, KS, KE),
|
|
||||||
L = lists:sublist(KVL,
|
|
||||||
X * 32 + 1,
|
|
||||||
32),
|
|
||||||
?assertMatch(L, R) end,
|
|
||||||
lists:seq(0, 24)).
|
|
||||||
|
|
||||||
skiplist_nolookup_test() ->
|
|
||||||
N = 4000,
|
|
||||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
|
||||||
SkipList = lists:foldl(fun({K, V}, Acc) ->
|
|
||||||
enter_nolookup(K, V, Acc) end,
|
|
||||||
empty(true),
|
|
||||||
KL),
|
|
||||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
?assertMatch(none, lookup(K, SkipList)) end,
|
|
||||||
KL),
|
|
||||||
?assertMatch(KLSorted, to_list(SkipList)).
|
|
||||||
|
|
||||||
skiplist_range_test() ->
|
|
||||||
N = 150,
|
|
||||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
|
||||||
|
|
||||||
KLSL1 = lists:sublist(lists:ukeysort(1, KL), 128),
|
|
||||||
SkipList1 = from_list(KLSL1),
|
|
||||||
{LastK1, V1} = lists:last(KLSL1),
|
|
||||||
R1 = to_range(SkipList1, LastK1, LastK1),
|
|
||||||
?assertMatch([{LastK1, V1}], R1),
|
|
||||||
|
|
||||||
KLSL2 = lists:sublist(lists:ukeysort(1, KL), 127),
|
|
||||||
SkipList2 = from_list(KLSL2),
|
|
||||||
{LastK2, V2} = lists:last(KLSL2),
|
|
||||||
R2 = to_range(SkipList2, LastK2, LastK2),
|
|
||||||
?assertMatch([{LastK2, V2}], R2),
|
|
||||||
|
|
||||||
KLSL3 = lists:sublist(lists:ukeysort(1, KL), 129),
|
|
||||||
SkipList3 = from_list(KLSL3),
|
|
||||||
{LastK3, V3} = lists:last(KLSL3),
|
|
||||||
R3 = to_range(SkipList3, LastK3, LastK3),
|
|
||||||
?assertMatch([{LastK3, V3}], R3),
|
|
||||||
|
|
||||||
{FirstK4, V4} = lists:nth(1, KLSL3),
|
|
||||||
R4 = to_range(SkipList3, FirstK4, FirstK4),
|
|
||||||
?assertMatch([{FirstK4, V4}], R4).
|
|
||||||
|
|
||||||
|
|
||||||
empty_skiplist_size_test() ->
|
|
||||||
?assertMatch(0, leveled_skiplist:size(empty(false))),
|
|
||||||
?assertMatch(0, leveled_skiplist:size(empty(true))).
|
|
||||||
|
|
||||||
-endif.
|
|
|
@ -1,159 +0,0 @@
|
||||||
%% -------- TINY BLOOM ---------
|
|
||||||
%%
|
|
||||||
%% For sheltering relatively expensive lookups with a probabilistic check
|
|
||||||
%%
|
|
||||||
%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array.
|
|
||||||
%% Even at 1000 keys should still offer only a 20% false positive
|
|
||||||
%%
|
|
||||||
%% Restricted to no more than 256 arrays - so can't handle more than 250K keys
|
|
||||||
%% in total
|
|
||||||
%%
|
|
||||||
%% Implemented this way to make it easy to control false positive (just by
|
|
||||||
%% setting the width). Also only requires binary manipulations of a single
|
|
||||||
%% hash
|
|
||||||
|
|
||||||
-module(leveled_tinybloom).
|
|
||||||
|
|
||||||
-include("include/leveled.hrl").
|
|
||||||
|
|
||||||
-export([
|
|
||||||
enter/2,
|
|
||||||
check/2,
|
|
||||||
empty/1
|
|
||||||
]).
|
|
||||||
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Bloom API
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
empty(Width) when Width =< 256 ->
|
|
||||||
FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end,
|
|
||||||
lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)).
|
|
||||||
|
|
||||||
enter({hash, no_lookup}, Bloom) ->
|
|
||||||
Bloom;
|
|
||||||
enter({hash, Hash}, Bloom) ->
|
|
||||||
{Slot0, Bit1, Bit2} = split_hash(Hash),
|
|
||||||
Slot = Slot0 rem dict:size(Bloom),
|
|
||||||
BitArray0 = dict:fetch(Slot, Bloom),
|
|
||||||
FoldFun =
|
|
||||||
fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end,
|
|
||||||
BitArray1 = lists:foldl(FoldFun,
|
|
||||||
BitArray0,
|
|
||||||
lists:usort([Bit1, Bit2])),
|
|
||||||
dict:store(Slot, <<BitArray1/binary>>, Bloom);
|
|
||||||
enter(Key, Bloom) ->
|
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
|
||||||
enter({hash, Hash}, Bloom).
|
|
||||||
|
|
||||||
|
|
||||||
check({hash, Hash}, Bloom) ->
|
|
||||||
{Slot0, Bit1, Bit2} = split_hash(Hash),
|
|
||||||
Slot = Slot0 rem dict:size(Bloom),
|
|
||||||
BitArray = dict:fetch(Slot, Bloom),
|
|
||||||
|
|
||||||
case getbit(Bit1, BitArray, 4096) of
|
|
||||||
<<0:1>> ->
|
|
||||||
false;
|
|
||||||
<<1:1>> ->
|
|
||||||
case getbit(Bit2, BitArray, 4096) of
|
|
||||||
<<0:1>> ->
|
|
||||||
false;
|
|
||||||
<<1:1>> ->
|
|
||||||
true
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
check(Key, Bloom) ->
|
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
|
||||||
check({hash, Hash}, Bloom).
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Internal Functions
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
split_hash(Hash) ->
|
|
||||||
H0 = Hash band 255,
|
|
||||||
H1 = (Hash bsr 8) band 4095,
|
|
||||||
H2 = Hash bsr 20,
|
|
||||||
{H0, H1, H2}.
|
|
||||||
|
|
||||||
add_to_array(Bit, BitArray, ArrayLength) ->
|
|
||||||
RestLen = ArrayLength - Bit - 1,
|
|
||||||
<<Head:Bit/bitstring,
|
|
||||||
_B:1/integer,
|
|
||||||
Rest:RestLen/bitstring>> = BitArray,
|
|
||||||
<<Head/bitstring, 1:1, Rest/bitstring>>.
|
|
||||||
|
|
||||||
getbit(Bit, BitArray, ArrayLength) ->
|
|
||||||
RestLen = ArrayLength - Bit - 1,
|
|
||||||
<<_Head:Bit/bitstring,
|
|
||||||
B:1/bitstring,
|
|
||||||
_Rest:RestLen/bitstring>> = BitArray,
|
|
||||||
B.
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Test
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
|
|
||||||
simple_test() ->
|
|
||||||
N = 4000,
|
|
||||||
W = 6,
|
|
||||||
KLin = lists:map(fun(X) -> "Key_" ++
|
|
||||||
integer_to_list(X) ++
|
|
||||||
integer_to_list(random:uniform(100)) ++
|
|
||||||
binary_to_list(crypto:rand_bytes(2))
|
|
||||||
end,
|
|
||||||
lists:seq(1, N)),
|
|
||||||
KLout = lists:map(fun(X) ->
|
|
||||||
"NotKey_" ++
|
|
||||||
integer_to_list(X) ++
|
|
||||||
integer_to_list(random:uniform(100)) ++
|
|
||||||
binary_to_list(crypto:rand_bytes(2))
|
|
||||||
end,
|
|
||||||
lists:seq(1, N)),
|
|
||||||
SW0_PH = os:timestamp(),
|
|
||||||
lists:foreach(fun(X) -> erlang:phash2(X) end, KLin),
|
|
||||||
io:format(user,
|
|
||||||
"~nNative hash function hashes ~w keys in ~w microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW0_PH)]),
|
|
||||||
SW0_MH = os:timestamp(),
|
|
||||||
lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin),
|
|
||||||
io:format(user,
|
|
||||||
"~nMagic hash function hashes ~w keys in ~w microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW0_MH)]),
|
|
||||||
|
|
||||||
SW1 = os:timestamp(),
|
|
||||||
Bloom = lists:foldr(fun enter/2, empty(W), KLin),
|
|
||||||
io:format(user,
|
|
||||||
"~nAdding ~w keys to bloom took ~w microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW1)]),
|
|
||||||
|
|
||||||
SW2 = os:timestamp(),
|
|
||||||
lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin),
|
|
||||||
io:format(user,
|
|
||||||
"~nChecking ~w keys in bloom took ~w microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW2)]),
|
|
||||||
|
|
||||||
SW3 = os:timestamp(),
|
|
||||||
FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of
|
|
||||||
true -> Acc + 1;
|
|
||||||
false -> Acc
|
|
||||||
end end,
|
|
||||||
0,
|
|
||||||
KLout),
|
|
||||||
io:format(user,
|
|
||||||
"~nChecking ~w keys out of bloom took ~w microseconds " ++
|
|
||||||
"with ~w false positive rate~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW3), FP / N]),
|
|
||||||
?assertMatch(true, FP < (N div 4)).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-endif.
|
|
|
@ -162,7 +162,17 @@ to_list({tree, _L, Tree}) ->
|
||||||
end,
|
end,
|
||||||
lists:foldl(FoldFun, [], tree_to_list(Tree));
|
lists:foldl(FoldFun, [], tree_to_list(Tree));
|
||||||
to_list({idxt, _L, {TLI, _IDX}}) ->
|
to_list({idxt, _L, {TLI, _IDX}}) ->
|
||||||
lists:append(tuple_to_list(TLI)).
|
lists:append(tuple_to_list(TLI));
|
||||||
|
to_list({skpl, _L, SkipList}) ->
|
||||||
|
FoldFun =
|
||||||
|
fun({_M, SL}, Acc) ->
|
||||||
|
[SL|Acc]
|
||||||
|
end,
|
||||||
|
|
||||||
|
Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)),
|
||||||
|
Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))),
|
||||||
|
lists:append(Lv0List).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
tsize({_Type, L, _Tree}) ->
|
tsize({_Type, L, _Tree}) ->
|
||||||
|
@ -171,7 +181,9 @@ tsize({_Type, L, _Tree}) ->
|
||||||
empty(tree) ->
|
empty(tree) ->
|
||||||
{tree, 0, empty_tree()};
|
{tree, 0, empty_tree()};
|
||||||
empty(idxt) ->
|
empty(idxt) ->
|
||||||
{idxt, 0, {{}, empty_tree()}}.
|
{idxt, 0, {{}, empty_tree()}};
|
||||||
|
empty(skpl) ->
|
||||||
|
{skpl, 0, []}.
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% Internal Functions
|
%%% Internal Functions
|
||||||
|
@ -216,14 +228,22 @@ roll_list(KVList, L, SkipList, SkipWidth) ->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
lookup_match(_Key, []) ->
|
% lookup_match(_Key, []) ->
|
||||||
|
% none;
|
||||||
|
% lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key ->
|
||||||
|
% none;
|
||||||
|
% lookup_match(Key, [{Key, EV}|_Tail]) ->
|
||||||
|
% {value, EV};
|
||||||
|
% lookup_match(Key, [_Top|Tail]) ->
|
||||||
|
% lookup_match(Key, Tail).
|
||||||
|
|
||||||
|
lookup_match(Key, KVList) ->
|
||||||
|
case lists:keyfind(Key, 1, KVList) of
|
||||||
|
false ->
|
||||||
none;
|
none;
|
||||||
lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key ->
|
{Key, Value} ->
|
||||||
none;
|
{value, Value}
|
||||||
lookup_match(Key, [{Key, EV}|_Tail]) ->
|
end.
|
||||||
{value, EV};
|
|
||||||
lookup_match(Key, [_Top|Tail]) ->
|
|
||||||
lookup_match(Key, Tail).
|
|
||||||
|
|
||||||
lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key ->
|
lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key ->
|
||||||
{EK, EV};
|
{EK, EV};
|
||||||
|
@ -396,15 +416,14 @@ skpl_getsublist(Key, SkipList) ->
|
||||||
FoldFun =
|
FoldFun =
|
||||||
fun({Mark, SL}, Acc) ->
|
fun({Mark, SL}, Acc) ->
|
||||||
case {Acc, Mark} of
|
case {Acc, Mark} of
|
||||||
{none, Mark} when Mark >= Key ->
|
{[], Mark} when Mark >= Key ->
|
||||||
SL;
|
SL;
|
||||||
_ ->
|
_ ->
|
||||||
Acc
|
Acc
|
||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
SL1 = lists:foldl(FoldFun, none, SkipList),
|
SL1 = lists:foldl(FoldFun, [], SkipList),
|
||||||
lists:foldl(FoldFun, none, SL1).
|
lists:foldl(FoldFun, [], SL1).
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% Balance tree implementation
|
%%% Balance tree implementation
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue