Switch to using skip lists from leveled_tree
Remove now unused leveled_skiplist and leveled_tinybloom
This commit is contained in:
parent
6d2eb1d57c
commit
58cda7d157
4 changed files with 34 additions and 835 deletions
|
@ -15,7 +15,7 @@
|
|||
%% Inker key type used for tombstones
|
||||
-define(INKT_TOMB, tomb).
|
||||
|
||||
-define(CACHE_TYPE, idxt).
|
||||
-define(CACHE_TYPE, skpl).
|
||||
|
||||
-record(sft_options,
|
||||
{wait = true :: boolean(),
|
||||
|
|
|
@ -1,661 +0,0 @@
|
|||
%% -------- SKIPLIST ---------
|
||||
%%
|
||||
%% For storing small numbers of {K, V} pairs where reasonable insertion and
|
||||
%% fetch times, but with fast support for flattening to a list or a sublist
|
||||
%% within a certain key range
|
||||
%%
|
||||
%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's
|
||||
%% ongoing dependency on OTP16)
|
||||
%%
|
||||
%% Not a proper skip list. Only supports a fixed depth. Good enough for the
|
||||
%% purposes of leveled. Also uses peculiar enkey_passed function within
|
||||
%% leveled. Not tested beyond a depth of 2.
|
||||
|
||||
-module(leveled_skiplist).
|
||||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-export([
|
||||
from_list/1,
|
||||
from_list/2,
|
||||
from_sortedlist/1,
|
||||
from_sortedlist/2,
|
||||
from_orderedset/1,
|
||||
from_orderedset/2,
|
||||
to_list/1,
|
||||
enter/3,
|
||||
enter/4,
|
||||
enter_nolookup/3,
|
||||
to_range/2,
|
||||
to_range/3,
|
||||
lookup/2,
|
||||
lookup/3,
|
||||
empty/0,
|
||||
empty/1,
|
||||
size/1
|
||||
]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(SKIP_WIDTH, 16).
|
||||
-define(LIST_HEIGHT, 2).
|
||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
||||
-define(BITARRAY_SIZE, 2048).
|
||||
|
||||
%%%============================================================================
|
||||
%%% SkipList API
|
||||
%%%============================================================================
|
||||
|
||||
enter(Key, Value, SkipList) ->
|
||||
Hash = leveled_codec:magic_hash(Key),
|
||||
enter(Key, Hash, Value, SkipList).
|
||||
|
||||
enter(Key, Hash, Value, SkipList) ->
|
||||
Bloom0 =
|
||||
case element(1, SkipList) of
|
||||
list_only ->
|
||||
list_only;
|
||||
Bloom ->
|
||||
leveled_tinybloom:enter({hash, Hash}, Bloom)
|
||||
end,
|
||||
{Bloom0,
|
||||
enter(Key, Value, erlang:phash2(Key),
|
||||
element(2, SkipList),
|
||||
?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
||||
|
||||
%% Can iterate over a key entered this way, but never lookup the key
|
||||
%% used for index terms
|
||||
%% The key may still be a marker key - and the much cheaper native hash
|
||||
%% is used to dtermine this, avoiding the more expensive magic hash
|
||||
enter_nolookup(Key, Value, SkipList) ->
|
||||
{element(1, SkipList),
|
||||
enter(Key, Value, erlang:phash2(Key),
|
||||
element(2, SkipList),
|
||||
?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
||||
|
||||
from_orderedset(Table) ->
|
||||
from_orderedset(Table, false).
|
||||
|
||||
from_orderedset(Table, Bloom) ->
|
||||
from_sortedlist(ets:tab2list(Table), Bloom).
|
||||
|
||||
from_list(UnsortedKVL) ->
|
||||
from_list(UnsortedKVL, false).
|
||||
|
||||
from_list(UnsortedKVL, BloomProtect) ->
|
||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
||||
from_sortedlist(KVL, BloomProtect).
|
||||
|
||||
from_sortedlist(SortedKVL) ->
|
||||
from_sortedlist(SortedKVL, false).
|
||||
|
||||
from_sortedlist([], BloomProtect) ->
|
||||
empty(BloomProtect);
|
||||
from_sortedlist(SortedKVL, BloomProtect) ->
|
||||
Bloom0 =
|
||||
case BloomProtect of
|
||||
true ->
|
||||
lists:foldr(fun({K, _V}, Bloom) ->
|
||||
leveled_tinybloom:enter(K, Bloom) end,
|
||||
leveled_tinybloom:empty(?SKIP_WIDTH),
|
||||
SortedKVL);
|
||||
false ->
|
||||
list_only
|
||||
end,
|
||||
{Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
||||
|
||||
lookup(Key, SkipList) ->
|
||||
case element(1, SkipList) of
|
||||
list_only ->
|
||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
|
||||
_ ->
|
||||
lookup(Key, leveled_codec:magic_hash(Key), SkipList)
|
||||
end.
|
||||
|
||||
lookup(Key, Hash, SkipList) ->
|
||||
case element(1, SkipList) of
|
||||
list_only ->
|
||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
|
||||
_ ->
|
||||
case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of
|
||||
false ->
|
||||
none;
|
||||
true ->
|
||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT)
|
||||
end
|
||||
end.
|
||||
|
||||
|
||||
%% Rather than support iterator_from like gb_trees, will just an output a key
|
||||
%% sorted list for the desired range, which can the be iterated over as normal
|
||||
to_range(SkipList, Start) ->
|
||||
to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
|
||||
|
||||
to_range(SkipList, Start, End) ->
|
||||
to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT).
|
||||
|
||||
to_list(SkipList) ->
|
||||
to_list(element(2, SkipList), ?LIST_HEIGHT).
|
||||
|
||||
empty() ->
|
||||
empty(false).
|
||||
|
||||
empty(BloomProtect) ->
|
||||
case BloomProtect of
|
||||
true ->
|
||||
{leveled_tinybloom:empty(?SKIP_WIDTH),
|
||||
empty([], ?LIST_HEIGHT)};
|
||||
false ->
|
||||
{list_only, empty([], ?LIST_HEIGHT)}
|
||||
end.
|
||||
|
||||
size(SkipList) ->
|
||||
size(element(2, SkipList), ?LIST_HEIGHT).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% SkipList Base Functions
|
||||
%%%============================================================================
|
||||
|
||||
enter(Key, Value, Hash, SkipList, Width, 1) ->
|
||||
{MarkerKey, SubList} = find_mark(Key, SkipList),
|
||||
case Hash rem Width of
|
||||
0 ->
|
||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
|
||||
K =< Key end,
|
||||
SubList),
|
||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
||||
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
|
||||
lists:ukeysort(1, SkpL2);
|
||||
_ ->
|
||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
|
||||
UpdSubList =
|
||||
case RHS of
|
||||
[] ->
|
||||
LHS ++ [{Key, Value}];
|
||||
[{FirstKey, _V}|RHSTail] ->
|
||||
case FirstKey of
|
||||
Key ->
|
||||
LHS ++ [{Key, Value}] ++ RHSTail;
|
||||
_ ->
|
||||
LHS ++ [{Key, Value}] ++ RHS
|
||||
end
|
||||
end,
|
||||
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
||||
end;
|
||||
enter(Key, Value, Hash, SkipList, Width, Level) ->
|
||||
HashMatch = width(Level, Width),
|
||||
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
|
||||
UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1),
|
||||
case Hash rem HashMatch of
|
||||
0 ->
|
||||
%
|
||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
|
||||
K =< Key end,
|
||||
UpdSubSkipList),
|
||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
||||
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
|
||||
_ ->
|
||||
% Need to replace Marker Key with sublist
|
||||
lists:keyreplace(MarkerKey,
|
||||
1,
|
||||
SkipList,
|
||||
{MarkerKey, UpdSubSkipList})
|
||||
end.
|
||||
|
||||
from_list(SkipList, _SkipWidth, 0) ->
|
||||
SkipList;
|
||||
from_list(KVList, SkipWidth, ListHeight) ->
|
||||
L0 = length(KVList),
|
||||
SL0 =
|
||||
case L0 > SkipWidth of
|
||||
true ->
|
||||
from_list(KVList, L0, [], SkipWidth);
|
||||
false ->
|
||||
{LastK, _LastSL} = lists:last(KVList),
|
||||
[{LastK, KVList}]
|
||||
end,
|
||||
from_list(SL0, SkipWidth, ListHeight - 1).
|
||||
|
||||
from_list([], 0, SkipList, _SkipWidth) ->
|
||||
SkipList;
|
||||
from_list(KVList, L, SkipList, SkipWidth) ->
|
||||
SubLL = min(SkipWidth, L),
|
||||
{Head, Tail} = lists:split(SubLL, KVList),
|
||||
{LastK, _LastV} = lists:last(Head),
|
||||
from_list(Tail, L - SubLL, SkipList ++ [{LastK, Head}], SkipWidth).
|
||||
|
||||
|
||||
list_lookup(Key, SkipList, 1) ->
|
||||
SubList = get_sublist(Key, SkipList),
|
||||
case lists:keyfind(Key, 1, SubList) of
|
||||
false ->
|
||||
none;
|
||||
{Key, V} ->
|
||||
{value, V}
|
||||
end;
|
||||
list_lookup(Key, SkipList, Level) ->
|
||||
SubList = get_sublist(Key, SkipList),
|
||||
case SubList of
|
||||
null ->
|
||||
none;
|
||||
_ ->
|
||||
list_lookup(Key, SubList, Level - 1)
|
||||
end.
|
||||
|
||||
|
||||
to_list(SkipList, 1) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList);
|
||||
to_list(SkipList, Level) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end,
|
||||
[],
|
||||
SkipList).
|
||||
|
||||
|
||||
to_range(SkipList, StartKey, EndKey, ListHeight) ->
|
||||
to_range(SkipList, StartKey, EndKey, ListHeight, [], true).
|
||||
|
||||
to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) ->
|
||||
SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl),
|
||||
case SL of
|
||||
[] ->
|
||||
Acc;
|
||||
_ ->
|
||||
{LK, _LV} = lists:last(SL),
|
||||
case leveled_codec:endkey_passed(EndKey, LK) of
|
||||
false ->
|
||||
to_range(SkipList,
|
||||
LK,
|
||||
EndKey,
|
||||
ListHeight,
|
||||
Acc ++ SL,
|
||||
false);
|
||||
true ->
|
||||
SplitFun =
|
||||
fun({K, _V}) ->
|
||||
not leveled_codec:endkey_passed(EndKey, K) end,
|
||||
LHS = lists:takewhile(SplitFun, SL),
|
||||
Acc ++ LHS
|
||||
end
|
||||
end.
|
||||
|
||||
sublist_above(SkipList, StartKey, 0, StartIncl) ->
|
||||
TestFun =
|
||||
fun({K, _V}) ->
|
||||
case StartIncl of
|
||||
true ->
|
||||
K < StartKey;
|
||||
false ->
|
||||
K =< StartKey
|
||||
end end,
|
||||
lists:dropwhile(TestFun, SkipList);
|
||||
sublist_above(SkipList, StartKey, Level, StartIncl) ->
|
||||
TestFun =
|
||||
fun({K, _SL}) ->
|
||||
case StartIncl of
|
||||
true ->
|
||||
K < StartKey;
|
||||
false ->
|
||||
K =< StartKey
|
||||
end end,
|
||||
RHS = lists:dropwhile(TestFun, SkipList),
|
||||
case RHS of
|
||||
[] ->
|
||||
[];
|
||||
[{_K, SL}|_Rest] ->
|
||||
sublist_above(SL, StartKey, Level - 1, StartIncl)
|
||||
end.
|
||||
|
||||
empty(SkipList, 1) ->
|
||||
[{?INFINITY_KEY, SkipList}];
|
||||
empty(SkipList, Level) ->
|
||||
empty([{?INFINITY_KEY, SkipList}], Level - 1).
|
||||
|
||||
size(SkipList, 1) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList);
|
||||
size(SkipList, Level) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end,
|
||||
0,
|
||||
SkipList).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
width(1, Width) ->
|
||||
Width;
|
||||
width(N, Width) ->
|
||||
width(N - 1, Width * Width).
|
||||
|
||||
find_mark(Key, SkipList) ->
|
||||
lists:foldl(fun({Marker, SL}, Acc) ->
|
||||
case Acc of
|
||||
false ->
|
||||
case Marker >= Key of
|
||||
true ->
|
||||
{Marker, SL};
|
||||
false ->
|
||||
Acc
|
||||
end;
|
||||
_ ->
|
||||
Acc
|
||||
end end,
|
||||
false,
|
||||
SkipList).
|
||||
|
||||
get_sublist(Key, SkipList) ->
|
||||
lists:foldl(fun({SkipKey, SL}, Acc) ->
|
||||
case {Acc, SkipKey} of
|
||||
{null, SkipKey} when SkipKey >= Key ->
|
||||
SL;
|
||||
_ ->
|
||||
Acc
|
||||
end end,
|
||||
null,
|
||||
SkipList).
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||
generate_randomkeys(Seqn,
|
||||
Count,
|
||||
[],
|
||||
BucketRangeLow,
|
||||
BucketRangeHigh).
|
||||
|
||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||
Acc;
|
||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||
BNumber =
|
||||
case BRange of
|
||||
0 ->
|
||||
string:right(integer_to_list(BucketLow), 4, $0);
|
||||
_ ->
|
||||
BRand = random:uniform(BRange),
|
||||
string:right(integer_to_list(BucketLow + BRand), 4, $0)
|
||||
end,
|
||||
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
||||
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
||||
{Seqn, {active, infinity}, null}},
|
||||
generate_randomkeys(Seqn + 1,
|
||||
Count - 1,
|
||||
[{K, V}|Acc],
|
||||
BucketLow,
|
||||
BRange).
|
||||
|
||||
skiplist_small_test() ->
|
||||
% Check nothing bad happens with very small lists
|
||||
lists:foreach(fun(N) -> dotest_skiplist_small(N) end, lists:seq(1, 32)).
|
||||
|
||||
|
||||
dotest_skiplist_small(N) ->
|
||||
KL = generate_randomkeys(1, N, 1, 2),
|
||||
SkipList1 =
|
||||
lists:foldl(fun({K, V}, SL) ->
|
||||
enter(K, V, SL)
|
||||
end,
|
||||
empty(),
|
||||
KL),
|
||||
SkipList2 = from_list(lists:reverse(KL)),
|
||||
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList1))
|
||||
end,
|
||||
lists:ukeysort(1, lists:reverse(KL))),
|
||||
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList2))
|
||||
end,
|
||||
lists:ukeysort(1, lists:reverse(KL))).
|
||||
|
||||
skiplist_withbloom_test() ->
|
||||
io:format(user, "~n~nBloom protected skiplist test:~n~n", []),
|
||||
skiplist_tester(true).
|
||||
|
||||
skiplist_nobloom_test() ->
|
||||
io:format(user, "~n~nBloom free skiplist test:~n~n", []),
|
||||
skiplist_tester(false).
|
||||
|
||||
skiplist_tester(Bloom) ->
|
||||
N = 4000,
|
||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||
|
||||
OS = ets:new(test, [ordered_set, private]),
|
||||
ets:insert(OS, KL),
|
||||
SWaETS = os:timestamp(),
|
||||
SkipList = from_orderedset(OS, Bloom),
|
||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds " ++
|
||||
"from ordered set~n",
|
||||
[N, timer:now_diff(os:timestamp(), SWaETS)]),
|
||||
|
||||
SWaGSL = os:timestamp(),
|
||||
SkipList = from_list(lists:reverse(KL), Bloom),
|
||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
|
||||
"Top level key count of ~w~n",
|
||||
[N,
|
||||
timer:now_diff(os:timestamp(), SWaGSL),
|
||||
length(element(2, SkipList))]),
|
||||
io:format(user, "Second tier key counts of ~w~n",
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||
element(2, SkipList))]),
|
||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
||||
|
||||
SWaGSL2 = os:timestamp(),
|
||||
SkipList = from_sortedlist(KLSorted, Bloom),
|
||||
io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
|
||||
"microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SWaGSL2)]),
|
||||
|
||||
SWaDSL = os:timestamp(),
|
||||
SkipList1 =
|
||||
lists:foldl(fun({K, V}, SL) ->
|
||||
enter(K, V, SL)
|
||||
end,
|
||||
empty(Bloom),
|
||||
KL),
|
||||
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
|
||||
"microseconds~n" ++
|
||||
"Top level key count of ~w~n",
|
||||
[N,
|
||||
timer:now_diff(os:timestamp(), SWaDSL),
|
||||
length(element(2, SkipList1))]),
|
||||
io:format(user, "Second tier key counts of ~w~n",
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||
element(2, SkipList1))]),
|
||||
|
||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
||||
skiplist_timingtest(KLSorted, SkipList, N, Bloom),
|
||||
|
||||
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
||||
skiplist_timingtest(KLSorted, SkipList1, N, Bloom).
|
||||
|
||||
|
||||
skiplist_timingtest(KL, SkipList, N, Bloom) ->
|
||||
io:format(user, "Timing tests on skiplist of size ~w~n",
|
||||
[leveled_skiplist:size(SkipList)]),
|
||||
CheckList1 = lists:sublist(KL, N div 4, 200),
|
||||
CheckList2 = lists:sublist(KL, N div 3, 200),
|
||||
CheckList3 = lists:sublist(KL, N div 2, 200),
|
||||
CheckList4 = lists:sublist(KL, N - 1000, 200),
|
||||
CheckList5 = lists:sublist(KL, N - 500, 200),
|
||||
CheckList6 = lists:sublist(KL, 1, 10),
|
||||
CheckList7 = lists:nthtail(N - 200, KL),
|
||||
CheckList8 = lists:sublist(KL, N div 2, 1),
|
||||
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
||||
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
||||
|
||||
SWb = os:timestamp(),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
?assertMatch({value, V}, lookup(K, SkipList))
|
||||
end,
|
||||
CheckAll),
|
||||
io:format(user, "Finding 1020 keys took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWb)]),
|
||||
|
||||
RangeFun =
|
||||
fun(SkipListToQuery, CheckListForQ, Assert) ->
|
||||
KR =
|
||||
to_range(SkipListToQuery,
|
||||
element(1, lists:nth(1, CheckListForQ)),
|
||||
element(1, lists:last(CheckListForQ))),
|
||||
case Assert of
|
||||
true ->
|
||||
CompareL = length(lists:usort(CheckListForQ)),
|
||||
?assertMatch(CompareL, length(KR));
|
||||
false ->
|
||||
KR
|
||||
end
|
||||
end,
|
||||
|
||||
SWc = os:timestamp(),
|
||||
RangeFun(SkipList, CheckList1, true),
|
||||
RangeFun(SkipList, CheckList2, true),
|
||||
RangeFun(SkipList, CheckList3, true),
|
||||
RangeFun(SkipList, CheckList4, true),
|
||||
RangeFun(SkipList, CheckList5, true),
|
||||
RangeFun(SkipList, CheckList6, true),
|
||||
RangeFun(SkipList, CheckList7, true),
|
||||
RangeFun(SkipList, CheckList8, true),
|
||||
|
||||
KL_OOR1 = generate_randomkeys(1, 4, N div 5 + 1, N div 5 + 10),
|
||||
KR9 = RangeFun(SkipList, KL_OOR1, false),
|
||||
?assertMatch([], KR9),
|
||||
|
||||
KL_OOR2 = generate_randomkeys(1, 4, 0, 0),
|
||||
KR10 = RangeFun(SkipList, KL_OOR2, false),
|
||||
?assertMatch([], KR10),
|
||||
|
||||
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWc)]),
|
||||
|
||||
AltKL1 = generate_randomkeys(1, 2000, 1, 200),
|
||||
SWd0 = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
lookup(K, SkipList)
|
||||
end,
|
||||
AltKL1),
|
||||
io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd0)]),
|
||||
SWd1 = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
leveled_codec:magic_hash(K)
|
||||
end,
|
||||
AltKL1),
|
||||
io:format(user, "Generating 2000 magic hashes took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd1)]),
|
||||
SWd2 = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
erlang:phash2(K)
|
||||
end,
|
||||
AltKL1),
|
||||
io:format(user, "Generating 2000 not so magic hashes took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd2)]),
|
||||
|
||||
AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300),
|
||||
SWe = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
none = lookup(K, SkipList)
|
||||
end,
|
||||
AltKL2),
|
||||
io:format(user, "Getting 1000 missing keys above range took ~w " ++
|
||||
"microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWe)]),
|
||||
AltKL3 = generate_randomkeys(1, 1000, 0, 0),
|
||||
SWf = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
none = lookup(K, SkipList)
|
||||
end,
|
||||
AltKL3),
|
||||
io:format(user, "Getting 1000 missing keys below range took ~w " ++
|
||||
"microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWf)]),
|
||||
|
||||
SWg = os:timestamp(),
|
||||
FlatList = to_list(SkipList),
|
||||
io:format(user, "Flattening skiplist took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWg)]),
|
||||
?assertMatch(KL, FlatList),
|
||||
|
||||
case Bloom of
|
||||
true ->
|
||||
HashList = lists:map(fun(_X) ->
|
||||
random:uniform(4294967295) end,
|
||||
lists:seq(1, 2000)),
|
||||
SWh = os:timestamp(),
|
||||
lists:foreach(fun(X) ->
|
||||
lookup(X, X, SkipList) end,
|
||||
HashList),
|
||||
io:format(user,
|
||||
"Getting 2000 missing keys when hash was known " ++
|
||||
"took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWh)]);
|
||||
false ->
|
||||
ok
|
||||
end.
|
||||
|
||||
define_kv(X) ->
|
||||
{{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null},
|
||||
{X, {active, infinity}, null}}.
|
||||
|
||||
skiplist_roundsize_test() ->
|
||||
KVL = lists:map(fun(X) -> define_kv(X) end, lists:seq(1, 4096)),
|
||||
SkipList = from_list(KVL),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
?assertMatch({value, V}, lookup(K, SkipList)) end,
|
||||
KVL),
|
||||
lists:foreach(fun(X) ->
|
||||
{KS, _VS} = define_kv(X * 32 + 1),
|
||||
{KE, _VE} = define_kv((X + 1) * 32),
|
||||
R = to_range(SkipList, KS, KE),
|
||||
L = lists:sublist(KVL,
|
||||
X * 32 + 1,
|
||||
32),
|
||||
?assertMatch(L, R) end,
|
||||
lists:seq(0, 24)).
|
||||
|
||||
skiplist_nolookup_test() ->
|
||||
N = 4000,
|
||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||
SkipList = lists:foldl(fun({K, V}, Acc) ->
|
||||
enter_nolookup(K, V, Acc) end,
|
||||
empty(true),
|
||||
KL),
|
||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
?assertMatch(none, lookup(K, SkipList)) end,
|
||||
KL),
|
||||
?assertMatch(KLSorted, to_list(SkipList)).
|
||||
|
||||
skiplist_range_test() ->
|
||||
N = 150,
|
||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||
|
||||
KLSL1 = lists:sublist(lists:ukeysort(1, KL), 128),
|
||||
SkipList1 = from_list(KLSL1),
|
||||
{LastK1, V1} = lists:last(KLSL1),
|
||||
R1 = to_range(SkipList1, LastK1, LastK1),
|
||||
?assertMatch([{LastK1, V1}], R1),
|
||||
|
||||
KLSL2 = lists:sublist(lists:ukeysort(1, KL), 127),
|
||||
SkipList2 = from_list(KLSL2),
|
||||
{LastK2, V2} = lists:last(KLSL2),
|
||||
R2 = to_range(SkipList2, LastK2, LastK2),
|
||||
?assertMatch([{LastK2, V2}], R2),
|
||||
|
||||
KLSL3 = lists:sublist(lists:ukeysort(1, KL), 129),
|
||||
SkipList3 = from_list(KLSL3),
|
||||
{LastK3, V3} = lists:last(KLSL3),
|
||||
R3 = to_range(SkipList3, LastK3, LastK3),
|
||||
?assertMatch([{LastK3, V3}], R3),
|
||||
|
||||
{FirstK4, V4} = lists:nth(1, KLSL3),
|
||||
R4 = to_range(SkipList3, FirstK4, FirstK4),
|
||||
?assertMatch([{FirstK4, V4}], R4).
|
||||
|
||||
|
||||
empty_skiplist_size_test() ->
|
||||
?assertMatch(0, leveled_skiplist:size(empty(false))),
|
||||
?assertMatch(0, leveled_skiplist:size(empty(true))).
|
||||
|
||||
-endif.
|
|
@ -1,159 +0,0 @@
|
|||
%% -------- TINY BLOOM ---------
|
||||
%%
|
||||
%% For sheltering relatively expensive lookups with a probabilistic check
|
||||
%%
|
||||
%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array.
|
||||
%% Even at 1000 keys should still offer only a 20% false positive
|
||||
%%
|
||||
%% Restricted to no more than 256 arrays - so can't handle more than 250K keys
|
||||
%% in total
|
||||
%%
|
||||
%% Implemented this way to make it easy to control false positive (just by
|
||||
%% setting the width). Also only requires binary manipulations of a single
|
||||
%% hash
|
||||
|
||||
-module(leveled_tinybloom).
|
||||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-export([
|
||||
enter/2,
|
||||
check/2,
|
||||
empty/1
|
||||
]).
|
||||
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
%%%============================================================================
|
||||
%%% Bloom API
|
||||
%%%============================================================================
|
||||
|
||||
empty(Width) when Width =< 256 ->
|
||||
FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end,
|
||||
lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)).
|
||||
|
||||
enter({hash, no_lookup}, Bloom) ->
|
||||
Bloom;
|
||||
enter({hash, Hash}, Bloom) ->
|
||||
{Slot0, Bit1, Bit2} = split_hash(Hash),
|
||||
Slot = Slot0 rem dict:size(Bloom),
|
||||
BitArray0 = dict:fetch(Slot, Bloom),
|
||||
FoldFun =
|
||||
fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end,
|
||||
BitArray1 = lists:foldl(FoldFun,
|
||||
BitArray0,
|
||||
lists:usort([Bit1, Bit2])),
|
||||
dict:store(Slot, <<BitArray1/binary>>, Bloom);
|
||||
enter(Key, Bloom) ->
|
||||
Hash = leveled_codec:magic_hash(Key),
|
||||
enter({hash, Hash}, Bloom).
|
||||
|
||||
|
||||
check({hash, Hash}, Bloom) ->
|
||||
{Slot0, Bit1, Bit2} = split_hash(Hash),
|
||||
Slot = Slot0 rem dict:size(Bloom),
|
||||
BitArray = dict:fetch(Slot, Bloom),
|
||||
|
||||
case getbit(Bit1, BitArray, 4096) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
case getbit(Bit2, BitArray, 4096) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
true
|
||||
end
|
||||
end;
|
||||
check(Key, Bloom) ->
|
||||
Hash = leveled_codec:magic_hash(Key),
|
||||
check({hash, Hash}, Bloom).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
split_hash(Hash) ->
|
||||
H0 = Hash band 255,
|
||||
H1 = (Hash bsr 8) band 4095,
|
||||
H2 = Hash bsr 20,
|
||||
{H0, H1, H2}.
|
||||
|
||||
add_to_array(Bit, BitArray, ArrayLength) ->
|
||||
RestLen = ArrayLength - Bit - 1,
|
||||
<<Head:Bit/bitstring,
|
||||
_B:1/integer,
|
||||
Rest:RestLen/bitstring>> = BitArray,
|
||||
<<Head/bitstring, 1:1, Rest/bitstring>>.
|
||||
|
||||
getbit(Bit, BitArray, ArrayLength) ->
|
||||
RestLen = ArrayLength - Bit - 1,
|
||||
<<_Head:Bit/bitstring,
|
||||
B:1/bitstring,
|
||||
_Rest:RestLen/bitstring>> = BitArray,
|
||||
B.
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
simple_test() ->
|
||||
N = 4000,
|
||||
W = 6,
|
||||
KLin = lists:map(fun(X) -> "Key_" ++
|
||||
integer_to_list(X) ++
|
||||
integer_to_list(random:uniform(100)) ++
|
||||
binary_to_list(crypto:rand_bytes(2))
|
||||
end,
|
||||
lists:seq(1, N)),
|
||||
KLout = lists:map(fun(X) ->
|
||||
"NotKey_" ++
|
||||
integer_to_list(X) ++
|
||||
integer_to_list(random:uniform(100)) ++
|
||||
binary_to_list(crypto:rand_bytes(2))
|
||||
end,
|
||||
lists:seq(1, N)),
|
||||
SW0_PH = os:timestamp(),
|
||||
lists:foreach(fun(X) -> erlang:phash2(X) end, KLin),
|
||||
io:format(user,
|
||||
"~nNative hash function hashes ~w keys in ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW0_PH)]),
|
||||
SW0_MH = os:timestamp(),
|
||||
lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin),
|
||||
io:format(user,
|
||||
"~nMagic hash function hashes ~w keys in ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW0_MH)]),
|
||||
|
||||
SW1 = os:timestamp(),
|
||||
Bloom = lists:foldr(fun enter/2, empty(W), KLin),
|
||||
io:format(user,
|
||||
"~nAdding ~w keys to bloom took ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW1)]),
|
||||
|
||||
SW2 = os:timestamp(),
|
||||
lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin),
|
||||
io:format(user,
|
||||
"~nChecking ~w keys in bloom took ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW2)]),
|
||||
|
||||
SW3 = os:timestamp(),
|
||||
FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of
|
||||
true -> Acc + 1;
|
||||
false -> Acc
|
||||
end end,
|
||||
0,
|
||||
KLout),
|
||||
io:format(user,
|
||||
"~nChecking ~w keys out of bloom took ~w microseconds " ++
|
||||
"with ~w false positive rate~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW3), FP / N]),
|
||||
?assertMatch(true, FP < (N div 4)).
|
||||
|
||||
|
||||
|
||||
-endif.
|
|
@ -162,7 +162,17 @@ to_list({tree, _L, Tree}) ->
|
|||
end,
|
||||
lists:foldl(FoldFun, [], tree_to_list(Tree));
|
||||
to_list({idxt, _L, {TLI, _IDX}}) ->
|
||||
lists:append(tuple_to_list(TLI)).
|
||||
lists:append(tuple_to_list(TLI));
|
||||
to_list({skpl, _L, SkipList}) ->
|
||||
FoldFun =
|
||||
fun({_M, SL}, Acc) ->
|
||||
[SL|Acc]
|
||||
end,
|
||||
|
||||
Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)),
|
||||
Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))),
|
||||
lists:append(Lv0List).
|
||||
|
||||
|
||||
|
||||
tsize({_Type, L, _Tree}) ->
|
||||
|
@ -171,7 +181,9 @@ tsize({_Type, L, _Tree}) ->
|
|||
empty(tree) ->
|
||||
{tree, 0, empty_tree()};
|
||||
empty(idxt) ->
|
||||
{idxt, 0, {{}, empty_tree()}}.
|
||||
{idxt, 0, {{}, empty_tree()}};
|
||||
empty(skpl) ->
|
||||
{skpl, 0, []}.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
|
@ -216,14 +228,22 @@ roll_list(KVList, L, SkipList, SkipWidth) ->
|
|||
|
||||
|
||||
|
||||
lookup_match(_Key, []) ->
|
||||
% lookup_match(_Key, []) ->
|
||||
% none;
|
||||
% lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key ->
|
||||
% none;
|
||||
% lookup_match(Key, [{Key, EV}|_Tail]) ->
|
||||
% {value, EV};
|
||||
% lookup_match(Key, [_Top|Tail]) ->
|
||||
% lookup_match(Key, Tail).
|
||||
|
||||
lookup_match(Key, KVList) ->
|
||||
case lists:keyfind(Key, 1, KVList) of
|
||||
false ->
|
||||
none;
|
||||
lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key ->
|
||||
none;
|
||||
lookup_match(Key, [{Key, EV}|_Tail]) ->
|
||||
{value, EV};
|
||||
lookup_match(Key, [_Top|Tail]) ->
|
||||
lookup_match(Key, Tail).
|
||||
{Key, Value} ->
|
||||
{value, Value}
|
||||
end.
|
||||
|
||||
lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key ->
|
||||
{EK, EV};
|
||||
|
@ -396,15 +416,14 @@ skpl_getsublist(Key, SkipList) ->
|
|||
FoldFun =
|
||||
fun({Mark, SL}, Acc) ->
|
||||
case {Acc, Mark} of
|
||||
{none, Mark} when Mark >= Key ->
|
||||
{[], Mark} when Mark >= Key ->
|
||||
SL;
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
end,
|
||||
SL1 = lists:foldl(FoldFun, none, SkipList),
|
||||
lists:foldl(FoldFun, none, SL1).
|
||||
|
||||
SL1 = lists:foldl(FoldFun, [], SkipList),
|
||||
lists:foldl(FoldFun, [], SL1).
|
||||
|
||||
%%%============================================================================
|
||||
%%% Balance tree implementation
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue