Switch to using skip lists from leveled_tree

Remove now unused leveled_skiplist and leveled_tinybloom
This commit is contained in:
Martin Sumner 2017-01-21 22:34:56 +00:00
parent 6d2eb1d57c
commit 58cda7d157
4 changed files with 34 additions and 835 deletions

View file

@ -15,7 +15,7 @@
%% Inker key type used for tombstones %% Inker key type used for tombstones
-define(INKT_TOMB, tomb). -define(INKT_TOMB, tomb).
-define(CACHE_TYPE, idxt). -define(CACHE_TYPE, skpl).
-record(sft_options, -record(sft_options,
{wait = true :: boolean(), {wait = true :: boolean(),

View file

@ -1,661 +0,0 @@
%% -------- SKIPLIST ---------
%%
%% For storing small numbers of {K, V} pairs where reasonable insertion and
%% fetch times, but with fast support for flattening to a list or a sublist
%% within a certain key range
%%
%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's
%% ongoing dependency on OTP16)
%%
%% Not a proper skip list. Only supports a fixed depth. Good enough for the
%% purposes of leveled. Also uses peculiar enkey_passed function within
%% leveled. Not tested beyond a depth of 2.
-module(leveled_skiplist).
-include("include/leveled.hrl").
-export([
from_list/1,
from_list/2,
from_sortedlist/1,
from_sortedlist/2,
from_orderedset/1,
from_orderedset/2,
to_list/1,
enter/3,
enter/4,
enter_nolookup/3,
to_range/2,
to_range/3,
lookup/2,
lookup/3,
empty/0,
empty/1,
size/1
]).
-include_lib("eunit/include/eunit.hrl").
-define(SKIP_WIDTH, 16).
-define(LIST_HEIGHT, 2).
-define(INFINITY_KEY, {null, null, null, null, null}).
-define(BITARRAY_SIZE, 2048).
%%%============================================================================
%%% SkipList API
%%%============================================================================
enter(Key, Value, SkipList) ->
Hash = leveled_codec:magic_hash(Key),
enter(Key, Hash, Value, SkipList).
enter(Key, Hash, Value, SkipList) ->
Bloom0 =
case element(1, SkipList) of
list_only ->
list_only;
Bloom ->
leveled_tinybloom:enter({hash, Hash}, Bloom)
end,
{Bloom0,
enter(Key, Value, erlang:phash2(Key),
element(2, SkipList),
?SKIP_WIDTH, ?LIST_HEIGHT)}.
%% Can iterate over a key entered this way, but never lookup the key
%% used for index terms
%% The key may still be a marker key - and the much cheaper native hash
%% is used to dtermine this, avoiding the more expensive magic hash
enter_nolookup(Key, Value, SkipList) ->
{element(1, SkipList),
enter(Key, Value, erlang:phash2(Key),
element(2, SkipList),
?SKIP_WIDTH, ?LIST_HEIGHT)}.
from_orderedset(Table) ->
from_orderedset(Table, false).
from_orderedset(Table, Bloom) ->
from_sortedlist(ets:tab2list(Table), Bloom).
from_list(UnsortedKVL) ->
from_list(UnsortedKVL, false).
from_list(UnsortedKVL, BloomProtect) ->
KVL = lists:ukeysort(1, UnsortedKVL),
from_sortedlist(KVL, BloomProtect).
from_sortedlist(SortedKVL) ->
from_sortedlist(SortedKVL, false).
from_sortedlist([], BloomProtect) ->
empty(BloomProtect);
from_sortedlist(SortedKVL, BloomProtect) ->
Bloom0 =
case BloomProtect of
true ->
lists:foldr(fun({K, _V}, Bloom) ->
leveled_tinybloom:enter(K, Bloom) end,
leveled_tinybloom:empty(?SKIP_WIDTH),
SortedKVL);
false ->
list_only
end,
{Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
lookup(Key, SkipList) ->
case element(1, SkipList) of
list_only ->
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
_ ->
lookup(Key, leveled_codec:magic_hash(Key), SkipList)
end.
lookup(Key, Hash, SkipList) ->
case element(1, SkipList) of
list_only ->
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
_ ->
case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of
false ->
none;
true ->
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT)
end
end.
%% Rather than support iterator_from like gb_trees, will just an output a key
%% sorted list for the desired range, which can the be iterated over as normal
to_range(SkipList, Start) ->
to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
to_range(SkipList, Start, End) ->
to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT).
to_list(SkipList) ->
to_list(element(2, SkipList), ?LIST_HEIGHT).
empty() ->
empty(false).
empty(BloomProtect) ->
case BloomProtect of
true ->
{leveled_tinybloom:empty(?SKIP_WIDTH),
empty([], ?LIST_HEIGHT)};
false ->
{list_only, empty([], ?LIST_HEIGHT)}
end.
size(SkipList) ->
size(element(2, SkipList), ?LIST_HEIGHT).
%%%============================================================================
%%% SkipList Base Functions
%%%============================================================================
enter(Key, Value, Hash, SkipList, Width, 1) ->
{MarkerKey, SubList} = find_mark(Key, SkipList),
case Hash rem Width of
0 ->
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
K =< Key end,
SubList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
lists:ukeysort(1, SkpL2);
_ ->
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
UpdSubList =
case RHS of
[] ->
LHS ++ [{Key, Value}];
[{FirstKey, _V}|RHSTail] ->
case FirstKey of
Key ->
LHS ++ [{Key, Value}] ++ RHSTail;
_ ->
LHS ++ [{Key, Value}] ++ RHS
end
end,
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
end;
enter(Key, Value, Hash, SkipList, Width, Level) ->
HashMatch = width(Level, Width),
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1),
case Hash rem HashMatch of
0 ->
%
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
K =< Key end,
UpdSubSkipList),
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
_ ->
% Need to replace Marker Key with sublist
lists:keyreplace(MarkerKey,
1,
SkipList,
{MarkerKey, UpdSubSkipList})
end.
from_list(SkipList, _SkipWidth, 0) ->
SkipList;
from_list(KVList, SkipWidth, ListHeight) ->
L0 = length(KVList),
SL0 =
case L0 > SkipWidth of
true ->
from_list(KVList, L0, [], SkipWidth);
false ->
{LastK, _LastSL} = lists:last(KVList),
[{LastK, KVList}]
end,
from_list(SL0, SkipWidth, ListHeight - 1).
from_list([], 0, SkipList, _SkipWidth) ->
SkipList;
from_list(KVList, L, SkipList, SkipWidth) ->
SubLL = min(SkipWidth, L),
{Head, Tail} = lists:split(SubLL, KVList),
{LastK, _LastV} = lists:last(Head),
from_list(Tail, L - SubLL, SkipList ++ [{LastK, Head}], SkipWidth).
list_lookup(Key, SkipList, 1) ->
SubList = get_sublist(Key, SkipList),
case lists:keyfind(Key, 1, SubList) of
false ->
none;
{Key, V} ->
{value, V}
end;
list_lookup(Key, SkipList, Level) ->
SubList = get_sublist(Key, SkipList),
case SubList of
null ->
none;
_ ->
list_lookup(Key, SubList, Level - 1)
end.
to_list(SkipList, 1) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList);
to_list(SkipList, Level) ->
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end,
[],
SkipList).
to_range(SkipList, StartKey, EndKey, ListHeight) ->
to_range(SkipList, StartKey, EndKey, ListHeight, [], true).
to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) ->
SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl),
case SL of
[] ->
Acc;
_ ->
{LK, _LV} = lists:last(SL),
case leveled_codec:endkey_passed(EndKey, LK) of
false ->
to_range(SkipList,
LK,
EndKey,
ListHeight,
Acc ++ SL,
false);
true ->
SplitFun =
fun({K, _V}) ->
not leveled_codec:endkey_passed(EndKey, K) end,
LHS = lists:takewhile(SplitFun, SL),
Acc ++ LHS
end
end.
sublist_above(SkipList, StartKey, 0, StartIncl) ->
TestFun =
fun({K, _V}) ->
case StartIncl of
true ->
K < StartKey;
false ->
K =< StartKey
end end,
lists:dropwhile(TestFun, SkipList);
sublist_above(SkipList, StartKey, Level, StartIncl) ->
TestFun =
fun({K, _SL}) ->
case StartIncl of
true ->
K < StartKey;
false ->
K =< StartKey
end end,
RHS = lists:dropwhile(TestFun, SkipList),
case RHS of
[] ->
[];
[{_K, SL}|_Rest] ->
sublist_above(SL, StartKey, Level - 1, StartIncl)
end.
empty(SkipList, 1) ->
[{?INFINITY_KEY, SkipList}];
empty(SkipList, Level) ->
empty([{?INFINITY_KEY, SkipList}], Level - 1).
size(SkipList, 1) ->
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList);
size(SkipList, Level) ->
lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end,
0,
SkipList).
%%%============================================================================
%%% Internal Functions
%%%============================================================================
width(1, Width) ->
Width;
width(N, Width) ->
width(N - 1, Width * Width).
find_mark(Key, SkipList) ->
lists:foldl(fun({Marker, SL}, Acc) ->
case Acc of
false ->
case Marker >= Key of
true ->
{Marker, SL};
false ->
Acc
end;
_ ->
Acc
end end,
false,
SkipList).
get_sublist(Key, SkipList) ->
lists:foldl(fun({SkipKey, SL}, Acc) ->
case {Acc, SkipKey} of
{null, SkipKey} when SkipKey >= Key ->
SL;
_ ->
Acc
end end,
null,
SkipList).
%%%============================================================================
%%% Test
%%%============================================================================
-ifdef(TEST).
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
generate_randomkeys(Seqn,
Count,
[],
BucketRangeLow,
BucketRangeHigh).
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
Acc;
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BNumber =
case BRange of
0 ->
string:right(integer_to_list(BucketLow), 4, $0);
_ ->
BRand = random:uniform(BRange),
string:right(integer_to_list(BucketLow + BRand), 4, $0)
end,
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
{Seqn, {active, infinity}, null}},
generate_randomkeys(Seqn + 1,
Count - 1,
[{K, V}|Acc],
BucketLow,
BRange).
skiplist_small_test() ->
% Check nothing bad happens with very small lists
lists:foreach(fun(N) -> dotest_skiplist_small(N) end, lists:seq(1, 32)).
dotest_skiplist_small(N) ->
KL = generate_randomkeys(1, N, 1, 2),
SkipList1 =
lists:foldl(fun({K, V}, SL) ->
enter(K, V, SL)
end,
empty(),
KL),
SkipList2 = from_list(lists:reverse(KL)),
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList1))
end,
lists:ukeysort(1, lists:reverse(KL))),
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList2))
end,
lists:ukeysort(1, lists:reverse(KL))).
skiplist_withbloom_test() ->
io:format(user, "~n~nBloom protected skiplist test:~n~n", []),
skiplist_tester(true).
skiplist_nobloom_test() ->
io:format(user, "~n~nBloom free skiplist test:~n~n", []),
skiplist_tester(false).
skiplist_tester(Bloom) ->
N = 4000,
KL = generate_randomkeys(1, N, 1, N div 5),
OS = ets:new(test, [ordered_set, private]),
ets:insert(OS, KL),
SWaETS = os:timestamp(),
SkipList = from_orderedset(OS, Bloom),
io:format(user, "Generating skip list with ~w keys in ~w microseconds " ++
"from ordered set~n",
[N, timer:now_diff(os:timestamp(), SWaETS)]),
SWaGSL = os:timestamp(),
SkipList = from_list(lists:reverse(KL), Bloom),
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
"Top level key count of ~w~n",
[N,
timer:now_diff(os:timestamp(), SWaGSL),
length(element(2, SkipList))]),
io:format(user, "Second tier key counts of ~w~n",
[lists:map(fun({_L, SL}) -> length(SL) end,
element(2, SkipList))]),
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
SWaGSL2 = os:timestamp(),
SkipList = from_sortedlist(KLSorted, Bloom),
io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
"microseconds~n",
[N, timer:now_diff(os:timestamp(), SWaGSL2)]),
SWaDSL = os:timestamp(),
SkipList1 =
lists:foldl(fun({K, V}, SL) ->
enter(K, V, SL)
end,
empty(Bloom),
KL),
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
"microseconds~n" ++
"Top level key count of ~w~n",
[N,
timer:now_diff(os:timestamp(), SWaDSL),
length(element(2, SkipList1))]),
io:format(user, "Second tier key counts of ~w~n",
[lists:map(fun({_L, SL}) -> length(SL) end,
element(2, SkipList1))]),
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
skiplist_timingtest(KLSorted, SkipList, N, Bloom),
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
skiplist_timingtest(KLSorted, SkipList1, N, Bloom).
skiplist_timingtest(KL, SkipList, N, Bloom) ->
io:format(user, "Timing tests on skiplist of size ~w~n",
[leveled_skiplist:size(SkipList)]),
CheckList1 = lists:sublist(KL, N div 4, 200),
CheckList2 = lists:sublist(KL, N div 3, 200),
CheckList3 = lists:sublist(KL, N div 2, 200),
CheckList4 = lists:sublist(KL, N - 1000, 200),
CheckList5 = lists:sublist(KL, N - 500, 200),
CheckList6 = lists:sublist(KL, 1, 10),
CheckList7 = lists:nthtail(N - 200, KL),
CheckList8 = lists:sublist(KL, N div 2, 1),
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
SWb = os:timestamp(),
lists:foreach(fun({K, V}) ->
?assertMatch({value, V}, lookup(K, SkipList))
end,
CheckAll),
io:format(user, "Finding 1020 keys took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWb)]),
RangeFun =
fun(SkipListToQuery, CheckListForQ, Assert) ->
KR =
to_range(SkipListToQuery,
element(1, lists:nth(1, CheckListForQ)),
element(1, lists:last(CheckListForQ))),
case Assert of
true ->
CompareL = length(lists:usort(CheckListForQ)),
?assertMatch(CompareL, length(KR));
false ->
KR
end
end,
SWc = os:timestamp(),
RangeFun(SkipList, CheckList1, true),
RangeFun(SkipList, CheckList2, true),
RangeFun(SkipList, CheckList3, true),
RangeFun(SkipList, CheckList4, true),
RangeFun(SkipList, CheckList5, true),
RangeFun(SkipList, CheckList6, true),
RangeFun(SkipList, CheckList7, true),
RangeFun(SkipList, CheckList8, true),
KL_OOR1 = generate_randomkeys(1, 4, N div 5 + 1, N div 5 + 10),
KR9 = RangeFun(SkipList, KL_OOR1, false),
?assertMatch([], KR9),
KL_OOR2 = generate_randomkeys(1, 4, 0, 0),
KR10 = RangeFun(SkipList, KL_OOR2, false),
?assertMatch([], KR10),
io:format(user, "Finding 10 ranges took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWc)]),
AltKL1 = generate_randomkeys(1, 2000, 1, 200),
SWd0 = os:timestamp(),
lists:foreach(fun({K, _V}) ->
lookup(K, SkipList)
end,
AltKL1),
io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWd0)]),
SWd1 = os:timestamp(),
lists:foreach(fun({K, _V}) ->
leveled_codec:magic_hash(K)
end,
AltKL1),
io:format(user, "Generating 2000 magic hashes took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWd1)]),
SWd2 = os:timestamp(),
lists:foreach(fun({K, _V}) ->
erlang:phash2(K)
end,
AltKL1),
io:format(user, "Generating 2000 not so magic hashes took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWd2)]),
AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300),
SWe = os:timestamp(),
lists:foreach(fun({K, _V}) ->
none = lookup(K, SkipList)
end,
AltKL2),
io:format(user, "Getting 1000 missing keys above range took ~w " ++
"microseconds~n",
[timer:now_diff(os:timestamp(), SWe)]),
AltKL3 = generate_randomkeys(1, 1000, 0, 0),
SWf = os:timestamp(),
lists:foreach(fun({K, _V}) ->
none = lookup(K, SkipList)
end,
AltKL3),
io:format(user, "Getting 1000 missing keys below range took ~w " ++
"microseconds~n",
[timer:now_diff(os:timestamp(), SWf)]),
SWg = os:timestamp(),
FlatList = to_list(SkipList),
io:format(user, "Flattening skiplist took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWg)]),
?assertMatch(KL, FlatList),
case Bloom of
true ->
HashList = lists:map(fun(_X) ->
random:uniform(4294967295) end,
lists:seq(1, 2000)),
SWh = os:timestamp(),
lists:foreach(fun(X) ->
lookup(X, X, SkipList) end,
HashList),
io:format(user,
"Getting 2000 missing keys when hash was known " ++
"took ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWh)]);
false ->
ok
end.
define_kv(X) ->
{{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null},
{X, {active, infinity}, null}}.
skiplist_roundsize_test() ->
KVL = lists:map(fun(X) -> define_kv(X) end, lists:seq(1, 4096)),
SkipList = from_list(KVL),
lists:foreach(fun({K, V}) ->
?assertMatch({value, V}, lookup(K, SkipList)) end,
KVL),
lists:foreach(fun(X) ->
{KS, _VS} = define_kv(X * 32 + 1),
{KE, _VE} = define_kv((X + 1) * 32),
R = to_range(SkipList, KS, KE),
L = lists:sublist(KVL,
X * 32 + 1,
32),
?assertMatch(L, R) end,
lists:seq(0, 24)).
skiplist_nolookup_test() ->
N = 4000,
KL = generate_randomkeys(1, N, 1, N div 5),
SkipList = lists:foldl(fun({K, V}, Acc) ->
enter_nolookup(K, V, Acc) end,
empty(true),
KL),
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
lists:foreach(fun({K, _V}) ->
?assertMatch(none, lookup(K, SkipList)) end,
KL),
?assertMatch(KLSorted, to_list(SkipList)).
skiplist_range_test() ->
N = 150,
KL = generate_randomkeys(1, N, 1, N div 5),
KLSL1 = lists:sublist(lists:ukeysort(1, KL), 128),
SkipList1 = from_list(KLSL1),
{LastK1, V1} = lists:last(KLSL1),
R1 = to_range(SkipList1, LastK1, LastK1),
?assertMatch([{LastK1, V1}], R1),
KLSL2 = lists:sublist(lists:ukeysort(1, KL), 127),
SkipList2 = from_list(KLSL2),
{LastK2, V2} = lists:last(KLSL2),
R2 = to_range(SkipList2, LastK2, LastK2),
?assertMatch([{LastK2, V2}], R2),
KLSL3 = lists:sublist(lists:ukeysort(1, KL), 129),
SkipList3 = from_list(KLSL3),
{LastK3, V3} = lists:last(KLSL3),
R3 = to_range(SkipList3, LastK3, LastK3),
?assertMatch([{LastK3, V3}], R3),
{FirstK4, V4} = lists:nth(1, KLSL3),
R4 = to_range(SkipList3, FirstK4, FirstK4),
?assertMatch([{FirstK4, V4}], R4).
empty_skiplist_size_test() ->
?assertMatch(0, leveled_skiplist:size(empty(false))),
?assertMatch(0, leveled_skiplist:size(empty(true))).
-endif.

View file

@ -1,159 +0,0 @@
%% -------- TINY BLOOM ---------
%%
%% For sheltering relatively expensive lookups with a probabilistic check
%%
%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array.
%% Even at 1000 keys should still offer only a 20% false positive
%%
%% Restricted to no more than 256 arrays - so can't handle more than 250K keys
%% in total
%%
%% Implemented this way to make it easy to control false positive (just by
%% setting the width). Also only requires binary manipulations of a single
%% hash
-module(leveled_tinybloom).
-include("include/leveled.hrl").
-export([
enter/2,
check/2,
empty/1
]).
-include_lib("eunit/include/eunit.hrl").
%%%============================================================================
%%% Bloom API
%%%============================================================================
empty(Width) when Width =< 256 ->
FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end,
lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)).
enter({hash, no_lookup}, Bloom) ->
Bloom;
enter({hash, Hash}, Bloom) ->
{Slot0, Bit1, Bit2} = split_hash(Hash),
Slot = Slot0 rem dict:size(Bloom),
BitArray0 = dict:fetch(Slot, Bloom),
FoldFun =
fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end,
BitArray1 = lists:foldl(FoldFun,
BitArray0,
lists:usort([Bit1, Bit2])),
dict:store(Slot, <<BitArray1/binary>>, Bloom);
enter(Key, Bloom) ->
Hash = leveled_codec:magic_hash(Key),
enter({hash, Hash}, Bloom).
check({hash, Hash}, Bloom) ->
{Slot0, Bit1, Bit2} = split_hash(Hash),
Slot = Slot0 rem dict:size(Bloom),
BitArray = dict:fetch(Slot, Bloom),
case getbit(Bit1, BitArray, 4096) of
<<0:1>> ->
false;
<<1:1>> ->
case getbit(Bit2, BitArray, 4096) of
<<0:1>> ->
false;
<<1:1>> ->
true
end
end;
check(Key, Bloom) ->
Hash = leveled_codec:magic_hash(Key),
check({hash, Hash}, Bloom).
%%%============================================================================
%%% Internal Functions
%%%============================================================================
split_hash(Hash) ->
H0 = Hash band 255,
H1 = (Hash bsr 8) band 4095,
H2 = Hash bsr 20,
{H0, H1, H2}.
add_to_array(Bit, BitArray, ArrayLength) ->
RestLen = ArrayLength - Bit - 1,
<<Head:Bit/bitstring,
_B:1/integer,
Rest:RestLen/bitstring>> = BitArray,
<<Head/bitstring, 1:1, Rest/bitstring>>.
getbit(Bit, BitArray, ArrayLength) ->
RestLen = ArrayLength - Bit - 1,
<<_Head:Bit/bitstring,
B:1/bitstring,
_Rest:RestLen/bitstring>> = BitArray,
B.
%%%============================================================================
%%% Test
%%%============================================================================
-ifdef(TEST).
simple_test() ->
N = 4000,
W = 6,
KLin = lists:map(fun(X) -> "Key_" ++
integer_to_list(X) ++
integer_to_list(random:uniform(100)) ++
binary_to_list(crypto:rand_bytes(2))
end,
lists:seq(1, N)),
KLout = lists:map(fun(X) ->
"NotKey_" ++
integer_to_list(X) ++
integer_to_list(random:uniform(100)) ++
binary_to_list(crypto:rand_bytes(2))
end,
lists:seq(1, N)),
SW0_PH = os:timestamp(),
lists:foreach(fun(X) -> erlang:phash2(X) end, KLin),
io:format(user,
"~nNative hash function hashes ~w keys in ~w microseconds~n",
[N, timer:now_diff(os:timestamp(), SW0_PH)]),
SW0_MH = os:timestamp(),
lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin),
io:format(user,
"~nMagic hash function hashes ~w keys in ~w microseconds~n",
[N, timer:now_diff(os:timestamp(), SW0_MH)]),
SW1 = os:timestamp(),
Bloom = lists:foldr(fun enter/2, empty(W), KLin),
io:format(user,
"~nAdding ~w keys to bloom took ~w microseconds~n",
[N, timer:now_diff(os:timestamp(), SW1)]),
SW2 = os:timestamp(),
lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin),
io:format(user,
"~nChecking ~w keys in bloom took ~w microseconds~n",
[N, timer:now_diff(os:timestamp(), SW2)]),
SW3 = os:timestamp(),
FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of
true -> Acc + 1;
false -> Acc
end end,
0,
KLout),
io:format(user,
"~nChecking ~w keys out of bloom took ~w microseconds " ++
"with ~w false positive rate~n",
[N, timer:now_diff(os:timestamp(), SW3), FP / N]),
?assertMatch(true, FP < (N div 4)).
-endif.

View file

@ -162,7 +162,17 @@ to_list({tree, _L, Tree}) ->
end, end,
lists:foldl(FoldFun, [], tree_to_list(Tree)); lists:foldl(FoldFun, [], tree_to_list(Tree));
to_list({idxt, _L, {TLI, _IDX}}) -> to_list({idxt, _L, {TLI, _IDX}}) ->
lists:append(tuple_to_list(TLI)). lists:append(tuple_to_list(TLI));
to_list({skpl, _L, SkipList}) ->
FoldFun =
fun({_M, SL}, Acc) ->
[SL|Acc]
end,
Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)),
Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))),
lists:append(Lv0List).
tsize({_Type, L, _Tree}) -> tsize({_Type, L, _Tree}) ->
@ -171,7 +181,9 @@ tsize({_Type, L, _Tree}) ->
empty(tree) -> empty(tree) ->
{tree, 0, empty_tree()}; {tree, 0, empty_tree()};
empty(idxt) -> empty(idxt) ->
{idxt, 0, {{}, empty_tree()}}. {idxt, 0, {{}, empty_tree()}};
empty(skpl) ->
{skpl, 0, []}.
%%%============================================================================ %%%============================================================================
%%% Internal Functions %%% Internal Functions
@ -216,14 +228,22 @@ roll_list(KVList, L, SkipList, SkipWidth) ->
lookup_match(_Key, []) -> % lookup_match(_Key, []) ->
none; % none;
lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> % lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key ->
none; % none;
lookup_match(Key, [{Key, EV}|_Tail]) -> % lookup_match(Key, [{Key, EV}|_Tail]) ->
{value, EV}; % {value, EV};
lookup_match(Key, [_Top|Tail]) -> % lookup_match(Key, [_Top|Tail]) ->
lookup_match(Key, Tail). % lookup_match(Key, Tail).
lookup_match(Key, KVList) ->
case lists:keyfind(Key, 1, KVList) of
false ->
none;
{Key, Value} ->
{value, Value}
end.
lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key ->
{EK, EV}; {EK, EV};
@ -396,15 +416,14 @@ skpl_getsublist(Key, SkipList) ->
FoldFun = FoldFun =
fun({Mark, SL}, Acc) -> fun({Mark, SL}, Acc) ->
case {Acc, Mark} of case {Acc, Mark} of
{none, Mark} when Mark >= Key -> {[], Mark} when Mark >= Key ->
SL; SL;
_ -> _ ->
Acc Acc
end end
end, end,
SL1 = lists:foldl(FoldFun, none, SkipList), SL1 = lists:foldl(FoldFun, [], SkipList),
lists:foldl(FoldFun, none, SL1). lists:foldl(FoldFun, [], SL1).
%%%============================================================================ %%%============================================================================
%%% Balance tree implementation %%% Balance tree implementation