From 58cda7d1573e516d44e9f8c27da35e002b899214 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 21 Jan 2017 22:34:56 +0000 Subject: [PATCH] Switch to using skip lists from leveled_tree Remove now unused leveled_skiplist and leveled_tinybloom --- include/leveled.hrl | 2 +- src/leveled_skiplist.erl | 661 -------------------------------------- src/leveled_tinybloom.erl | 159 --------- src/leveled_tree.erl | 47 ++- 4 files changed, 34 insertions(+), 835 deletions(-) delete mode 100644 src/leveled_skiplist.erl delete mode 100644 src/leveled_tinybloom.erl diff --git a/include/leveled.hrl b/include/leveled.hrl index e1c9646..fa4dd11 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -15,7 +15,7 @@ %% Inker key type used for tombstones -define(INKT_TOMB, tomb). --define(CACHE_TYPE, idxt). +-define(CACHE_TYPE, skpl). -record(sft_options, {wait = true :: boolean(), diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl deleted file mode 100644 index b79d050..0000000 --- a/src/leveled_skiplist.erl +++ /dev/null @@ -1,661 +0,0 @@ -%% -------- SKIPLIST --------- -%% -%% For storing small numbers of {K, V} pairs where reasonable insertion and -%% fetch times, but with fast support for flattening to a list or a sublist -%% within a certain key range -%% -%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's -%% ongoing dependency on OTP16) -%% -%% Not a proper skip list. Only supports a fixed depth. Good enough for the -%% purposes of leveled. Also uses peculiar enkey_passed function within -%% leveled. Not tested beyond a depth of 2. - --module(leveled_skiplist). - --include("include/leveled.hrl"). - --export([ - from_list/1, - from_list/2, - from_sortedlist/1, - from_sortedlist/2, - from_orderedset/1, - from_orderedset/2, - to_list/1, - enter/3, - enter/4, - enter_nolookup/3, - to_range/2, - to_range/3, - lookup/2, - lookup/3, - empty/0, - empty/1, - size/1 - ]). - --include_lib("eunit/include/eunit.hrl"). - --define(SKIP_WIDTH, 16). --define(LIST_HEIGHT, 2). --define(INFINITY_KEY, {null, null, null, null, null}). --define(BITARRAY_SIZE, 2048). - -%%%============================================================================ -%%% SkipList API -%%%============================================================================ - -enter(Key, Value, SkipList) -> - Hash = leveled_codec:magic_hash(Key), - enter(Key, Hash, Value, SkipList). - -enter(Key, Hash, Value, SkipList) -> - Bloom0 = - case element(1, SkipList) of - list_only -> - list_only; - Bloom -> - leveled_tinybloom:enter({hash, Hash}, Bloom) - end, - {Bloom0, - enter(Key, Value, erlang:phash2(Key), - element(2, SkipList), - ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -%% Can iterate over a key entered this way, but never lookup the key -%% used for index terms -%% The key may still be a marker key - and the much cheaper native hash -%% is used to dtermine this, avoiding the more expensive magic hash -enter_nolookup(Key, Value, SkipList) -> - {element(1, SkipList), - enter(Key, Value, erlang:phash2(Key), - element(2, SkipList), - ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -from_orderedset(Table) -> - from_orderedset(Table, false). - -from_orderedset(Table, Bloom) -> - from_sortedlist(ets:tab2list(Table), Bloom). - -from_list(UnsortedKVL) -> - from_list(UnsortedKVL, false). - -from_list(UnsortedKVL, BloomProtect) -> - KVL = lists:ukeysort(1, UnsortedKVL), - from_sortedlist(KVL, BloomProtect). - -from_sortedlist(SortedKVL) -> - from_sortedlist(SortedKVL, false). - -from_sortedlist([], BloomProtect) -> - empty(BloomProtect); -from_sortedlist(SortedKVL, BloomProtect) -> - Bloom0 = - case BloomProtect of - true -> - lists:foldr(fun({K, _V}, Bloom) -> - leveled_tinybloom:enter(K, Bloom) end, - leveled_tinybloom:empty(?SKIP_WIDTH), - SortedKVL); - false -> - list_only - end, - {Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -lookup(Key, SkipList) -> - case element(1, SkipList) of - list_only -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT); - _ -> - lookup(Key, leveled_codec:magic_hash(Key), SkipList) - end. - -lookup(Key, Hash, SkipList) -> - case element(1, SkipList) of - list_only -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT); - _ -> - case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of - false -> - none; - true -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT) - end - end. - - -%% Rather than support iterator_from like gb_trees, will just an output a key -%% sorted list for the desired range, which can the be iterated over as normal -to_range(SkipList, Start) -> - to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT). - -to_range(SkipList, Start, End) -> - to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT). - -to_list(SkipList) -> - to_list(element(2, SkipList), ?LIST_HEIGHT). - -empty() -> - empty(false). - -empty(BloomProtect) -> - case BloomProtect of - true -> - {leveled_tinybloom:empty(?SKIP_WIDTH), - empty([], ?LIST_HEIGHT)}; - false -> - {list_only, empty([], ?LIST_HEIGHT)} - end. - -size(SkipList) -> - size(element(2, SkipList), ?LIST_HEIGHT). - - -%%%============================================================================ -%%% SkipList Base Functions -%%%============================================================================ - -enter(Key, Value, Hash, SkipList, Width, 1) -> - {MarkerKey, SubList} = find_mark(Key, SkipList), - case Hash rem Width of - 0 -> - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> - K =< Key end, - SubList), - SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), - SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1], - lists:ukeysort(1, SkpL2); - _ -> - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList), - UpdSubList = - case RHS of - [] -> - LHS ++ [{Key, Value}]; - [{FirstKey, _V}|RHSTail] -> - case FirstKey of - Key -> - LHS ++ [{Key, Value}] ++ RHSTail; - _ -> - LHS ++ [{Key, Value}] ++ RHS - end - end, - lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList}) - end; -enter(Key, Value, Hash, SkipList, Width, Level) -> - HashMatch = width(Level, Width), - {MarkerKey, SubSkipList} = find_mark(Key, SkipList), - UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1), - case Hash rem HashMatch of - 0 -> - % - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> - K =< Key end, - UpdSubSkipList), - SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), - lists:ukeysort(1, [{Key, LHS}|SkpL1]); - _ -> - % Need to replace Marker Key with sublist - lists:keyreplace(MarkerKey, - 1, - SkipList, - {MarkerKey, UpdSubSkipList}) - end. - -from_list(SkipList, _SkipWidth, 0) -> - SkipList; -from_list(KVList, SkipWidth, ListHeight) -> - L0 = length(KVList), - SL0 = - case L0 > SkipWidth of - true -> - from_list(KVList, L0, [], SkipWidth); - false -> - {LastK, _LastSL} = lists:last(KVList), - [{LastK, KVList}] - end, - from_list(SL0, SkipWidth, ListHeight - 1). - -from_list([], 0, SkipList, _SkipWidth) -> - SkipList; -from_list(KVList, L, SkipList, SkipWidth) -> - SubLL = min(SkipWidth, L), - {Head, Tail} = lists:split(SubLL, KVList), - {LastK, _LastV} = lists:last(Head), - from_list(Tail, L - SubLL, SkipList ++ [{LastK, Head}], SkipWidth). - - -list_lookup(Key, SkipList, 1) -> - SubList = get_sublist(Key, SkipList), - case lists:keyfind(Key, 1, SubList) of - false -> - none; - {Key, V} -> - {value, V} - end; -list_lookup(Key, SkipList, Level) -> - SubList = get_sublist(Key, SkipList), - case SubList of - null -> - none; - _ -> - list_lookup(Key, SubList, Level - 1) - end. - - -to_list(SkipList, 1) -> - lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList); -to_list(SkipList, Level) -> - lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end, - [], - SkipList). - - -to_range(SkipList, StartKey, EndKey, ListHeight) -> - to_range(SkipList, StartKey, EndKey, ListHeight, [], true). - -to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) -> - SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl), - case SL of - [] -> - Acc; - _ -> - {LK, _LV} = lists:last(SL), - case leveled_codec:endkey_passed(EndKey, LK) of - false -> - to_range(SkipList, - LK, - EndKey, - ListHeight, - Acc ++ SL, - false); - true -> - SplitFun = - fun({K, _V}) -> - not leveled_codec:endkey_passed(EndKey, K) end, - LHS = lists:takewhile(SplitFun, SL), - Acc ++ LHS - end - end. - -sublist_above(SkipList, StartKey, 0, StartIncl) -> - TestFun = - fun({K, _V}) -> - case StartIncl of - true -> - K < StartKey; - false -> - K =< StartKey - end end, - lists:dropwhile(TestFun, SkipList); -sublist_above(SkipList, StartKey, Level, StartIncl) -> - TestFun = - fun({K, _SL}) -> - case StartIncl of - true -> - K < StartKey; - false -> - K =< StartKey - end end, - RHS = lists:dropwhile(TestFun, SkipList), - case RHS of - [] -> - []; - [{_K, SL}|_Rest] -> - sublist_above(SL, StartKey, Level - 1, StartIncl) - end. - -empty(SkipList, 1) -> - [{?INFINITY_KEY, SkipList}]; -empty(SkipList, Level) -> - empty([{?INFINITY_KEY, SkipList}], Level - 1). - -size(SkipList, 1) -> - lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList); -size(SkipList, Level) -> - lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end, - 0, - SkipList). - - -%%%============================================================================ -%%% Internal Functions -%%%============================================================================ - -width(1, Width) -> - Width; -width(N, Width) -> - width(N - 1, Width * Width). - -find_mark(Key, SkipList) -> - lists:foldl(fun({Marker, SL}, Acc) -> - case Acc of - false -> - case Marker >= Key of - true -> - {Marker, SL}; - false -> - Acc - end; - _ -> - Acc - end end, - false, - SkipList). - -get_sublist(Key, SkipList) -> - lists:foldl(fun({SkipKey, SL}, Acc) -> - case {Acc, SkipKey} of - {null, SkipKey} when SkipKey >= Key -> - SL; - _ -> - Acc - end end, - null, - SkipList). - -%%%============================================================================ -%%% Test -%%%============================================================================ - --ifdef(TEST). - -generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> - generate_randomkeys(Seqn, - Count, - [], - BucketRangeLow, - BucketRangeHigh). - -generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> - Acc; -generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> - BNumber = - case BRange of - 0 -> - string:right(integer_to_list(BucketLow), 4, $0); - _ -> - BRand = random:uniform(BRange), - string:right(integer_to_list(BucketLow + BRand), 4, $0) - end, - KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0), - {K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, - {Seqn, {active, infinity}, null}}, - generate_randomkeys(Seqn + 1, - Count - 1, - [{K, V}|Acc], - BucketLow, - BRange). - -skiplist_small_test() -> - % Check nothing bad happens with very small lists - lists:foreach(fun(N) -> dotest_skiplist_small(N) end, lists:seq(1, 32)). - - -dotest_skiplist_small(N) -> - KL = generate_randomkeys(1, N, 1, 2), - SkipList1 = - lists:foldl(fun({K, V}, SL) -> - enter(K, V, SL) - end, - empty(), - KL), - SkipList2 = from_list(lists:reverse(KL)), - lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList1)) - end, - lists:ukeysort(1, lists:reverse(KL))), - lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList2)) - end, - lists:ukeysort(1, lists:reverse(KL))). - -skiplist_withbloom_test() -> - io:format(user, "~n~nBloom protected skiplist test:~n~n", []), - skiplist_tester(true). - -skiplist_nobloom_test() -> - io:format(user, "~n~nBloom free skiplist test:~n~n", []), - skiplist_tester(false). - -skiplist_tester(Bloom) -> - N = 4000, - KL = generate_randomkeys(1, N, 1, N div 5), - - OS = ets:new(test, [ordered_set, private]), - ets:insert(OS, KL), - SWaETS = os:timestamp(), - SkipList = from_orderedset(OS, Bloom), - io:format(user, "Generating skip list with ~w keys in ~w microseconds " ++ - "from ordered set~n", - [N, timer:now_diff(os:timestamp(), SWaETS)]), - - SWaGSL = os:timestamp(), - SkipList = from_list(lists:reverse(KL), Bloom), - io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++ - "Top level key count of ~w~n", - [N, - timer:now_diff(os:timestamp(), SWaGSL), - length(element(2, SkipList))]), - io:format(user, "Second tier key counts of ~w~n", - [lists:map(fun({_L, SL}) -> length(SL) end, - element(2, SkipList))]), - KLSorted = lists:ukeysort(1, lists:reverse(KL)), - - SWaGSL2 = os:timestamp(), - SkipList = from_sortedlist(KLSorted, Bloom), - io:format(user, "Generating skip list with ~w sorted keys in ~w " ++ - "microseconds~n", - [N, timer:now_diff(os:timestamp(), SWaGSL2)]), - - SWaDSL = os:timestamp(), - SkipList1 = - lists:foldl(fun({K, V}, SL) -> - enter(K, V, SL) - end, - empty(Bloom), - KL), - io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++ - "microseconds~n" ++ - "Top level key count of ~w~n", - [N, - timer:now_diff(os:timestamp(), SWaDSL), - length(element(2, SkipList1))]), - io:format(user, "Second tier key counts of ~w~n", - [lists:map(fun({_L, SL}) -> length(SL) end, - element(2, SkipList1))]), - - io:format(user, "~nRunning timing tests for generated skiplist:~n", []), - skiplist_timingtest(KLSorted, SkipList, N, Bloom), - - io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []), - skiplist_timingtest(KLSorted, SkipList1, N, Bloom). - - -skiplist_timingtest(KL, SkipList, N, Bloom) -> - io:format(user, "Timing tests on skiplist of size ~w~n", - [leveled_skiplist:size(SkipList)]), - CheckList1 = lists:sublist(KL, N div 4, 200), - CheckList2 = lists:sublist(KL, N div 3, 200), - CheckList3 = lists:sublist(KL, N div 2, 200), - CheckList4 = lists:sublist(KL, N - 1000, 200), - CheckList5 = lists:sublist(KL, N - 500, 200), - CheckList6 = lists:sublist(KL, 1, 10), - CheckList7 = lists:nthtail(N - 200, KL), - CheckList8 = lists:sublist(KL, N div 2, 1), - CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++ - CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7, - - SWb = os:timestamp(), - lists:foreach(fun({K, V}) -> - ?assertMatch({value, V}, lookup(K, SkipList)) - end, - CheckAll), - io:format(user, "Finding 1020 keys took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWb)]), - - RangeFun = - fun(SkipListToQuery, CheckListForQ, Assert) -> - KR = - to_range(SkipListToQuery, - element(1, lists:nth(1, CheckListForQ)), - element(1, lists:last(CheckListForQ))), - case Assert of - true -> - CompareL = length(lists:usort(CheckListForQ)), - ?assertMatch(CompareL, length(KR)); - false -> - KR - end - end, - - SWc = os:timestamp(), - RangeFun(SkipList, CheckList1, true), - RangeFun(SkipList, CheckList2, true), - RangeFun(SkipList, CheckList3, true), - RangeFun(SkipList, CheckList4, true), - RangeFun(SkipList, CheckList5, true), - RangeFun(SkipList, CheckList6, true), - RangeFun(SkipList, CheckList7, true), - RangeFun(SkipList, CheckList8, true), - - KL_OOR1 = generate_randomkeys(1, 4, N div 5 + 1, N div 5 + 10), - KR9 = RangeFun(SkipList, KL_OOR1, false), - ?assertMatch([], KR9), - - KL_OOR2 = generate_randomkeys(1, 4, 0, 0), - KR10 = RangeFun(SkipList, KL_OOR2, false), - ?assertMatch([], KR10), - - io:format(user, "Finding 10 ranges took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWc)]), - - AltKL1 = generate_randomkeys(1, 2000, 1, 200), - SWd0 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - lookup(K, SkipList) - end, - AltKL1), - io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd0)]), - SWd1 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - leveled_codec:magic_hash(K) - end, - AltKL1), - io:format(user, "Generating 2000 magic hashes took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd1)]), - SWd2 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - erlang:phash2(K) - end, - AltKL1), - io:format(user, "Generating 2000 not so magic hashes took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd2)]), - - AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300), - SWe = os:timestamp(), - lists:foreach(fun({K, _V}) -> - none = lookup(K, SkipList) - end, - AltKL2), - io:format(user, "Getting 1000 missing keys above range took ~w " ++ - "microseconds~n", - [timer:now_diff(os:timestamp(), SWe)]), - AltKL3 = generate_randomkeys(1, 1000, 0, 0), - SWf = os:timestamp(), - lists:foreach(fun({K, _V}) -> - none = lookup(K, SkipList) - end, - AltKL3), - io:format(user, "Getting 1000 missing keys below range took ~w " ++ - "microseconds~n", - [timer:now_diff(os:timestamp(), SWf)]), - - SWg = os:timestamp(), - FlatList = to_list(SkipList), - io:format(user, "Flattening skiplist took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWg)]), - ?assertMatch(KL, FlatList), - - case Bloom of - true -> - HashList = lists:map(fun(_X) -> - random:uniform(4294967295) end, - lists:seq(1, 2000)), - SWh = os:timestamp(), - lists:foreach(fun(X) -> - lookup(X, X, SkipList) end, - HashList), - io:format(user, - "Getting 2000 missing keys when hash was known " ++ - "took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWh)]); - false -> - ok - end. - -define_kv(X) -> - {{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null}, - {X, {active, infinity}, null}}. - -skiplist_roundsize_test() -> - KVL = lists:map(fun(X) -> define_kv(X) end, lists:seq(1, 4096)), - SkipList = from_list(KVL), - lists:foreach(fun({K, V}) -> - ?assertMatch({value, V}, lookup(K, SkipList)) end, - KVL), - lists:foreach(fun(X) -> - {KS, _VS} = define_kv(X * 32 + 1), - {KE, _VE} = define_kv((X + 1) * 32), - R = to_range(SkipList, KS, KE), - L = lists:sublist(KVL, - X * 32 + 1, - 32), - ?assertMatch(L, R) end, - lists:seq(0, 24)). - -skiplist_nolookup_test() -> - N = 4000, - KL = generate_randomkeys(1, N, 1, N div 5), - SkipList = lists:foldl(fun({K, V}, Acc) -> - enter_nolookup(K, V, Acc) end, - empty(true), - KL), - KLSorted = lists:ukeysort(1, lists:reverse(KL)), - lists:foreach(fun({K, _V}) -> - ?assertMatch(none, lookup(K, SkipList)) end, - KL), - ?assertMatch(KLSorted, to_list(SkipList)). - -skiplist_range_test() -> - N = 150, - KL = generate_randomkeys(1, N, 1, N div 5), - - KLSL1 = lists:sublist(lists:ukeysort(1, KL), 128), - SkipList1 = from_list(KLSL1), - {LastK1, V1} = lists:last(KLSL1), - R1 = to_range(SkipList1, LastK1, LastK1), - ?assertMatch([{LastK1, V1}], R1), - - KLSL2 = lists:sublist(lists:ukeysort(1, KL), 127), - SkipList2 = from_list(KLSL2), - {LastK2, V2} = lists:last(KLSL2), - R2 = to_range(SkipList2, LastK2, LastK2), - ?assertMatch([{LastK2, V2}], R2), - - KLSL3 = lists:sublist(lists:ukeysort(1, KL), 129), - SkipList3 = from_list(KLSL3), - {LastK3, V3} = lists:last(KLSL3), - R3 = to_range(SkipList3, LastK3, LastK3), - ?assertMatch([{LastK3, V3}], R3), - - {FirstK4, V4} = lists:nth(1, KLSL3), - R4 = to_range(SkipList3, FirstK4, FirstK4), - ?assertMatch([{FirstK4, V4}], R4). - - -empty_skiplist_size_test() -> - ?assertMatch(0, leveled_skiplist:size(empty(false))), - ?assertMatch(0, leveled_skiplist:size(empty(true))). - --endif. \ No newline at end of file diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl deleted file mode 100644 index 2278c2a..0000000 --- a/src/leveled_tinybloom.erl +++ /dev/null @@ -1,159 +0,0 @@ -%% -------- TINY BLOOM --------- -%% -%% For sheltering relatively expensive lookups with a probabilistic check -%% -%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array. -%% Even at 1000 keys should still offer only a 20% false positive -%% -%% Restricted to no more than 256 arrays - so can't handle more than 250K keys -%% in total -%% -%% Implemented this way to make it easy to control false positive (just by -%% setting the width). Also only requires binary manipulations of a single -%% hash - --module(leveled_tinybloom). - --include("include/leveled.hrl"). - --export([ - enter/2, - check/2, - empty/1 - ]). - - --include_lib("eunit/include/eunit.hrl"). - -%%%============================================================================ -%%% Bloom API -%%%============================================================================ - -empty(Width) when Width =< 256 -> - FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end, - lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)). - -enter({hash, no_lookup}, Bloom) -> - Bloom; -enter({hash, Hash}, Bloom) -> - {Slot0, Bit1, Bit2} = split_hash(Hash), - Slot = Slot0 rem dict:size(Bloom), - BitArray0 = dict:fetch(Slot, Bloom), - FoldFun = - fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end, - BitArray1 = lists:foldl(FoldFun, - BitArray0, - lists:usort([Bit1, Bit2])), - dict:store(Slot, <>, Bloom); -enter(Key, Bloom) -> - Hash = leveled_codec:magic_hash(Key), - enter({hash, Hash}, Bloom). - - -check({hash, Hash}, Bloom) -> - {Slot0, Bit1, Bit2} = split_hash(Hash), - Slot = Slot0 rem dict:size(Bloom), - BitArray = dict:fetch(Slot, Bloom), - - case getbit(Bit1, BitArray, 4096) of - <<0:1>> -> - false; - <<1:1>> -> - case getbit(Bit2, BitArray, 4096) of - <<0:1>> -> - false; - <<1:1>> -> - true - end - end; -check(Key, Bloom) -> - Hash = leveled_codec:magic_hash(Key), - check({hash, Hash}, Bloom). - - -%%%============================================================================ -%%% Internal Functions -%%%============================================================================ - -split_hash(Hash) -> - H0 = Hash band 255, - H1 = (Hash bsr 8) band 4095, - H2 = Hash bsr 20, - {H0, H1, H2}. - -add_to_array(Bit, BitArray, ArrayLength) -> - RestLen = ArrayLength - Bit - 1, - <> = BitArray, - <>. - -getbit(Bit, BitArray, ArrayLength) -> - RestLen = ArrayLength - Bit - 1, - <<_Head:Bit/bitstring, - B:1/bitstring, - _Rest:RestLen/bitstring>> = BitArray, - B. - - -%%%============================================================================ -%%% Test -%%%============================================================================ - --ifdef(TEST). - -simple_test() -> - N = 4000, - W = 6, - KLin = lists:map(fun(X) -> "Key_" ++ - integer_to_list(X) ++ - integer_to_list(random:uniform(100)) ++ - binary_to_list(crypto:rand_bytes(2)) - end, - lists:seq(1, N)), - KLout = lists:map(fun(X) -> - "NotKey_" ++ - integer_to_list(X) ++ - integer_to_list(random:uniform(100)) ++ - binary_to_list(crypto:rand_bytes(2)) - end, - lists:seq(1, N)), - SW0_PH = os:timestamp(), - lists:foreach(fun(X) -> erlang:phash2(X) end, KLin), - io:format(user, - "~nNative hash function hashes ~w keys in ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW0_PH)]), - SW0_MH = os:timestamp(), - lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin), - io:format(user, - "~nMagic hash function hashes ~w keys in ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW0_MH)]), - - SW1 = os:timestamp(), - Bloom = lists:foldr(fun enter/2, empty(W), KLin), - io:format(user, - "~nAdding ~w keys to bloom took ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW1)]), - - SW2 = os:timestamp(), - lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin), - io:format(user, - "~nChecking ~w keys in bloom took ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW2)]), - - SW3 = os:timestamp(), - FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of - true -> Acc + 1; - false -> Acc - end end, - 0, - KLout), - io:format(user, - "~nChecking ~w keys out of bloom took ~w microseconds " ++ - "with ~w false positive rate~n", - [N, timer:now_diff(os:timestamp(), SW3), FP / N]), - ?assertMatch(true, FP < (N div 4)). - - - --endif. \ No newline at end of file diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index e72a887..f027f54 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -162,7 +162,17 @@ to_list({tree, _L, Tree}) -> end, lists:foldl(FoldFun, [], tree_to_list(Tree)); to_list({idxt, _L, {TLI, _IDX}}) -> - lists:append(tuple_to_list(TLI)). + lists:append(tuple_to_list(TLI)); +to_list({skpl, _L, SkipList}) -> + FoldFun = + fun({_M, SL}, Acc) -> + [SL|Acc] + end, + + Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)), + Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))), + lists:append(Lv0List). + tsize({_Type, L, _Tree}) -> @@ -171,7 +181,9 @@ tsize({_Type, L, _Tree}) -> empty(tree) -> {tree, 0, empty_tree()}; empty(idxt) -> - {idxt, 0, {{}, empty_tree()}}. + {idxt, 0, {{}, empty_tree()}}; +empty(skpl) -> + {skpl, 0, []}. %%%============================================================================ %%% Internal Functions @@ -216,14 +228,22 @@ roll_list(KVList, L, SkipList, SkipWidth) -> -lookup_match(_Key, []) -> - none; -lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> - none; -lookup_match(Key, [{Key, EV}|_Tail]) -> - {value, EV}; -lookup_match(Key, [_Top|Tail]) -> - lookup_match(Key, Tail). +% lookup_match(_Key, []) -> +% none; +% lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> +% none; +% lookup_match(Key, [{Key, EV}|_Tail]) -> +% {value, EV}; +% lookup_match(Key, [_Top|Tail]) -> +% lookup_match(Key, Tail). + +lookup_match(Key, KVList) -> + case lists:keyfind(Key, 1, KVList) of + false -> + none; + {Key, Value} -> + {value, Value} + end. lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> {EK, EV}; @@ -396,15 +416,14 @@ skpl_getsublist(Key, SkipList) -> FoldFun = fun({Mark, SL}, Acc) -> case {Acc, Mark} of - {none, Mark} when Mark >= Key -> + {[], Mark} when Mark >= Key -> SL; _ -> Acc end end, - SL1 = lists:foldl(FoldFun, none, SkipList), - lists:foldl(FoldFun, none, SL1). - + SL1 = lists:foldl(FoldFun, [], SkipList), + lists:foldl(FoldFun, [], SL1). %%%============================================================================ %%% Balance tree implementation