Initial functions and unit tests
Try to replace SFT files with one that more natively supports features already in use (e.g. skiplist, tinybloom and magic_hash)
This commit is contained in:
parent
05ddcadbf9
commit
90e587dcee
3 changed files with 240 additions and 9 deletions
|
@ -18,6 +18,7 @@
|
|||
-export([
|
||||
from_list/1,
|
||||
from_list/2,
|
||||
to_sstlist/1,
|
||||
from_sortedlist/1,
|
||||
from_sortedlist/2,
|
||||
to_list/1,
|
||||
|
@ -37,6 +38,7 @@
|
|||
|
||||
-define(SKIP_WIDTH, 16).
|
||||
-define(LIST_HEIGHT, 2).
|
||||
-define(SST_WIDTH, 8).
|
||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
||||
-define(BITARRAY_SIZE, 2048).
|
||||
|
||||
|
@ -94,6 +96,9 @@ from_sortedlist(SortedKVL, BloomProtect) ->
|
|||
end,
|
||||
{Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
||||
|
||||
to_sstlist(SortedKVL) ->
|
||||
{list_only, from_list(SortedKVL, ?SST_WIDTH, ?LIST_HEIGHT)}.
|
||||
|
||||
lookup(Key, SkipList) ->
|
||||
case element(1, SkipList) of
|
||||
list_only ->
|
||||
|
|
133
src/leveled_sst.erl
Normal file
133
src/leveled_sst.erl
Normal file
|
@ -0,0 +1,133 @@
|
|||
%% -------- SST (Variant) ---------
|
||||
%%
|
||||
%% A FSM module intended to wrap a persisted, ordered view of Keys and Values
|
||||
%%
|
||||
%% The persisted view is built from a list (which may be created by merging
|
||||
%% multiple lists)
|
||||
|
||||
-module(leveled_sst).
|
||||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-define(SLOT_SIZE, 128).
|
||||
-define(COMPRESSION_LEVEL, 1).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
%%%============================================================================
|
||||
%%% API
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
build_slot(KVList, HashList) when length(KVList) =< ?SLOT_SIZE ->
|
||||
SkipList = leveled_skiplist:to_sstlist(KVList),
|
||||
Bloom = lists:foldr(fun leveled_tinybloom:tiny_enter/2,
|
||||
leveled_tinybloom:tiny_empty(),
|
||||
HashList),
|
||||
SlotBin = term_to_binary(SkipList, [{compressed, ?COMPRESSION_LEVEL}]),
|
||||
{SlotBin, Bloom}.
|
||||
|
||||
is_check_slot_required(_Hash, none) ->
|
||||
true;
|
||||
is_check_slot_required(Hash, Bloom) ->
|
||||
leveled_tinybloom:tiny_check(Hash, Bloom).
|
||||
|
||||
lookup_in_slot(Key, {pointer, Handle, Pos, Length}) ->
|
||||
lookup_in_slot(Key, read_slot(Handle, Pos, Length));
|
||||
lookup_in_slot(Key, SlotBin) ->
|
||||
SkipList = binary_to_term(SlotBin),
|
||||
leveled_skiplist:lookup(Key, SkipList).
|
||||
|
||||
range_from_slot(StartKey, EndKey, {pointer, Handle, Pos, Length}) ->
|
||||
range_from_slot(StartKey, EndKey, read_slot(Handle, Pos, Length));
|
||||
range_from_slot(StartKey, EndKey, SlotBin) ->
|
||||
SkipList = binary_to_term(SlotBin),
|
||||
leveled_skiplist:to_range(SkipList, StartKey, EndKey).
|
||||
|
||||
all_from_slot({pointer, Handle, Pos, Length}) ->
|
||||
all_from_slot(read_slot(Handle, Pos, Length));
|
||||
all_from_slot(SlotBin) ->
|
||||
SkipList = binary_to_term(SlotBin),
|
||||
leveled_skiplist:to_list(SkipList).
|
||||
|
||||
|
||||
read_slot(_Handle, _Pos, _Length) ->
|
||||
not_yet_implemented.
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||
generate_randomkeys(Seqn,
|
||||
Count,
|
||||
[],
|
||||
BucketRangeLow,
|
||||
BucketRangeHigh).
|
||||
|
||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||
Acc;
|
||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||
BNumber =
|
||||
case BRange of
|
||||
0 ->
|
||||
string:right(integer_to_list(BucketLow), 4, $0);
|
||||
_ ->
|
||||
BRand = random:uniform(BRange),
|
||||
string:right(integer_to_list(BucketLow + BRand), 4, $0)
|
||||
end,
|
||||
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
||||
LedgerKey = leveled_codec:to_ledgerkey("Bucket" ++ BNumber,
|
||||
"Key" ++ KNumber,
|
||||
o),
|
||||
{_B, _K, KV} = leveled_codec:generate_ledgerkv(LedgerKey,
|
||||
Seqn,
|
||||
crypto:rand_bytes(64),
|
||||
64,
|
||||
infinity),
|
||||
generate_randomkeys(Seqn + 1,
|
||||
Count - 1,
|
||||
[KV|Acc],
|
||||
BucketLow,
|
||||
BRange).
|
||||
|
||||
|
||||
simple_slotbin_test() ->
|
||||
KVList0 = generate_randomkeys(1, 256, 1, 4),
|
||||
KVList1 = lists:sublist(lists:ukeysort(1, KVList0), 1, 128),
|
||||
ExtractHashFun =
|
||||
fun({K, V}) ->
|
||||
{_SQN, H} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
||||
{hash, H} end,
|
||||
HashList = lists:map(ExtractHashFun, KVList1),
|
||||
|
||||
SW0 = os:timestamp(),
|
||||
{SlotBin0, Bloom0} = build_slot(KVList1, HashList),
|
||||
io:format(user, "~nSlot built in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SW0)]),
|
||||
|
||||
SW1 = os:timestamp(),
|
||||
lists:foreach(fun(H) -> ?assertMatch(true,
|
||||
is_check_slot_required(H, Bloom0))
|
||||
end,
|
||||
HashList),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
?assertMatch({value, V},
|
||||
lookup_in_slot(K, SlotBin0))
|
||||
end,
|
||||
KVList1),
|
||||
io:format(user, "~nSlot checked for all keys in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SW1)]).
|
||||
|
||||
|
||||
-endif.
|
|
@ -2,7 +2,7 @@
|
|||
%%
|
||||
%% For sheltering relatively expensive lookups with a probabilistic check
|
||||
%%
|
||||
%% Uses multiple 256 byte blooms. Can sensibly hold up to 1000 keys per array.
|
||||
%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array.
|
||||
%% Even at 1000 keys should still offer only a 20% false positive
|
||||
%%
|
||||
%% Restricted to no more than 256 arrays - so can't handle more than 250K keys
|
||||
|
@ -19,9 +19,13 @@
|
|||
-export([
|
||||
enter/2,
|
||||
check/2,
|
||||
empty/1
|
||||
empty/1,
|
||||
tiny_enter/2,
|
||||
tiny_check/2,
|
||||
tiny_empty/0
|
||||
]).
|
||||
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
%%%============================================================================
|
||||
|
@ -39,7 +43,9 @@ enter({hash, Hash}, Bloom) ->
|
|||
{H0, Bit1, Bit2} = split_hash(Hash),
|
||||
Slot = H0 rem dict:size(Bloom),
|
||||
BitArray0 = dict:fetch(Slot, Bloom),
|
||||
BitArray1 = lists:foldl(fun add_to_array/2,
|
||||
FoldFun =
|
||||
fun(K, Arr) -> add_to_array(K, Arr, 4096) end,
|
||||
BitArray1 = lists:foldl(FoldFun,
|
||||
BitArray0,
|
||||
lists:usort([Bit1, Bit2])),
|
||||
dict:store(Slot, BitArray1, Bloom);
|
||||
|
@ -51,11 +57,11 @@ check({hash, Hash}, Bloom) ->
|
|||
{H0, Bit1, Bit2} = split_hash(Hash),
|
||||
Slot = H0 rem dict:size(Bloom),
|
||||
BitArray = dict:fetch(Slot, Bloom),
|
||||
case getbit(Bit1, BitArray) of
|
||||
case getbit(Bit1, BitArray, 4096) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
case getbit(Bit2, BitArray) of
|
||||
case getbit(Bit2, BitArray, 4096) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
|
@ -66,6 +72,37 @@ check(Key, Bloom) ->
|
|||
Hash = leveled_codec:magic_hash(Key),
|
||||
check({hash, Hash}, Bloom).
|
||||
|
||||
tiny_empty() ->
|
||||
<<0:1024>>.
|
||||
|
||||
tiny_enter({hash, no_lookup}, Bloom) ->
|
||||
Bloom;
|
||||
tiny_enter({hash, Hash}, Bloom) ->
|
||||
{Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
|
||||
FoldFun =
|
||||
fun(K, Arr) -> add_to_array(K, Arr, 1024) end,
|
||||
lists:foldl(FoldFun, Bloom, lists:usort([Bit0, Bit1, Bit2])).
|
||||
|
||||
tiny_check({hash, Hash}, Bloom) ->
|
||||
{Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
|
||||
case getbit(Bit0, Bloom, 1024) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
case getbit(Bit1, Bloom, 1024) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
case getbit(Bit2, Bloom, 1024) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
true
|
||||
end
|
||||
end
|
||||
end.
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
@ -76,15 +113,21 @@ split_hash(Hash) ->
|
|||
H2 = Hash bsr 20,
|
||||
{H0, H1, H2}.
|
||||
|
||||
add_to_array(Bit, BitArray) ->
|
||||
RestLen = 4096 - Bit - 1,
|
||||
split_hash_for_tinybloom(Hash) ->
|
||||
H0 = Hash band 1023,
|
||||
H1 = (Hash bsr 10) band 1023,
|
||||
H2 = (Hash bsr 20) band 1023,
|
||||
{H0, H1, H2}.
|
||||
|
||||
add_to_array(Bit, BitArray, ArrayLength) ->
|
||||
RestLen = ArrayLength - Bit - 1,
|
||||
<<Head:Bit/bitstring,
|
||||
_B:1/bitstring,
|
||||
Rest:RestLen/bitstring>> = BitArray,
|
||||
<<Head/bitstring, 1:1, Rest/bitstring>>.
|
||||
|
||||
getbit(Bit, BitArray) ->
|
||||
RestLen = 4096 - Bit - 1,
|
||||
getbit(Bit, BitArray, ArrayLength) ->
|
||||
RestLen = ArrayLength - Bit - 1,
|
||||
<<_Head:Bit/bitstring,
|
||||
B:1/bitstring,
|
||||
_Rest:RestLen/bitstring>> = BitArray,
|
||||
|
@ -148,6 +191,56 @@ simple_test() ->
|
|||
"with ~w false positive rate~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW3), FP / N]),
|
||||
?assertMatch(true, FP < (N div 4)).
|
||||
|
||||
tiny_test() ->
|
||||
N = 128,
|
||||
K = 32, % more checks out then in K * checks
|
||||
KLin = lists:map(fun(X) -> "Key_" ++
|
||||
integer_to_list(X) ++
|
||||
integer_to_list(random:uniform(100)) ++
|
||||
binary_to_list(crypto:rand_bytes(2))
|
||||
end,
|
||||
lists:seq(1, N)),
|
||||
KLout = lists:map(fun(X) ->
|
||||
"NotKey_" ++
|
||||
integer_to_list(X) ++
|
||||
integer_to_list(random:uniform(100)) ++
|
||||
binary_to_list(crypto:rand_bytes(2))
|
||||
end,
|
||||
lists:seq(1, N * K)),
|
||||
|
||||
HashIn = lists:map(fun(X) ->
|
||||
{hash, leveled_codec:magic_hash(X)} end,
|
||||
KLin),
|
||||
HashOut = lists:map(fun(X) ->
|
||||
{hash, leveled_codec:magic_hash(X)} end,
|
||||
KLout),
|
||||
|
||||
SW1 = os:timestamp(),
|
||||
Bloom = lists:foldr(fun tiny_enter/2, tiny_empty(), HashIn),
|
||||
io:format(user,
|
||||
"~nAdding ~w hashes to tiny bloom took ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW1)]),
|
||||
|
||||
SW2 = os:timestamp(),
|
||||
lists:foreach(fun(X) ->
|
||||
?assertMatch(true, tiny_check(X, Bloom)) end, HashIn),
|
||||
io:format(user,
|
||||
"~nChecking ~w hashes in tiny bloom took ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW2)]),
|
||||
|
||||
SW3 = os:timestamp(),
|
||||
FP = lists:foldr(fun(X, Acc) -> case tiny_check(X, Bloom) of
|
||||
true -> Acc + 1;
|
||||
false -> Acc
|
||||
end end,
|
||||
0,
|
||||
HashOut),
|
||||
io:format(user,
|
||||
"~nChecking ~w hashes out of tiny bloom took ~w microseconds "
|
||||
++ "with ~w false positive rate~n",
|
||||
[N * K, timer:now_diff(os:timestamp(), SW3), FP / (N * K)]),
|
||||
?assertMatch(true, FP < ((N * K) div 8)).
|
||||
|
||||
|
||||
-endif.
|
Loading…
Add table
Add a link
Reference in a new issue