commit
67d372e051
3 changed files with 410 additions and 91 deletions
|
@ -451,7 +451,7 @@ sst_timing({N, SSTTimerD}, SW, TimerType) ->
|
|||
end.
|
||||
|
||||
sst_keylist() ->
|
||||
[slot_bloom, slot_fetch].
|
||||
[tiny_bloom, slot_bloom, slot_fetch].
|
||||
|
||||
|
||||
get_timing(undefined, SW, TimerType) ->
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
%% -------- Slots ---------
|
||||
%%
|
||||
%% The view is built from sublists referred to as slot. Each slot is up to 128
|
||||
%% keys and values in size. Three strategis have been benchmarked for the
|
||||
%% keys and values in size. Three strategies have been benchmarked for the
|
||||
%% slot: a skiplist, a gb-tree, four blocks of flat lists with an index.
|
||||
%%
|
||||
%% Skiplist:
|
||||
|
@ -23,7 +23,7 @@
|
|||
%%
|
||||
%% Indexed Blocks:
|
||||
%% build and serialise slot 342 microseconds
|
||||
%% de-deriaise and check * 128 - 6746 microseconds
|
||||
%% de-deserialise and check * 128 - 6746 microseconds
|
||||
%% flatten back to list - 187 microseconds
|
||||
%%
|
||||
%% The negative side of using Indexed Blocks is the storage of the index. In
|
||||
|
@ -34,23 +34,19 @@
|
|||
%%
|
||||
%% -------- Blooms ---------
|
||||
%%
|
||||
%% There is a summary bloom for the table. the summary bloom is split by the
|
||||
%% first byte of the hash, and consists of two hashes (derived from the
|
||||
%% remainder of the hash). This is the top bloom, and the size varies by
|
||||
%% level.
|
||||
%% Level 0 has 8 bits per key - 0.05 fpr
|
||||
%% Level 1 has 6 bits per key - 0.08 fpr
|
||||
%% Other Levels have 4 bits per key - 0.15 fpr
|
||||
%% There is a bloom for each slot - based on two hashes and 8 bits per key.
|
||||
%%
|
||||
%% With the indexed block implementation of the slot a second slot-level bloom
|
||||
%% is unnecessary (as the index itself yields a 0.003 % fpr).
|
||||
%% Hashing for blooms is a challenge, as the slot is a slice of an ordered
|
||||
%% list of keys with a fixed format. It is likely that the keys may vary by
|
||||
%% only one or two ascii characters, and there is a desire to avoid the
|
||||
%% overhead of cryptographic hash functions that may be able to handle this.
|
||||
%%
|
||||
%% -------- Summary ---------
|
||||
%%
|
||||
%% Each file has a summary - which is the 128 keys at the top of each slot in
|
||||
%% a skiplist, with some basic metadata about the slot stored as the value.
|
||||
%%
|
||||
%% The summary is stored seperately to the slots (wihtin the same file).
|
||||
%% The summary is stored seperately to the slots (within the same file).
|
||||
%%
|
||||
%% -------- CRC Checks ---------
|
||||
%%
|
||||
|
@ -114,7 +110,8 @@
|
|||
|
||||
-record(slot_index_value, {slot_id :: integer(),
|
||||
start_position :: integer(),
|
||||
length :: integer()}).
|
||||
length :: integer(),
|
||||
bloom :: binary()}).
|
||||
|
||||
-record(summary, {first_key :: tuple(),
|
||||
last_key :: tuple(),
|
||||
|
@ -398,47 +395,51 @@ fetch(LedgerKey, Hash, State) ->
|
|||
Summary = State#state.summary,
|
||||
Slot = lookup_slot(LedgerKey, Summary#summary.index),
|
||||
SlotID = Slot#slot_index_value.slot_id,
|
||||
CachedBlockIdx = array:get(SlotID - 1,
|
||||
State#state.blockindex_cache),
|
||||
case CachedBlockIdx of
|
||||
none ->
|
||||
SlotBin = read_slot(State#state.handle, Slot),
|
||||
{Result, BlockIdx} = binaryslot_get(SlotBin,
|
||||
LedgerKey,
|
||||
Hash,
|
||||
none),
|
||||
BlockIndexCache = array:set(SlotID - 1,
|
||||
BlockIdx,
|
||||
Bloom = Slot#slot_index_value.bloom,
|
||||
case leveled_tinybloom:check_hash(Hash, Bloom) of
|
||||
false ->
|
||||
{not_present, tiny_bloom, SlotID, State};
|
||||
true ->
|
||||
CachedBlockIdx = array:get(SlotID - 1,
|
||||
State#state.blockindex_cache),
|
||||
{Result,
|
||||
slot_fetch,
|
||||
Slot#slot_index_value.slot_id,
|
||||
State#state{blockindex_cache = BlockIndexCache}};
|
||||
_ ->
|
||||
PosList = find_pos(CachedBlockIdx,
|
||||
double_hash(Hash, LedgerKey),
|
||||
[],
|
||||
0),
|
||||
case PosList of
|
||||
[] ->
|
||||
{not_present, slot_bloom, SlotID, State};
|
||||
_ ->
|
||||
case CachedBlockIdx of
|
||||
none ->
|
||||
SlotBin = read_slot(State#state.handle, Slot),
|
||||
Result = binaryslot_get(SlotBin,
|
||||
LedgerKey,
|
||||
Hash,
|
||||
{true, PosList}),
|
||||
{element(1, Result), slot_fetch, SlotID, State}
|
||||
end
|
||||
{Result, BlockIdx} = binaryslot_get(SlotBin,
|
||||
LedgerKey,
|
||||
Hash,
|
||||
none),
|
||||
BlockIndexCache = array:set(SlotID - 1,
|
||||
BlockIdx,
|
||||
State#state.blockindex_cache),
|
||||
{Result,
|
||||
slot_fetch,
|
||||
Slot#slot_index_value.slot_id,
|
||||
State#state{blockindex_cache = BlockIndexCache}};
|
||||
_ ->
|
||||
PosList = find_pos(CachedBlockIdx,
|
||||
double_hash(Hash, LedgerKey),
|
||||
[],
|
||||
0),
|
||||
case PosList of
|
||||
[] ->
|
||||
{not_present, slot_bloom, SlotID, State};
|
||||
_ ->
|
||||
SlotBin = read_slot(State#state.handle, Slot),
|
||||
Result = binaryslot_get(SlotBin,
|
||||
LedgerKey,
|
||||
Hash,
|
||||
{true, PosList}),
|
||||
{element(1, Result), slot_fetch, SlotID, State}
|
||||
end
|
||||
end
|
||||
end.
|
||||
|
||||
|
||||
fetch_range(StartKey, EndKey, ScanWidth, State) ->
|
||||
Summary = State#state.summary,
|
||||
Handle = State#state.handle,
|
||||
{Slots, LTrim, RTrim} = lookup_slots(StartKey,
|
||||
EndKey,
|
||||
Summary#summary.index),
|
||||
{Slots, RTrim} = lookup_slots(StartKey, EndKey, Summary#summary.index),
|
||||
Self = self(),
|
||||
SL = length(Slots),
|
||||
ExpandedSlots =
|
||||
|
@ -447,15 +448,11 @@ fetch_range(StartKey, EndKey, ScanWidth, State) ->
|
|||
[];
|
||||
1 ->
|
||||
[Slot] = Slots,
|
||||
case {LTrim, RTrim} of
|
||||
{true, true} ->
|
||||
case RTrim of
|
||||
true ->
|
||||
[{pointer, Self, Slot, StartKey, EndKey}];
|
||||
{true, false} ->
|
||||
[{pointer, Self, Slot, StartKey, all}];
|
||||
{false, true} ->
|
||||
[{pointer, Self, Slot, all, EndKey}];
|
||||
{false, false} ->
|
||||
[{pointer, Self, Slot, all, all}]
|
||||
false ->
|
||||
[{pointer, Self, Slot, StartKey, all}]
|
||||
end;
|
||||
N ->
|
||||
{LSlot, MidSlots, RSlot} =
|
||||
|
@ -472,21 +469,13 @@ fetch_range(StartKey, EndKey, ScanWidth, State) ->
|
|||
{pointer, Self, S, all, all}
|
||||
end,
|
||||
MidSlots),
|
||||
case {LTrim, RTrim} of
|
||||
{true, true} ->
|
||||
case RTrim of
|
||||
true ->
|
||||
[{pointer, Self, LSlot, StartKey, all}] ++
|
||||
MidSlotPointers ++
|
||||
[{pointer, Self, RSlot, all, EndKey}];
|
||||
{true, false} ->
|
||||
false ->
|
||||
[{pointer, Self, LSlot, StartKey, all}] ++
|
||||
MidSlotPointers ++
|
||||
[{pointer, Self, RSlot, all, all}];
|
||||
{false, true} ->
|
||||
[{pointer, Self, LSlot, all, all}] ++
|
||||
MidSlotPointers ++
|
||||
[{pointer, Self, RSlot, all, EndKey}];
|
||||
{false, false} ->
|
||||
[{pointer, Self, LSlot, all, all}] ++
|
||||
MidSlotPointers ++
|
||||
[{pointer, Self, RSlot, all, all}]
|
||||
end
|
||||
|
@ -603,11 +592,13 @@ build_all_slots(KVL, SC, Pos, SlotID, SlotIdx, BlockIdxA, SlotsBin) ->
|
|||
lists:split(?SLOT_SIZE, KVL)
|
||||
end,
|
||||
{LastKey, _V} = lists:last(SlotList),
|
||||
{BlockIndex, SlotBin} = generate_binary_slot(SlotList),
|
||||
{BlockIndex, SlotBin, HashList} = generate_binary_slot(SlotList),
|
||||
Length = byte_size(SlotBin),
|
||||
Bloom = leveled_tinybloom:create_bloom(HashList),
|
||||
SlotIndexV = #slot_index_value{slot_id = SlotID,
|
||||
start_position = Pos,
|
||||
length = Length},
|
||||
length = Length,
|
||||
bloom = Bloom},
|
||||
build_all_slots(KVRem,
|
||||
SC - 1,
|
||||
Pos + Length,
|
||||
|
@ -706,9 +697,9 @@ lookup_slots(StartKey, EndKey, Tree) ->
|
|||
{EK, _EndSlot} = lists:last(SlotList),
|
||||
case EK of
|
||||
EndKey ->
|
||||
{lists:map(MapFun, SlotList), true, false};
|
||||
{lists:map(MapFun, SlotList), false};
|
||||
_ ->
|
||||
{lists:map(MapFun, SlotList), true, true}
|
||||
{lists:map(MapFun, SlotList), true}
|
||||
end.
|
||||
|
||||
|
||||
|
@ -739,7 +730,7 @@ lookup_slots(StartKey, EndKey, Tree) ->
|
|||
generate_binary_slot(KVL) ->
|
||||
|
||||
HashFoldFun =
|
||||
fun({K, V}, {PosBinAcc, NoHashCount}) ->
|
||||
fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) ->
|
||||
|
||||
{_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
||||
case is_integer(H1) of
|
||||
|
@ -750,7 +741,8 @@ generate_binary_slot(KVL) ->
|
|||
{<<1:1/integer,
|
||||
PosH1:15/integer,
|
||||
PosBinAcc/binary>>,
|
||||
0};
|
||||
0,
|
||||
[H1|HashAcc]};
|
||||
N ->
|
||||
% The No Hash Count is an integer between 0 and 127
|
||||
% and so at read time should count NHC + 1
|
||||
|
@ -760,15 +752,16 @@ generate_binary_slot(KVL) ->
|
|||
0:1/integer,
|
||||
NHC:7/integer,
|
||||
PosBinAcc/binary>>,
|
||||
0}
|
||||
0,
|
||||
HashAcc}
|
||||
end;
|
||||
false ->
|
||||
{PosBinAcc, NoHashCount + 1}
|
||||
{PosBinAcc, NoHashCount + 1, HashAcc}
|
||||
end
|
||||
|
||||
end,
|
||||
|
||||
{PosBinIndex0, NHC} = lists:foldr(HashFoldFun, {<<>>, 0}, KVL),
|
||||
{PosBinIndex0, NHC, HashL} = lists:foldr(HashFoldFun, {<<>>, 0, []}, KVL),
|
||||
PosBinIndex1 =
|
||||
case NHC of
|
||||
0 ->
|
||||
|
@ -825,7 +818,7 @@ generate_binary_slot(KVL) ->
|
|||
CRC32 = erlang:crc32(SlotBin),
|
||||
FullBin = <<CRC32:32/integer, SlotBin/binary>>,
|
||||
|
||||
{PosBinIndex1, FullBin}.
|
||||
{PosBinIndex1, FullBin, HashL}.
|
||||
|
||||
|
||||
binaryslot_get(FullBin, Key, Hash, CachedPosLookup) ->
|
||||
|
@ -1212,18 +1205,9 @@ indexed_list_test() ->
|
|||
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 4)),
|
||||
KVL1 = lists:sublist(KVL0, 128),
|
||||
|
||||
% BloomAddFun =
|
||||
% fun({H, K}, {Bloom, Total, Max}) ->
|
||||
% SW = os:timestamp(),
|
||||
% Bloom0 = leveled_tinybloom:tiny_enter(H, K, Bloom),
|
||||
% T0 = timer:now_diff(os:timestamp(), SW),
|
||||
% {Bloom0, Total + T0, max(T0, Max)}
|
||||
|
||||
% end,
|
||||
|
||||
SW0 = os:timestamp(),
|
||||
|
||||
{_PosBinIndex1, FullBin} = generate_binary_slot(KVL1),
|
||||
{_PosBinIndex1, FullBin, _HL} = generate_binary_slot(KVL1),
|
||||
io:format(user,
|
||||
"Indexed list created slot in ~w microseconds of size ~w~n",
|
||||
[timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),
|
||||
|
@ -1251,7 +1235,7 @@ indexed_list_mixedkeys_test() ->
|
|||
KVL1 = lists:sublist(KVL0, 33),
|
||||
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
|
||||
|
||||
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
||||
{_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||
|
||||
{TestK1, TestV1} = lists:nth(4, KVL1),
|
||||
MH1 = leveled_codec:magic_hash(TestK1),
|
||||
|
@ -1277,7 +1261,7 @@ indexed_list_mixedkeys2_test() ->
|
|||
IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)),
|
||||
% this isn't actually ordered correctly
|
||||
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
||||
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
||||
{_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
MH = leveled_codec:magic_hash(K),
|
||||
test_binary_slot(FullBin, K, MH, {K, V})
|
||||
|
@ -1286,7 +1270,7 @@ indexed_list_mixedkeys2_test() ->
|
|||
|
||||
indexed_list_allindexkeys_test() ->
|
||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
|
||||
{PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
||||
{PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||
?assertMatch(<<127:8/integer>>, PosBinIndex1),
|
||||
% SW = os:timestamp(),
|
||||
BinToList = binaryslot_tolist(FullBin),
|
||||
|
@ -1299,7 +1283,7 @@ indexed_list_allindexkeys_test() ->
|
|||
|
||||
indexed_list_allindexkeys_trimmed_test() ->
|
||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
|
||||
{PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
||||
{PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||
?assertMatch(<<127:8/integer>>, PosBinIndex1),
|
||||
?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
|
||||
{i,
|
||||
|
@ -1337,7 +1321,7 @@ indexed_list_mixedkeys_bitflip_test() ->
|
|||
KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)),
|
||||
KVL1 = lists:sublist(KVL0, 33),
|
||||
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
|
||||
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
||||
{_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||
L = byte_size(FullBin),
|
||||
Byte1 = random:uniform(L),
|
||||
<<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = FullBin,
|
||||
|
|
335
src/leveled_tinybloom.erl
Normal file
335
src/leveled_tinybloom.erl
Normal file
|
@ -0,0 +1,335 @@
|
|||
%% -------- TinyBloom ---------
|
||||
%%
|
||||
%% A fixed size bloom that supports 128 keys only, made to try and minimise
|
||||
%% the cost of producing the bloom
|
||||
%%
|
||||
|
||||
|
||||
-module(leveled_tinybloom).
|
||||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-export([
|
||||
create_bloom/1,
|
||||
check_hash/2
|
||||
]).
|
||||
|
||||
-define(BITS_PER_KEY, 8). % Must be 8 or 4
|
||||
-define(INTEGER_SIZE, ?BITS_PER_KEY * 8).
|
||||
-define(BAND_MASK, ?INTEGER_SIZE - 1).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% API
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
create_bloom(HashList) ->
|
||||
case length(HashList) of
|
||||
0 ->
|
||||
<<>>;
|
||||
L when L > 32 ->
|
||||
add_hashlist(HashList,
|
||||
15,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0);
|
||||
L when L > 16 ->
|
||||
add_hashlist(HashList, 3, 0, 0, 0, 0);
|
||||
_ ->
|
||||
add_hashlist(HashList, 1, 0, 0)
|
||||
end.
|
||||
|
||||
check_hash(_Hash, <<>>) ->
|
||||
false;
|
||||
check_hash(Hash, BloomBin) ->
|
||||
SlotSplit = (byte_size(BloomBin) div ?BITS_PER_KEY) - 1,
|
||||
{Slot, H0, H1} = split_hash(Hash, SlotSplit),
|
||||
Mask = get_mask(H0, H1),
|
||||
Pos = Slot * ?BITS_PER_KEY,
|
||||
IntSize = ?INTEGER_SIZE,
|
||||
<<_H:Pos/binary, CheckInt:IntSize/integer, _T/binary>> = BloomBin,
|
||||
case CheckInt band Mask of
|
||||
Mask ->
|
||||
true;
|
||||
_ ->
|
||||
false
|
||||
end.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
split_hash(Hash, SlotSplit) ->
|
||||
Slot = Hash band SlotSplit,
|
||||
H0 = (Hash bsr 4) band (?BAND_MASK),
|
||||
H1 = (Hash bsr 10) band (?BAND_MASK),
|
||||
H3 = (Hash bsr 16) band (?BAND_MASK),
|
||||
H4 = (Hash bsr 22) band (?BAND_MASK),
|
||||
Slot0 = (Hash bsr 28) band SlotSplit,
|
||||
{Slot bxor Slot0, H0 bxor H3, H1 bxor H4}.
|
||||
|
||||
get_mask(H0, H1) ->
|
||||
case H0 == H1 of
|
||||
true ->
|
||||
1 bsl H0;
|
||||
false ->
|
||||
(1 bsl H0) + (1 bsl H1)
|
||||
end.
|
||||
|
||||
|
||||
%% This looks ugly and clunky, but in tests it was quicker than modifying an
|
||||
%% Erlang term like an array as it is passed around the loop
|
||||
|
||||
add_hashlist([], _S, S0, S1) ->
|
||||
IntSize = ?INTEGER_SIZE,
|
||||
<<S0:IntSize/integer, S1:IntSize/integer>>;
|
||||
add_hashlist([TopHash|T], SlotSplit, S0, S1) ->
|
||||
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
||||
Mask = get_mask(H0, H1),
|
||||
case Slot of
|
||||
0 ->
|
||||
add_hashlist(T, SlotSplit, S0 bor Mask, S1);
|
||||
1 ->
|
||||
add_hashlist(T, SlotSplit, S0, S1 bor Mask)
|
||||
end.
|
||||
|
||||
add_hashlist([], _S, S0, S1, S2, S3) ->
|
||||
IntSize = ?INTEGER_SIZE,
|
||||
<<S0:IntSize/integer, S1:IntSize/integer,
|
||||
S2:IntSize/integer, S3:IntSize/integer>>;
|
||||
add_hashlist([TopHash|T], SlotSplit, S0, S1, S2, S3) ->
|
||||
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
||||
Mask = get_mask(H0, H1),
|
||||
case Slot of
|
||||
0 ->
|
||||
add_hashlist(T, SlotSplit, S0 bor Mask, S1, S2, S3);
|
||||
1 ->
|
||||
add_hashlist(T, SlotSplit, S0, S1 bor Mask, S2, S3);
|
||||
2 ->
|
||||
add_hashlist(T, SlotSplit, S0, S1, S2 bor Mask, S3);
|
||||
3 ->
|
||||
add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask)
|
||||
end.
|
||||
|
||||
add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF) ->
|
||||
IntSize = ?INTEGER_SIZE,
|
||||
<<S0:IntSize/integer, S1:IntSize/integer,
|
||||
S2:IntSize/integer, S3:IntSize/integer,
|
||||
S4:IntSize/integer, S5:IntSize/integer,
|
||||
S6:IntSize/integer, S7:IntSize/integer,
|
||||
S8:IntSize/integer, S9:IntSize/integer,
|
||||
SA:IntSize/integer, SB:IntSize/integer,
|
||||
SC:IntSize/integer, SD:IntSize/integer,
|
||||
SE:IntSize/integer, SF:IntSize/integer>>;
|
||||
add_hashlist([TopHash|T],
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF) ->
|
||||
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
||||
Mask = get_mask(H0, H1),
|
||||
case Slot of
|
||||
0 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0 bor Mask, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
1 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1 bor Mask, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
2 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2 bor Mask, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
3 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3 bor Mask, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
4 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4 bor Mask, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
5 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5 bor Mask, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
6 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6 bor Mask, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
7 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7 bor Mask, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
8 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8 bor Mask, S9,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
9 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9 bor Mask,
|
||||
SA, SB, SC, SD, SE, SF);
|
||||
10 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA bor Mask, SB, SC, SD, SE, SF);
|
||||
11 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB bor Mask, SC, SD, SE, SF);
|
||||
12 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC bor Mask, SD, SE, SF);
|
||||
13 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD bor Mask, SE, SF);
|
||||
14 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE bor Mask, SF);
|
||||
15 ->
|
||||
add_hashlist(T,
|
||||
SlotSplit,
|
||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
||||
SA, SB, SC, SD, SE, SF bor Mask)
|
||||
end.
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||
generate_randomkeys(Seqn,
|
||||
Count,
|
||||
[],
|
||||
BucketRangeLow,
|
||||
BucketRangeHigh).
|
||||
|
||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||
Acc;
|
||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||
BRand = random:uniform(BRange),
|
||||
BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0),
|
||||
KNumber = string:right(integer_to_list(random:uniform(10000)), 6, $0),
|
||||
LedgerKey = leveled_codec:to_ledgerkey("Bucket" ++ BNumber,
|
||||
"Key" ++ KNumber,
|
||||
o),
|
||||
{_B, _K, KV, _H} = leveled_codec:generate_ledgerkv(LedgerKey,
|
||||
Seqn,
|
||||
crypto:rand_bytes(64),
|
||||
64,
|
||||
infinity),
|
||||
generate_randomkeys(Seqn + 1,
|
||||
Count - 1,
|
||||
[KV|Acc],
|
||||
BucketLow,
|
||||
BRange).
|
||||
|
||||
|
||||
get_hashlist(N) ->
|
||||
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N * 2, 1, 20)),
|
||||
KVL = lists:sublist(KVL0, N),
|
||||
HashFun =
|
||||
fun({K, _V}) ->
|
||||
leveled_codec:magic_hash(K)
|
||||
end,
|
||||
lists:map(HashFun, KVL).
|
||||
|
||||
check_all_hashes(BloomBin, HashList) ->
|
||||
CheckFun =
|
||||
fun(Hash) ->
|
||||
?assertMatch(true, check_hash(Hash, BloomBin))
|
||||
end,
|
||||
lists:foreach(CheckFun, HashList).
|
||||
|
||||
check_neg_hashes(BloomBin, HashList, Counters) ->
|
||||
CheckFun =
|
||||
fun(Hash, {AccT, AccF}) ->
|
||||
case check_hash(Hash, BloomBin) of
|
||||
true ->
|
||||
{AccT + 1, AccF};
|
||||
false ->
|
||||
{AccT, AccF + 1}
|
||||
end
|
||||
end,
|
||||
lists:foldl(CheckFun, Counters, HashList).
|
||||
|
||||
|
||||
empty_bloom_test() ->
|
||||
BloomBin0 = create_bloom([]),
|
||||
?assertMatch({0, 4},
|
||||
check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})).
|
||||
|
||||
bloom_test() ->
|
||||
test_bloom(128),
|
||||
test_bloom(64),
|
||||
test_bloom(32),
|
||||
test_bloom(16),
|
||||
test_bloom(8).
|
||||
|
||||
test_bloom(N) ->
|
||||
HashList1 = get_hashlist(N),
|
||||
HashList2 = get_hashlist(N),
|
||||
HashList3 = get_hashlist(N),
|
||||
HashList4 = get_hashlist(N),
|
||||
|
||||
SWa = os:timestamp(),
|
||||
BloomBin1 = create_bloom(HashList1),
|
||||
BloomBin2 = create_bloom(HashList2),
|
||||
BloomBin3 = create_bloom(HashList3),
|
||||
BloomBin4 = create_bloom(HashList4),
|
||||
TSa = timer:now_diff(os:timestamp(), SWa),
|
||||
|
||||
SWb = os:timestamp(),
|
||||
check_all_hashes(BloomBin1, HashList1),
|
||||
check_all_hashes(BloomBin2, HashList2),
|
||||
check_all_hashes(BloomBin3, HashList3),
|
||||
check_all_hashes(BloomBin4, HashList4),
|
||||
TSb = timer:now_diff(os:timestamp(), SWb),
|
||||
|
||||
HashPool = get_hashlist(N * 2),
|
||||
HashListOut1 = lists:sublist(lists:subtract(HashPool, HashList1), N),
|
||||
HashListOut2 = lists:sublist(lists:subtract(HashPool, HashList2), N),
|
||||
HashListOut3 = lists:sublist(lists:subtract(HashPool, HashList3), N),
|
||||
HashListOut4 = lists:sublist(lists:subtract(HashPool, HashList4), N),
|
||||
|
||||
SWc = os:timestamp(),
|
||||
C0 = {0, 0},
|
||||
C1 = check_neg_hashes(BloomBin1, HashListOut1, C0),
|
||||
C2 = check_neg_hashes(BloomBin2, HashListOut2, C1),
|
||||
C3 = check_neg_hashes(BloomBin3, HashListOut3, C2),
|
||||
C4 = check_neg_hashes(BloomBin4, HashListOut4, C3),
|
||||
{Pos, Neg} = C4,
|
||||
FPR = Pos / (Pos + Neg),
|
||||
TSc = timer:now_diff(os:timestamp(), SWc),
|
||||
|
||||
io:format(user,
|
||||
"Test with size ~w has microsecond timings: -"
|
||||
++ " build ~w check ~w neg_check ~w and fpr ~w~n",
|
||||
[N, TSa, TSb, TSc, FPR]).
|
||||
|
||||
|
||||
|
||||
-endif.
|
Loading…
Add table
Add a link
Reference in a new issue