2016-12-23 12:30:58 +00:00
|
|
|
%% -------- SST (Variant) ---------
|
|
|
|
%%
|
|
|
|
%% A FSM module intended to wrap a persisted, ordered view of Keys and Values
|
|
|
|
%%
|
|
|
|
%% The persisted view is built from a list (which may be created by merging
|
2016-12-24 00:41:50 +00:00
|
|
|
%% multiple lists). The list is built first, then the view is created in bulk.
|
2016-12-24 00:02:06 +00:00
|
|
|
%%
|
|
|
|
%% -------- Slots ---------
|
|
|
|
%%
|
|
|
|
%% The view is built from sublists referred to as slot. Each slot is up to 128
|
|
|
|
%% keys and values in size. The slots are each themselves a gb_tree. The
|
|
|
|
%% gb_tree is slightly slower than the skiplist at fetch time, and doesn't
|
|
|
|
%% support directly the useful to_range function. However the from_orddict
|
|
|
|
%% capability is much faster than from_sortedlist in skiplist, saving on CPU
|
|
|
|
%% at sst build time:
|
|
|
|
%%
|
|
|
|
%% Skiplist:
|
|
|
|
%% build and serialise slot 3233 microseconds
|
|
|
|
%% de-serialise and check * 128 - 14669 microseconds
|
|
|
|
%% flatten back to list - 164 microseconds
|
|
|
|
%%
|
|
|
|
%% GBTree:
|
2016-12-24 00:41:50 +00:00
|
|
|
%% build and serialise tree 1433 microseconds
|
2016-12-24 00:02:06 +00:00
|
|
|
%% de-serialise and check * 128 - 15263 microseconds
|
|
|
|
%% flatten back to list - 175 microseconds
|
|
|
|
%%
|
|
|
|
%% The performance advantage at lookup time is no negligible as the time to
|
|
|
|
%% de-deserialise for each check is dominant. This time grows linearly with
|
|
|
|
%% the size of the slot, wherease the serialisation time is relatively constant
|
|
|
|
%% with growth. So bigger slots would be quicker to build, but the penalty for
|
|
|
|
%% that speed is too high at lookup time.
|
2016-12-24 00:41:50 +00:00
|
|
|
%%
|
|
|
|
%% -------- Blooms ---------
|
|
|
|
%%
|
|
|
|
%% There are two different tiny blooms for each table. One is split by the
|
|
|
|
%% first byte of the hash, and consists of two hashes (derived from the
|
|
|
|
%% remainder of the hash). This is the top bloom, and the size vaires by
|
|
|
|
%% level.
|
|
|
|
%% Level 0 has 8 bits per key - 0.05 fpr
|
|
|
|
%% Level 1 has 6 bits per key - 0.08 fpr
|
|
|
|
%% Other Levels have 4 bits per key - 0.15 fpr
|
|
|
|
%%
|
|
|
|
%% If this level is passed, then each slot has its own bloom based on the
|
|
|
|
%% same hash, but now split into three hashes and having a fixed 8 bit per
|
|
|
|
%% key size at all levels.
|
|
|
|
%% Slot Bloom has 8 bits per key - 0.03 fpr
|
|
|
|
%%
|
|
|
|
%% All blooms are base don the DJ Bernstein magic hash which proved to give
|
|
|
|
%% the predicted fpr in tests (unlike phash2 which has significantly higher
|
|
|
|
%% fpr). Due to the cost of producing the magic hash, it is read from the
|
|
|
|
%% value not reproduced each time. If the value is set to no_lookup no bloom
|
|
|
|
%% entry is added, and if all hashes are no_lookup in the slot then no bloom
|
|
|
|
%% is produced.
|
2016-12-24 00:02:06 +00:00
|
|
|
|
2016-12-23 12:30:58 +00:00
|
|
|
|
|
|
|
-module(leveled_sst).
|
|
|
|
|
|
|
|
-include("include/leveled.hrl").
|
|
|
|
|
2016-12-23 18:08:22 +00:00
|
|
|
-define(SLOT_SIZE, 128).
|
2016-12-23 12:30:58 +00:00
|
|
|
-define(COMPRESSION_LEVEL, 1).
|
2016-12-24 00:41:50 +00:00
|
|
|
-define(LEVEL_BLOOM_SLOTS, [{0, 64}, {1, 48}, {default, 32}]).
|
2016-12-23 12:30:58 +00:00
|
|
|
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
2016-12-23 18:08:22 +00:00
|
|
|
-record(slot_index_value, {slot_id :: integer(),
|
|
|
|
bloom :: dict:dict(),
|
|
|
|
start_position :: integer(),
|
|
|
|
length :: integer()}).
|
|
|
|
|
2016-12-23 12:30:58 +00:00
|
|
|
%%%============================================================================
|
|
|
|
%%% API
|
|
|
|
%%%============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%%%============================================================================
|
|
|
|
%%% Internal Functions
|
|
|
|
%%%============================================================================
|
|
|
|
|
2016-12-24 00:41:50 +00:00
|
|
|
build_table_summary(SlotIndex, AllHashes, Level) ->
|
|
|
|
BloomSlots =
|
|
|
|
case lists:keyfind(Level, ?LEVEL_BLOOM_SLOTS) of
|
|
|
|
{Level, N} ->
|
|
|
|
N;
|
|
|
|
false ->
|
|
|
|
element(2, lists:keyfind(default, ?LEVEL_BLOOM_SLOTS))
|
|
|
|
end,
|
|
|
|
Bloom = lists:foldr(fun leveled_tinybloom:enter/2,
|
|
|
|
leveled_bloom:empty(BloomSlots),
|
|
|
|
AllHashes),
|
|
|
|
SkipSlot = leveled_skiplist:from_sortedlist(lists:reverse(SlotIndex)),
|
|
|
|
term_to_binary({SkipSlot, Bloom}, [{comprressed, ?COMPRESSION_LEVEL}]).
|
|
|
|
|
2016-12-23 18:08:22 +00:00
|
|
|
build_all_slots(KVList, BasePosition) ->
|
|
|
|
build_all_slots(KVList, BasePosition, [], 1, []).
|
|
|
|
|
|
|
|
build_all_slots([], _Start, AllHashes, _SlotID, SlotIndex) ->
|
|
|
|
{SlotIndex, AllHashes};
|
|
|
|
build_all_slots(KVList, StartPosition, AllHashes, SlotID, SlotIndex) ->
|
|
|
|
{SlotList, KVRem} = lists:split(?SLOT_SIZE, KVList),
|
|
|
|
{LastKey, _V} = lists:tail(SlotList),
|
|
|
|
ExtractHashFun =
|
|
|
|
fun({K, V}, Acc) ->
|
|
|
|
{_SQN, H} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
|
|
|
case H of
|
|
|
|
no_lookup ->
|
|
|
|
Acc;
|
|
|
|
H ->
|
|
|
|
[{hash, H}|Acc]
|
|
|
|
end
|
|
|
|
end,
|
2016-12-24 00:41:50 +00:00
|
|
|
HashList = lists:foldr(ExtractHashFun, [], SlotList),
|
|
|
|
{SlotBin, Bloom} = build_slot(SlotList, HashList),
|
2016-12-23 18:08:22 +00:00
|
|
|
Length = byte_size(SlotBin),
|
|
|
|
SlotIndexV = #slot_index_value{slot_id = SlotID,
|
|
|
|
bloom = Bloom,
|
|
|
|
start_position = StartPosition,
|
|
|
|
length = Length},
|
|
|
|
build_all_slots(KVRem,
|
|
|
|
StartPosition + Length,
|
|
|
|
HashList ++ AllHashes,
|
|
|
|
SlotID + 1,
|
|
|
|
[{LastKey, SlotIndexV}|SlotIndex]).
|
|
|
|
|
|
|
|
|
2016-12-24 00:41:50 +00:00
|
|
|
build_slot(KVList, HashList) ->
|
2016-12-24 00:02:06 +00:00
|
|
|
Tree = gb_trees:from_orddict(KVList),
|
2016-12-23 12:30:58 +00:00
|
|
|
Bloom = lists:foldr(fun leveled_tinybloom:tiny_enter/2,
|
|
|
|
leveled_tinybloom:tiny_empty(),
|
|
|
|
HashList),
|
2016-12-24 00:02:06 +00:00
|
|
|
SlotBin = term_to_binary(Tree, [{compressed, ?COMPRESSION_LEVEL}]),
|
2016-12-23 12:30:58 +00:00
|
|
|
{SlotBin, Bloom}.
|
|
|
|
|
|
|
|
is_check_slot_required(_Hash, none) ->
|
|
|
|
true;
|
|
|
|
is_check_slot_required(Hash, Bloom) ->
|
|
|
|
leveled_tinybloom:tiny_check(Hash, Bloom).
|
|
|
|
|
|
|
|
lookup_in_slot(Key, {pointer, Handle, Pos, Length}) ->
|
|
|
|
lookup_in_slot(Key, read_slot(Handle, Pos, Length));
|
|
|
|
lookup_in_slot(Key, SlotBin) ->
|
2016-12-24 00:02:06 +00:00
|
|
|
Tree = binary_to_term(SlotBin),
|
|
|
|
gb_trees:lookup(Key, Tree).
|
2016-12-23 12:30:58 +00:00
|
|
|
|
|
|
|
all_from_slot({pointer, Handle, Pos, Length}) ->
|
|
|
|
all_from_slot(read_slot(Handle, Pos, Length));
|
|
|
|
all_from_slot(SlotBin) ->
|
|
|
|
SkipList = binary_to_term(SlotBin),
|
2016-12-24 00:02:06 +00:00
|
|
|
gb_trees:to_list(SkipList).
|
2016-12-23 12:30:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
read_slot(_Handle, _Pos, _Length) ->
|
|
|
|
not_yet_implemented.
|
|
|
|
|
|
|
|
|
|
|
|
%%%============================================================================
|
|
|
|
%%% Test
|
|
|
|
%%%============================================================================
|
|
|
|
|
|
|
|
-ifdef(TEST).
|
|
|
|
|
|
|
|
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
|
|
|
generate_randomkeys(Seqn,
|
|
|
|
Count,
|
|
|
|
[],
|
|
|
|
BucketRangeLow,
|
|
|
|
BucketRangeHigh).
|
|
|
|
|
|
|
|
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
|
|
|
Acc;
|
|
|
|
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
|
|
|
BNumber =
|
|
|
|
case BRange of
|
|
|
|
0 ->
|
|
|
|
string:right(integer_to_list(BucketLow), 4, $0);
|
|
|
|
_ ->
|
|
|
|
BRand = random:uniform(BRange),
|
|
|
|
string:right(integer_to_list(BucketLow + BRand), 4, $0)
|
|
|
|
end,
|
|
|
|
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
|
|
|
LedgerKey = leveled_codec:to_ledgerkey("Bucket" ++ BNumber,
|
|
|
|
"Key" ++ KNumber,
|
|
|
|
o),
|
|
|
|
{_B, _K, KV} = leveled_codec:generate_ledgerkv(LedgerKey,
|
|
|
|
Seqn,
|
|
|
|
crypto:rand_bytes(64),
|
|
|
|
64,
|
|
|
|
infinity),
|
|
|
|
generate_randomkeys(Seqn + 1,
|
|
|
|
Count - 1,
|
|
|
|
[KV|Acc],
|
|
|
|
BucketLow,
|
|
|
|
BRange).
|
|
|
|
|
|
|
|
|
|
|
|
simple_slotbin_test() ->
|
2016-12-23 13:17:59 +00:00
|
|
|
KVList0 = generate_randomkeys(1, ?SLOT_SIZE * 2, 1, 4),
|
|
|
|
KVList1 = lists:sublist(lists:ukeysort(1, KVList0), 1, ?SLOT_SIZE),
|
2016-12-23 12:30:58 +00:00
|
|
|
ExtractHashFun =
|
|
|
|
fun({K, V}) ->
|
|
|
|
{_SQN, H} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
|
|
|
{hash, H} end,
|
|
|
|
HashList = lists:map(ExtractHashFun, KVList1),
|
|
|
|
SW0 = os:timestamp(),
|
|
|
|
{SlotBin0, Bloom0} = build_slot(KVList1, HashList),
|
2016-12-23 18:08:22 +00:00
|
|
|
io:format(user, "Slot built in ~w microseconds with size ~w~n",
|
2016-12-23 12:44:44 +00:00
|
|
|
[timer:now_diff(os:timestamp(), SW0), byte_size(SlotBin0)]),
|
2016-12-23 12:30:58 +00:00
|
|
|
|
|
|
|
SW1 = os:timestamp(),
|
|
|
|
lists:foreach(fun(H) -> ?assertMatch(true,
|
|
|
|
is_check_slot_required(H, Bloom0))
|
|
|
|
end,
|
|
|
|
HashList),
|
|
|
|
lists:foreach(fun({K, V}) ->
|
|
|
|
?assertMatch({value, V},
|
|
|
|
lookup_in_slot(K, SlotBin0))
|
|
|
|
end,
|
|
|
|
KVList1),
|
2016-12-23 23:30:15 +00:00
|
|
|
io:format(user, "Slot checked for all keys in ~w microseconds~n",
|
2016-12-23 18:08:22 +00:00
|
|
|
[timer:now_diff(os:timestamp(), SW1)]),
|
2016-12-24 00:02:06 +00:00
|
|
|
SW2 = os:timestamp(),
|
|
|
|
?assertMatch(KVList1, all_from_slot(SlotBin0)),
|
|
|
|
io:format(user, "Slot flattened in ~w microseconds~n",
|
|
|
|
[timer:now_diff(os:timestamp(), SW2)]).
|
2016-12-23 23:30:15 +00:00
|
|
|
|
2016-12-23 18:08:22 +00:00
|
|
|
|
2016-12-23 12:30:58 +00:00
|
|
|
-endif.
|