leveled/src/leveled_sst.erl

260 lines
9.9 KiB
Erlang
Raw Normal View History

%% -------- SST (Variant) ---------
%%
%% A FSM module intended to wrap a persisted, ordered view of Keys and Values
%%
%% The persisted view is built from a list (which may be created by merging
%% multiple lists). The list is built first, then the view is created in bulk.
%%
%% -------- Slots ---------
%%
%% The view is built from sublists referred to as slot. Each slot is up to 128
%% keys and values in size. The slots are each themselves a gb_tree. The
%% gb_tree is slightly slower than the skiplist at fetch time, and doesn't
%% support directly the useful to_range function. However the from_orddict
%% capability is much faster than from_sortedlist in skiplist, saving on CPU
%% at sst build time:
%%
%% Skiplist:
%% build and serialise slot 3233 microseconds
%% de-serialise and check * 128 - 14669 microseconds
%% flatten back to list - 164 microseconds
%%
%% GBTree:
%% build and serialise tree 1433 microseconds
%% de-serialise and check * 128 - 15263 microseconds
%% flatten back to list - 175 microseconds
%%
%% The performance advantage at lookup time is no negligible as the time to
%% de-deserialise for each check is dominant. This time grows linearly with
%% the size of the slot, wherease the serialisation time is relatively constant
%% with growth. So bigger slots would be quicker to build, but the penalty for
%% that speed is too high at lookup time.
%%
%% -------- Blooms ---------
%%
%% There are two different tiny blooms for each table. One is split by the
%% first byte of the hash, and consists of two hashes (derived from the
%% remainder of the hash). This is the top bloom, and the size vaires by
%% level.
%% Level 0 has 8 bits per key - 0.05 fpr
%% Level 1 has 6 bits per key - 0.08 fpr
%% Other Levels have 4 bits per key - 0.15 fpr
%%
%% If this level is passed, then each slot has its own bloom based on the
%% same hash, but now split into three hashes and having a fixed 8 bit per
%% key size at all levels.
%% Slot Bloom has 8 bits per key - 0.03 fpr
%%
%% All blooms are base don the DJ Bernstein magic hash which proved to give
%% the predicted fpr in tests (unlike phash2 which has significantly higher
%% fpr). Due to the cost of producing the magic hash, it is read from the
%% value not reproduced each time. If the value is set to no_lookup no bloom
%% entry is added, and if all hashes are no_lookup in the slot then no bloom
%% is produced.
-module(leveled_sst).
-include("include/leveled.hrl").
2016-12-23 18:08:22 +00:00
-define(SLOT_SIZE, 128).
-define(COMPRESSION_LEVEL, 1).
-define(LEVEL_BLOOM_SLOTS, [{0, 64}, {1, 48}, {default, 32}]).
-include_lib("eunit/include/eunit.hrl").
2016-12-23 18:08:22 +00:00
-record(slot_index_value, {slot_id :: integer(),
bloom :: dict:dict(),
start_position :: integer(),
length :: integer()}).
%%%============================================================================
%%% API
%%%============================================================================
%%%============================================================================
%%% Internal Functions
%%%============================================================================
build_table_summary(SlotIndex, AllHashes, Level) ->
BloomSlots =
case lists:keyfind(Level, 1, ?LEVEL_BLOOM_SLOTS) of
{Level, N} ->
N;
false ->
element(2, lists:keyfind(default, 1, ?LEVEL_BLOOM_SLOTS))
end,
Bloom = lists:foldr(fun leveled_tinybloom:enter/2,
leveled_tinybloom:empty(BloomSlots),
AllHashes),
SkipSlot = leveled_skiplist:from_sortedlist(lists:reverse(SlotIndex)),
SummBin = term_to_binary({SkipSlot, Bloom},
[{compressed, ?COMPRESSION_LEVEL}]),
SummCRC = erlang:crc32(SummBin),
<<SummCRC:32/integer, SummBin/binary>>.
read_table_summary(BinWithCheck) ->
<<SummCRC:32/integer, SummBin/binary>> = BinWithCheck,
CRCCheck = erlang:crc32(SummBin),
if
CRCCheck == SummCRC ->
% If not might it should be possible to rebuild from all the slots
binary_to_term(SummBin)
end.
2016-12-23 18:08:22 +00:00
build_all_slots(KVList, BasePosition) ->
L = length(KVList),
% The length is not a constant time command and the list may be large,
% but otherwise lenght must be called each iteration to avoid exception
% on split or sublist
SlotCount = L div ?SLOT_SIZE,
build_all_slots(KVList, SlotCount, BasePosition, [], 1, [], <<>>).
2016-12-23 18:08:22 +00:00
build_all_slots([], _Count, _Start, AllHashes, _SlotID, SlotIndex, SlotsBin) ->
{SlotIndex, AllHashes, SlotsBin};
build_all_slots(KVL, Count, Start, AllHashes, SlotID, SlotIndex, SlotsBin) ->
{SlotList, KVRem} =
case Count of
0 ->
{lists:sublist(KVL, ?SLOT_SIZE), []};
_N ->
lists:split(?SLOT_SIZE, KVL)
end,
{LastKey, _V} = lists:last(SlotList),
2016-12-23 18:08:22 +00:00
ExtractHashFun =
fun({K, V}, Acc) ->
{_SQN, H} = leveled_codec:strip_to_seqnhashonly({K, V}),
case H of
no_lookup ->
Acc;
H ->
[{hash, H}|Acc]
end
end,
HashList = lists:foldr(ExtractHashFun, [], SlotList),
{SlotBin, Bloom} = build_slot(SlotList, HashList),
SlotCRC = erlang:crc32(SlotBin),
Length = byte_size(SlotBin) + 4,
2016-12-23 18:08:22 +00:00
SlotIndexV = #slot_index_value{slot_id = SlotID,
bloom = Bloom,
start_position = Start,
2016-12-23 18:08:22 +00:00
length = Length},
build_all_slots(KVRem,
Count - 1,
Start + Length,
2016-12-23 18:08:22 +00:00
HashList ++ AllHashes,
SlotID + 1,
[{LastKey, SlotIndexV}|SlotIndex],
<<SlotsBin/binary, SlotCRC:32/integer, SlotBin/binary>>).
2016-12-23 18:08:22 +00:00
build_slot(KVList, HashList) ->
Tree = gb_trees:from_orddict(KVList),
Bloom = lists:foldr(fun leveled_tinybloom:tiny_enter/2,
leveled_tinybloom:tiny_empty(),
HashList),
SlotBin = term_to_binary(Tree, [{compressed, ?COMPRESSION_LEVEL}]),
{SlotBin, Bloom}.
is_check_slot_required(_Hash, none) ->
true;
is_check_slot_required(Hash, Bloom) ->
leveled_tinybloom:tiny_check(Hash, Bloom).
lookup_in_slot(Key, {pointer, Handle, Pos, Length}) ->
lookup_in_slot(Key, read_slot(Handle, Pos, Length));
lookup_in_slot(Key, SlotBin) ->
Tree = binary_to_term(SlotBin),
gb_trees:lookup(Key, Tree).
all_from_slot({pointer, Handle, Pos, Length}) ->
all_from_slot(read_slot(Handle, Pos, Length));
all_from_slot(SlotBin) ->
SkipList = binary_to_term(SlotBin),
gb_trees:to_list(SkipList).
read_slot(_Handle, _Pos, _Length) ->
not_implemented.
%%%============================================================================
%%% Test
%%%============================================================================
-ifdef(TEST).
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
generate_randomkeys(Seqn,
Count,
[],
BucketRangeLow,
BucketRangeHigh).
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
Acc;
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BNumber =
case BRange of
0 ->
string:right(integer_to_list(BucketLow), 4, $0);
_ ->
BRand = random:uniform(BRange),
string:right(integer_to_list(BucketLow + BRand), 4, $0)
end,
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
LedgerKey = leveled_codec:to_ledgerkey("Bucket" ++ BNumber,
"Key" ++ KNumber,
o),
{_B, _K, KV} = leveled_codec:generate_ledgerkv(LedgerKey,
Seqn,
crypto:rand_bytes(64),
64,
infinity),
generate_randomkeys(Seqn + 1,
Count - 1,
[KV|Acc],
BucketLow,
BRange).
simple_slotbin_test() ->
2016-12-23 13:17:59 +00:00
KVList0 = generate_randomkeys(1, ?SLOT_SIZE * 2, 1, 4),
KVList1 = lists:sublist(lists:ukeysort(1, KVList0), 1, ?SLOT_SIZE),
ExtractHashFun =
fun({K, V}) ->
{_SQN, H} = leveled_codec:strip_to_seqnhashonly({K, V}),
{hash, H} end,
HashList = lists:map(ExtractHashFun, KVList1),
SW0 = os:timestamp(),
{SlotBin0, Bloom0} = build_slot(KVList1, HashList),
2016-12-23 18:08:22 +00:00
io:format(user, "Slot built in ~w microseconds with size ~w~n",
2016-12-23 12:44:44 +00:00
[timer:now_diff(os:timestamp(), SW0), byte_size(SlotBin0)]),
SW1 = os:timestamp(),
lists:foreach(fun(H) -> ?assertMatch(true,
is_check_slot_required(H, Bloom0))
end,
HashList),
lists:foreach(fun({K, V}) ->
?assertMatch({value, V},
lookup_in_slot(K, SlotBin0))
end,
KVList1),
2016-12-23 23:30:15 +00:00
io:format(user, "Slot checked for all keys in ~w microseconds~n",
2016-12-23 18:08:22 +00:00
[timer:now_diff(os:timestamp(), SW1)]),
SW2 = os:timestamp(),
?assertMatch(KVList1, all_from_slot(SlotBin0)),
io:format(user, "Slot flattened in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SW2)]).
2016-12-23 23:30:15 +00:00
simple_slotbinsummary_test() ->
KVList0 = generate_randomkeys(1, ?SLOT_SIZE * 8 + 100, 1, 4),
KVList1 = lists:ukeysort(1, KVList0),
{SlotIndex, AllHashes, SlotsBin} = build_all_slots(KVList1, 0),
_SummaryBin = build_table_summary(SlotIndex, AllHashes, 2).
-endif.