
* Switch to logger Use logger rather than io:format when logging. The ct tests have besn switched to log to file, testutil/init_per_suite/1 may offer useful guidance on configuring logger with leveled. As all logs are produced by the leveled_log module, the MFA metadata is uninteresting for log outputs, but can be used for explicit filter controls for leveled logs. * iolist_to_binary not unicode_binary() logger filters will be error and be removed if the format line is a binary(). Must be either a charlist() or a unicode_binary() - so iolist_to_binary() can't be used * Add metadata for filter * Update test/end_to_end/tictac_SUITE.erl Co-authored-by: Thomas Arts <thomas.arts@quviq.com> --------- Co-authored-by: Thomas Arts <thomas.arts@quviq.com>
451 lines
16 KiB
Erlang
451 lines
16 KiB
Erlang
%% -------- PENCILLER MEMORY ---------
|
|
%%
|
|
%% Module that provides functions for maintaining the L0 memory of the
|
|
%% Penciller.
|
|
%%
|
|
%% It is desirable that the L0Mem can efficiently handle the push of new trees
|
|
%% whilst maintaining the capability to quickly snapshot the memory for clones
|
|
%% of the Penciller.
|
|
%%
|
|
%% ETS tables are not used due to complications with managing their mutability,
|
|
%% as the database is snapshotted.
|
|
%%
|
|
%% An attempt was made to merge all trees into a single tree on push (in a
|
|
%% spawned process), but this proved to have an expensive impact as the tree
|
|
%% got larger.
|
|
%%
|
|
%% This approach is to keep a list of trees which have been received in the
|
|
%% order which they were received. There is then a fixed-size array of hashes
|
|
%% used to either point lookups at the right tree in the list, or inform the
|
|
%% requestor it is not present avoiding any lookups.
|
|
%%
|
|
%% The trade-off taken with the approach is that the size of the L0Cache is
|
|
%% uncertain. The Size count is incremented based on the inbound size and so
|
|
%% does not necessarily reflect the size once the lists are merged (reflecting
|
|
%% rotating objects)
|
|
|
|
-module(leveled_pmem).
|
|
|
|
-include("leveled.hrl").
|
|
|
|
-export([
|
|
prepare_for_index/2,
|
|
add_to_cache/5,
|
|
to_list/2,
|
|
check_levelzero/3,
|
|
check_levelzero/4,
|
|
merge_trees/4,
|
|
add_to_index/3,
|
|
new_index/0,
|
|
check_index/2,
|
|
cache_full/1
|
|
]).
|
|
|
|
-define(MAX_CACHE_LINES, 31). % Must be less than 128
|
|
|
|
-type index_array() :: list(array:array())|[]|none.
|
|
|
|
-export_type([index_array/0]).
|
|
|
|
%%%============================================================================
|
|
%%% API
|
|
%%%============================================================================
|
|
|
|
-spec cache_full(list()) -> boolean().
|
|
%% @doc
|
|
%% If there are already 31 entries in the cache then the cache is full
|
|
cache_full(L0Cache) ->
|
|
length(L0Cache) == ?MAX_CACHE_LINES.
|
|
|
|
-spec prepare_for_index(
|
|
array:array(), leveled_codec:segment_hash()) -> array:array().
|
|
%% @doc
|
|
%% Add the hash of a key to the index. This is 'prepared' in the sense that
|
|
%% this index is not use until it is loaded into the main index.
|
|
%%
|
|
%% prepare_for_index is called from the Bookie when been added to the ledger
|
|
%% cache, but the index is not used until that ledger cache is in the
|
|
%% penciller L0 memory
|
|
prepare_for_index(IndexArray, no_lookup) ->
|
|
IndexArray;
|
|
prepare_for_index(IndexArray, Hash) ->
|
|
{Slot, H0} = split_hash(Hash),
|
|
Bin = array:get(Slot, IndexArray),
|
|
array:set(Slot, <<Bin/binary, H0:24/integer>>, IndexArray).
|
|
|
|
-spec add_to_index(array:array(), index_array(), integer()) -> index_array().
|
|
%% @doc
|
|
%% Expand the penciller's current index array with the details from a new
|
|
%% ledger cache tree sent from the Bookie. The tree will have a cache slot
|
|
%% which is the index of this ledger_cache in the list of the ledger_caches
|
|
add_to_index(LM1Array, L0Index, CacheSlot) when CacheSlot < 128 ->
|
|
[LM1Array|L0Index].
|
|
|
|
-spec new_index() -> array:array().
|
|
%% @doc
|
|
%% Create a new index array
|
|
new_index() ->
|
|
array:new([{size, 256}, {default, <<>>}]).
|
|
|
|
-spec check_index(leveled_codec:segment_hash(), index_array())
|
|
-> list(non_neg_integer()).
|
|
%% @doc
|
|
%% return a list of positions in the list of cache arrays that may contain the
|
|
%% key associated with the hash being checked
|
|
check_index(Hash, L0Index) ->
|
|
{Slot, H0} = split_hash(Hash),
|
|
{_L, Positions} =
|
|
lists:foldl(
|
|
fun(A, {SlotC, PosList}) ->
|
|
B = array:get(Slot, A),
|
|
case find_pos(B, H0) of
|
|
true -> {SlotC + 1, [SlotC|PosList]};
|
|
false -> {SlotC + 1, PosList}
|
|
end
|
|
end,
|
|
{1, []},
|
|
L0Index),
|
|
lists:reverse(Positions).
|
|
|
|
-spec add_to_cache(
|
|
integer(),
|
|
{tuple(), integer(), integer()},
|
|
integer(),
|
|
list(),
|
|
boolean()) -> {integer(), integer(), list()}|empty_push.
|
|
%% @doc
|
|
%% The penciller's cache is a list of leveled_trees, this adds a new tree to
|
|
%% that cache, providing an update to the approximate size of the cache and
|
|
%% the Ledger's SQN.
|
|
%% Updates to cache must set Writable to true if the update could generate a
|
|
%% Level 0 file - as this must guard against empty entries (which may lead to
|
|
%% an attempt to write an empty L0 file)
|
|
add_to_cache(L0Size, {LM1, MinSQN, MaxSQN}, LedgerSQN, TreeList, Writeable) ->
|
|
case {Writeable, leveled_tree:tsize(LM1)} of
|
|
{true, 0} ->
|
|
empty_push;
|
|
{_, LM1Size} ->
|
|
if
|
|
MinSQN >= LedgerSQN ->
|
|
{MaxSQN,
|
|
L0Size + LM1Size,
|
|
[LM1|TreeList]}
|
|
end
|
|
end.
|
|
|
|
-spec to_list(
|
|
integer(), fun((pos_integer()) -> leveled_tree:leveled_tree())) -> list().
|
|
%% @doc
|
|
%% The cache is a list of leveled_trees of length Slots. This will fetch
|
|
%% each tree in turn by slot ID and then produce a merged/sorted output of
|
|
%% Keys and Values (to load into a SST file).
|
|
%%
|
|
%% Each slot is requested in turn to avoid halting the penciller whilst it
|
|
%% does a large object copy of the whole cache.
|
|
to_list(Slots, FetchFun) ->
|
|
SW = os:timestamp(),
|
|
SlotList = lists:seq(1, Slots),
|
|
FullList = lists:foldl(fun(Slot, Acc) ->
|
|
Tree = FetchFun(Slot),
|
|
L = leveled_tree:to_list(Tree),
|
|
lists:ukeymerge(1, Acc, L)
|
|
end,
|
|
[],
|
|
SlotList),
|
|
leveled_log:log_timer(pm002, [length(FullList)], SW),
|
|
FullList.
|
|
|
|
-spec check_levelzero(tuple(), list(integer()), list())
|
|
-> {boolean(), tuple|not_found}.
|
|
%% @doc
|
|
%% Check for the presence of a given Key in the Level Zero cache, with the
|
|
%% index array having been checked first for a list of potential positions
|
|
%% in the list of ledger caches - and then each potential ledger_cache being
|
|
%% checked (with the most recently received cache being checked first) until a
|
|
%% match is found.
|
|
check_levelzero(Key, PosList, TreeList) ->
|
|
check_levelzero(Key, leveled_codec:segment_hash(Key), PosList, TreeList).
|
|
|
|
-spec check_levelzero(tuple(), {integer(), integer()}, list(integer()), list())
|
|
-> {boolean(), tuple|not_found}.
|
|
%% @doc
|
|
%% Check for the presence of a given Key in the Level Zero cache, with the
|
|
%% index array having been checked first for a list of potential positions
|
|
%% in the list of ledger caches - and then each potential ledger_cache being
|
|
%% checked (with the most recently received cache being checked first) until a
|
|
%% match is found.
|
|
check_levelzero(_Key, _Hash, _PosList, []) ->
|
|
{false, not_found};
|
|
check_levelzero(_Key, _Hash, [], _TreeList) ->
|
|
{false, not_found};
|
|
check_levelzero(Key, Hash, PosList, TreeList) ->
|
|
check_slotlist(Key, Hash, PosList, TreeList).
|
|
|
|
-spec merge_trees(tuple(), tuple(), list(tuple()), tuple()) -> list().
|
|
%% @doc
|
|
%% Return a list of keys and values across the level zero cache (and the
|
|
%% currently unmerged bookie's ledger cache) that are between StartKey
|
|
%% and EndKey (inclusive).
|
|
merge_trees(StartKey, EndKey, TreeList, LevelMinus1) ->
|
|
lists:foldl(
|
|
fun(Tree, Acc) ->
|
|
R = leveled_tree:match_range(StartKey, EndKey, Tree),
|
|
lists:ukeymerge(1, Acc, R) end,
|
|
[],
|
|
[LevelMinus1|TreeList]).
|
|
|
|
%%%============================================================================
|
|
%%% Internal Functions
|
|
%%%============================================================================
|
|
|
|
|
|
find_pos(<<>>, _Hash) ->
|
|
false;
|
|
find_pos(<<Hash:24/integer, _T/binary>>, Hash) ->
|
|
true;
|
|
find_pos(<<_Miss:24/integer, T/binary>>, Hash) ->
|
|
find_pos(T, Hash).
|
|
|
|
|
|
split_hash({SegmentID, ExtraHash}) ->
|
|
Slot = SegmentID band 255,
|
|
H0 = (SegmentID bsr 8) bor (ExtraHash bsl 8),
|
|
{Slot, H0 band 16#FFFFFF}.
|
|
|
|
check_slotlist(Key, _Hash, CheckList, TreeList) ->
|
|
SlotCheckFun =
|
|
fun(SlotToCheck, {Found, KV}) ->
|
|
case Found of
|
|
true ->
|
|
{Found, KV};
|
|
false ->
|
|
CheckTree = lists:nth(SlotToCheck, TreeList),
|
|
case leveled_tree:match(Key, CheckTree) of
|
|
none ->
|
|
{Found, KV};
|
|
{value, Value} ->
|
|
{true, {Key, Value}}
|
|
end
|
|
end
|
|
end,
|
|
lists:foldl(SlotCheckFun, {false, not_found}, CheckList).
|
|
|
|
%%%============================================================================
|
|
%%% Test
|
|
%%%============================================================================
|
|
|
|
-ifdef(TEST).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
generate_randomkeys_aslist(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
|
lists:ukeysort(1,
|
|
generate_randomkeys(Seqn,
|
|
Count,
|
|
[],
|
|
BucketRangeLow,
|
|
BucketRangeHigh)).
|
|
|
|
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
|
KVL = generate_randomkeys(Seqn,
|
|
Count,
|
|
[],
|
|
BucketRangeLow,
|
|
BucketRangeHigh),
|
|
leveled_tree:from_orderedlist(lists:ukeysort(1, KVL), ?CACHE_TYPE).
|
|
|
|
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
|
Acc;
|
|
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
|
BNumber =
|
|
lists:flatten(
|
|
io_lib:format("~4..0B",
|
|
[BucketLow + leveled_rand:uniform(BRange)])),
|
|
KNumber =
|
|
lists:flatten(io_lib:format("~4..0B", [leveled_rand:uniform(1000)])),
|
|
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
|
{Seqn, {active, infinity}, null}},
|
|
generate_randomkeys(Seqn + 1,
|
|
Count - 1,
|
|
[{K, V}|Acc],
|
|
BucketLow,
|
|
BRange).
|
|
|
|
|
|
compare_method_test() ->
|
|
R = lists:foldl(fun(_X, {LedgerSQN, L0Size, L0TreeList}) ->
|
|
LM1 = generate_randomkeys(LedgerSQN + 1,
|
|
2000, 1, 500),
|
|
add_to_cache(
|
|
L0Size,
|
|
{LM1, LedgerSQN + 1, LedgerSQN + 2000},
|
|
LedgerSQN,
|
|
L0TreeList,
|
|
true)
|
|
end,
|
|
{0, 0, []},
|
|
lists:seq(1, 16)),
|
|
|
|
{SQN, Size, TreeList} = R,
|
|
?assertMatch(32000, SQN),
|
|
?assertMatch(true, Size =< 32000),
|
|
|
|
TestList = leveled_tree:to_list(generate_randomkeys(1, 2000, 1, 800)),
|
|
|
|
FindKeyFun =
|
|
fun(Key) ->
|
|
fun(Tree, {Found, KV}) ->
|
|
case Found of
|
|
true ->
|
|
{true, KV};
|
|
false ->
|
|
L0 = leveled_tree:match(Key, Tree),
|
|
case L0 of
|
|
none ->
|
|
{false, not_found};
|
|
{value, Value} ->
|
|
{true, {Key, Value}}
|
|
end
|
|
end
|
|
end
|
|
end,
|
|
|
|
S0 = lists:foldl(fun({Key, _V}, Acc) ->
|
|
R0 = lists:foldl(FindKeyFun(Key),
|
|
{false, not_found},
|
|
TreeList),
|
|
[R0|Acc] end,
|
|
[],
|
|
TestList),
|
|
|
|
PosList = lists:seq(1, length(TreeList)),
|
|
S1 = lists:foldl(fun({Key, _V}, Acc) ->
|
|
R0 = check_levelzero(Key, PosList, TreeList),
|
|
[R0|Acc]
|
|
end,
|
|
[],
|
|
TestList),
|
|
|
|
?assertMatch(S0, S1),
|
|
|
|
StartKey = {o, "Bucket0100", null, null},
|
|
EndKey = {o, "Bucket0200", null, null},
|
|
SWa = os:timestamp(),
|
|
FetchFun = fun(Slot) -> lists:nth(Slot, TreeList) end,
|
|
DumpList = to_list(length(TreeList), FetchFun),
|
|
Q0 = lists:foldl(fun({K, V}, Acc) ->
|
|
P = leveled_codec:endkey_passed(EndKey, K),
|
|
case {K, P} of
|
|
{K, false} when K >= StartKey ->
|
|
[{K, V}|Acc];
|
|
_ ->
|
|
Acc
|
|
end
|
|
end,
|
|
[],
|
|
DumpList),
|
|
Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, Q0), ?CACHE_TYPE),
|
|
Sz0 = leveled_tree:tsize(Tree),
|
|
io:format("Crude method took ~w microseconds resulting in tree of " ++
|
|
"size ~w~n",
|
|
[timer:now_diff(os:timestamp(), SWa), Sz0]),
|
|
SWb = os:timestamp(),
|
|
Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty(?CACHE_TYPE)),
|
|
Sz1 = length(Q1),
|
|
io:format("Merge method took ~w microseconds resulting in tree of " ++
|
|
"size ~w~n",
|
|
[timer:now_diff(os:timestamp(), SWb), Sz1]),
|
|
?assertMatch(Sz0, Sz1).
|
|
|
|
with_index_test_() ->
|
|
% Otherwise this test may timeout when run with coverage enabled
|
|
{timeout, 60, fun with_index_test2/0}.
|
|
|
|
with_index_test2() ->
|
|
IndexPrepareFun =
|
|
fun({K, _V}, Acc) ->
|
|
H = leveled_codec:segment_hash(K),
|
|
prepare_for_index(Acc, H)
|
|
end,
|
|
LoadFun =
|
|
fun(_X, {{LedgerSQN, L0Size, L0TreeList}, L0Idx, SrcList}) ->
|
|
LM1 = generate_randomkeys_aslist(LedgerSQN + 1, 2000, 1, 500),
|
|
LM1Array = lists:foldl(IndexPrepareFun, new_index(), LM1),
|
|
LM1SL = leveled_tree:from_orderedlist(lists:ukeysort(1, LM1), ?CACHE_TYPE),
|
|
UpdL0Index = add_to_index(LM1Array, L0Idx, length(L0TreeList) + 1),
|
|
R = add_to_cache(
|
|
L0Size,
|
|
{LM1SL, LedgerSQN + 1, LedgerSQN + 2000},
|
|
LedgerSQN,
|
|
L0TreeList,
|
|
true),
|
|
{R, UpdL0Index, lists:ukeymerge(1, LM1, SrcList)}
|
|
end,
|
|
|
|
R0 = lists:foldl(LoadFun, {{0, 0, []}, [], []}, lists:seq(1, 16)),
|
|
|
|
{{SQN, Size, TreeList}, L0Index, SrcKVL} = R0,
|
|
?assertMatch(32000, SQN),
|
|
?assertMatch(true, Size =< 32000),
|
|
|
|
CheckFun =
|
|
fun({K, V}, {L0Idx, L0Cache}) ->
|
|
H = leveled_codec:segment_hash(K),
|
|
PosList = check_index(H, L0Idx),
|
|
?assertMatch({true, {K, V}},
|
|
check_slotlist(K, H, PosList, L0Cache)),
|
|
{L0Idx, L0Cache}
|
|
end,
|
|
|
|
_R1 = lists:foldl(CheckFun, {L0Index, TreeList}, SrcKVL).
|
|
|
|
|
|
index_performance_test() ->
|
|
LM1 = generate_randomkeys_aslist(1, 2000, 1, 500),
|
|
LM2 = generate_randomkeys_aslist(2001, 2000, 1, 500),
|
|
HL1 = lists:map(fun({K, _V}) -> leveled_codec:segment_hash(K) end, LM1),
|
|
HL2 = lists:map(fun({K, _V}) -> leveled_codec:segment_hash(K) end, LM2),
|
|
|
|
SWP = os:timestamp(),
|
|
A1 =
|
|
lists:foldl(
|
|
fun(H, A) -> prepare_for_index(A, H) end,
|
|
new_index(),
|
|
HL1),
|
|
io:format(
|
|
user,
|
|
"~nPrepare single index takes ~w microsec~n",
|
|
[timer:now_diff(os:timestamp(), SWP)]),
|
|
|
|
SWL = os:timestamp(),
|
|
PMI1 =
|
|
lists:foldl(
|
|
fun(I, Idx) -> add_to_index(A1, Idx, I) end, [], lists:seq(1, 8)),
|
|
io:format(
|
|
user,
|
|
"Appending to array takes ~w microsec~n",
|
|
[timer:now_diff(os:timestamp(), SWL)]),
|
|
|
|
SWC1 = os:timestamp(),
|
|
R0 = lists:seq(1, 8),
|
|
lists:foreach(fun(H) -> ?assertMatch(R0, check_index(H, PMI1)) end, HL1),
|
|
io:format(
|
|
user,
|
|
"Checking 2000 matches in array at each level takes ~w microsec~n",
|
|
[timer:now_diff(os:timestamp(), SWC1)]),
|
|
|
|
SWC2 = os:timestamp(),
|
|
FPT =
|
|
lists:foldl(
|
|
fun(H, FPC) -> FPC + length(check_index(H, PMI1)) end,
|
|
0,
|
|
HL2),
|
|
io:format(
|
|
user,
|
|
"Checking 2000 misses in array at each level takes ~w microsec " ++
|
|
"with ~w false positives~n",
|
|
[timer:now_diff(os:timestamp(), SWC2), FPT]).
|
|
|
|
|
|
|
|
-endif.
|