
Move legacy CDB code used only in unit tests into test area. Fix column width in pmem and comment out the unused case statement (in healthy tests) from the penciller test code
282 lines
No EOL
11 KiB
Erlang
282 lines
No EOL
11 KiB
Erlang
%% -------- PENCILLER MEMORY ---------
|
|
%%
|
|
%% Module that provides functions for maintaining the L0 memory of the
|
|
%% Penciller.
|
|
%%
|
|
%% It is desirable that the L0Mem can efficiently handle the push of new trees
|
|
%% whilst maintaining the capability to quickly snapshot the memory for clones
|
|
%% of the Penciller.
|
|
%%
|
|
%% ETS tables are not used due to complications with managing their mutability,
|
|
%% as the database is snapshotted.
|
|
%%
|
|
%% An attempt was made to merge all trees into a single tree on push (in a
|
|
%% spawned process), but this proved to have an expensive impact as the tree
|
|
%% got larger.
|
|
%%
|
|
%% This approach is to keep a list of trees which have been received in the
|
|
%% order which they were received. There is then a fixed-size array of hashes
|
|
%% used to either point lookups at the right tree in the list, or inform the
|
|
%% requestor it is not present avoiding any lookups.
|
|
%%
|
|
%% Tests show this takes one third of the time at push (when compared to
|
|
%% merging to a single tree), and is an order of magnitude more efficient as
|
|
%% the tree reaches peak size. It is also an order of magnitude more
|
|
%% efficient to use the hash index when compared to looking through all the
|
|
%% trees.
|
|
%%
|
|
%% Total time for single_tree 217000 microseconds
|
|
%% Total time for array_tree 209000 microseconds
|
|
%% Total time for array_list 142000 microseconds
|
|
%% Total time for array_filter 69000 microseconds
|
|
%% List of 2000 checked without array - success count of 90 in 36000 microsecs
|
|
%% List of 2000 checked with array - success count of 90 in 1000 microsecs
|
|
%%
|
|
%% The trade-off taken with the approach is that the size of the L0Cache is
|
|
%% uncertain. The Size count is incremented if the hash is not already
|
|
%% present, so the size may be lower than the actual size due to hash
|
|
%% collisions
|
|
|
|
-module(leveled_pmem).
|
|
|
|
-include("include/leveled.hrl").
|
|
|
|
-export([
|
|
add_to_index/5,
|
|
to_list/2,
|
|
new_index/0,
|
|
check_levelzero/3,
|
|
merge_trees/4
|
|
]).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
-define(SLOT_WIDTH, {4096, 12}).
|
|
|
|
|
|
%%%============================================================================
|
|
%%% API
|
|
%%%============================================================================
|
|
|
|
add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
|
|
SW = os:timestamp(),
|
|
SlotInTreeList = length(TreeList) + 1,
|
|
FoldFun = fun({K, V}, {AccMinSQN, AccMaxSQN, AccCount, HashIndex}) ->
|
|
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
|
{Hash, Slot} = hash_to_slot(K),
|
|
L = array:get(Slot, HashIndex),
|
|
Count0 = case lists:keymember(Hash, 1, L) of
|
|
true ->
|
|
AccCount;
|
|
false ->
|
|
AccCount + 1
|
|
end,
|
|
{min(SQN, AccMinSQN),
|
|
max(SQN, AccMaxSQN),
|
|
Count0,
|
|
array:set(Slot, [{Hash, SlotInTreeList}|L], HashIndex)}
|
|
end,
|
|
LM1List = gb_trees:to_list(LevelMinus1),
|
|
StartingT = {infinity, 0, L0Size, L0Index},
|
|
{MinSQN, MaxSQN, NewL0Size, UpdL0Index} = lists:foldl(FoldFun,
|
|
StartingT,
|
|
LM1List),
|
|
leveled_log:log_timer("PM001", [NewL0Size], SW),
|
|
if
|
|
MinSQN > LedgerSQN ->
|
|
{MaxSQN,
|
|
NewL0Size,
|
|
UpdL0Index,
|
|
lists:append(TreeList, [LevelMinus1])}
|
|
end.
|
|
|
|
|
|
to_list(Slots, FetchFun) ->
|
|
SW = os:timestamp(),
|
|
SlotList = lists:reverse(lists:seq(1, Slots)),
|
|
FullList = lists:foldl(fun(Slot, Acc) ->
|
|
Tree = FetchFun(Slot),
|
|
L = gb_trees:to_list(Tree),
|
|
lists:ukeymerge(1, Acc, L)
|
|
end,
|
|
[],
|
|
SlotList),
|
|
leveled_log:log_timer("PM002", [length(FullList)], SW),
|
|
FullList.
|
|
|
|
|
|
new_index() ->
|
|
array:new(element(1, ?SLOT_WIDTH), [{default, []}, fixed]).
|
|
|
|
|
|
check_levelzero(Key, L0Index, TreeList) ->
|
|
{Hash, Slot} = hash_to_slot(Key),
|
|
CheckList = array:get(Slot, L0Index),
|
|
SlotList = lists:foldl(fun({H0, S0}, SL) ->
|
|
case H0 of
|
|
Hash ->
|
|
[S0|SL];
|
|
_ ->
|
|
SL
|
|
end
|
|
end,
|
|
[],
|
|
CheckList),
|
|
lists:foldl(fun(SlotToCheck, {Found, KV}) ->
|
|
case Found of
|
|
true ->
|
|
{Found, KV};
|
|
false ->
|
|
CheckTree = lists:nth(SlotToCheck, TreeList),
|
|
case gb_trees:lookup(Key, CheckTree) of
|
|
none ->
|
|
{Found, KV};
|
|
{value, Value} ->
|
|
{true, {Key, Value}}
|
|
end
|
|
end
|
|
end,
|
|
{false, not_found},
|
|
lists:reverse(lists:usort(SlotList))).
|
|
|
|
|
|
merge_trees(StartKey, EndKey, TreeList, LevelMinus1) ->
|
|
lists:foldl(fun(Tree, TreeAcc) ->
|
|
merge_nexttree(Tree, TreeAcc, StartKey, EndKey) end,
|
|
gb_trees:empty(),
|
|
lists:append(TreeList, [LevelMinus1])).
|
|
|
|
%%%============================================================================
|
|
%%% Internal Functions
|
|
%%%============================================================================
|
|
|
|
|
|
hash_to_slot(Key) ->
|
|
H = erlang:phash2(Key),
|
|
{H bsr element(2, ?SLOT_WIDTH), H band (element(1, ?SLOT_WIDTH) - 1)}.
|
|
|
|
merge_nexttree(Tree, TreeAcc, StartKey, EndKey) ->
|
|
Iter = gb_trees:iterator_from(StartKey, Tree),
|
|
merge_nexttree(Iter, TreeAcc, EndKey).
|
|
|
|
merge_nexttree(Iter, TreeAcc, EndKey) ->
|
|
case gb_trees:next(Iter) of
|
|
none ->
|
|
TreeAcc;
|
|
{Key, Value, NewIter} ->
|
|
case leveled_codec:endkey_passed(EndKey, Key) of
|
|
true ->
|
|
TreeAcc;
|
|
false ->
|
|
merge_nexttree(NewIter,
|
|
gb_trees:enter(Key, Value, TreeAcc),
|
|
EndKey)
|
|
end
|
|
end.
|
|
|
|
%%%============================================================================
|
|
%%% Test
|
|
%%%============================================================================
|
|
|
|
-ifdef(TEST).
|
|
|
|
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
|
generate_randomkeys(Seqn,
|
|
Count,
|
|
gb_trees:empty(),
|
|
BucketRangeLow,
|
|
BucketRangeHigh).
|
|
|
|
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
|
Acc;
|
|
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
|
BNumber = string:right(integer_to_list(BucketLow + random:uniform(BRange)),
|
|
4, $0),
|
|
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
|
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
|
{Seqn, {active, infinity}, null}},
|
|
generate_randomkeys(Seqn + 1,
|
|
Count - 1,
|
|
gb_trees:enter(K, V, Acc),
|
|
BucketLow,
|
|
BRange).
|
|
|
|
|
|
compare_method_test() ->
|
|
R = lists:foldl(fun(_X, {LedgerSQN, L0Size, L0Index, L0TreeList}) ->
|
|
LM1 = generate_randomkeys(LedgerSQN + 1,
|
|
2000, 1, 500),
|
|
add_to_index(L0Index, L0Size, LM1, LedgerSQN,
|
|
L0TreeList)
|
|
end,
|
|
{0, 0, new_index(), []},
|
|
lists:seq(1, 16)),
|
|
|
|
{SQN, Size, Index, TreeList} = R,
|
|
?assertMatch(32000, SQN),
|
|
?assertMatch(true, Size =< 32000),
|
|
|
|
TestList = gb_trees:to_list(generate_randomkeys(1, 2000, 1, 800)),
|
|
|
|
S0 = lists:foldl(fun({Key, _V}, Acc) ->
|
|
R0 = lists:foldr(fun(Tree, {Found, KV}) ->
|
|
case Found of
|
|
true ->
|
|
{true, KV};
|
|
false ->
|
|
L0 = gb_trees:lookup(Key, Tree),
|
|
case L0 of
|
|
none ->
|
|
{false, not_found};
|
|
{value, Value} ->
|
|
{true, {Key, Value}}
|
|
end
|
|
end
|
|
end,
|
|
{false, not_found},
|
|
TreeList),
|
|
[R0|Acc]
|
|
end,
|
|
[],
|
|
TestList),
|
|
|
|
S1 = lists:foldl(fun({Key, _V}, Acc) ->
|
|
R0 = check_levelzero(Key, Index, TreeList),
|
|
[R0|Acc]
|
|
end,
|
|
[],
|
|
TestList),
|
|
|
|
?assertMatch(S0, S1),
|
|
|
|
StartKey = {o, "Bucket0100", null, null},
|
|
EndKey = {o, "Bucket0200", null, null},
|
|
SWa = os:timestamp(),
|
|
FetchFun = fun(Slot) -> lists:nth(Slot, TreeList) end,
|
|
DumpList = to_list(length(TreeList), FetchFun),
|
|
Q0 = lists:foldl(fun({K, V}, Acc) ->
|
|
P = leveled_codec:endkey_passed(EndKey, K),
|
|
case {K, P} of
|
|
{K, false} when K >= StartKey ->
|
|
gb_trees:enter(K, V, Acc);
|
|
_ ->
|
|
Acc
|
|
end
|
|
end,
|
|
gb_trees:empty(),
|
|
DumpList),
|
|
Sz0 = gb_trees:size(Q0),
|
|
io:format("Crude method took ~w microseconds resulting in tree of " ++
|
|
"size ~w~n",
|
|
[timer:now_diff(os:timestamp(), SWa), Sz0]),
|
|
SWb = os:timestamp(),
|
|
Q1 = merge_trees(StartKey, EndKey, TreeList, gb_trees:empty()),
|
|
Sz1 = gb_trees:size(Q1),
|
|
io:format("Merge method took ~w microseconds resulting in tree of " ++
|
|
"size ~w~n",
|
|
[timer:now_diff(os:timestamp(), SWb), Sz1]),
|
|
?assertMatch(Sz0, Sz1).
|
|
|
|
|
|
|
|
-endif. |