Revert ominshambles performance refactoring

To try and improve performance index entries had been removed from the
Ledger Cache, and a shadow list of the LedgerCache (in SQN order) was
kept to avoid gb_trees:to_list on push_mem.

This did not go well.  The issue was that ets does not deal with
duplicate keys in the list when inserting (it will only insert one, but
it is not clear which one).

This has been reverted back out.

The ETS parameters have been changed to [set, private].  It is not used
as an iterator, and is no longer passed out of the process (the
memtable_copy is sent instead).  This also avoids the tab2list function
being called.
This commit is contained in:
martinsumner 2016-10-19 00:10:48 +01:00
parent 8f29a6c40f
commit f16f71ae81
6 changed files with 89 additions and 62 deletions

View file

@ -160,7 +160,7 @@
penciller :: pid(), penciller :: pid(),
cache_size :: integer(), cache_size :: integer(),
back_pressure :: boolean(), back_pressure :: boolean(),
ledger_cache :: {gb_trees:tree(), list()}, ledger_cache :: gb_trees:tree(),
is_snapshot :: boolean()}). is_snapshot :: boolean()}).
@ -242,7 +242,7 @@ init([Opts]) ->
{ok, #state{inker=Inker, {ok, #state{inker=Inker,
penciller=Penciller, penciller=Penciller,
cache_size=CacheSize, cache_size=CacheSize,
ledger_cache={gb_trees:empty(), []}, ledger_cache=gb_trees:empty(),
is_snapshot=false}}; is_snapshot=false}};
Bookie -> Bookie ->
{ok, {ok,
@ -397,15 +397,16 @@ code_change(_OldVsn, State, _Extra) ->
%%% Internal functions %%% Internal functions
%%%============================================================================ %%%============================================================================
bucket_stats(Penciller, {_ObjTree, ChangeList}, Bucket, Tag) -> bucket_stats(Penciller, LedgerCache, Bucket, Tag) ->
PCLopts = #penciller_options{start_snapshot=true, PCLopts = #penciller_options{start_snapshot=true,
source_penciller=Penciller}, source_penciller=Penciller},
{ok, LedgerSnapshot} = leveled_penciller:pcl_start(PCLopts), {ok, LedgerSnapshot} = leveled_penciller:pcl_start(PCLopts),
Folder = fun() -> Folder = fun() ->
Increment = gb_trees:to_list(LedgerCache),
io:format("Length of increment in snapshot is ~w~n", io:format("Length of increment in snapshot is ~w~n",
[length(ChangeList)]), [length(Increment)]),
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot, ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
{infinity, ChangeList}), {infinity, Increment}),
StartKey = leveled_codec:to_ledgerkey(Bucket, null, Tag), StartKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
EndKey = leveled_codec:to_ledgerkey(Bucket, null, Tag), EndKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot, Acc = leveled_penciller:pcl_fetchkeys(LedgerSnapshot,
@ -418,7 +419,7 @@ bucket_stats(Penciller, {_ObjTree, ChangeList}, Bucket, Tag) ->
end, end,
{async, Folder}. {async, Folder}.
index_query(Penciller, {_ObjTree, ChangeList}, index_query(Penciller, LedgerCache,
Bucket, Bucket,
{IdxField, StartValue, EndValue}, {IdxField, StartValue, EndValue},
{ReturnTerms, TermRegex}) -> {ReturnTerms, TermRegex}) ->
@ -426,10 +427,11 @@ index_query(Penciller, {_ObjTree, ChangeList},
source_penciller=Penciller}, source_penciller=Penciller},
{ok, LedgerSnapshot} = leveled_penciller:pcl_start(PCLopts), {ok, LedgerSnapshot} = leveled_penciller:pcl_start(PCLopts),
Folder = fun() -> Folder = fun() ->
Increment = gb_trees:to_list(LedgerCache),
io:format("Length of increment in snapshot is ~w~n", io:format("Length of increment in snapshot is ~w~n",
[length(ChangeList)]), [length(Increment)]),
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot, ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
{infinity, ChangeList}), {infinity, Increment}),
StartKey = leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, StartKey = leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG,
IdxField, StartValue), IdxField, StartValue),
EndKey = leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, EndKey = leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG,
@ -491,9 +493,8 @@ startup(InkerOpts, PencillerOpts) ->
{Inker, Penciller}. {Inker, Penciller}.
fetch_head(Key, Penciller, {ObjTree, _ChangeList}) -> fetch_head(Key, Penciller, LedgerCache) ->
case gb_trees:lookup(Key, LedgerCache) of
case gb_trees:lookup(Key, ObjTree) of
{value, Head} -> {value, Head} ->
Head; Head;
none -> none ->
@ -569,30 +570,20 @@ preparefor_ledgercache(PK, SQN, Obj, Size, IndexSpecs) ->
[PrimaryChange] ++ ConvSpecs. [PrimaryChange] ++ ConvSpecs.
addto_ledgercache(Changes, Cache) -> addto_ledgercache(Changes, Cache) ->
{ObjectTree, ChangeList} = Cache, lists:foldl(fun({K, V}, Acc) -> gb_trees:enter(K, V, Acc) end,
{lists:foldl(fun({K, V}, Acc) -> Cache,
case leveled_codec:is_indexkey(K) of Changes).
false ->
gb_trees:enter(K, V, Acc);
true ->
Acc
end
end,
ObjectTree,
Changes),
ChangeList ++ Changes}.
maybepush_ledgercache(MaxCacheSize, Cache, Penciller) -> maybepush_ledgercache(MaxCacheSize, Cache, Penciller) ->
{_ObjectTree, ChangeList} = Cache, CacheSize = gb_trees:size(Cache),
CacheSize = length(ChangeList),
if if
CacheSize > MaxCacheSize -> CacheSize > MaxCacheSize ->
case leveled_penciller:pcl_pushmem(Penciller, case leveled_penciller:pcl_pushmem(Penciller,
ChangeList) of gb_trees:to_list(Cache)) of
ok -> ok ->
{ok, {gb_trees:empty(), []}}; {ok, gb_trees:empty()};
pause -> pause ->
{pause, {gb_trees:empty(), []}}; {pause, gb_trees:empty()};
refused -> refused ->
{ok, Cache} {ok, Cache}
end; end;

View file

@ -724,9 +724,10 @@ manifest_printer(Manifest) ->
initiate_penciller_snapshot(Bookie) -> initiate_penciller_snapshot(Bookie) ->
{ok, {ok,
{LedgerSnap, {_ObjTree, ChangeList}}, {LedgerSnap, LedgerCache},
_} = leveled_bookie:book_snapshotledger(Bookie, self(), undefined), _} = leveled_bookie:book_snapshotledger(Bookie, self(), undefined),
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnap, ChangeList), ok = leveled_penciller:pcl_loadsnapshot(LedgerSnap,
gb_trees:to_list(LedgerCache)),
MaxSQN = leveled_penciller:pcl_getstartupsequencenumber(LedgerSnap), MaxSQN = leveled_penciller:pcl_getstartupsequencenumber(LedgerSnap),
{LedgerSnap, MaxSQN}. {LedgerSnap, MaxSQN}.

View file

@ -246,6 +246,7 @@
pcl_loadsnapshot/2, pcl_loadsnapshot/2,
pcl_getstartupsequencenumber/1, pcl_getstartupsequencenumber/1,
roll_new_tree/3, roll_new_tree/3,
roll_into_list/1,
clean_testdir/1]). clean_testdir/1]).
-include_lib("eunit/include/eunit.hrl"). -include_lib("eunit/include/eunit.hrl").
@ -377,7 +378,7 @@ handle_call({push_mem, DumpList}, From, State=#state{is_snapshot=Snap})
% then add to memory in the background before updating the loop state % then add to memory in the background before updating the loop state
% - Push the update into memory (do_pushtomem/3) % - Push the update into memory (do_pushtomem/3)
% - If we haven't got through quickcheck now need to check if there is a % - If we haven't got through quickcheck now need to check if there is a
% definite need to write a new L0 file (roll_memory/2). If all clear this % definite need to write a new L0 file (roll_memory/3). If all clear this
% will write the file in the background and allow a response to the user. % will write the file in the background and allow a response to the user.
% If not the change has still been made but the the L0 file will not have % If not the change has still been made but the the L0 file will not have
% been prompted - so the reply does not indicate failure but returns the % been prompted - so the reply does not indicate failure but returns the
@ -414,7 +415,7 @@ handle_call({push_mem, DumpList}, From, State=#state{is_snapshot=Snap})
State1#state.memtable_copy, State1#state.memtable_copy,
MaxSQN), MaxSQN),
case roll_memory(State1, MaxTableSize) of case roll_memory(State1, MaxTableSize, L0Snap) of
{ok, L0Pend, ManSN, TableSize2} -> {ok, L0Pend, ManSN, TableSize2} ->
io:format("Push completed in ~w microseconds~n", io:format("Push completed in ~w microseconds~n",
[timer:now_diff(os:timestamp(), StartWatch)]), [timer:now_diff(os:timestamp(), StartWatch)]),
@ -587,9 +588,16 @@ terminate(Reason, State) ->
no_change -> no_change ->
State State
end, end,
Dump = ets:tab2list(UpdState#state.memtable), % TODO:
% This next section (to the end of the case clause), appears to be
% pointless. It will persist the in-memory state to a SFT file, but on
% startup that file will be ignored as the manifest has not bene updated
%
% Should we update the manifest, or stop trying to persist on closure?
Dump = roll_into_list(State#state.memtable_copy),
case {UpdState#state.levelzero_pending, case {UpdState#state.levelzero_pending,
get_item(0, UpdState#state.manifest, []), length(Dump)} of get_item(0, UpdState#state.manifest, []),
length(Dump)} of
{?L0PEND_RESET, [], L} when L > 0 -> {?L0PEND_RESET, [], L} when L > 0 ->
MSN = UpdState#state.manifest_sqn + 1, MSN = UpdState#state.manifest_sqn + 1,
FileName = UpdState#state.root_path FileName = UpdState#state.root_path
@ -616,6 +624,8 @@ terminate(Reason, State) ->
++ " with ~w keys discarded~n", ++ " with ~w keys discarded~n",
[length(Dump)]) [length(Dump)])
end, end,
% Tidy shutdown of individual files
ok = close_files(0, UpdState#state.manifest), ok = close_files(0, UpdState#state.manifest),
lists:foreach(fun({_FN, Pid, _SN}) -> lists:foreach(fun({_FN, Pid, _SN}) ->
leveled_sft:sft_close(Pid) end, leveled_sft:sft_close(Pid) end,
@ -639,15 +649,9 @@ start_from_file(PCLopts) ->
M -> M ->
M M
end, end,
% Options (not) chosen here: % There is no need to export this ets table (hence private) or iterate
% - As we pass the ETS table to the sft file when the L0 file is created % over it (hence set not ordered_set)
% then this cannot be private. TID = ets:new(?MEMTABLE, [set, private]),
% - There is no use of iterator, so a set could be used, but then the
% output of tab2list would need to be sorted
% TODO:
% - Test switching to [set, private] and sending the L0 snapshots to the
% sft_new cast
TID = ets:new(?MEMTABLE, [ordered_set]),
{ok, Clerk} = leveled_pclerk:clerk_new(self()), {ok, Clerk} = leveled_pclerk:clerk_new(self()),
InitState = #state{memtable=TID, InitState = #state{memtable=TID,
clerk=Clerk, clerk=Clerk,
@ -767,12 +771,17 @@ quickcheck_pushtomem(DumpList, TableSize, MaxSize) ->
do_pushtomem(DumpList, MemTable, Snapshot, MaxSQN) -> do_pushtomem(DumpList, MemTable, Snapshot, MaxSQN) ->
SW = os:timestamp(), SW = os:timestamp(),
UpdSnapshot = add_increment_to_memcopy(Snapshot, MaxSQN, DumpList), UpdSnapshot = add_increment_to_memcopy(Snapshot, MaxSQN, DumpList),
% Note that the DumpList must have been taken from a source which
% naturally de-duplicates the keys. It is not possible just to cache
% changes in a list (in the Bookie for example), as the insert method does
% not apply the list in order, and so it is not clear which of a duplicate
% key will be applied
ets:insert(MemTable, DumpList), ets:insert(MemTable, DumpList),
io:format("Push into memory timed at ~w microseconds~n", io:format("Push into memory timed at ~w microseconds~n",
[timer:now_diff(os:timestamp(), SW)]), [timer:now_diff(os:timestamp(), SW)]),
UpdSnapshot. UpdSnapshot.
roll_memory(State, MaxSize) -> roll_memory(State, MaxSize, MemTableCopy) ->
case ets:info(State#state.memtable, size) of case ets:info(State#state.memtable, size) of
Size when Size > MaxSize -> Size when Size > MaxSize ->
L0 = get_item(0, State#state.manifest, []), L0 = get_item(0, State#state.manifest, []),
@ -784,7 +793,7 @@ roll_memory(State, MaxSize) ->
++ integer_to_list(MSN) ++ "_0_0", ++ integer_to_list(MSN) ++ "_0_0",
Opts = #sft_options{wait=false}, Opts = #sft_options{wait=false},
{ok, L0Pid} = leveled_sft:sft_new(FileName, {ok, L0Pid} = leveled_sft:sft_new(FileName,
State#state.memtable, MemTableCopy,
[], [],
0, 0,
Opts), Opts),
@ -938,7 +947,6 @@ return_work(State, From) ->
%% This takes the three parts of a memtable copy - the increments, the tree %% This takes the three parts of a memtable copy - the increments, the tree
%% and the SQN at which the tree was formed, and outputs a new tree %% and the SQN at which the tree was formed, and outputs a new tree
roll_new_tree(Tree, [], HighSQN) -> roll_new_tree(Tree, [], HighSQN) ->
{Tree, HighSQN}; {Tree, HighSQN};
roll_new_tree(Tree, [{SQN, KVList}|TailIncs], HighSQN) when SQN >= HighSQN -> roll_new_tree(Tree, [{SQN, KVList}|TailIncs], HighSQN) when SQN >= HighSQN ->
@ -954,6 +962,14 @@ roll_new_tree(Tree, [{SQN, KVList}|TailIncs], HighSQN) when SQN >= HighSQN ->
roll_new_tree(Tree, [_H|TailIncs], HighSQN) -> roll_new_tree(Tree, [_H|TailIncs], HighSQN) ->
roll_new_tree(Tree, TailIncs, HighSQN). roll_new_tree(Tree, TailIncs, HighSQN).
%% This takes the three parts of a memtable copy - the increments, the tree
%% and the SQN at which the tree was formed, and outputs a sorted list
roll_into_list(MemTableCopy) ->
{Tree, _SQN} = roll_new_tree(MemTableCopy#l0snapshot.tree,
MemTableCopy#l0snapshot.increments,
MemTableCopy#l0snapshot.ledger_sqn),
gb_trees:to_list(Tree).
%% Update the memtable copy if the tree created advances the SQN %% Update the memtable copy if the tree created advances the SQN
cache_tree_in_memcopy(MemCopy, Tree, SQN) -> cache_tree_in_memcopy(MemCopy, Tree, SQN) ->
case MemCopy#l0snapshot.ledger_sqn of case MemCopy#l0snapshot.ledger_sqn of
@ -1331,7 +1347,7 @@ rename_manifest_files(RootPath, NewMSN) ->
filelib:is_file(OldFN), filelib:is_file(OldFN),
NewFN, NewFN,
filelib:is_file(NewFN)]), filelib:is_file(NewFN)]),
file:rename(OldFN,NewFN). ok = file:rename(OldFN,NewFN).
filepath(RootPath, manifest) -> filepath(RootPath, manifest) ->
RootPath ++ "/" ++ ?MANIFEST_FP; RootPath ++ "/" ++ ?MANIFEST_FP;
@ -1379,13 +1395,14 @@ confirm_delete(Filename, UnreferencedFiles, RegisteredSnapshots) ->
assess_sqn([]) -> assess_sqn([]) ->
empty; empty;
assess_sqn([HeadKV|[]]) -> assess_sqn(DumpList) ->
{leveled_codec:strip_to_seqonly(HeadKV), assess_sqn(DumpList, infinity, 0).
leveled_codec:strip_to_seqonly(HeadKV)};
assess_sqn([HeadKV|DumpList]) ->
{leveled_codec:strip_to_seqonly(HeadKV),
leveled_codec:strip_to_seqonly(lists:last(DumpList))}.
assess_sqn([], MinSQN, MaxSQN) ->
{MinSQN, MaxSQN};
assess_sqn([HeadKey|Tail], MinSQN, MaxSQN) ->
SQN = leveled_codec:strip_to_seqonly(HeadKey),
assess_sqn(Tail, min(MinSQN, SQN), max(MaxSQN, SQN)).
%%%============================================================================ %%%============================================================================
%%% Test %%% Test
@ -1499,11 +1516,6 @@ simple_server_test() ->
max_inmemory_tablesize=1000}), max_inmemory_tablesize=1000}),
TopSQN = pcl_getstartupsequencenumber(PCLr), TopSQN = pcl_getstartupsequencenumber(PCLr),
Check = case TopSQN of Check = case TopSQN of
2001 ->
%% Last push not persisted
S3a = pcl_pushmem(PCL, [Key3]),
if S3a == pause -> timer:sleep(1000); true -> ok end,
ok;
2002 -> 2002 ->
%% everything got persisted %% everything got persisted
ok; ok;

View file

@ -183,7 +183,7 @@
-define(MERGE_SCANWIDTH, 8). -define(MERGE_SCANWIDTH, 8).
-define(DELETE_TIMEOUT, 60000). -define(DELETE_TIMEOUT, 60000).
-define(MAX_KEYS, ?SLOT_COUNT * ?BLOCK_COUNT * ?BLOCK_SIZE). -define(MAX_KEYS, ?SLOT_COUNT * ?BLOCK_COUNT * ?BLOCK_SIZE).
-define(DISCARD_EXT, ".discarded").
-record(state, {version = ?CURRENT_VERSION :: tuple(), -record(state, {version = ?CURRENT_VERSION :: tuple(),
slot_index :: list(), slot_index :: list(),
@ -387,7 +387,7 @@ create_levelzero(Inp1, Filename) ->
true -> true ->
Inp1; Inp1;
false -> false ->
ets:tab2list(Inp1) leveled_penciller:roll_into_list(Inp1)
end, end,
{TmpFilename, PrmFilename} = generate_filenames(Filename), {TmpFilename, PrmFilename} = generate_filenames(Filename),
case create_file(TmpFilename) of case create_file(TmpFilename) of
@ -510,6 +510,19 @@ complete_file(Handle, FileMD, KL1, KL2, Level, Rename) ->
open_file(FileMD); open_file(FileMD);
{true, OldName, NewName} -> {true, OldName, NewName} ->
io:format("Renaming file from ~s to ~s~n", [OldName, NewName]), io:format("Renaming file from ~s to ~s~n", [OldName, NewName]),
case filelib:is_file(NewName) of
true ->
io:format("Filename ~s already exists~n",
[NewName]),
AltName = filename:join(filename:dirname(NewName),
filename:basename(NewName))
++ ?DISCARD_EXT,
io:format("Rename rogue filename ~s to ~s~n",
[NewName, AltName]),
ok = file:rename(NewName, AltName);
false ->
ok
end,
ok = file:rename(OldName, NewName), ok = file:rename(OldName, NewName),
open_file(FileMD#state{filename=NewName}) open_file(FileMD#state{filename=NewName})
end, end,

View file

@ -192,6 +192,7 @@ fetchput_snapshot(_Config) ->
io:format("Checked for replacement objects in active bookie" ++ io:format("Checked for replacement objects in active bookie" ++
", old objects in snapshot~n"), ", old objects in snapshot~n"),
ok = filelib:ensure_dir(RootPath ++ "/ledger/ledger_files"),
{ok, FNsA} = file:list_dir(RootPath ++ "/ledger/ledger_files"), {ok, FNsA} = file:list_dir(RootPath ++ "/ledger/ledger_files"),
ObjList3 = testutil:generate_objects(15000, 5002), ObjList3 = testutil:generate_objects(15000, 5002),
lists:foreach(fun({_RN, Obj, Spc}) -> lists:foreach(fun({_RN, Obj, Spc}) ->
@ -212,6 +213,7 @@ fetchput_snapshot(_Config) ->
testutil:check_forlist(SnapBookie3, lists:nth(length(CLs2), CLs2)), testutil:check_forlist(SnapBookie3, lists:nth(length(CLs2), CLs2)),
testutil:check_formissinglist(SnapBookie2, ChkList3), testutil:check_formissinglist(SnapBookie2, ChkList3),
testutil:check_formissinglist(SnapBookie2, lists:nth(length(CLs2), CLs2)), testutil:check_formissinglist(SnapBookie2, lists:nth(length(CLs2), CLs2)),
testutil:check_forlist(Bookie2, ChkList2),
testutil:check_forlist(SnapBookie3, ChkList2), testutil:check_forlist(SnapBookie3, ChkList2),
testutil:check_forlist(SnapBookie2, ChkList1), testutil:check_forlist(SnapBookie2, ChkList1),
io:format("Started new snapshot and check for new objects~n"), io:format("Started new snapshot and check for new objects~n"),

View file

@ -53,14 +53,22 @@ check_forlist(Bookie, ChkList, Log) ->
lists:foreach(fun({_RN, Obj, _Spc}) -> lists:foreach(fun({_RN, Obj, _Spc}) ->
if if
Log == true -> Log == true ->
io:format("Fetching Key ~w~n", [Obj#r_object.key]); io:format("Fetching Key ~s~n", [Obj#r_object.key]);
true -> true ->
ok ok
end, end,
R = leveled_bookie:book_riakget(Bookie, R = leveled_bookie:book_riakget(Bookie,
Obj#r_object.bucket, Obj#r_object.bucket,
Obj#r_object.key), Obj#r_object.key),
R = {ok, Obj} end, ok = case R of
{ok, Obj} ->
ok;
not_found ->
io:format("Object not found for key ~s~n",
[Obj#r_object.key]),
error
end
end,
ChkList), ChkList),
io:format("Fetch check took ~w microseconds checking list of length ~w~n", io:format("Fetch check took ~w microseconds checking list of length ~w~n",
[timer:now_diff(os:timestamp(), SW), length(ChkList)]). [timer:now_diff(os:timestamp(), SW), length(ChkList)]).