Mas i370 d31 sstmemory (#373)
* Don't use fetch_cache below the page_cache level * Don't time fetches due to SQN checks SQN checks are all background processes * Hibernate on SQN check SQN check in the penciller is used for journal (all object) folds, but mainly for journal compaction. Use this to trigger hibernation where SST files stay quiet after the compaction check. * Add catch for hibernate timeout * Scale cache_size with level Based on volume testing. Relatively speaking, far higher value to be gained from caches at higher levels (lower numbered levels). The cache at lower levels are proportionally much less efficient. so cache more at higher levels, where there is value, and less at lower levels where there is more cost relative to value. * OTP 24 fix to cherry-pick * Make minimal change to previous setup Making significant change appears to not have had the expected positive improvement - so a more minimal change is proposed. The assumption is that the cache only really gets used for double reads in the write path (e.g. where the application reads before a write) - and so a large cache make minimal difference, but no cache still has a downside. * Introduce new types * Mas i370 d30 sstmemory (#374) * Don't time fetches due to SQN checks SQN checks are all background processes * Hibernate on SQN check SQN check in the penciller is used for journal (all object) folds, but mainly for journal compaction. Use this to trigger hibernation where SST files stay quiet after the compaction check. * Add catch for hibernate timeout * Scale cache_size with level Based on volume testing. Relatively speaking, far higher value to be gained from caches at higher levels (lower numbered levels). The cache at lower levels are proportionally much less efficient. so cache more at higher levels, where there is value, and less at lower levels where there is more cost relative to value. * Make minimal change to previous setup Making significant change appears to not have had the expected positive improvement - so a more minimal change is proposed. The assumption is that the cache only really gets used for double reads in the write path (e.g. where the application reads before a write) - and so a large cache make minimal difference, but no cache still has a downside. * Introduce new types * More memory management Clear blockindex_cache on timeout, and manually GC on pclerk after work. * Add further garbage collection prompt After fetching level zero, significant change in references in the penciller memory, so prompt a garbage_collect() at this point.
This commit is contained in:
parent
75edb7293d
commit
f8485210ed
4 changed files with 293 additions and 74 deletions
|
@ -396,7 +396,7 @@ pcl_fetchlevelzero(Pid, Slot, ReturnFun) ->
|
|||
%% The Key needs to be hashable (i.e. have a tag which indicates that the key
|
||||
%% can be looked up) - index entries are not hashable for example.
|
||||
%%
|
||||
%% If the hash is already knonw, call pcl_fetch/3 as segment_hash is a
|
||||
%% If the hash is already known, call pcl_fetch/3 as segment_hash is a
|
||||
%% relatively expensive hash function
|
||||
pcl_fetch(Pid, Key) ->
|
||||
Hash = leveled_codec:segment_hash(Key),
|
||||
|
@ -749,12 +749,14 @@ handle_call({fetch, Key, Hash, UseL0Index}, _From, State) ->
|
|||
{reply, R, State#state{timings=UpdTimings0, timings_countdown=CountDown}};
|
||||
handle_call({check_sqn, Key, Hash, SQN}, _From, State) ->
|
||||
{reply,
|
||||
compare_to_sqn(plain_fetch_mem(Key,
|
||||
Hash,
|
||||
State#state.manifest,
|
||||
State#state.levelzero_cache,
|
||||
State#state.levelzero_index),
|
||||
SQN),
|
||||
compare_to_sqn(
|
||||
fetch_sqn(
|
||||
Key,
|
||||
Hash,
|
||||
State#state.manifest,
|
||||
State#state.levelzero_cache,
|
||||
State#state.levelzero_index),
|
||||
SQN),
|
||||
State};
|
||||
handle_call({fetch_keys,
|
||||
StartKey, EndKey,
|
||||
|
@ -1066,8 +1068,19 @@ handle_cast(work_for_clerk, State) ->
|
|||
Backlog = N > ?WORKQUEUE_BACKLOG_TOLERANCE,
|
||||
leveled_log:log("P0024", [N, Backlog]),
|
||||
[TL|_Tail] = WL,
|
||||
ok = leveled_pclerk:clerk_push(State#state.clerk,
|
||||
{TL, State#state.manifest}),
|
||||
ok =
|
||||
leveled_pclerk:clerk_push(
|
||||
State#state.clerk, {TL, State#state.manifest}),
|
||||
case TL of
|
||||
0 ->
|
||||
% Just written a L0 so as LoopState now rewritten,
|
||||
% garbage collect to free as much as possible as
|
||||
% soon as possible
|
||||
garbage_collect();
|
||||
_ ->
|
||||
ok
|
||||
end,
|
||||
|
||||
{noreply,
|
||||
State#state{work_backlog=Backlog, work_ongoing=true}}
|
||||
end;
|
||||
|
@ -1450,22 +1463,27 @@ roll_memory(State, true) ->
|
|||
%% the result tuple includes the level at which the result was found.
|
||||
timed_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, Timings) ->
|
||||
SW = os:timestamp(),
|
||||
{R, Level} = fetch_mem(Key, Hash, Manifest, L0Cache, L0Index),
|
||||
{R, Level} =
|
||||
fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, fun timed_sst_get/4),
|
||||
UpdTimings = update_timings(SW, Timings, R, Level),
|
||||
{R, UpdTimings}.
|
||||
|
||||
|
||||
-spec plain_fetch_mem(tuple(), {integer(), integer()},
|
||||
leveled_pmanifest:manifest(), list(),
|
||||
leveled_pmem:index_array()) -> not_present|tuple().
|
||||
-spec fetch_sqn(
|
||||
leveled_codec:ledger_key(),
|
||||
leveled_codec:segment_hash(),
|
||||
leveled_pmanifest:manifest(),
|
||||
list(),
|
||||
leveled_pmem:index_array()) ->
|
||||
not_present|leveled_codec:ledger_kv()|leveled_codec:ledger_sqn().
|
||||
%% @doc
|
||||
%% Fetch the result from the penciller, starting by looking in the memory,
|
||||
%% and if it is not found looking down level by level through the LSM tree.
|
||||
plain_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index) ->
|
||||
R = fetch_mem(Key, Hash, Manifest, L0Cache, L0Index),
|
||||
fetch_sqn(Key, Hash, Manifest, L0Cache, L0Index) ->
|
||||
R = fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, fun sst_getsqn/4),
|
||||
element(1, R).
|
||||
|
||||
fetch_mem(Key, Hash, Manifest, L0Cache, L0Index) ->
|
||||
fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, FetchFun) ->
|
||||
PosList =
|
||||
case L0Index of
|
||||
none ->
|
||||
|
@ -1476,7 +1494,7 @@ fetch_mem(Key, Hash, Manifest, L0Cache, L0Index) ->
|
|||
L0Check = leveled_pmem:check_levelzero(Key, Hash, PosList, L0Cache),
|
||||
case L0Check of
|
||||
{false, not_found} ->
|
||||
fetch(Key, Hash, Manifest, 0, fun timed_sst_get/4);
|
||||
fetch(Key, Hash, Manifest, 0, FetchFun);
|
||||
{true, KV} ->
|
||||
{KV, memory}
|
||||
end.
|
||||
|
@ -1515,6 +1533,9 @@ timed_sst_get(PID, Key, Hash, Level) ->
|
|||
T0 = timer:now_diff(os:timestamp(), SW),
|
||||
log_slowfetch(T0, R, PID, Level, ?SLOW_FETCH).
|
||||
|
||||
sst_getsqn(PID, Key, Hash, _Level) ->
|
||||
leveled_sst:sst_getsqn(PID, Key, Hash).
|
||||
|
||||
log_slowfetch(T0, R, PID, Level, FetchTolerance) ->
|
||||
case {T0, R} of
|
||||
{T, R} when T < FetchTolerance ->
|
||||
|
@ -1528,29 +1549,26 @@ log_slowfetch(T0, R, PID, Level, FetchTolerance) ->
|
|||
end.
|
||||
|
||||
|
||||
-spec compare_to_sqn(tuple()|not_present, integer()) -> sqn_check().
|
||||
-spec compare_to_sqn(
|
||||
leveled_codec:ledger_kv()|leveled_codec:sqn()|not_present,
|
||||
integer()) -> sqn_check().
|
||||
%% @doc
|
||||
%% Check to see if the SQN in the penciller is after the SQN expected for an
|
||||
%% object (used to allow the journal to check compaction status from a cache
|
||||
%% of the ledger - objects with a more recent sequence number can be compacted).
|
||||
compare_to_sqn(not_present, _SQN) ->
|
||||
missing;
|
||||
compare_to_sqn(ObjSQN, SQN) when is_integer(ObjSQN), ObjSQN > SQN ->
|
||||
replaced;
|
||||
compare_to_sqn(ObjSQN, _SQN) when is_integer(ObjSQN) ->
|
||||
% Normally we would expect the SQN to be equal here, but
|
||||
% this also allows for the Journal to have a more advanced
|
||||
% value. We return true here as we wouldn't want to
|
||||
% compact thta more advanced value, but this may cause
|
||||
% confusion in snapshots.
|
||||
current;
|
||||
compare_to_sqn(Obj, SQN) ->
|
||||
case Obj of
|
||||
not_present ->
|
||||
missing;
|
||||
Obj ->
|
||||
SQNToCompare = leveled_codec:strip_to_seqonly(Obj),
|
||||
if
|
||||
SQNToCompare > SQN ->
|
||||
replaced;
|
||||
true ->
|
||||
% Normally we would expect the SQN to be equal here, but
|
||||
% this also allows for the Journal to have a more advanced
|
||||
% value. We return true here as we wouldn't want to
|
||||
% compact thta more advanced value, but this may cause
|
||||
% confusion in snapshots.
|
||||
current
|
||||
end
|
||||
end.
|
||||
compare_to_sqn(leveled_codec:strip_to_seqonly(Obj), SQN).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue