(slightly) less random reads on journal compaction

This commit is contained in:
Martin Sumner 2020-03-06 11:29:25 +00:00
parent 4d550ef2a1
commit 60e29f2ff0
3 changed files with 40 additions and 24 deletions

View file

@ -130,6 +130,7 @@
-define(DELETE_TIMEOUT, 10000). -define(DELETE_TIMEOUT, 10000).
-define(TIMING_SAMPLECOUNTDOWN, 5000). -define(TIMING_SAMPLECOUNTDOWN, 5000).
-define(TIMING_SAMPLESIZE, 100). -define(TIMING_SAMPLESIZE, 100).
-define(GETPOS_FACTOR, 8).
-define(MAX_OBJECT_SIZE, 1000000000). -define(MAX_OBJECT_SIZE, 1000000000).
% 1GB but really should be much smaller than this % 1GB but really should be much smaller than this
@ -266,29 +267,36 @@ cdb_getpositions(Pid, SampleSize) ->
% requests waiting for this to complete, loop over each of the 256 indexes % requests waiting for this to complete, loop over each of the 256 indexes
% outside of the FSM processing loop - to allow for other messages to be % outside of the FSM processing loop - to allow for other messages to be
% interleaved % interleaved
case SampleSize of SW = os:timestamp(),
all -> PosList =
FoldFun = case SampleSize of
fun(Index, Acc) -> all ->
cdb_getpositions_fromidx(Pid, all, Index, Acc) FoldFun =
end, fun(Index, Acc) ->
IdxList = lists:seq(0, 255), cdb_getpositions_fromidx(Pid, all, Index, Acc)
lists:foldl(FoldFun, [], IdxList); end,
S0 -> IdxList = lists:seq(0, 255),
FoldFun = lists:foldl(FoldFun, [], IdxList);
fun({_R, Index}, Acc) -> S0 ->
case length(Acc) of FC = ?GETPOS_FACTOR * S0,
S0 -> FoldFun =
Acc; fun({_R, Index}, Acc) ->
L when L < S0 -> case length(Acc) of
cdb_getpositions_fromidx(Pid, S0, Index, Acc) FC ->
end Acc;
end, L when L < FC ->
RandFun = fun(X) -> {leveled_rand:uniform(), X} end, cdb_getpositions_fromidx(Pid, FC, Index, Acc)
SeededL = lists:map(RandFun, lists:seq(0, 255)), end
SortedL = lists:keysort(1, SeededL), end,
lists:foldl(FoldFun, [], SortedL) RandFun = fun(X) -> {leveled_rand:uniform(), X} end,
end. SeededL = lists:map(RandFun, lists:seq(0, 255)),
SortedL = lists:keysort(1, SeededL),
PosList0 = lists:foldl(FoldFun, [], SortedL),
P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)),
lists:sublist(lists:sort(PosList0), P1, S0)
end,
leveled_log:log_timer("CDB22", [length(PosList)], SW),
PosList.
cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) -> cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) ->
gen_fsm:sync_send_event(Pid, gen_fsm:sync_send_event(Pid,

View file

@ -508,6 +508,10 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) ->
check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -> check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) ->
FN = leveled_cdb:cdb_filename(CDB), FN = leveled_cdb:cdb_filename(CDB),
PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize),
AvgJump =
(lists:last(PositionList) - lists:nth(1, PositionList))
div length(PositionList),
leveled_log:log("IC014", [AvgJump]),
KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []),
Score = Score =
size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN), size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN),

View file

@ -356,6 +356,8 @@
{"IC013", {"IC013",
{warn, "File with name ~s to be ignored in manifest as scanning for " {warn, "File with name ~s to be ignored in manifest as scanning for "
++ "first key returned empty - maybe corrupted"}}, ++ "first key returned empty - maybe corrupted"}},
{"IC014",
{info, "Fetching position list with average byte jump ~p"}},
{"CDB01", {"CDB01",
{info, "Opening file for writing with filename ~s"}}, {info, "Opening file for writing with filename ~s"}},
@ -404,7 +406,9 @@
{"CDB20", {"CDB20",
{warn, "Error ~w caught when safe reading a file to length ~w"}}, {warn, "Error ~w caught when safe reading a file to length ~w"}},
{"CDB21", {"CDB21",
{warn, "File ~s to be deleted but already gone"}} {warn, "File ~s to be deleted but already gone"}},
{"CDB22",
{info, "Positions ~w fetch"}}
]). ]).