From 60e29f2ff084e35e5e72164ced44857389fbd805 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 6 Mar 2020 11:29:25 +0000 Subject: [PATCH] (slightly) less random reads on journal compaction --- src/leveled_cdb.erl | 54 ++++++++++++++++++++++++------------------ src/leveled_iclerk.erl | 4 ++++ src/leveled_log.erl | 6 ++++- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 49418ff..f40a17b 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -130,6 +130,7 @@ -define(DELETE_TIMEOUT, 10000). -define(TIMING_SAMPLECOUNTDOWN, 5000). -define(TIMING_SAMPLESIZE, 100). +-define(GETPOS_FACTOR, 8). -define(MAX_OBJECT_SIZE, 1000000000). % 1GB but really should be much smaller than this @@ -266,29 +267,36 @@ cdb_getpositions(Pid, SampleSize) -> % requests waiting for this to complete, loop over each of the 256 indexes % outside of the FSM processing loop - to allow for other messages to be % interleaved - case SampleSize of - all -> - FoldFun = - fun(Index, Acc) -> - cdb_getpositions_fromidx(Pid, all, Index, Acc) - end, - IdxList = lists:seq(0, 255), - lists:foldl(FoldFun, [], IdxList); - S0 -> - FoldFun = - fun({_R, Index}, Acc) -> - case length(Acc) of - S0 -> - Acc; - L when L < S0 -> - cdb_getpositions_fromidx(Pid, S0, Index, Acc) - end - end, - RandFun = fun(X) -> {leveled_rand:uniform(), X} end, - SeededL = lists:map(RandFun, lists:seq(0, 255)), - SortedL = lists:keysort(1, SeededL), - lists:foldl(FoldFun, [], SortedL) - end. + SW = os:timestamp(), + PosList = + case SampleSize of + all -> + FoldFun = + fun(Index, Acc) -> + cdb_getpositions_fromidx(Pid, all, Index, Acc) + end, + IdxList = lists:seq(0, 255), + lists:foldl(FoldFun, [], IdxList); + S0 -> + FC = ?GETPOS_FACTOR * S0, + FoldFun = + fun({_R, Index}, Acc) -> + case length(Acc) of + FC -> + Acc; + L when L < FC -> + cdb_getpositions_fromidx(Pid, FC, Index, Acc) + end + end, + RandFun = fun(X) -> {leveled_rand:uniform(), X} end, + SeededL = lists:map(RandFun, lists:seq(0, 255)), + SortedL = lists:keysort(1, SeededL), + PosList0 = lists:foldl(FoldFun, [], SortedL), + P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)), + lists:sublist(lists:sort(PosList0), P1, S0) + end, + leveled_log:log_timer("CDB22", [length(PosList)], SW), + PosList. cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) -> gen_fsm:sync_send_event(Pid, diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index fb8a088..21ed1ba 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -508,6 +508,10 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -> FN = leveled_cdb:cdb_filename(CDB), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), + AvgJump = + (lists:last(PositionList) - lists:nth(1, PositionList)) + div length(PositionList), + leveled_log:log("IC014", [AvgJump]), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), Score = size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN), diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 27344dc..cfc2791 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -356,6 +356,8 @@ {"IC013", {warn, "File with name ~s to be ignored in manifest as scanning for " ++ "first key returned empty - maybe corrupted"}}, + {"IC014", + {info, "Fetching position list with average byte jump ~p"}}, {"CDB01", {info, "Opening file for writing with filename ~s"}}, @@ -404,7 +406,9 @@ {"CDB20", {warn, "Error ~w caught when safe reading a file to length ~w"}}, {"CDB21", - {warn, "File ~s to be deleted but already gone"}} + {warn, "File ~s to be deleted but already gone"}}, + {"CDB22", + {info, "Positions ~w fetch"}} ]).