Merge pull request #307 from martinsumner/mas-i306-lessrandomreads

Mas i306 lessrandomreads
This commit is contained in:
Martin Sumner 2020-03-13 19:55:16 +00:00 committed by GitHub
commit aaf58dd343
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 15 deletions

View file

@ -130,6 +130,7 @@
-define(DELETE_TIMEOUT, 10000). -define(DELETE_TIMEOUT, 10000).
-define(TIMING_SAMPLECOUNTDOWN, 5000). -define(TIMING_SAMPLECOUNTDOWN, 5000).
-define(TIMING_SAMPLESIZE, 100). -define(TIMING_SAMPLESIZE, 100).
-define(GETPOS_FACTOR, 8).
-define(MAX_OBJECT_SIZE, 1000000000). -define(MAX_OBJECT_SIZE, 1000000000).
% 1GB but really should be much smaller than this % 1GB but really should be much smaller than this
@ -270,24 +271,28 @@ cdb_getpositions(Pid, SampleSize) ->
all -> all ->
FoldFun = FoldFun =
fun(Index, Acc) -> fun(Index, Acc) ->
cdb_getpositions_fromidx(Pid, all, Index, Acc) PosList = cdb_getpositions_fromidx(Pid, all, Index, []),
lists:merge(Acc, lists:sort(PosList))
end, end,
IdxList = lists:seq(0, 255), IdxList = lists:seq(0, 255),
lists:foldl(FoldFun, [], IdxList); lists:foldl(FoldFun, [], IdxList);
S0 -> S0 ->
FC = ?GETPOS_FACTOR * S0,
FoldFun = FoldFun =
fun({_R, Index}, Acc) -> fun({_R, Index}, Acc) ->
case length(Acc) of case length(Acc) of
S0 -> FC ->
Acc; Acc;
L when L < S0 -> L when L < FC ->
cdb_getpositions_fromidx(Pid, S0, Index, Acc) cdb_getpositions_fromidx(Pid, FC, Index, Acc)
end end
end, end,
RandFun = fun(X) -> {leveled_rand:uniform(), X} end, RandFun = fun(X) -> {leveled_rand:uniform(), X} end,
SeededL = lists:map(RandFun, lists:seq(0, 255)), SeededL = lists:map(RandFun, lists:seq(0, 255)),
SortedL = lists:keysort(1, SeededL), SortedL = lists:keysort(1, SeededL),
lists:foldl(FoldFun, [], SortedL) PosList0 = lists:foldl(FoldFun, [], SortedL),
P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)),
lists:sublist(lists:sort(PosList0), P1, S0)
end. end.
cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) -> cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) ->
@ -1226,10 +1231,9 @@ scan_index_returnpositions(Handle, Position, Count, PosList0) ->
[HPosition|PosList] [HPosition|PosList]
end end
end, end,
PosList = lists:foldl(AddPosFun, lists:foldl(AddPosFun,
PosList0, PosList0,
read_next_n_integerpairs(Handle, Count)), read_next_n_integerpairs(Handle, Count)).
lists:reverse(PosList).
%% Take an active file and write the hash details necessary to close that %% Take an active file and write the hash details necessary to close that

View file

@ -507,13 +507,22 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) ->
%% calls. %% calls.
check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -> check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) ->
FN = leveled_cdb:cdb_filename(CDB), FN = leveled_cdb:cdb_filename(CDB),
SW = os:timestamp(),
PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize),
KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []),
Score = Score =
size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN), size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN),
leveled_log:log("IC004", [FN, Score]), safely_log_filescore(PositionList, FN, Score, SW),
Score. Score.
safely_log_filescore([], FN, Score, SW) ->
leveled_log:log_timer("IC004", [Score, empty, FN], SW);
safely_log_filescore(PositionList, FN, Score, SW) ->
AvgJump =
(lists:last(PositionList) - lists:nth(1, PositionList))
div length(PositionList),
leveled_log:log_timer("IC004", [Score, AvgJump, FN], SW).
size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) ->
FoldFunForSizeCompare = FoldFunForSizeCompare =
fun(KS, {ActSize, RplSize}) -> fun(KS, {ActSize, RplSize}) ->
@ -548,7 +557,7 @@ size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) ->
{ActiveSize, ReplacedSize} = R0, {ActiveSize, ReplacedSize} = R0,
case ActiveSize + ReplacedSize of case ActiveSize + ReplacedSize of
0 -> 0 ->
100.0; 0.0;
_ -> _ ->
100 * ActiveSize / (ActiveSize + ReplacedSize) 100 * ActiveSize / (ActiveSize + ReplacedSize)
end. end.
@ -1113,7 +1122,6 @@ compact_empty_file_test() ->
FN1 = leveled_inker:filepath(RP, 1, new_journal), FN1 = leveled_inker:filepath(RP, 1, new_journal),
CDBopts = #cdb_options{binary_mode=true}, CDBopts = #cdb_options{binary_mode=true},
{ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBopts), {ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBopts),
ok = leveled_cdb:cdb_put(CDB1, {1, stnd, test_ledgerkey("Key1")}, <<>>),
{ok, FN2} = leveled_cdb:cdb_complete(CDB1), {ok, FN2} = leveled_cdb:cdb_complete(CDB1),
{ok, CDB2} = leveled_cdb:cdb_open_reader(FN2), {ok, CDB2} = leveled_cdb:cdb_open_reader(FN2),
LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}},
@ -1121,7 +1129,7 @@ compact_empty_file_test() ->
{3, {o, "Bucket", "Key3", null}}], {3, {o, "Bucket", "Key3", null}}],
LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> false end, LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> false end,
Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4), Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4),
?assertMatch(100.0, Score1), ?assertMatch(0.0, Score1),
ok = leveled_cdb:cdb_deletepending(CDB2), ok = leveled_cdb:cdb_deletepending(CDB2),
ok = leveled_cdb:cdb_destroy(CDB2). ok = leveled_cdb:cdb_destroy(CDB2).

View file

@ -336,7 +336,7 @@
{info, "Scoring of compaction runs complete with highest score=~w " {info, "Scoring of compaction runs complete with highest score=~w "
++ "with run of run_length=~w"}}, ++ "with run of run_length=~w"}},
{"IC004", {"IC004",
{info, "Score for filename ~s is ~w"}}, {info, "Score=~w with mean_byte_jump=~w for filename ~s"}},
{"IC005", {"IC005",
{info, "Compaction to be performed on ~w files with score of ~w"}}, {info, "Compaction to be performed on ~w files with score of ~w"}},
{"IC006", {"IC006",