From 6b3328f4a38a4b1b56c2ad82dcd722be3fc7cb8a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 9 Mar 2020 17:45:06 +0000 Subject: [PATCH] Rationalise logging in commit Also: Sort the output from an 'all' fetch one loop at a time Make sure the test of scoring na empty file is scoring an empty file If it is an emtpy file we want to compact the fragment away - in which case it should score 0.0 not 100.0 --- src/leveled_cdb.erl | 64 ++++++++++++++++++++---------------------- src/leveled_iclerk.erl | 22 +++++++++------ src/leveled_log.erl | 6 ++-- 3 files changed, 45 insertions(+), 47 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index f40a17b..3de322b 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -267,36 +267,33 @@ cdb_getpositions(Pid, SampleSize) -> % requests waiting for this to complete, loop over each of the 256 indexes % outside of the FSM processing loop - to allow for other messages to be % interleaved - SW = os:timestamp(), - PosList = - case SampleSize of - all -> - FoldFun = - fun(Index, Acc) -> - cdb_getpositions_fromidx(Pid, all, Index, Acc) - end, - IdxList = lists:seq(0, 255), - lists:foldl(FoldFun, [], IdxList); - S0 -> - FC = ?GETPOS_FACTOR * S0, - FoldFun = - fun({_R, Index}, Acc) -> - case length(Acc) of - FC -> - Acc; - L when L < FC -> - cdb_getpositions_fromidx(Pid, FC, Index, Acc) - end - end, - RandFun = fun(X) -> {leveled_rand:uniform(), X} end, - SeededL = lists:map(RandFun, lists:seq(0, 255)), - SortedL = lists:keysort(1, SeededL), - PosList0 = lists:foldl(FoldFun, [], SortedL), - P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)), - lists:sublist(lists:sort(PosList0), P1, S0) - end, - leveled_log:log_timer("CDB22", [length(PosList)], SW), - PosList. + case SampleSize of + all -> + FoldFun = + fun(Index, Acc) -> + PosList = cdb_getpositions_fromidx(Pid, all, Index, []), + lists:merge(Acc, lists:sort(PosList)) + end, + IdxList = lists:seq(0, 255), + lists:foldl(FoldFun, [], IdxList); + S0 -> + FC = ?GETPOS_FACTOR * S0, + FoldFun = + fun({_R, Index}, Acc) -> + case length(Acc) of + FC -> + Acc; + L when L < FC -> + cdb_getpositions_fromidx(Pid, FC, Index, Acc) + end + end, + RandFun = fun(X) -> {leveled_rand:uniform(), X} end, + SeededL = lists:map(RandFun, lists:seq(0, 255)), + SortedL = lists:keysort(1, SeededL), + PosList0 = lists:foldl(FoldFun, [], SortedL), + P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)), + lists:sublist(lists:sort(PosList0), P1, S0) + end. cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) -> gen_fsm:sync_send_event(Pid, @@ -1234,10 +1231,9 @@ scan_index_returnpositions(Handle, Position, Count, PosList0) -> [HPosition|PosList] end end, - PosList = lists:foldl(AddPosFun, - PosList0, - read_next_n_integerpairs(Handle, Count)), - lists:reverse(PosList). + lists:foldl(AddPosFun, + PosList0, + read_next_n_integerpairs(Handle, Count)). %% Take an active file and write the hash details necessary to close that diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 21ed1ba..1c969f6 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -507,17 +507,22 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> %% calls. check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -> FN = leveled_cdb:cdb_filename(CDB), + SW = os:timestamp(), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), - AvgJump = - (lists:last(PositionList) - lists:nth(1, PositionList)) - div length(PositionList), - leveled_log:log("IC014", [AvgJump]), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), Score = size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN), - leveled_log:log("IC004", [FN, Score]), + safely_log_filescore(PositionList, FN, Score, SW), Score. +safely_log_filescore([], FN, Score, SW) -> + leveled_log:log_timer("IC004", [Score, empty, FN], SW); +safely_log_filescore(PositionList, FN, Score, SW) -> + AvgJump = + (lists:last(PositionList) - lists:nth(1, PositionList)) + div length(PositionList), + leveled_log:log_timer("IC004", [Score, AvgJump, FN], SW). + size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> FoldFunForSizeCompare = fun(KS, {ActSize, RplSize}) -> @@ -546,13 +551,13 @@ size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> % expected format of the key {ActSize, RplSize} end - end, + end, R0 = lists:foldl(FoldFunForSizeCompare, {0, 0}, KeySizeList), {ActiveSize, ReplacedSize} = R0, case ActiveSize + ReplacedSize of 0 -> - 100.0; + 0.0; _ -> 100 * ActiveSize / (ActiveSize + ReplacedSize) end. @@ -1117,7 +1122,6 @@ compact_empty_file_test() -> FN1 = leveled_inker:filepath(RP, 1, new_journal), CDBopts = #cdb_options{binary_mode=true}, {ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBopts), - ok = leveled_cdb:cdb_put(CDB1, {1, stnd, test_ledgerkey("Key1")}, <<>>), {ok, FN2} = leveled_cdb:cdb_complete(CDB1), {ok, CDB2} = leveled_cdb:cdb_open_reader(FN2), LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, @@ -1125,7 +1129,7 @@ compact_empty_file_test() -> {3, {o, "Bucket", "Key3", null}}], LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> false end, Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4), - ?assertMatch(100.0, Score1), + ?assertMatch(0.0, Score1), ok = leveled_cdb:cdb_deletepending(CDB2), ok = leveled_cdb:cdb_destroy(CDB2). diff --git a/src/leveled_log.erl b/src/leveled_log.erl index cfc2791..ddc624d 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -336,7 +336,7 @@ {info, "Scoring of compaction runs complete with highest score=~w " ++ "with run of run_length=~w"}}, {"IC004", - {info, "Score for filename ~s is ~w"}}, + {info, "Score=~w with mean_byte_jump=~w for filename ~s"}}, {"IC005", {info, "Compaction to be performed on ~w files with score of ~w"}}, {"IC006", @@ -406,9 +406,7 @@ {"CDB20", {warn, "Error ~w caught when safe reading a file to length ~w"}}, {"CDB21", - {warn, "File ~s to be deleted but already gone"}}, - {"CDB22", - {info, "Positions ~w fetch"}} + {warn, "File ~s to be deleted but already gone"}} ]).