diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 49418ff..3de322b 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -130,6 +130,7 @@ -define(DELETE_TIMEOUT, 10000). -define(TIMING_SAMPLECOUNTDOWN, 5000). -define(TIMING_SAMPLESIZE, 100). +-define(GETPOS_FACTOR, 8). -define(MAX_OBJECT_SIZE, 1000000000). % 1GB but really should be much smaller than this @@ -270,24 +271,28 @@ cdb_getpositions(Pid, SampleSize) -> all -> FoldFun = fun(Index, Acc) -> - cdb_getpositions_fromidx(Pid, all, Index, Acc) + PosList = cdb_getpositions_fromidx(Pid, all, Index, []), + lists:merge(Acc, lists:sort(PosList)) end, IdxList = lists:seq(0, 255), lists:foldl(FoldFun, [], IdxList); S0 -> + FC = ?GETPOS_FACTOR * S0, FoldFun = fun({_R, Index}, Acc) -> case length(Acc) of - S0 -> + FC -> Acc; - L when L < S0 -> - cdb_getpositions_fromidx(Pid, S0, Index, Acc) + L when L < FC -> + cdb_getpositions_fromidx(Pid, FC, Index, Acc) end end, RandFun = fun(X) -> {leveled_rand:uniform(), X} end, SeededL = lists:map(RandFun, lists:seq(0, 255)), SortedL = lists:keysort(1, SeededL), - lists:foldl(FoldFun, [], SortedL) + PosList0 = lists:foldl(FoldFun, [], SortedL), + P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)), + lists:sublist(lists:sort(PosList0), P1, S0) end. cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) -> @@ -1226,10 +1231,9 @@ scan_index_returnpositions(Handle, Position, Count, PosList0) -> [HPosition|PosList] end end, - PosList = lists:foldl(AddPosFun, - PosList0, - read_next_n_integerpairs(Handle, Count)), - lists:reverse(PosList). + lists:foldl(AddPosFun, + PosList0, + read_next_n_integerpairs(Handle, Count)). %% Take an active file and write the hash details necessary to close that diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index 95ee191..e996c15 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -507,13 +507,22 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> %% calls. check_single_file(CDB, FilterFun, FilterServer, MaxSQN, SampleSize, BatchSize) -> FN = leveled_cdb:cdb_filename(CDB), + SW = os:timestamp(), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), Score = size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN), - leveled_log:log("IC004", [FN, Score]), + safely_log_filescore(PositionList, FN, Score, SW), Score. +safely_log_filescore([], FN, Score, SW) -> + leveled_log:log_timer("IC004", [Score, empty, FN], SW); +safely_log_filescore(PositionList, FN, Score, SW) -> + AvgJump = + (lists:last(PositionList) - lists:nth(1, PositionList)) + div length(PositionList), + leveled_log:log_timer("IC004", [Score, AvgJump, FN], SW). + size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> FoldFunForSizeCompare = fun(KS, {ActSize, RplSize}) -> @@ -548,7 +557,7 @@ size_comparison_score(KeySizeList, FilterFun, FilterServer, MaxSQN) -> {ActiveSize, ReplacedSize} = R0, case ActiveSize + ReplacedSize of 0 -> - 100.0; + 0.0; _ -> 100 * ActiveSize / (ActiveSize + ReplacedSize) end. @@ -1125,7 +1134,6 @@ compact_empty_file_test() -> FN1 = leveled_inker:filepath(RP, 1, new_journal), CDBopts = #cdb_options{binary_mode=true}, {ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBopts), - ok = leveled_cdb:cdb_put(CDB1, {1, stnd, test_ledgerkey("Key1")}, <<>>), {ok, FN2} = leveled_cdb:cdb_complete(CDB1), {ok, CDB2} = leveled_cdb:cdb_open_reader(FN2), LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, @@ -1133,7 +1141,7 @@ compact_empty_file_test() -> {3, {o, "Bucket", "Key3", null}}], LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> replaced end, Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4), - ?assertMatch(100.0, Score1), + ?assertMatch(0.0, Score1), ok = leveled_cdb:cdb_deletepending(CDB2), ok = leveled_cdb:cdb_destroy(CDB2). diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 27344dc..51072fa 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -336,7 +336,7 @@ {info, "Scoring of compaction runs complete with highest score=~w " ++ "with run of run_length=~w"}}, {"IC004", - {info, "Score for filename ~s is ~w"}}, + {info, "Score=~w with mean_byte_jump=~w for filename ~s"}}, {"IC005", {info, "Compaction to be performed on ~w files with score of ~w"}}, {"IC006",