Boost docs
Try and make this easier to follow with docs and specs
This commit is contained in:
parent
979c65f0af
commit
7885f93ba6
1 changed files with 47 additions and 6 deletions
|
@ -118,7 +118,19 @@
|
||||||
compaction_perc :: float() | undefined}).
|
compaction_perc :: float() | undefined}).
|
||||||
|
|
||||||
-type iclerk_options() :: #iclerk_options{}.
|
-type iclerk_options() :: #iclerk_options{}.
|
||||||
-type candidate():: #candidate{}.
|
-type candidate() :: #candidate{}.
|
||||||
|
-type score_parameters() :: {integer(), float(), float()}.
|
||||||
|
% Score parameters are a tuple
|
||||||
|
% - of maximum run length; how long a run of consecutive files can be for
|
||||||
|
% one compaction run
|
||||||
|
% - maximum run compaction target; percentage space which should be
|
||||||
|
% released from a compaction run of the maximum length to make it a run
|
||||||
|
% worthwhile of compaction (released space is 100.0 - target e.g. 70.0
|
||||||
|
% means that 30.0% should be released)
|
||||||
|
% - single_file compaction target; percentage space which should be
|
||||||
|
% released from a compaction run of a single file to make it a run
|
||||||
|
% worthwhile of compaction (released space is 100.0 - target e.g. 70.0
|
||||||
|
% means that 30.0% should be released)
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% API
|
%%% API
|
||||||
|
@ -462,15 +474,31 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) ->
|
||||||
fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List).
|
fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List).
|
||||||
|
|
||||||
|
|
||||||
-spec assess_candidates(list(candidate()), tuple())
|
-spec assess_candidates(list(candidate()), score_parameters())
|
||||||
-> {list(candidate()), float()}.
|
-> {list(candidate()), float()}.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% For each run length we need to assess all the runs of candidates
|
%% For each run length we need to assess all the possible runs of candidates,
|
||||||
%% The best scoring run should emerge as the winner
|
%% to determine which is the best score - to be put forward as the best
|
||||||
|
%% candidate run for compaction.
|
||||||
|
%%
|
||||||
%% Although this requires many loops over the list of the candidate, as the
|
%% Although this requires many loops over the list of the candidate, as the
|
||||||
%% file scores have already been calculated the cost per loop should not be
|
%% file scores have already been calculated the cost per loop should not be
|
||||||
%% a high burden. Reducing the maximum run length, will reduce the cost of
|
%% a high burden. Reducing the maximum run length, will reduce the cost of
|
||||||
%% this exercise should be a problem
|
%% this exercise should be a problem.
|
||||||
|
%%
|
||||||
|
%% The score parameters are used to produce the score of the compaction run,
|
||||||
|
%% with a higher score being better. The parameters are the maximum run
|
||||||
|
%% length and the compaction targets (for max run length and single file).
|
||||||
|
%% The score of an individual file is the approximate percentage of the space
|
||||||
|
%% that would be retained after compaction (e.g. 100 less the percentage of
|
||||||
|
%% space wasted by historic objects).
|
||||||
|
%%
|
||||||
|
%% So a file score of 60% indicates that 40% of the space would be
|
||||||
|
%% reclaimed following compaction. A single file target of 50% would not be
|
||||||
|
%% met for this file. However, if there are 4 consecutive files scoring 60%,
|
||||||
|
%% and the maximum run length is 4, and the maximum run length compaction
|
||||||
|
%% target is 70% - then this run of four files would be a viable candidate
|
||||||
|
%% for compaction.
|
||||||
assess_candidates(AllCandidates, Params) ->
|
assess_candidates(AllCandidates, Params) ->
|
||||||
MaxRunLength = min(element(1, Params), length(AllCandidates)),
|
MaxRunLength = min(element(1, Params), length(AllCandidates)),
|
||||||
NaiveBestRun = lists:sublist(AllCandidates, MaxRunLength),
|
NaiveBestRun = lists:sublist(AllCandidates, MaxRunLength),
|
||||||
|
@ -478,7 +506,6 @@ assess_candidates(AllCandidates, Params) ->
|
||||||
fun(RunLength, Best) ->
|
fun(RunLength, Best) ->
|
||||||
assess_for_runlength(RunLength, AllCandidates, Params, Best)
|
assess_for_runlength(RunLength, AllCandidates, Params, Best)
|
||||||
end,
|
end,
|
||||||
|
|
||||||
% Check all run lengths to find the best candidate. Reverse the list of
|
% Check all run lengths to find the best candidate. Reverse the list of
|
||||||
% run lengths, so that longer runs win on equality of score
|
% run lengths, so that longer runs win on equality of score
|
||||||
lists:foldl(FoldFun,
|
lists:foldl(FoldFun,
|
||||||
|
@ -486,6 +513,14 @@ assess_candidates(AllCandidates, Params) ->
|
||||||
lists:reverse(lists:seq(1, MaxRunLength))).
|
lists:reverse(lists:seq(1, MaxRunLength))).
|
||||||
|
|
||||||
|
|
||||||
|
-spec assess_for_runlength(integer(), list(candidate()), score_parameters(),
|
||||||
|
{list(candidate()), float()})
|
||||||
|
-> {list(candidate()), float()}.
|
||||||
|
%% @doc
|
||||||
|
%% For a given run length, calculate the scores for all consecutive runs of
|
||||||
|
%% files, comparing the score with the best run which has beens een so far.
|
||||||
|
%% The best is a tuple of the actual run of candidates, along with the score
|
||||||
|
%% achieved for that run
|
||||||
assess_for_runlength(RunLength, AllCandidates, Params, Best) ->
|
assess_for_runlength(RunLength, AllCandidates, Params, Best) ->
|
||||||
NumberOfRuns = 1 + length(AllCandidates) - RunLength,
|
NumberOfRuns = 1 + length(AllCandidates) - RunLength,
|
||||||
FoldFun =
|
FoldFun =
|
||||||
|
@ -500,6 +535,12 @@ assess_for_runlength(RunLength, AllCandidates, Params, Best) ->
|
||||||
lists:foldl(FoldFun, Best, lists:seq(1, NumberOfRuns)).
|
lists:foldl(FoldFun, Best, lists:seq(1, NumberOfRuns)).
|
||||||
|
|
||||||
|
|
||||||
|
-spec score_run(list(candidate()), score_parameters()) -> float().
|
||||||
|
%% @doc
|
||||||
|
%% Score a run. Caluclate the avergae score across all the files in the run,
|
||||||
|
%% and deduct that from a target score. Good candidate runs for comapction
|
||||||
|
%% have larger (positive) scores. Bad candidate runs for compaction have
|
||||||
|
%% negative scores.
|
||||||
score_run([], _Params) ->
|
score_run([], _Params) ->
|
||||||
0.0;
|
0.0;
|
||||||
score_run(Run, {MaxRunLength, MR_CT, SF_CT}) ->
|
score_run(Run, {MaxRunLength, MR_CT, SF_CT}) ->
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue