Refactor - STILL BROKEN
Will at least compile, but in need of a massive eunit rewrite and associated debug to get back to a potentially verifiable state again
This commit is contained in:
parent
08641e05cf
commit
0204a23a58
5 changed files with 527 additions and 766 deletions
|
@ -25,26 +25,28 @@
|
|||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-export([init/1,
|
||||
-export([
|
||||
init/1,
|
||||
handle_call/3,
|
||||
handle_cast/2,
|
||||
handle_info/2,
|
||||
terminate/2,
|
||||
clerk_new/1,
|
||||
code_change/3
|
||||
]).
|
||||
|
||||
-export([
|
||||
clerk_new/2,
|
||||
clerk_prompt/1,
|
||||
clerk_manifestchange/3,
|
||||
code_change/3]).
|
||||
clerk_close/1
|
||||
]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(MAX_TIMEOUT, 2000).
|
||||
-define(MIN_TIMEOUT, 50).
|
||||
-define(END_KEY, {null, null, null, null}).
|
||||
-define(MAX_TIMEOUT, 1000).
|
||||
-define(MIN_TIMEOUT, 200).
|
||||
|
||||
-record(state, {owner :: pid(),
|
||||
manifest, % ets table reference
|
||||
change_pending=false :: boolean(),
|
||||
work_item :: #penciller_work{}|null}).
|
||||
root_path :: string()}).
|
||||
|
||||
%%%============================================================================
|
||||
%%% API
|
||||
|
@ -69,26 +71,22 @@ clerk_close(Pid) ->
|
|||
init([]) ->
|
||||
{ok, #state{}}.
|
||||
|
||||
handle_call({load, Owner, Manifest}, _From, State) ->
|
||||
{reply,
|
||||
ok,
|
||||
State#state{owner=Owner, manifest=Manifest},
|
||||
?MIN_TIMEOUT}.
|
||||
handle_call({load, Owner, RootPath}, _From, State) ->
|
||||
{reply, ok, State#state{owner=Owner, root_path=RootPath}, ?MIN_TIMEOUT}.
|
||||
|
||||
handle_cast(prompt, State) ->
|
||||
{noreply, State, ?MIN_TIMEOUT};
|
||||
handle_info(timeout, State);
|
||||
handle_cast(close, State) ->
|
||||
(stop, normal, State).
|
||||
{stop, normal, State}.
|
||||
|
||||
handle_info(timeout, State=#state{change_pending=Pnd}) when Pnd == false ->
|
||||
handle_info(timeout, State) ->
|
||||
case requestandhandle_work(State) of
|
||||
{false, Timeout} ->
|
||||
{noreply, State, Timeout};
|
||||
{true, WI} ->
|
||||
false ->
|
||||
{noreply, State, ?MAX_TIMEOUT};
|
||||
true ->
|
||||
% No timeout now as will wait for call to return manifest
|
||||
% change
|
||||
{noreply,
|
||||
State#state{change_pending=true, work_item=WI}}
|
||||
{noreply, State, ?MIN_TIMEOUT}
|
||||
end.
|
||||
|
||||
|
||||
|
@ -105,182 +103,116 @@ code_change(_OldVsn, State, _Extra) ->
|
|||
|
||||
requestandhandle_work(State) ->
|
||||
case leveled_penciller:pcl_workforclerk(State#state.owner) of
|
||||
false ->
|
||||
none ->
|
||||
leveled_log:log("PC006", []),
|
||||
false;
|
||||
{SrcLevel, ManifestSQN} ->
|
||||
{Additions, Removals} = merge(Level,
|
||||
State#state.manifest,
|
||||
ManifestSQN),
|
||||
{SrcLevel, Manifest} ->
|
||||
{UpdManifest, EntriesToDelete} = merge(SrcLevel,
|
||||
Manifest,
|
||||
State#state.root_path),
|
||||
leveled_log:log("PC007", []),
|
||||
ok = leveled_penciller:pcl_commitmanifestchange(State#state.owner,
|
||||
SrcLevel,
|
||||
Additions,
|
||||
Removals,
|
||||
ManifestSQN),
|
||||
UpdManifest),
|
||||
ok = leveled_manifest:save_manifest(UpdManifest,
|
||||
State#state.root_path),
|
||||
ok = notify_deletions(EntriesToDelete, State#state.owner),
|
||||
true
|
||||
end.
|
||||
|
||||
|
||||
merge(SrcLevel, Manifest, ManifestSQN) ->
|
||||
SrcF = select_filetomerge(SrcLevel, Manifest),
|
||||
|
||||
|
||||
Candidates = check_for_merge_candidates(SrcF, SinkFiles),
|
||||
%% TODO:
|
||||
%% Need to work out if this is the top level
|
||||
%% And then tell merge process to create files at the top level
|
||||
%% Which will include the reaping of expired tombstones
|
||||
leveled_log:log("PC008", [SrcLevel, length(Candidates)]),
|
||||
|
||||
MergedFiles = case length(Candidates) of
|
||||
merge(SrcLevel, Manifest, RootPath) ->
|
||||
Src = leveled_manifest:mergefile_selector(Manifest, SrcLevel),
|
||||
NewSQN = leveled_manifest:get_manifest_sqn(Manifest) + 1,
|
||||
SinkList = leveled_manifest:merge_lookup(Manifest,
|
||||
SrcLevel + 1,
|
||||
Src#manifest_entry.start_key,
|
||||
Src#manifest_entry.end_key),
|
||||
Candidates = length(SinkList),
|
||||
leveled_log:log("PC008", [SrcLevel, Candidates]),
|
||||
case Candidates of
|
||||
0 ->
|
||||
%% If no overlapping candiates, manifest change only required
|
||||
%%
|
||||
%% TODO: need to think still about simply renaming when at
|
||||
%% lower level
|
||||
leveled_log:log("PC009",
|
||||
[SrcF#manifest_entry.filename, SrcLevel + 1]),
|
||||
[SrcF];
|
||||
[Src#manifest_entry.filename, SrcLevel + 1]),
|
||||
Man0 = leveled_manifest:remove_manifest_entry(Manifest,
|
||||
NewSQN,
|
||||
SrcLevel,
|
||||
Src),
|
||||
Man1 = leveled_manifest:insert_manifest_entry(Man0,
|
||||
NewSQN,
|
||||
SrcLevel + 1,
|
||||
Src),
|
||||
{Man1, []};
|
||||
_ ->
|
||||
perform_merge({SrcF#manifest_entry.owner,
|
||||
SrcF#manifest_entry.filename},
|
||||
Candidates,
|
||||
{SrcLevel, WI#penciller_work.target_is_basement},
|
||||
{WI#penciller_work.ledger_filepath,
|
||||
WI#penciller_work.next_sqn})
|
||||
end,
|
||||
NewLevel = lists:sort(lists:append(MergedFiles, Others)),
|
||||
UpdMFest2 = lists:keystore(SrcLevel + 1,
|
||||
1,
|
||||
UpdMFest1,
|
||||
{SrcLevel + 1, NewLevel}),
|
||||
|
||||
ok = filelib:ensure_dir(WI#penciller_work.manifest_file),
|
||||
{ok, Handle} = file:open(WI#penciller_work.manifest_file,
|
||||
[binary, raw, write]),
|
||||
ok = file:write(Handle, term_to_binary(UpdMFest2)),
|
||||
ok = file:close(Handle),
|
||||
case lists:member(SrcF, MergedFiles) of
|
||||
true ->
|
||||
{UpdMFest2, Candidates};
|
||||
false ->
|
||||
%% Can rub out src file as it is not part of output
|
||||
{UpdMFest2, Candidates ++ [SrcF]}
|
||||
FilePath = leveled_penciller:filepath(RootPath,
|
||||
NewSQN,
|
||||
new_merge_files),
|
||||
perform_merge(Manifest, Src, SinkList, SrcLevel, FilePath, NewSQN)
|
||||
end.
|
||||
|
||||
|
||||
mark_for_delete([], _Penciller) ->
|
||||
notify_deletions([], _Penciller) ->
|
||||
ok;
|
||||
mark_for_delete([Head|Tail], Penciller) ->
|
||||
notify_deletions([Head|Tail], Penciller) ->
|
||||
ok = leveled_sst:sst_setfordelete(Head#manifest_entry.owner, Penciller),
|
||||
mark_for_delete(Tail, Penciller).
|
||||
|
||||
|
||||
check_for_merge_candidates(SrcF, SinkFiles) ->
|
||||
lists:partition(fun(Ref) ->
|
||||
case {Ref#manifest_entry.start_key,
|
||||
Ref#manifest_entry.end_key} of
|
||||
{_, EK} when SrcF#manifest_entry.start_key > EK ->
|
||||
false;
|
||||
{SK, _} when SrcF#manifest_entry.end_key < SK ->
|
||||
false;
|
||||
_ ->
|
||||
true
|
||||
end end,
|
||||
SinkFiles).
|
||||
|
||||
|
||||
%% An algorithm for discovering which files to merge ....
|
||||
%% We can find the most optimal file:
|
||||
%% - The one with the most overlapping data below?
|
||||
%% - The one that overlaps with the fewest files below?
|
||||
%% - The smallest file?
|
||||
%% We could try and be fair in some way (merge oldest first)
|
||||
%% Ultimately, there is a lack of certainty that being fair or optimal is
|
||||
%% genuinely better - eventually every file has to be compacted.
|
||||
%%
|
||||
%% Hence, the initial implementation is to select files to merge at random
|
||||
|
||||
select_filetomerge(SrcLevel, Manifest, ManifestSQN) ->
|
||||
Level = leveled_manifest:range_lookup(Manifest,
|
||||
1,
|
||||
all,
|
||||
?END_KEY,
|
||||
ManifestSQN),
|
||||
|
||||
FN = lists:nth(random:uniform(length(Level)), Level).
|
||||
|
||||
|
||||
notify_deletions(Tail, Penciller).
|
||||
|
||||
|
||||
%% Assumption is that there is a single SST from a higher level that needs
|
||||
%% to be merged into multiple SSTs at a lower level. This should create an
|
||||
%% entirely new set of SSTs, and the calling process can then update the
|
||||
%% manifest.
|
||||
%% to be merged into multiple SSTs at a lower level.
|
||||
%%
|
||||
%% Once the FileToMerge has been emptied, the remainder of the candidate list
|
||||
%% needs to be placed in a remainder SST that may be of a sub-optimal (small)
|
||||
%% size. This stops the need to perpetually roll over the whole level if the
|
||||
%% level consists of already full files. Some smartness may be required when
|
||||
%% selecting the candidate list so that small files just outside the candidate
|
||||
%% list be included to avoid a proliferation of small files.
|
||||
%%
|
||||
%% FileToMerge should be a tuple of {FileName, Pid} where the Pid is the Pid of
|
||||
%% the gen_server leveled_sft process representing the file.
|
||||
%%
|
||||
%% CandidateList should be a list of {StartKey, EndKey, Pid} tuples
|
||||
%% representing different gen_server leveled_sft processes, sorted by StartKey.
|
||||
%%
|
||||
%% The level is the level which the new files should be created at.
|
||||
%% SrcLevel is the level of the src sst file, the sink should be srcLevel + 1
|
||||
|
||||
perform_merge({SrcPid, SrcFN}, CandidateList, LevelInfo, {Filepath, MSN}) ->
|
||||
leveled_log:log("PC010", [SrcFN, MSN]),
|
||||
PointerList = lists:map(fun(P) ->
|
||||
{next, P#manifest_entry.owner, all} end,
|
||||
CandidateList),
|
||||
MaxSQN = leveled_sst:sst_getmaxsequencenumber(SrcPid),
|
||||
do_merge([{next, SrcPid, all}],
|
||||
PointerList,
|
||||
LevelInfo,
|
||||
{Filepath, MSN},
|
||||
MaxSQN,
|
||||
0,
|
||||
[]).
|
||||
perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN) ->
|
||||
leveled_log:log("PC010", [Src#manifest_entry.filename, NewSQN]),
|
||||
SrcList = [{next, Src#manifest_entry.owner, all}],
|
||||
SinkPointerList = leveled_manifest:pointer_convert(Manifest, SinkList),
|
||||
MaxSQN = leveled_sst:sst_getmaxsequencenumber(Src#manifest_entry.owner),
|
||||
SinkLevel = SrcLevel + 1,
|
||||
SinkBasement = leveled_basement:is_basement(Manifest, SinkLevel),
|
||||
Man0 = do_merge(SrcList, SinkPointerList,
|
||||
SinkLevel, SinkBasement,
|
||||
RootPath, NewSQN, MaxSQN,
|
||||
0, Manifest),
|
||||
RemoveFun =
|
||||
fun(Entry, AccMan) ->
|
||||
leveled_manifest:remove_manifest_entry(AccMan,
|
||||
NewSQN,
|
||||
SinkLevel,
|
||||
Entry)
|
||||
end,
|
||||
Man1 = lists:foldl(RemoveFun, Man0, SinkList),
|
||||
leveled_manifest:remove_manifest_entry(Man1, NewSQN, SrcLevel, Src).
|
||||
|
||||
do_merge([], [], {SrcLevel, _IsB}, {_Filepath, MSN}, _MaxSQN,
|
||||
FileCounter, OutList) ->
|
||||
leveled_log:log("PC011", [MSN, SrcLevel, FileCounter]),
|
||||
OutList;
|
||||
do_merge(KL1, KL2, {SrcLevel, IsB}, {Filepath, MSN}, MaxSQN,
|
||||
FileCounter, OutList) ->
|
||||
FileName = lists:flatten(io_lib:format(Filepath ++ "_~w_~w.sst",
|
||||
[SrcLevel + 1, FileCounter])),
|
||||
leveled_log:log("PC012", [MSN, FileName]),
|
||||
do_merge([], [], SinkLevel, _SinkB, _RP, NewSQN, _MaxSQN, Counter, Man0) ->
|
||||
leveled_log:log("PC011", [NewSQN, SinkLevel, Counter]),
|
||||
Man0;
|
||||
do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, Counter, Man0) ->
|
||||
FileName = lists:flatten(io_lib:format(RP ++ "_~w_~w.sst",
|
||||
[SinkLevel, Counter])),
|
||||
leveled_log:log("PC012", [NewSQN, FileName]),
|
||||
TS1 = os:timestamp(),
|
||||
case leveled_sst:sst_new(FileName, KL1, KL2, IsB, SrcLevel + 1, MaxSQN) of
|
||||
case leveled_sst:sst_new(FileName, KL1, KL2, SinkB, SinkLevel, MaxSQN) of
|
||||
empty ->
|
||||
leveled_log:log("PC013", [FileName]),
|
||||
OutList;
|
||||
Man0;
|
||||
{ok, Pid, Reply} ->
|
||||
{{KL1Rem, KL2Rem}, SmallestKey, HighestKey} = Reply,
|
||||
ExtMan = lists:append(OutList,
|
||||
[#manifest_entry{start_key=SmallestKey,
|
||||
end_key=HighestKey,
|
||||
owner=Pid,
|
||||
filename=FileName}]),
|
||||
Entry = #manifest_entry{start_key=SmallestKey,
|
||||
end_key=HighestKey,
|
||||
owner=Pid,
|
||||
filename=FileName},
|
||||
Man1 = leveled_manifest:insert_manifest_entry(Man0,
|
||||
NewSQN,
|
||||
SinkLevel,
|
||||
Entry),
|
||||
leveled_log:log_timer("PC015", [], TS1),
|
||||
do_merge(KL1Rem, KL2Rem,
|
||||
{SrcLevel, IsB}, {Filepath, MSN}, MaxSQN,
|
||||
FileCounter + 1, ExtMan)
|
||||
end.
|
||||
|
||||
|
||||
get_item(Index, List, Default) ->
|
||||
case lists:keysearch(Index, 1, List) of
|
||||
{value, {Index, Value}} ->
|
||||
Value;
|
||||
false ->
|
||||
Default
|
||||
SinkLevel, SinkB,
|
||||
RP, NewSQN, MaxSQN,
|
||||
Counter + 1, Man1)
|
||||
end.
|
||||
|
||||
|
||||
|
@ -306,26 +238,6 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
|
|||
null}},
|
||||
generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
|
||||
|
||||
choose_pid_toquery([ManEntry|_T], Key) when
|
||||
Key >= ManEntry#manifest_entry.start_key,
|
||||
ManEntry#manifest_entry.end_key >= Key ->
|
||||
ManEntry#manifest_entry.owner;
|
||||
choose_pid_toquery([_H|T], Key) ->
|
||||
choose_pid_toquery(T, Key).
|
||||
|
||||
|
||||
find_randomkeys(_FList, 0, _Source) ->
|
||||
ok;
|
||||
find_randomkeys(FList, Count, Source) ->
|
||||
KV1 = lists:nth(random:uniform(length(Source)), Source),
|
||||
K1 = leveled_codec:strip_to_keyonly(KV1),
|
||||
P1 = choose_pid_toquery(FList, K1),
|
||||
FoundKV = leveled_sst:sst_get(P1, K1),
|
||||
Found = leveled_codec:strip_to_keyonly(FoundKV),
|
||||
io:format("success finding ~w in ~w~n", [K1, P1]),
|
||||
?assertMatch(K1, Found),
|
||||
find_randomkeys(FList, Count - 1, Source).
|
||||
|
||||
|
||||
merge_file_test() ->
|
||||
KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)),
|
||||
|
@ -353,57 +265,22 @@ merge_file_test() ->
|
|||
2,
|
||||
KL4_L2,
|
||||
undefined),
|
||||
Result = perform_merge({PidL1_1, "../test/KL1_L1.sst"},
|
||||
[#manifest_entry{owner=PidL2_1},
|
||||
#manifest_entry{owner=PidL2_2},
|
||||
#manifest_entry{owner=PidL2_3},
|
||||
#manifest_entry{owner=PidL2_4}],
|
||||
{2, false}, {"../test/", 99}),
|
||||
lists:foreach(fun(ManEntry) ->
|
||||
{o, B1, K1} = ManEntry#manifest_entry.start_key,
|
||||
{o, B2, K2} = ManEntry#manifest_entry.end_key,
|
||||
io:format("Result of ~s ~s and ~s ~s with Pid ~w~n",
|
||||
[B1, K1, B2, K2, ManEntry#manifest_entry.owner]) end,
|
||||
Result),
|
||||
io:format("Finding keys in KL1_L1~n"),
|
||||
ok = find_randomkeys(Result, 50, KL1_L1),
|
||||
io:format("Finding keys in KL1_L2~n"),
|
||||
ok = find_randomkeys(Result, 50, KL1_L2),
|
||||
io:format("Finding keys in KL2_L2~n"),
|
||||
ok = find_randomkeys(Result, 50, KL2_L2),
|
||||
io:format("Finding keys in KL3_L2~n"),
|
||||
ok = find_randomkeys(Result, 50, KL3_L2),
|
||||
io:format("Finding keys in KL4_L2~n"),
|
||||
ok = find_randomkeys(Result, 50, KL4_L2),
|
||||
leveled_sst:sst_clear(PidL1_1),
|
||||
leveled_sst:sst_clear(PidL2_1),
|
||||
leveled_sst:sst_clear(PidL2_2),
|
||||
leveled_sst:sst_clear(PidL2_3),
|
||||
leveled_sst:sst_clear(PidL2_4),
|
||||
lists:foreach(fun(ManEntry) ->
|
||||
leveled_sst:sst_clear(ManEntry#manifest_entry.owner) end,
|
||||
Result).
|
||||
|
||||
select_merge_candidates_test() ->
|
||||
Sink1 = #manifest_entry{start_key = {o, "Bucket", "Key1"},
|
||||
end_key = {o, "Bucket", "Key20000"}},
|
||||
Sink2 = #manifest_entry{start_key = {o, "Bucket", "Key20001"},
|
||||
end_key = {o, "Bucket1", "Key1"}},
|
||||
Src1 = #manifest_entry{start_key = {o, "Bucket", "Key40001"},
|
||||
end_key = {o, "Bucket", "Key60000"}},
|
||||
{Candidates, Others} = check_for_merge_candidates(Src1, [Sink1, Sink2]),
|
||||
?assertMatch([Sink2], Candidates),
|
||||
?assertMatch([Sink1], Others).
|
||||
|
||||
|
||||
select_merge_file_test() ->
|
||||
L0 = [{{o, "B1", "K1"}, {o, "B3", "K3"}, dummy_pid}],
|
||||
L1 = [{{o, "B1", "K1"}, {o, "B2", "K2"}, dummy_pid},
|
||||
{{o, "B2", "K3"}, {o, "B4", "K4"}, dummy_pid}],
|
||||
Manifest = [{0, L0}, {1, L1}],
|
||||
{FileRef, NewManifest} = select_filetomerge(0, Manifest),
|
||||
?assertMatch(FileRef, {{o, "B1", "K1"}, {o, "B3", "K3"}, dummy_pid}),
|
||||
?assertMatch(NewManifest, [{0, []}, {1, L1}]).
|
||||
E1 = #manifest_entry{owner = PidL1_1, filename = "../test/KL1_L1.sst"},
|
||||
E2 = #manifest_entry{owner = PidL2_1, filename = "../test/KL1_L2.sst"},
|
||||
E3 = #manifest_entry{owner = PidL2_2, filename = "../test/KL2_L2.sst"},
|
||||
E4 = #manifest_entry{owner = PidL2_3, filename = "../test/KL3_L2.sst"},
|
||||
E5 = #manifest_entry{owner = PidL2_4, filename = "../test/KL4_L2.sst"},
|
||||
|
||||
Man0 = leveled_manifest:new_manifest(),
|
||||
Man1 = leveled_manifest:insert_manifest_entry(Man0, 1, 2, E1),
|
||||
Man2 = leveled_manifest:insert_manifest_entry(Man1, 1, 2, E1),
|
||||
Man3 = leveled_manifest:insert_manifest_entry(Man2, 1, 2, E1),
|
||||
Man4 = leveled_manifest:insert_manifest_entry(Man3, 1, 2, E1),
|
||||
Man5 = leveled_manifest:insert_manifest_entry(Man4, 2, 1, E1),
|
||||
|
||||
Man6 = perform_merge(Man5, E1, [E2, E3, E4, E5], 1, "../test", 3),
|
||||
|
||||
?assertMatch(3, leveled_manifest:get_manifest_sqn(Man6)).
|
||||
|
||||
coverage_cheat_test() ->
|
||||
{ok, _State1} = code_change(null, #state{}, null).
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue