diff --git a/src/leveled_imanifest.erl b/src/leveled_imanifest.erl index 1f431fe..e278ad7 100644 --- a/src/leveled_imanifest.erl +++ b/src/leveled_imanifest.erl @@ -151,6 +151,10 @@ to_list(Manifest) -> -spec reader(integer(), string()) -> manifest(). %% @doc %% Given a file path and a manifest SQN return the inker manifest +%% If the reader crashes on startup the database cannot be started. However, +%% previous versions of the manifest are kept - and so a rollback can be +%% performed. If waste is not retained though, there may be unresolvable data +%% loss on rollback. reader(SQN, RootPath) -> ManifestPath = leveled_inker:filepath(RootPath, manifest_dir), leveled_log:log("I0015", [ManifestPath, SQN]), @@ -171,10 +175,12 @@ writer(Manifest, ManSQN, RootPath) -> integer_to_list(ManSQN) ++ "." ++ ?MANIFEST_FILEX), TmpFN = filename:join(ManPath, integer_to_list(ManSQN) ++ "." ++ ?PENDING_FILEX), + %% TODO: This should support a CRC check, but issues with making the CRC + %% check backwards compatible (so that the reader can read manifests both + %% with and without a CRC check) MBin = term_to_binary(to_list(Manifest), [compressed]), leveled_log:log("I0016", [ManSQN]), - ok = file:write_file(TmpFN, MBin), - ok = file:rename(TmpFN, NewFN), + ok = leveled_util:safe_rename(TmpFN, NewFN, MBin, true), GC_SQN = ManSQN - ?MANIFESTS_TO_RETAIN, GC_Man = filename:join(ManPath, integer_to_list(GC_SQN) ++ "." ++ ?MANIFEST_FILEX), diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index e19ceda..32ee376 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -323,7 +323,7 @@ ink_fold(Pid, MinSQN, FoldFuns, Acc) -> -spec ink_loadpcl(pid(), integer(), fun(), pid()) -> ok. %% -%% Function to prompt load of the Ledger at startup. the Penciller should +%% Function to prompt load of the Ledger at startup. The Penciller should %% have determined the lowest SQN not present in the Ledger, and the inker %% should fold over the Journal from that point, using the function to load %% penciller with the results. @@ -988,7 +988,7 @@ key_check(LedgerKey, SQN, Manifest) -> -spec build_manifest(list(), list(), #cdb_options{}) -> {leveled_imanifest:manifest(), integer(), integer(), pid()}. %% @doc -%% Selectes the correct manifets to open, and the starts a process for each +%% Selects the correct manifest to open, and then starts a process for each %% file in the manifest, storing the PID for that process within the manifest. %% Opens an active journal if one is not present. build_manifest(ManifestFilenames, @@ -1026,7 +1026,7 @@ build_manifest(ManifestFilenames, JSQN end, - % Update the manifest if it has been changed by the process of laoding + % Update the manifest if it has been changed by the process of loading % the manifest (must also increment the manifest SQN). UpdManifestSQN = if diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index a1cce11..91bacaa 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -53,6 +53,7 @@ -include_lib("eunit/include/eunit.hrl"). -define(MANIFEST_FILEX, "man"). +-define(PENDING_FILEX, "pnd"). -define(MANIFEST_FP, "ledger_manifest"). -define(MAX_LEVELS, 8). -define(TREE_TYPE, idxt). @@ -201,14 +202,15 @@ close_manifest(Manifest, CloseEntryFun) -> %% @doc %% Save the manifest to file (with a checksum) save_manifest(Manifest, RootPath) -> - FP = filepath(RootPath, Manifest#manifest.manifest_sqn, current_manifest), + TFP = filepath(RootPath, Manifest#manifest.manifest_sqn, pending_manifest), + AFP = filepath(RootPath, Manifest#manifest.manifest_sqn, current_manifest), ManBin = term_to_binary(Manifest#manifest{snapshots = [], pending_deletes = dict:new(), min_snapshot_sqn = 0, blooms = dict:new()}), CRC = erlang:crc32(ManBin), - ok = file:write_file(FP, <>), - {ok, <>} = file:read_file(FP), + ToPersist = <>, + ok = leveled_util:safe_rename(TFP, AFP, ToPersist, true), GC_SQN = Manifest#manifest.manifest_sqn - ?MANIFESTS_TO_RETAIN, % If a manifest is corrupted the previous one will be tried, so don't % delete the previous one straight away. Retain until enough have been @@ -906,7 +908,12 @@ filepath(RootPath, manifest) -> filepath(RootPath, NewMSN, current_manifest) -> filepath(RootPath, manifest) ++ "nonzero_" - ++ integer_to_list(NewMSN) ++ "." ++ ?MANIFEST_FILEX. + ++ integer_to_list(NewMSN) ++ "." ++ ?MANIFEST_FILEX; +filepath(RootPath, NewMSN, pending_manifest) -> + filepath(RootPath, manifest) ++ "nonzero_" + ++ integer_to_list(NewMSN) ++ "." ++ ?PENDING_FILEX. + + open_manifestfile(_RootPath, L) when L == [] orelse L == [0] -> diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 92eac7c..2eddd34 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1201,13 +1201,6 @@ write_file(RootPath, Filename, SummaryBin, SlotsBin, SlotsLength = byte_size(SlotsBin), {PendingName, FinalName} = generate_filenames(Filename), FileVersion = gen_fileversion(PressMethod, IdxModDate), - ok = file:write_file(filename:join(RootPath, PendingName), - <>, - [raw]), case filelib:is_file(filename:join(RootPath, FinalName)) of true -> AltName = filename:join(RootPath, filename:basename(FinalName)) @@ -1217,8 +1210,14 @@ write_file(RootPath, Filename, SummaryBin, SlotsBin, false -> ok end, - file:rename(filename:join(RootPath, PendingName), - filename:join(RootPath, FinalName)), + ok = leveled_util:safe_rename(filename:join(RootPath, PendingName), + filename:join(RootPath, FinalName), + <>, + false), FinalName. read_file(Filename, State, LoadPageCache) -> diff --git a/src/leveled_util.erl b/src/leveled_util.erl index 03c2083..c81d814 100644 --- a/src/leveled_util.erl +++ b/src/leveled_util.erl @@ -14,7 +14,10 @@ -export([generate_uuid/0, integer_now/0, integer_time/1, - magic_hash/1]). + magic_hash/1, + safe_rename/4]). + +-define(WRITE_OPS, [binary, raw, read, write]). -spec generate_uuid() -> list(). @@ -65,12 +68,33 @@ hash1(H, <>) -> hash1(H2, Rest). +-spec safe_rename(string(), string(), binary(), boolean()) -> ok. +%% @doc +%% Write a file, sync it and rename it (and for super-safe mode read it back) +%% An attempt to prevent crashes leaving files with empty or partially written +%% values +safe_rename(TempFN, RealFN, BinData, ReadCheck) -> + {ok, TempFH} = file:open(TempFN, ?WRITE_OPS), + ok = file:write(TempFH, BinData), + ok = file:sync(TempFH), + ok = file:close(TempFH), + ok = file:rename(TempFN, RealFN), + case ReadCheck of + true -> + {ok, ReadBack} = file:read_file(RealFN), + true = (ReadBack == BinData), + ok; + false -> + ok + end. + %%%============================================================================ %%% Test %%%============================================================================ -ifdef(TEST). +-define(TEST_AREA, "test/test_area/util/"). magichashperf_test() -> KeyFun = @@ -86,4 +110,17 @@ magichashperf_test() -> {TimeMH2, _HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]), io:format(user, "1000 keys magic hashed in ~w microseconds~n", [TimeMH2]). + +safe_rename_test() -> + ok = filelib:ensure_dir(?TEST_AREA), + TempFN = filename:join(?TEST_AREA, "test_manifest0.pnd"), + RealFN = filename:join(?TEST_AREA, "test_manifest0.man"), + ok = safe_rename(TempFN, RealFN, <<1:128/integer>>, false), + ?assertMatch({ok, <<1:128/integer>>}, file:read_file(RealFN)), + TempFN1 = filename:join(?TEST_AREA, "test_manifest1.pnd"), + RealFN1 = filename:join(?TEST_AREA, "test_manifest1.man"), + ok = safe_rename(TempFN1, RealFN1, <<2:128/integer>>, true), + ?assertMatch({ok, <<2:128/integer>>}, file:read_file(RealFN1)). + + -endif.