From d56ff6efbccc171c03b508b00603023e3825bae6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 18 May 2018 11:17:04 +0100 Subject: [PATCH 1/7] Make CRC check cover key and value So that a corrupted key is detected through a CRC check without crashing leveled (e.g. because it binary_to_term/1 failes somewhere or it is a tuple of the wrong length). Previously the CRC covered the value only. Note if you only wish to extract the value, the key cannot be independently validated. Perhaps a process extrating key only could hit issues. --- src/leveled_cdb.erl | 162 ++++++++++++++++++++++++-------------------- 1 file changed, 87 insertions(+), 75 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 524de3a..c019fd6 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -1062,7 +1062,8 @@ find_lastkey(Handle, IndexCache) -> _ -> {ok, _} = file:position(Handle, LastPosition), {KeyLength, _ValueLength} = read_next_2_integers(Handle), - safe_read_next_key(Handle, KeyLength) + {K, _KB} = safe_read_next_key(Handle, KeyLength), + K end. @@ -1124,8 +1125,8 @@ extract_kvpair(Handle, [Position|Rest], Key, BinaryMode) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), case safe_read_next_key(Handle, KeyLength) of - Key -> % If same key as passed in, then found! - case read_next_value(Handle, ValueLength, crc) of + {Key, KeyBin} -> % If same key as passed in, then found! + case checkread_next_value(Handle, ValueLength, KeyBin) of {false, _} -> crc_wonky; {_, Value} -> @@ -1143,18 +1144,20 @@ extract_kvpair(Handle, [Position|Rest], Key, BinaryMode) -> extract_key(Handle, Position) -> {ok, _} = file:position(Handle, Position), {KeyLength, _ValueLength} = read_next_2_integers(Handle), - {safe_read_next_key(Handle, KeyLength)}. + {K, _KB} = safe_read_next_key(Handle, KeyLength), + {K}. extract_key_size(Handle, Position) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), - {safe_read_next_key(Handle, KeyLength), ValueLength}. + {K, _KB} = safe_read_next_key(Handle, KeyLength), + {K, ValueLength}. extract_key_value_check(Handle, Position, BinaryMode) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), - K = safe_read_next_key(Handle, KeyLength), - {Check, V} = read_next_value(Handle, ValueLength, crc), + {K, KB} = safe_read_next_key(Handle, KeyLength), + {Check, V} = checkread_next_value(Handle, ValueLength, KB), case BinaryMode of true -> {K, V, Check}; @@ -1174,18 +1177,13 @@ startup_scan_over_file(Handle, Position) -> {ok, FinalPos} = file:position(Handle, cur), {FinalPos, Output}. + +%% @doc %% Specific filter to be used at startup to build a hashtree for an incomplete %% cdb file, and returns at the end the hashtree and the final Key seen in the %% journal - -startup_filter(Key, ValueAsBin, Position, {Hashtree, _LastKey}, _ExtractFun) -> - case crccheck_value(ValueAsBin) of - true -> - % This function is preceeded by a "safe read" of the key and value - % and so the crccheck should always be true, as a failed check - % should not reach this stage - {loop, {put_hashtree(Key, Position, Hashtree), Key}} - end. +startup_filter(Key, _ValueAsBin, Position, {Hashtree, _LastKey}, _ExtractFun) -> + {loop, {put_hashtree(Key, Position, Hashtree), Key}}. %% Scan for key changes - scan over file returning applying FilterFun @@ -1228,14 +1226,18 @@ scan_over_file(Handle, Position, FilterFun, Output, LastKey) -> end end. + +%% @doc %% Confirm that the last key has been defined and set to a non-default value +check_last_key(empty) -> + empty; +check_last_key(_) + ok. -check_last_key(LastKey) -> - case LastKey of - empty -> empty; - _ -> ok - end. +-spec saferead_keyvalue(file:io_device()) + -> false|{any(), any(), integer(), integer()}. +%% @doc %% Read the Key/Value at this point, returning {ok, Key, Value} %% catch expected exceptions associated with file corruption (or end) and %% return eof @@ -1247,10 +1249,10 @@ saferead_keyvalue(Handle) -> case safe_read_next_key(Handle, KeyL) of false -> false; - Key -> + {Key, KeyBin} -> case file:read(Handle, ValueL) of {ok, Value} -> - case crccheck_value(Value) of + case crccheck(Value, KeyBin) of true -> {Key, Value, KeyL, ValueL}; false -> @@ -1263,57 +1265,67 @@ saferead_keyvalue(Handle) -> end. +-spec safe_read_next_key(file:io_device(), integer()) + -> false|{any(), binary()}. +%% @doc +%% Return a key masking nay failure in a fixed return of false safe_read_next_key(Handle, Length) -> - try read_next_key(Handle, Length) of + try read_next_item(Handle, Length) of eof -> false; - Term -> - Term + SafeResult -> + SafeResult catch error:badarg -> false end. -%% The first four bytes of the value are the crc check -crccheck_value(Value) when byte_size(Value) >4 -> - << Hash:32/integer, Tail/bitstring>> = Value, - case calc_crc(Tail) of - Hash -> +-spec read_next_item(file:io_device(), integer()) -> eof|{any(), binary()}. +%% @doc +%% Read the next item which is length L, returning both the term and the +%% original binary +read_next_item(Handle, Length) -> + case file:read(Handle, Length) of + {ok, Bin} -> + {binary_to_term(Bin), Bin}; + eof -> + eof + end. + +-spec crccheck(binary()|bitstring(), binary()) -> boolean(). +%% @doc +%% CRC chaeck the value which should be a binary, where the first four bytes +%% are a CRC check. If the binary is truncated, it could be a bitstring or +%% less than 4 bytes - in which case return false to recognise the corruption. +crccheck(<>, KeyBin) when is_binary(KeyBin) -> + case calc_crc(KeyBin, Value) of + CRC -> true; _ -> leveled_log:log("CDB10", []), false end; -crccheck_value(_) -> +crccheck(_V, _KB) -> leveled_log:log("CDB11", []), false. -%% Run a crc check filling out any values which don't fit on byte boundary -calc_crc(Value) -> - case bit_size(Value) rem 8 of - 0 -> - erlang:crc32(Value); - N -> - M = 8 - N, - erlang:crc32(<>) - end. -read_next_key(Handle, Length) -> - case file:read(Handle, Length) of - {ok, Bin} -> - binary_to_term(Bin); - eof -> - eof - end. +-spec calc_crc(binary(), binary()) -> integer(). +%% @doc +%% Do a vaanilla CRC calculation on the binary +calc_crc(KeyBin, Value) -> erlang:crc32(<>). +-spec checkread_next_value(file:io_device(), integer(), binary()) + -> {boolean(), binary()|crc_wonky}. +%% @doc %% Read next string where the string has a CRC prepended - stripping the crc %% and checking if requested -read_next_value(Handle, Length, crc) -> - {ok, <>} = file:read(Handle, Length), - case calc_crc(Bin) of +checkread_next_value(Handle, Length, KeyBin) -> + {ok, <>} = file:read(Handle, Length), + case calc_crc(KeyBin, Value) of CRC -> - {true, Bin}; + {true, Value}; _ -> {false, crc_wonky} end. @@ -1573,12 +1585,12 @@ key_value_to_record({Key, Value}, BinaryMode) -> false -> term_to_binary(Value) end, - LK = byte_size(BK), - LV = byte_size(BV), - LK_FL = endian_flip(LK), - LV_FL = endian_flip(LV + 4), - CRC = calc_crc(BV), - <>. + KS = byte_size(BK), + VS = byte_size(BV), + KS_FL = endian_flip(KS), + VS_FL = endian_flip(VS + 4), + CRC = calc_crc(BK, BV), + <>. multi_key_value_to_record(KVList, BinaryMode, LastPosition) -> @@ -1777,9 +1789,9 @@ dump(FileName) -> {ok, _} = file:position(Handle, {bof, 2048}), Fn1 = fun(_I, Acc) -> {KL, VL} = read_next_2_integers(Handle), - Key = read_next_key(Handle, KL), + {Key, KB} = safe_read_next_key(Handle, KL), Value = - case read_next_value(Handle, VL, crc) of + case checkread_next_value(Handle, VL, KB) of {true, V0} -> binary_to_term(V0) end, @@ -1943,35 +1955,35 @@ to_dict_test() -> ok = file:delete("../test/from_dict_test1.cdb"). crccheck_emptyvalue_test() -> - ?assertMatch(false, crccheck_value(<<>>)). + ?assertMatch(false, crccheck(<<>>, <<"Key">>)). crccheck_shortvalue_test() -> Value = <<128,128,32>>, - ?assertMatch(false, crccheck_value(Value)). + ?assertMatch(false, crccheck(Value, <<"Key">>)). crccheck_justshortvalue_test() -> Value = <<128,128,32,64>>, - ?assertMatch(false, crccheck_value(Value)). - -crccheck_correctvalue_test() -> - Value = term_to_binary("some text as value"), - Hash = erlang:crc32(Value), - ValueOnDisk = <>, - ?assertMatch(true, crccheck_value(ValueOnDisk)). + ?assertMatch(false, crccheck(Value, <<"Key">>)). crccheck_wronghash_test() -> Value = term_to_binary("some text as value"), - Hash = erlang:crc32(Value) + 1, - ValueOnDisk = <>, - ?assertMatch(false, crccheck_value(ValueOnDisk)). + Key = <<"K">>, + BadHash = erlang:crc32(<>), + GoodHash = erlang:crc32(<>), + GValueOnDisk = <>, + BValueOnDisk = <>, + ?assertMatch(false, crccheck(BValueOnDisk, Key)), + ?assertMatch(true, crccheck(GValueOnDisk, Key)). crccheck_truncatedvalue_test() -> Value = term_to_binary("some text as value"), - Hash = erlang:crc32(Value), + Key = <<"K">>, + Hash = erlang:crc32(<>), ValueOnDisk = <>, Size = bit_size(ValueOnDisk) - 1, <> = ValueOnDisk, - ?assertMatch(false, crccheck_value(TruncatedValue)). + ?assertMatch(false, crccheck(TruncatedValue, Key)), + ?assertMatch(true, crccheck(ValueOnDisk, Key)). activewrite_singlewrite_test() -> Key = "0002", @@ -2330,7 +2342,7 @@ safe_read_test() -> % only if we understand why Key = term_to_binary(<<"Key">>), Value = <<"Value">>, - CRC = calc_crc(Value), + CRC = calc_crc(Key, Value), ValToWrite = <>, KeyL = byte_size(Key), FlippedKeyL = endian_flip(KeyL), From 7cc5512a0afc14499870cc2f52e109cb74f9abf3 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 18 May 2018 11:49:19 +0100 Subject: [PATCH 2/7] safe_read There are now two cases of safe_read_next_key/2 the first case using the original function name will return just the Key when positive (not the Key and KeyBinary). If the Key and KeyBinary is required (e.g. for CRC checking purposes), then use safe_read_next_keyint/2. --- src/leveled_cdb.erl | 53 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index c019fd6..2acabff 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -135,6 +135,7 @@ -type cdb_options() :: #cdb_options{}. -type cdb_timings() :: no_timing|#cdb_timings{}. +-type hashtable_index() :: tuple(). @@ -1016,6 +1017,7 @@ hashtable_calc(HashTree, StartPos) -> %% Internal functions %%%%%%%%%%%%%%%%%%%% + determine_new_filename(Filename) -> filename:rootname(Filename, ".pnd") ++ ".cdb". @@ -1024,6 +1026,12 @@ rename_for_read(Filename, NewName) -> leveled_log:log("CDB08", [Filename, NewName, filelib:is_file(NewName)]), file:rename(Filename, NewName). + +-spec open_for_readonly(string(), term()) + -> {file:io_device(), hashtable_index(), term()}. +%% @doc +%% Open a CDB file to accept read requests (e.g. key/value lookups) but no +%% additions or changes open_for_readonly(Filename, LastKeyKnown) -> {ok, Handle} = file:open(Filename, [binary, raw, read]), Index = load_index(Handle), @@ -1036,6 +1044,11 @@ open_for_readonly(Filename, LastKeyKnown) -> end, {Handle, Index, LastKey}. + +-spec load_index(file:io_device()) -> hashtable_index(). +%% @doc +%% The CDB file has at the beginning an index of how many keys are present in +%% each of 256 slices of the hashtable. This loads that index load_index(Handle) -> Index = lists:seq(0, 255), LoadIndexFun = @@ -1046,6 +1059,9 @@ load_index(Handle) -> end, list_to_tuple(lists:map(LoadIndexFun, Index)). + +-spec find_lastkey(file:io_device(), hashtable_index()) -> empty|term(). +%% @doc %% Function to find the LastKey in the file find_lastkey(Handle, IndexCache) -> ScanIndexFun = @@ -1062,8 +1078,7 @@ find_lastkey(Handle, IndexCache) -> _ -> {ok, _} = file:position(Handle, LastPosition), {KeyLength, _ValueLength} = read_next_2_integers(Handle), - {K, _KB} = safe_read_next_key(Handle, KeyLength), - K + safe_read_next_key(Handle, KeyLength) end. @@ -1124,7 +1139,7 @@ extract_kvpair(_H, [], _K, _BinaryMode) -> extract_kvpair(Handle, [Position|Rest], Key, BinaryMode) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), - case safe_read_next_key(Handle, KeyLength) of + case safe_read_next_keyint(Handle, KeyLength) of {Key, KeyBin} -> % If same key as passed in, then found! case checkread_next_value(Handle, ValueLength, KeyBin) of {false, _} -> @@ -1144,19 +1159,18 @@ extract_kvpair(Handle, [Position|Rest], Key, BinaryMode) -> extract_key(Handle, Position) -> {ok, _} = file:position(Handle, Position), {KeyLength, _ValueLength} = read_next_2_integers(Handle), - {K, _KB} = safe_read_next_key(Handle, KeyLength), - {K}. + {safe_read_next_key(Handle, KeyLength)}. extract_key_size(Handle, Position) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), - {K, _KB} = safe_read_next_key(Handle, KeyLength), + K = safe_read_next_key(Handle, KeyLength), {K, ValueLength}. extract_key_value_check(Handle, Position, BinaryMode) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), - {K, KB} = safe_read_next_key(Handle, KeyLength), + {K, KB} = safe_read_next_keyint(Handle, KeyLength), {Check, V} = checkread_next_value(Handle, ValueLength, KB), case BinaryMode of true -> @@ -1231,7 +1245,7 @@ scan_over_file(Handle, Position, FilterFun, Output, LastKey) -> %% Confirm that the last key has been defined and set to a non-default value check_last_key(empty) -> empty; -check_last_key(_) +check_last_key(_LK) -> ok. @@ -1246,7 +1260,7 @@ saferead_keyvalue(Handle) -> eof -> false; {KeyL, ValueL} -> - case safe_read_next_key(Handle, KeyL) of + case safe_read_next_keyint(Handle, KeyL) of false -> false; {Key, KeyBin} -> @@ -1265,11 +1279,26 @@ saferead_keyvalue(Handle) -> end. --spec safe_read_next_key(file:io_device(), integer()) +-spec safe_read_next_key(file:io_device(), integer()) -> false|term(). +%% @doc +%% Return the next key or have false returned if there is some sort of +%% potentially expected error (e.g. due to file truncation). Note that no +%% CRC check has been performed, if CRC check is required then use +%% safe_read_next_keyint/2 so that the binary key can also be returned to +%% be CRC checked along with the value +safe_read_next_key(Handle, Length) -> + case safe_read_next_keyint(Handle, Length) of + {K, _KB} -> + K; + false -> + false + end. + +-spec safe_read_next_keyint(file:io_device(), integer()) -> false|{any(), binary()}. %% @doc %% Return a key masking nay failure in a fixed return of false -safe_read_next_key(Handle, Length) -> +safe_read_next_keyint(Handle, Length) -> try read_next_item(Handle, Length) of eof -> false; @@ -1789,7 +1818,7 @@ dump(FileName) -> {ok, _} = file:position(Handle, {bof, 2048}), Fn1 = fun(_I, Acc) -> {KL, VL} = read_next_2_integers(Handle), - {Key, KB} = safe_read_next_key(Handle, KL), + {Key, KB} = safe_read_next_keyint(Handle, KL), Value = case checkread_next_value(Handle, VL, KB) of {true, V0} -> From d468dd32a80a1d3baba8cfacd1a28b0a174e8b72 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 18 May 2018 13:29:19 +0100 Subject: [PATCH 3/7] More consistency in extract value functions Try to make this a little bit cleaner and more obvious. Make sure the value passed is always a value and not a crc/value. --- src/leveled_cdb.erl | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 2acabff..6e62164 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -15,6 +15,10 @@ %% - The ability to scan a database in blocks of sequence numbers %% - The applictaion of a CRC check by default to all values %% +%% Because of the final delta - this is incompatible with standard CDB files +%% (in that you won't be able to fetch values if the file was written by +%% another CDB writer as the CRC check is missing) +%% %% This module provides functions to create and query a CDB (constant database). %% A CDB implements a two-level hashtable which provides fast {key,value} %% lookups that remain fairly constant in speed regardless of the CDBs size. @@ -1169,16 +1173,16 @@ extract_key_size(Handle, Position) -> extract_key_value_check(Handle, Position, BinaryMode) -> {ok, _} = file:position(Handle, Position), - {KeyLength, ValueLength} = read_next_2_integers(Handle), - {K, KB} = safe_read_next_keyint(Handle, KeyLength), - {Check, V} = checkread_next_value(Handle, ValueLength, KB), - case BinaryMode of - true -> - {K, V, Check}; - false -> - {K, binary_to_term(V), Check} + case {BinaryMode, saferead_keyvalue(Handle)} of + {_, false} -> + {null, null, false}; + {true, {Key, Value, _KeyL, _ValueL}} -> + {Key, Value, true}; + {false, {Key, Value, _KeyL, _ValueL}} -> + {Key, binary_to_term(Value), true} end. + %% Scan through the file until there is a failure to crc check an input, and %% at that point return the position and the key dictionary scanned so far startup_scan_over_file(Handle, Position) -> @@ -1267,10 +1271,11 @@ saferead_keyvalue(Handle) -> case file:read(Handle, ValueL) of {ok, Value} -> case crccheck(Value, KeyBin) of - true -> - {Key, Value, KeyL, ValueL}; false -> - false + false; + TrueValue -> + % i.e. value with no CRC + {Key, TrueValue, KeyL, ValueL} end; eof -> false @@ -1321,7 +1326,7 @@ read_next_item(Handle, Length) -> eof end. --spec crccheck(binary()|bitstring(), binary()) -> boolean(). +-spec crccheck(binary()|bitstring(), binary()) -> any(). %% @doc %% CRC chaeck the value which should be a binary, where the first four bytes %% are a CRC check. If the binary is truncated, it could be a bitstring or @@ -1329,7 +1334,7 @@ read_next_item(Handle, Length) -> crccheck(<>, KeyBin) when is_binary(KeyBin) -> case calc_crc(KeyBin, Value) of CRC -> - true; + Value; _ -> leveled_log:log("CDB10", []), false @@ -1361,8 +1366,7 @@ checkread_next_value(Handle, Length, KeyBin) -> %% Extract value and size from binary containing CRC extract_valueandsize(ValueAsBin) -> - <<_CRC:32/integer, Bin/binary>> = ValueAsBin, - {Bin, byte_size(Bin)}. + {ValueAsBin, byte_size(ValueAsBin)}. %% Used for reading lengths @@ -2002,7 +2006,7 @@ crccheck_wronghash_test() -> GValueOnDisk = <>, BValueOnDisk = <>, ?assertMatch(false, crccheck(BValueOnDisk, Key)), - ?assertMatch(true, crccheck(GValueOnDisk, Key)). + ?assertMatch(Value, crccheck(GValueOnDisk, Key)). crccheck_truncatedvalue_test() -> Value = term_to_binary("some text as value"), @@ -2012,7 +2016,7 @@ crccheck_truncatedvalue_test() -> Size = bit_size(ValueOnDisk) - 1, <> = ValueOnDisk, ?assertMatch(false, crccheck(TruncatedValue, Key)), - ?assertMatch(true, crccheck(ValueOnDisk, Key)). + ?assertMatch(Value, crccheck(ValueOnDisk, Key)). activewrite_singlewrite_test() -> Key = "0002", @@ -2402,7 +2406,7 @@ safe_read_test() -> false -> % Result OK to be false - should get that on error ok; - {<<"Key">>, ValToWrite, KeyL, BadValueL} -> + {<<"Key">>, Value, KeyL, BadValueL} -> % Sometimes corruption may yield a correct answer % for example if Value Length is too big % @@ -2410,7 +2414,7 @@ safe_read_test() -> % the end of the file - which is just a peculiarity of % the test ?assertMatch(true, BadValueL > ValueL); - {_BadKey, ValToWrite, KeyL, ValueL} -> + {_BadKey, Value, KeyL, ValueL} -> % Key is not CRC checked - so may be bit flipped to % something which is still passes through binary_to_term % Assumption is that the application should always @@ -2450,7 +2454,7 @@ safe_read_test() -> {ok, HandleHappy} = file:open(TestFN, ?WRITE_OPS), ok = file:pwrite(HandleHappy, 0, BinToWrite), {ok, _} = file:position(HandleHappy, bof), - ?assertMatch({<<"Key">>, ValToWrite, KeyL, ValueL}, + ?assertMatch({<<"Key">>, Value, KeyL, ValueL}, saferead_keyvalue(HandleHappy)), file:delete(TestFN). From 5e95e6a6b35ddba521bc5a2bb54129ce3615e261 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 18 May 2018 13:44:51 +0100 Subject: [PATCH 4/7] correct incorrect return value required by leveled_codec:compact_inkerkvc/2 --- src/leveled_cdb.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 6e62164..13a2a8c 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -1175,7 +1175,7 @@ extract_key_value_check(Handle, Position, BinaryMode) -> {ok, _} = file:position(Handle, Position), case {BinaryMode, saferead_keyvalue(Handle)} of {_, false} -> - {null, null, false}; + {null, crc_wonky, false}; {true, {Key, Value, _KeyL, _ValueL}} -> {Key, Value, true}; {false, {Key, Value, _KeyL, _ValueL}} -> From 039b135f5f9f44e5beeb99a313b87fb48dd9eb41 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 18 May 2018 14:36:47 +0100 Subject: [PATCH 5/7] Ease timeout pressure in unit test --- src/leveled_penciller.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index f007084..a409d8e 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -2086,7 +2086,7 @@ create_file_test() -> _ -> ok end end, - [50, 50, 50, 50, 50]), + [50, 100, 200, 400, 800]), {ok, SrcFN, StartKey, EndKey} = checkready(SP), io:format("StartKey ~w EndKey ~w~n", [StartKey, EndKey]), ?assertMatch({o, _, _, _}, StartKey), From 8011a9cde19c3e662b9db376df06f0504ef7c109 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 18 May 2018 15:23:43 +0100 Subject: [PATCH 6/7] Unit test should not pass with bad key Unit test needed to capture a bad key scenario, which did not get returned false. However, bad keys are now CRC checked and so willalways return false - and so the case clause for bad key can be dropped. --- src/leveled_cdb.erl | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 13a2a8c..1c1e2ec 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -2410,17 +2410,10 @@ safe_read_test() -> % Sometimes corruption may yield a correct answer % for example if Value Length is too big % - % This cna only happen with a corrupted value length at + % This can only happen with a corrupted value length at % the end of the file - which is just a peculiarity of % the test - ?assertMatch(true, BadValueL > ValueL); - {_BadKey, Value, KeyL, ValueL} -> - % Key is not CRC checked - so may be bit flipped to - % something which is still passes through binary_to_term - % Assumption is that the application should always - % ultimately know the key - and so will be able to check - % against the Key it is trying for. - ok + ?assertMatch(true, BadValueL > ValueL) end, ok = file:close(Handle) end, From b8a642fd3540b71509021a6e54a500f29f2f5154 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 18 May 2018 15:48:36 +0100 Subject: [PATCH 7/7] Spec update --- src/leveled_cdb.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 1c1e2ec..6ec3b55 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -1183,6 +1183,9 @@ extract_key_value_check(Handle, Position, BinaryMode) -> end. +-spec startup_scan_over_file(fle:io_device(), file:location()) + -> {file:location(), any()}. +%% @doc %% Scan through the file until there is a failure to crc check an input, and %% at that point return the position and the key dictionary scanned so far startup_scan_over_file(Handle, Position) -> @@ -1204,12 +1207,13 @@ startup_filter(Key, _ValueAsBin, Position, {Hashtree, _LastKey}, _ExtractFun) -> {loop, {put_hashtree(Key, Position, Hashtree), Key}}. +-spec scan_over_file(file:io_device(), file:location(), fun(), any(), any()) + -> {file:location(), any()}. %% Scan for key changes - scan over file returning applying FilterFun %% The FilterFun should accept as input: %% - Key, ValueBin, Position, Accumulator, Fun (to extract values from Binary) %% -> outputting a new Accumulator and a loop|stop instruction as a tuple %% i.e. {loop, Acc} or {stop, Acc} - scan_over_file(Handle, Position, FilterFun, Output, LastKey) -> case saferead_keyvalue(Handle) of false ->