From e02c6df3ed7df6e2b2b7d00bb97deeca0c382631 Mon Sep 17 00:00:00 2001
From: Martin Sumner <martin.sumner@adaptip.co.uk>
Date: Wed, 19 Sep 2018 13:03:00 +0100
Subject: [PATCH] Refactor safe_read

Make safe_read a bit more flexible,and allow it to catch a wider number of errors.
---
 src/leveled_cdb.erl | 99 +++++++++++++++++++++++++--------------------
 src/leveled_log.erl |  4 +-
 2 files changed, 59 insertions(+), 44 deletions(-)

diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl
index d10b0a7..4d60c6a 100644
--- a/src/leveled_cdb.erl
+++ b/src/leveled_cdb.erl
@@ -1177,7 +1177,7 @@ extract_kvpair(_H, [], _K, _BinaryMode) ->
 extract_kvpair(Handle, [Position|Rest], Key, BinaryMode) ->
     {ok, _} = file:position(Handle, Position),
     {KeyLength, ValueLength} = read_next_2_integers(Handle),
-    case safe_read_next_keyint(Handle, KeyLength) of
+    case safe_read_next_keybin(Handle, KeyLength) of
         {Key, KeyBin} ->  % If same key as passed in, then found!
             case checkread_next_value(Handle, ValueLength, KeyBin) of
                 {false, _} -> 
@@ -1302,21 +1302,16 @@ saferead_keyvalue(Handle) ->
         eof ->
             false;
         {KeyL, ValueL} ->
-            case safe_read_next_keyint(Handle, KeyL) of 
+            case safe_read_next_keybin(Handle, KeyL) of 
                 false ->
                     false;
                 {Key, KeyBin} ->
-                    case file:read(Handle, ValueL) of 
-                        {ok, Value} -> 
-                            case crccheck(Value, KeyBin) of
-                                false ->
-                                    false;
-                                TrueValue ->
-                                    % i.e. value with no CRC
-                                    {Key, TrueValue, KeyL, ValueL}
-                            end;
-                        eof ->
-                            false 
+                    case safe_read_next_value(Handle, ValueL, KeyBin) of 
+                        false ->
+                            false;
+                        TrueValue ->
+                            % i.e. value with no CRC
+                            {Key, TrueValue, KeyL, ValueL}
                     end
             end
     end.
@@ -1326,43 +1321,57 @@ saferead_keyvalue(Handle) ->
 %% @doc
 %% Return the next key or have false returned if there is some sort of 
 %% potentially expected error (e.g. due to file truncation).  Note that no
-%% CRC check has been performed, if CRC check is required then use 
-%% safe_read_next_keyint/2 so that the binary key can also be returned to 
-%% be CRC checked along with the value
+%% CRC check has been performed
 safe_read_next_key(Handle, Length) ->
-    case safe_read_next_keyint(Handle, Length) of
-        {K, _KB} ->
-            K;
-        false ->
+    ReadFun = fun(Bin) -> binary_to_term(Bin) end,
+    safe_read_next(Handle, Length, ReadFun).
+
+-spec safe_read_next_keybin(file:io_device(), integer()) 
+                                            -> false|{term(), binary()}.
+%% @doc
+%% Return the next key or have false returned if there is some sort of 
+%% potentially expected error (e.g. due to file truncation).  Note that no
+%% CRC check has been performed
+%% Returns both the Key and the Binary version, the binary version being
+%% required for the CRC checking after the value fetch (see 
+%% safe_read_next_value/3)
+safe_read_next_keybin(Handle, Length) ->
+    ReadFun = fun(Bin) -> {binary_to_term(Bin), Bin} end,
+    safe_read_next(Handle, Length, ReadFun).
+
+-spec safe_read_next_value(file:io_device(), integer(), binary()) 
+                                                        -> binary()|false.
+safe_read_next_value(Handle, Length, KeyBin) ->
+    ReadFun =  fun(VBin) -> crccheck(VBin, KeyBin) end,
+    safe_read_next(Handle, Length, ReadFun).
+
+
+-spec safe_read_next(file:io_device(), integer(), fun()) -> any().
+%% @doc
+%% Read the next item of length Length
+%% Previously catching error:badarg was sufficient to capture errors of
+%% corruption, but on some OS versions may need to catch error:einval as well
+safe_read_next(Handle, Length, ReadFun) ->
+    try 
+        loose_read(Handle, Length, ReadFun)
+    catch
+        error:ReadError ->
+            leveled_log:log("CDB20", [ReadError, Length]),
             false
     end.
 
--spec safe_read_next_keyint(file:io_device(), integer())
-                                                -> false|{any(), binary()}.
+-spec loose_read(file:io_device(), integer(), fun()) -> any().
 %% @doc
-%% Return a key masking nay failure in a fixed return of false
-safe_read_next_keyint(Handle, Length) ->
-    try read_next_item(Handle, Length) of
+%% Read with minimal error handling (only eof) - to be wrapped in 
+%% safe_read_next/3 to catch exceptions.
+loose_read(Handle, Length, ReadFun) ->
+    case file:read(Handle, Length) of
         eof ->
             false;
-        SafeResult ->
-            SafeResult
-    catch
-        error:badarg ->
-            false
+        {ok, Result} ->
+            ReadFun(Result)
     end.
 
--spec read_next_item(file:io_device(), integer()) -> eof|{any(), binary()}.
-%% @doc
-%% Read the next item which is length L, returning both the term and the 
-%% original binary
-read_next_item(Handle, Length) ->
-    case file:read(Handle, Length) of
-        {ok, Bin} -> 
-            {binary_to_term(Bin), Bin};
-        eof ->
-            eof
-    end.
 
 -spec crccheck(binary()|bitstring(), binary()) -> any().
 %% @doc
@@ -1860,7 +1869,7 @@ dump(FileName) ->
     {ok, _} = file:position(Handle, {bof, 2048}),
     Fn1 = fun(_I, Acc) ->
         {KL, VL} = read_next_2_integers(Handle),
-        {Key, KB} = safe_read_next_keyint(Handle, KL),
+        {Key, KB} = safe_read_next_keybin(Handle, KL),
         Value =
             case checkread_next_value(Handle, VL, KB) of
                 {true, V0} ->
@@ -2229,13 +2238,17 @@ get_keys_byposition_manykeys_test_() ->
     {timeout, 120, fun get_keys_byposition_manykeys_test_to/0}.
 
 get_keys_byposition_manykeys_test_to() ->
-    KeyCount = 2048,
+    KeyCount = 16384,
     {ok, P1} = cdb_open_writer("../test/poskeymany.pnd",
                                 #cdb_options{binary_mode=false}),
     KVList = generate_sequentialkeys(KeyCount, []),
     lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, KVList),
     ok = cdb_roll(P1),
     % Should not return positions when rolling
+    % There is an implicit race here - if cdb_roll is too fast, then the test
+    % will fail.  It appears to be safe that if KeyCount is set to a high value
+    % (e.g. > 10K) it is implausible that cdb_roll will ever finish before the 
+    % call to cdb_getpositions is executed.  So the race is tolerated
     ?assertMatch([], cdb_getpositions(P1, 10)), 
     lists:foldl(fun(X, Complete) ->
                         case Complete of
diff --git a/src/leveled_log.erl b/src/leveled_log.erl
index b00c3a5..0df0aeb 100644
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@@ -361,7 +361,9 @@
     {"CDB19",
         {info, "Sample timings in microseconds for sample_count=~w " 
                     ++ "with totals of cycle_count=~w "
-                    ++ "fetch_time=~w index_time=~w"}}
+                    ++ "fetch_time=~w index_time=~w"}},
+    {"CDB20",
+        {error, "Error ~w caught when safe reading a file to length ~w"}}
         ]).