From c060c0e41df43475db9c3c69f2ed9b96fefece85 Mon Sep 17 00:00:00 2001
From: Martin Sumner <martin.sumner@adaptip.co.uk>
Date: Mon, 14 Jan 2019 12:27:51 +0000
Subject: [PATCH 1/2] Handle L0 cache being full

A test thta will cause leveled to crash due to a low cache size being set - but protect against this (as well as the general scenario of the cache being full).

There could be a potential case where a L0 file present (post pending) without work backlog being set.  In this case we want to roll the level zero to memory, but don't accept the cache update if the L0 cache is already full.
---
 src/leveled_log.erl             |  4 +-
 src/leveled_penciller.erl       | 74 ++++++++++++++++++++++++---------
 src/leveled_pmem.erl            |  9 +++-
 test/end_to_end/basic_SUITE.erl |  9 +++-
 4 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/src/leveled_log.erl b/src/leveled_log.erl
index 3bf6593..7f06ff9 100644
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@@ -139,7 +139,7 @@
         {warn, "We're doomed - intention recorded to destroy all files"}},
     {"P0031",
         {info, "Completion of update to levelzero"
-                    ++ " with cache size status ~w ~w"}},
+                    ++ " with cache_size=~w status=~w and update_success=~w"}},
     {"P0032",
         {info, "Fetch head timing with sample_count=~w and level timings of"
                     ++ " foundmem_time=~w found0_time=~w found1_time=~w" 
@@ -171,6 +171,8 @@
         {info, "Archiving filename ~s as unused at startup"}},
     {"P0041",
         {info, "Penciller manifest switched from SQN ~w to ~w"}},
+    {"P0042",
+        {warn, "Cache full so attempting roll memory with l0_size=~w"}},
         
     {"PC001",
         {info, "Penciller's clerk ~w started with owner ~w"}},
diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl
index 74e3756..2163e80 100644
--- a/src/leveled_penciller.erl
+++ b/src/leveled_penciller.erl
@@ -672,13 +672,27 @@ handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}},
     %
     % Check the approximate size of the cache.  If it is over the maximum size,
     % trigger a background L0 file write and update state of levelzero_pending.
-    case State#state.levelzero_pending or State#state.work_backlog of
-        true ->
+    CacheUpdateBlockedByPendingWork
+        = State#state.levelzero_pending or State#state.work_backlog,
+    CacheFull = leveled_pmem:cache_full(State#state.levelzero_cache),
+    case {CacheUpdateBlockedByPendingWork, CacheFull} of
+        {true, _} ->
             leveled_log:log("P0018", [returned,
                                         State#state.levelzero_pending,
                                         State#state.work_backlog]),
             {reply, returned, State};
-        false ->
+        {false, true} ->
+            leveled_log:log("P0042", [State#state.levelzero_size]),
+            % The cache is full (there are 127 items already in it), so
+            % can't accept any more.  However, we need to try and roll
+            % memory otherwise cache may be permanently full.
+            gen_server:reply(From, returned),
+            {L0Pend, L0Constructor, none} =
+                maybe_roll_memory(State, false),
+            {noreply, 
+                State#state{levelzero_pending=L0Pend,
+                            levelzero_constructor=L0Constructor}};
+        {false, false} ->
             leveled_log:log("P0018", [ok, false, false]),
             PushedTree =
                 case is_tuple(LedgerTable) of
@@ -689,7 +703,9 @@ handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}},
                                                         ?CACHE_TYPE)
                 end,
             % Reply must happen after the table has been converted
-            gen_server:reply(From, ok), 
+            gen_server:reply(From, ok),
+            % Update LevelZero will add to the cache and maybe roll the
+            % cache from memory to L0 disk if the cache is too big
             {noreply,
                 update_levelzero(State#state.levelzero_size,
                                     {PushedTree, PushedIdx, MinSQN, MaxSQN},
@@ -893,13 +909,16 @@ handle_call(close, _From, State) ->
     % on the clerk.
     ok = leveled_pclerk:clerk_close(State#state.clerk),
     leveled_log:log("P0008", [close]),
-
-    L0_Present = leveled_pmanifest:key_lookup(State#state.manifest, 0, all),
     L0_Left = State#state.levelzero_size > 0,
-    case {State#state.levelzero_pending, L0_Present, L0_Left} of
-        {false, false, true} ->
-            {L0Pid, _L0Bloom} = roll_memory(State, true),
-            ok = leveled_sst:sst_close(L0Pid);
+    case {State#state.levelzero_pending, L0_Left} of
+        {false, true} ->
+            {_L0Pend, L0Pid, _L0Bloom} = maybe_roll_memory(State, true),
+            case is_pid(L0Pid) of
+                true ->
+                    ok = leveled_sst:sst_close(L0Pid);
+                false ->
+                    ok
+            end;
         StatusTuple ->
             leveled_log:log("P0010", [StatusTuple])
     end,
@@ -1249,8 +1268,6 @@ update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
                                     ledger_sqn=UpdMaxSQN},
             CacheTooBig = NewL0Size > State#state.levelzero_maxcachesize,
             CacheMuchTooBig = NewL0Size > ?SUPER_MAX_TABLE_SIZE,
-            L0Free = 
-                not leveled_pmanifest:levelzero_present(State#state.manifest),
             RandomFactor =
                 case State#state.levelzero_cointoss of
                     true ->
@@ -1265,20 +1282,36 @@ update_levelzero(L0Size, {PushedTree, PushedIdx, MinSQN, MaxSQN},
                 end,
             NoPendingManifestChange = not State#state.work_ongoing,
             JitterCheck = RandomFactor or CacheMuchTooBig,
-            case {CacheTooBig, L0Free, JitterCheck, NoPendingManifestChange} of
-                {true, true, true, true}  ->
-                    {L0Constructor, none} = roll_memory(UpdState, false),
-                    leveled_log:log_timer("P0031", [true, true], SW),
-                    UpdState#state{levelzero_pending=true,
+            case {CacheTooBig, JitterCheck, NoPendingManifestChange} of
+                {true, true, true}  ->
+                    {L0Pend, L0Constructor, none} =
+                        maybe_roll_memory(UpdState, false),
+                    leveled_log:log_timer("P0031", [true, true, L0Pend], SW),
+                    UpdState#state{levelzero_pending=L0Pend,
                                     levelzero_constructor=L0Constructor};
                 _ ->
                     leveled_log:log_timer("P0031", 
-                                            [CacheTooBig, JitterCheck], 
+                                            [CacheTooBig, JitterCheck, false], 
                                             SW),
                     UpdState
             end
     end.
 
+
+-spec maybe_roll_memory(pcl_state(), boolean())
+                -> {boolean(), pid()|undefined, leveled_ebloom:bloom()|none}.
+%% @doc
+%% Check that no L0 file is present before rolling memory
+maybe_roll_memory(State, SyncRoll) ->
+    BlockedByL0 = leveled_pmanifest:levelzero_present(State#state.manifest),
+    case BlockedByL0 of
+        true ->
+            {false, undefined, none};
+        false ->
+            {L0Constructor, Bloom} = roll_memory(State, SyncRoll),
+            {true, L0Constructor, Bloom}
+    end.
+
 -spec roll_memory(pcl_state(), boolean()) 
                                     -> {pid(), leveled_ebloom:bloom()|none}.
 %% @doc
@@ -1916,7 +1949,10 @@ add_missing_hash({K, {SQN, ST, MD}}) ->
 clean_dir_test() ->
     % Pointless gesture to test coverage
     RootPath = "../test/ledger",
-    ok = clean_subdir(RootPath ++ "/test.bob").
+    ok = filelib:ensure_dir(RootPath),
+    ?assertMatch(ok, file:write_file(RootPath ++ "/test.bob", "hello")),
+    ok = clean_subdir(RootPath ++ "/test.bob"),
+    ok = file:delete(RootPath ++ "/test.bob").
 
 
 archive_files_test() ->
diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl
index f19103a..e2de638 100644
--- a/src/leveled_pmem.erl
+++ b/src/leveled_pmem.erl
@@ -38,7 +38,8 @@
         add_to_index/3,
         new_index/0,
         clear_index/1,
-        check_index/2
+        check_index/2,
+        cache_full/1
         ]).      
 
 -include_lib("eunit/include/eunit.hrl").
@@ -52,6 +53,12 @@
 %%% API
 %%%============================================================================
 
+-spec cache_full(list()) -> boolean().
+%% @doc
+%% If there are already 127 entries in the cache then the cache is full
+cache_full(L0Cache) ->
+    length(L0Cache) == 127.
+
 -spec prepare_for_index(index_array(), leveled_codec:segment_hash()) 
                                                             -> index_array().
 %% @doc
diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl
index b610440..5aef2e8 100644
--- a/test/end_to_end/basic_SUITE.erl
+++ b/test/end_to_end/basic_SUITE.erl
@@ -474,9 +474,16 @@ fetchput_snapshot(_Config) ->
 load_and_count(_Config) ->
     % Use artificially small files, and the load keys, counting they're all
     % present
+    load_and_count(50000000, 2500, 28000),
+    load_and_count(200000000, 100, 300000).
+
+
+load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) ->
     RootPath = testutil:reset_filestructure(),
     StartOpts1 = [{root_path, RootPath},
-                    {max_journalsize, 50000000},
+                    {max_journalsize, JournalSize},
+                    {cache_size, BookiesMemSize},
+                    {max_pencillercachesize, PencillerMemSize},
                     {sync_strategy, testutil:sync_strategy()}],
     {ok, Bookie1} = leveled_bookie:book_start(StartOpts1),
     {TestObject, TestSpec} = testutil:generate_testobject(),

From a4d89ad6d16f9d23c49d50736d120b9c4995099e Mon Sep 17 00:00:00 2001
From: Martin Sumner <martin.sumner@adaptip.co.uk>
Date: Mon, 14 Jan 2019 16:11:04 +0000
Subject: [PATCH 2/2] Add log of higher than expected ratio of cache sizes

Warn at startup if this ratio is high.  Not sure how snapshots will perform if there are a lot of ledger cache sin the list.  However, it should still work.  basic_SUITE/load_count test intended to demonstrate that a large ratio is still functional
---
 src/leveled_bookie.erl | 14 ++++++++++++++
 src/leveled_log.erl    |  3 +++
 2 files changed, 17 insertions(+)

diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl
index c6027a0..7b5c7b7 100644
--- a/src/leveled_bookie.erl
+++ b/src/leveled_bookie.erl
@@ -1125,6 +1125,20 @@ init([Opts]) ->
                 ConfiguredCacheSize div (100 div ?CACHE_SIZE_JITTER),
             CacheSize = 
                 ConfiguredCacheSize + erlang:phash2(self()) rem CacheJitter,
+            PCLMaxSize =
+                PencillerOpts#penciller_options.max_inmemory_tablesize,
+            CacheRatio = PCLMaxSize div ConfiguredCacheSize,
+                % It is expected that the maximum size of the penciller
+                % in-memory store should not be more than about 10 x the size
+                % of the ledger cache.  In this case there will be a larger
+                % than tested list of ledger_caches in the penciller memory,
+                % and performance may be unpredictable
+            case CacheRatio > 32 of
+                true ->
+                    leveled_log:log("B0020", [PCLMaxSize, ConfiguredCacheSize]);
+                false ->
+                    ok
+            end,
             
             {HeadOnly, HeadLookup} = 
                 case proplists:get_value(head_only, Opts) of 
diff --git a/src/leveled_log.erl b/src/leveled_log.erl
index 7f06ff9..954e3c0 100644
--- a/src/leveled_log.erl
+++ b/src/leveled_log.erl
@@ -75,6 +75,9 @@
     {"B0019",
         {warn, "Use of book_indexfold with constraint of Bucket ~w with "
                     ++ "no StartKey is deprecated"}},
+    {"B0020",
+        {warn, "Ratio of penciller cache size ~w to bookie's memory "
+                    ++ "cache size ~w is larger than expected"}},
 
     {"R0001",
         {debug, "Object fold to process batch of ~w objects"}},