From 7e4c3db9153dd9392b94b7f7e550b5f1815b285b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 8 Feb 2018 10:29:27 +0000 Subject: [PATCH 1/5] Alternate scale factor Also had failed unit test - there was an issue with bit-flipping the position not being safely caught --- src/leveled_penciller.erl | 11 ++++++++--- src/leveled_sst.erl | 9 ++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 0f214d1..e02109a 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -200,9 +200,14 @@ -include_lib("eunit/include/eunit.hrl"). --define(LEVEL_SCALEFACTOR, [{0, 0}, {1, 8}, {2, 64}, {3, 512}, - {4, 4096}, {5, 32768}, {6, 262144}, - {7, infinity}]). +-define(LEVEL_SCALEFACTOR, + [{0, 0}, {1, 4}, {2, 16}, {3, 64}, {4, 512}, + {5, 4096}, {6, 32768}, {7, infinity}]). + % As an alternative to going up by a factor of 8 at each level, + % increase by a factor of 4 at young levels - to make early + % compaction jobs shorter. A trillion keys is still supported + % before hitting the infinite level. At > 10 trillion keys + % behaviour may become increasingly difficult to predict. -define(MAX_LEVELS, 8). -define(MAX_WORK_WAIT, 300). -define(MANIFEST_FP, "ledger_manifest"). diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index b13d07f..e232372 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1517,9 +1517,12 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod) -> crc_check_slot(FullBin) -> <> = FullBin, + CRC32H:32/integer, + Rest/binary>> = FullBin, + PosBL0 = min(PosBL, byte_size(FullBin) - 4), + % If the position has been bit-flipped to beyond the maximum paossible + % length, use the maximum possible length + <> = Rest, case {hmac(Header), hmac(PosBL)} of {CRC32H, CRC32PBL} -> {Header, Blocks}; From c7cea04abae21cb8a9c574d6801735811cd52108 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 8 Feb 2018 15:31:35 +0000 Subject: [PATCH 2/5] Correct maximum length --- src/leveled_sst.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index e232372..d0ac95a 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1519,11 +1519,11 @@ crc_check_slot(FullBin) -> PosBL:32/integer, CRC32H:32/integer, Rest/binary>> = FullBin, - PosBL0 = min(PosBL, byte_size(FullBin) - 4), + PosBL0 = min(PosBL, byte_size(FullBin) - 3), % If the position has been bit-flipped to beyond the maximum paossible % length, use the maximum possible length <> = Rest, - case {hmac(Header), hmac(PosBL)} of + case {hmac(Header), hmac(PosBL0)} of {CRC32H, CRC32PBL} -> {Header, Blocks}; _ -> From 8113aebdcf7818f4ce7b1cc5bdc228d7fe46b2f7 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 9 Feb 2018 08:59:21 +0000 Subject: [PATCH 3/5] Add timings for Level 3 Level 3 readings now relatively common - so time the separately --- src/leveled_log.erl | 6 ++++-- src/leveled_penciller.erl | 11 ++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 0e475fc..e815bce 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -125,10 +125,12 @@ {"P0032", {info, "Fetch head timing with sample_count=~w and level timings of" ++ " foundmem_time=~w found0_time=~w found1_time=~w" - ++ " found2_time=~w foundlower_time=~w missed_time=~w" + ++ " found2_time=~w found3_time=~w foundlower_time=~w" + ++ " missed_time=~w" ++ " with counts of" ++ " foundmem_count=~w found0_count=~w found1_count=~w" - ++ " found2_count=~w foundlower_count=~w missed_count=~w"}}, + ++ " found2_count=~w found3_count=~w foundlower_count=~w" + ++ " missed_count=~w"}}, {"P0033", {error, "Corrupted manifest file at path ~s to be ignored " ++ "due to error ~w"}}, diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index e02109a..a7c47a9 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -264,12 +264,14 @@ found0_time = 0 :: integer(), found1_time = 0 :: integer(), found2_time = 0 :: integer(), + found3_time = 0 :: integer(), foundlower_time = 0 :: integer(), missed_time = 0 :: integer(), foundmem_count = 0 :: integer(), found0_count = 0 :: integer(), found1_count = 0 :: integer(), found2_count = 0 :: integer(), + found3_count = 0 :: integer(), foundlower_count = 0 :: integer(), missed_count = 0 :: integer()}). @@ -1505,12 +1507,14 @@ log_timings(Timings) -> Timings#pcl_timings.found0_time, Timings#pcl_timings.found1_time, Timings#pcl_timings.found2_time, + Timings#pcl_timings.found3_time, Timings#pcl_timings.foundlower_time, Timings#pcl_timings.missed_time, Timings#pcl_timings.foundmem_count, Timings#pcl_timings.found0_count, Timings#pcl_timings.found1_count, Timings#pcl_timings.found2_count, + Timings#pcl_timings.found3_count, Timings#pcl_timings.foundlower_count, Timings#pcl_timings.missed_count]). @@ -1548,6 +1552,10 @@ update_timings(SW, Timings, Result, Stage) -> L2T = Timings#pcl_timings.found2_time + Timer, L2C = Timings#pcl_timings.found2_count + 1, Timings0#pcl_timings{found2_time = L2T, found2_count = L2C}; + {_, 3} -> + L3T = Timings#pcl_timings.found3_time + Timer, + L3C = Timings#pcl_timings.found3_count + 1, + Timings0#pcl_timings{found3_time = L3T, found3_count = L3C}; _ -> LLT = Timings#pcl_timings.foundlower_time + Timer, LLC = Timings#pcl_timings.foundlower_count + 1, @@ -2012,7 +2020,8 @@ timings_test() -> ?assertMatch(3, T2#pcl_timings.sample_count), ?assertMatch(true, T2#pcl_timings.foundlower_time > T2#pcl_timings.found2_time), ?assertMatch(1, T2#pcl_timings.found2_count), - ?assertMatch(2, T2#pcl_timings.foundlower_count). + ?assertMatch(1, T2#pcl_timings.found3_count), + ?assertMatch(1, T2#pcl_timings.foundlower_count). coverage_cheat_test() -> From 5673d8b558baa4ee848a1b1f3661653bad0f8031 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 10 Feb 2018 08:09:33 +0000 Subject: [PATCH 4/5] Expand test to ensure coverage catch --- src/leveled_penciller.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index a7c47a9..964a18e 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -1969,7 +1969,7 @@ foldwithimm_simple_test() -> create_file_test() -> {RP, Filename} = {"../test/", "new_file.sst"}, ok = file:write_file(filename:join(RP, Filename), term_to_binary("hello")), - KVL = lists:usort(generate_randomkeys({10000, 0})), + KVL = lists:usort(generate_randomkeys({50000, 0})), Tree = leveled_tree:from_orderedlist(KVL, ?CACHE_TYPE), FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end, {ok, @@ -1979,7 +1979,7 @@ create_file_test() -> 1, FetchFun, undefined, - 10000, + 50000, native), lists:foreach(fun(X) -> case checkready(SP) of From f748fc8611a1bd3e5ced3bb8a2328b236d9eb897 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 10 Feb 2018 08:10:24 +0000 Subject: [PATCH 5/5] Narrower still Make the LSM tree more bottle shaped. Experiment to judge performance impact --- src/leveled_penciller.erl | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index a7c47a9..2efc114 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -201,13 +201,21 @@ -include_lib("eunit/include/eunit.hrl"). -define(LEVEL_SCALEFACTOR, - [{0, 0}, {1, 4}, {2, 16}, {3, 64}, {4, 512}, - {5, 4096}, {6, 32768}, {7, infinity}]). + [{0, 0}, + {1, 4}, {2, 16}, {3, 64}, % Factor of 4 + {4, 384}, {5, 2304}, % Factor of 6 + {6, 18432}, % Factor of 8 + {7, infinity}]). % As an alternative to going up by a factor of 8 at each level, % increase by a factor of 4 at young levels - to make early - % compaction jobs shorter. A trillion keys is still supported - % before hitting the infinite level. At > 10 trillion keys - % behaviour may become increasingly difficult to predict. + % compaction jobs shorter. + % + % There are 32K keys per files => with 4096 files there are 100M + % keys supported, + + % 600M keys is supported before hitting the infinite level. + % At o(10) trillion keys behaviour may become increasingly + % difficult to predict. -define(MAX_LEVELS, 8). -define(MAX_WORK_WAIT, 300). -define(MANIFEST_FP, "ledger_manifest").