From 2e2c35fe1b7fff24749e4d5e06b81bce64dd3fcb Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 29 Oct 2018 15:49:50 +0000 Subject: [PATCH 01/29] Extract deprecated recent_aae Ready to add other forms of last modified filtering --- src/leveled_bookie.erl | 29 +- src/leveled_codec.erl | 188 ---------- test/end_to_end/tictac_SUITE.erl | 619 ------------------------------- 3 files changed, 3 insertions(+), 833 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 39384d9..a1e77c6 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -104,7 +104,6 @@ -define(JOURNAL_SIZE_JITTER, 20). -define(ABSOLUTEMAX_JOURNALSIZE, 4000000000). -define(LONG_RUNNING, 80000). --define(RECENT_AAE, false). -define(COMPRESSION_METHOD, lz4). -define(COMPRESSION_POINT, on_receipt). -define(TIMING_SAMPLESIZE, 100). @@ -118,7 +117,6 @@ {cache_size, ?CACHE_SIZE}, {max_journalsize, 1000000000}, {sync_strategy, none}, - {recent_aae, ?RECENT_AAE}, {head_only, false}, {waste_retention_period, undefined}, {max_run_length, undefined}, @@ -140,7 +138,6 @@ -record(state, {inker :: pid() | undefined, penciller :: pid() | undefined, cache_size :: integer() | undefined, - recent_aae :: recent_aae(), ledger_cache = #ledger_cache{}, is_snapshot :: boolean() | undefined, slow_offer = false :: boolean(), @@ -186,7 +183,6 @@ -type fold_timings() :: no_timing|#fold_timings{}. -type head_timings() :: no_timing|#head_timings{}. -type timing_types() :: head|get|put|fold. --type recent_aae() :: false|#recent_aae{}|undefined. -type key() :: binary()|string()|{binary(), binary()}. % Keys SHOULD be binary() % string() support is a legacy of old tests @@ -220,12 +216,6 @@ % riak_sync is used for backwards compatability with OTP16 - and % will manually call sync() after each write (rather than use the % O_SYNC option on startup - {recent_aae, false|{atom(), list(), integer(), integer()}} | - % DEPRECATED - % Before working on kv_index_tictactree looked at the possibility - % of maintaining AAE just for recent changes. Given the efficiency - % of the kv_index_tictactree approach this is unecessary. - % Should be set to false {head_only, false|with_lookup|no_lookup} | % When set to true, there are three fundamental changes as to how % leveled will work: @@ -1008,16 +998,6 @@ init([Opts]) -> ConfiguredCacheSize div (100 div ?CACHE_SIZE_JITTER), CacheSize = ConfiguredCacheSize + erlang:phash2(self()) rem CacheJitter, - RecentAAE = - case proplists:get_value(recent_aae, Opts) of - false -> - false; - {FilterType, BucketList, LimitMinutes, UnitMinutes} -> - #recent_aae{filter = FilterType, - buckets = BucketList, - limit_minutes = LimitMinutes, - unit_minutes = UnitMinutes} - end, {HeadOnly, HeadLookup} = case proplists:get_value(head_only, Opts) of @@ -1030,7 +1010,6 @@ init([Opts]) -> end, State0 = #state{cache_size=CacheSize, - recent_aae=RecentAAE, is_snapshot=false, head_only=HeadOnly, head_lookup = HeadLookup}, @@ -1926,14 +1905,12 @@ preparefor_ledgercache(?INKT_KEYD, {no_lookup, SQN, KeyChanges}; preparefor_ledgercache(_InkTag, LedgerKey, SQN, Obj, Size, {IdxSpecs, TTL}, - State) -> - {Bucket, Key, MetaValue, {KeyH, ObjH}, LastMods} = + _State) -> + {Bucket, Key, MetaValue, {KeyH, _ObjH}, _LastMods} = leveled_codec:generate_ledgerkv(LedgerKey, SQN, Obj, Size, TTL), KeyChanges = [{LedgerKey, MetaValue}] ++ - leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL) ++ - leveled_codec:aae_indexspecs(State#state.recent_aae, - Bucket, Key, SQN, ObjH, LastMods), + leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL), {KeyH, SQN, KeyChanges}. diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 92c677b..abb70c7 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -63,7 +63,6 @@ get_keyandobjhash/2, idx_indexspecs/5, obj_objectspecs/3, - aae_indexspecs/6, riak_extract_metadata/2, segment_hash/1, to_lookup/1, @@ -76,7 +75,6 @@ -define(NRT_IDX, "$aae."). -define(ALL_BUCKETS, <<"$all">>). --type recent_aae() :: #recent_aae{}. -type riak_metadata() :: {binary()|delete, % Sibling Metadata binary()|null, % Vclock Metadata integer()|null, % Hash of vclock - non-exportable @@ -577,103 +575,6 @@ set_status(remove, _TTL) -> %% TODO: timestamps for delayed reaping tomb. --spec aae_indexspecs(false|recent_aae(), - any(), any(), - integer(), integer(), - list()) - -> list(). -%% @doc -%% Generate an additional index term representing the change, if the last -%% modified date for the change is within the definition of recency. -%% -%% The object may have multiple last modified dates (siblings), and in this -%% case index entries for all dates within the range are added. -%% -%% The index should entry auto-expire in the future (when it is no longer -%% relevant to assessing recent changes) -aae_indexspecs(false, _Bucket, _Key, _SQN, _H, _LastMods) -> - []; -aae_indexspecs(_AAE, _Bucket, _Key, _SQN, _H, []) -> - []; -aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) -> - InList = lists:member(Bucket, AAE#recent_aae.buckets), - Bucket0 = - case AAE#recent_aae.filter of - blacklist -> - case InList of - true -> - false; - false -> - {all, Bucket} - end; - whitelist -> - case InList of - true -> - Bucket; - false -> - false - end - end, - case Bucket0 of - false -> - []; - Bucket0 -> - GenIdxFun = - fun(LMD0, Acc) -> - Dates = parse_date(LMD0, - AAE#recent_aae.unit_minutes, - AAE#recent_aae.limit_minutes, - leveled_util:integer_now()), - case Dates of - no_index -> - Acc; - {LMD1, TTL} -> - TreeSize = AAE#recent_aae.tree_size, - SegID32 = leveled_tictac:keyto_segment32(Key), - SegID = - leveled_tictac:get_segment(SegID32, TreeSize), - IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin", - IdxTrmStr = - string:right(integer_to_list(SegID), 8, $0) ++ - "." ++ - string:right(integer_to_list(H), 8, $0), - {IdxK, IdxV} = - gen_indexspec(Bucket0, Key, - add, - list_to_binary(IdxFldStr), - list_to_binary(IdxTrmStr), - SQN, TTL), - [{IdxK, IdxV}|Acc] - end - end, - lists:foldl(GenIdxFun, [], LastMods) - end. - --spec parse_date(tuple(), integer(), integer(), integer()) -> - no_index|{list(), integer()}. -%% @doc -%% Parse the last modified date and the AAE date configuration to return a -%% binary to be used as the last modified date part of the index, and an -%% integer to be used as the TTL of the index entry. -%% Return no_index if the change is not recent. -parse_date(LMD, UnitMins, LimitMins, Now) -> - LMDsecs = leveled_util:integer_time(LMD), - Recent = (LMDsecs + LimitMins * 60) > Now, - case Recent of - false -> - no_index; - true -> - {{Y, M, D}, {Hour, Minute, _Second}} = - calendar:now_to_datetime(LMD), - RoundMins = - UnitMins * (Minute div UnitMins), - StrTime = - lists:flatten(io_lib:format(?LMD_FORMAT, - [Y, M, D, Hour, RoundMins])), - TTL = min(Now, LMDsecs) + (LimitMins + UnitMins) * 60, - {StrTime, TTL} - end. - -spec generate_ledgerkv( tuple(), integer(), any(), integer(), tuple()|infinity) -> {any(), any(), any(), @@ -927,95 +828,6 @@ hashperf_test() -> io:format(user, "1000 object hashes in ~w microseconds~n", [timer:now_diff(os:timestamp(), SW)]). -parsedate_test() -> - {MeS, S, MiS} = os:timestamp(), - timer:sleep(100), - Now = leveled_util:integer_now(), - UnitMins = 5, - LimitMins = 60, - PD = parse_date({MeS, S, MiS}, UnitMins, LimitMins, Now), - io:format("Parsed Date ~w~n", [PD]), - ?assertMatch(true, is_tuple(PD)), - check_pd(PD, UnitMins), - CheckFun = - fun(Offset) -> - ModDate = {MeS, S + Offset * 60, MiS}, - check_pd(parse_date(ModDate, UnitMins, LimitMins, Now), UnitMins) - end, - lists:foreach(CheckFun, lists:seq(1, 60)). - -check_pd(PD, UnitMins) -> - {LMDstr, _TTL} = PD, - Minutes = list_to_integer(lists:nthtail(10, LMDstr)), - ?assertMatch(0, Minutes rem UnitMins). - -parseolddate_test() -> - LMD = os:timestamp(), - timer:sleep(100), - Now = leveled_util:integer_now() + 60 * 60, - UnitMins = 5, - LimitMins = 60, - PD = parse_date(LMD, UnitMins, LimitMins, Now), - io:format("Parsed Date ~w~n", [PD]), - ?assertMatch(no_index, PD). - -genaaeidx_test() -> - AAE = #recent_aae{filter=blacklist, - buckets=[], - limit_minutes=60, - unit_minutes=5}, - Bucket = <<"Bucket1">>, - Key = <<"Key1">>, - SQN = 1, - H = erlang:phash2(null), - LastMods = [os:timestamp(), os:timestamp()], - - AAESpecs = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods), - ?assertMatch(2, length(AAESpecs)), - - LastMods1 = [os:timestamp()], - AAESpecs1 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods1), - ?assertMatch(1, length(AAESpecs1)), - IdxB = element(2, element(1, lists:nth(1, AAESpecs1))), - io:format(user, "AAE IDXSpecs1 ~w~n", [AAESpecs1]), - ?assertMatch(<<"$all">>, IdxB), - - LastMods0 = [], - AAESpecs0 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods0), - ?assertMatch(0, length(AAESpecs0)), - - AAE0 = AAE#recent_aae{filter=whitelist, - buckets=[<<"Bucket0">>]}, - AAESpecsB0 = aae_indexspecs(AAE0, Bucket, Key, SQN, H, LastMods1), - ?assertMatch(0, length(AAESpecsB0)), - - AAESpecsB1 = aae_indexspecs(AAE0, <<"Bucket0">>, Key, SQN, H, LastMods1), - ?assertMatch(1, length(AAESpecsB1)), - [{{?IDX_TAG, <<"Bucket0">>, {Fld, Term}, <<"Key1">>}, - {SQN, {active, TS}, no_lookup, null}}] = AAESpecsB1, - ?assertMatch(true, is_integer(TS)), - ?assertMatch(17, length(binary_to_list(Term))), - ?assertMatch("$aae.", lists:sublist(binary_to_list(Fld), 5)), - - AAE1 = AAE#recent_aae{filter=blacklist, - buckets=[<<"Bucket0">>]}, - AAESpecsB2 = aae_indexspecs(AAE1, <<"Bucket0">>, Key, SQN, H, LastMods1), - ?assertMatch(0, length(AAESpecsB2)). - -delayedupdate_aaeidx_test() -> - AAE = #recent_aae{filter=blacklist, - buckets=[], - limit_minutes=60, - unit_minutes=5}, - Bucket = <<"Bucket1">>, - Key = <<"Key1">>, - SQN = 1, - H = erlang:phash2(null), - {Mega, Sec, MSec} = os:timestamp(), - LastMods = [{Mega -1, Sec, MSec}], - AAESpecs = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods), - ?assertMatch(0, length(AAESpecs)). - head_segment_compare_test() -> % Reminder to align native and parallel(leveled_ko) key stores for % kv_index_tictactree diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 2c0435d..5a22abe 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -5,20 +5,12 @@ -export([ many_put_compare/1, index_compare/1, - recent_aae_noaae/1, - recent_aae_allaae/1, - recent_aae_bucketaae/1, - recent_aae_expiry/1, basic_headonly/1 ]). all() -> [ many_put_compare, index_compare, - recent_aae_noaae, - recent_aae_allaae, - recent_aae_bucketaae, - recent_aae_expiry, basic_headonly ]. @@ -542,478 +534,6 @@ index_compare(_Config) -> ok = leveled_bookie:book_close(Book2D). -recent_aae_noaae(_Config) -> - % Starts databases with recent_aae tables, and attempt to query to fetch - % recent aae trees returns empty trees as no index entries are found. - - TreeSize = small, - % SegmentCount = 256 * 256, - UnitMins = 2, - - % Test requires multiple different databases, so want to mount them all - % on individual file paths - RootPathA = testutil:reset_filestructure("testA"), - RootPathB = testutil:reset_filestructure("testB"), - RootPathC = testutil:reset_filestructure("testC"), - RootPathD = testutil:reset_filestructure("testD"), - StartOptsA = aae_startopts(RootPathA, false), - StartOptsB = aae_startopts(RootPathB, false), - StartOptsC = aae_startopts(RootPathC, false), - StartOptsD = aae_startopts(RootPathD, false), - - % Book1A to get all objects - {ok, Book1A} = leveled_bookie:book_start(StartOptsA), - % Book1B/C/D will have objects partitioned across it - {ok, Book1B} = leveled_bookie:book_start(StartOptsB), - {ok, Book1C} = leveled_bookie:book_start(StartOptsC), - {ok, Book1D} = leveled_bookie:book_start(StartOptsD), - - {B1, K1, V1, S1, MD} = {"Bucket", - "Key1.1.4567.4321", - "Value1", - [], - [{"MDK1", "MDV1"}]}, - {TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD), - - SW_StartLoad = os:timestamp(), - - ok = testutil:book_riakput(Book1A, TestObject, TestSpec), - ok = testutil:book_riakput(Book1B, TestObject, TestSpec), - testutil:check_forobject(Book1A, TestObject), - testutil:check_forobject(Book1B, TestObject), - - {TicTacTreeJoined, TicTacTreeFull, EmptyTree, _LMDIndexes} = - load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - false), - % Go compare! Also confirm we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, - TicTacTreeJoined), - - DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = DL1_0 == [], - true = length(DL1_1) == 0, - - ok = leveled_bookie:book_close(Book1A), - ok = leveled_bookie:book_close(Book1B), - ok = leveled_bookie:book_close(Book1C), - ok = leveled_bookie:book_close(Book1D). - - -recent_aae_allaae(_Config) -> - % Leveled is started in blacklisted mode with no buckets blacklisted. - % - % A number of changes are then loaded into a store, and also partitioned - % across a separate set of three stores. A merge tree is returned from - % both the single store and the partitioned store, and proven to compare - % the same. - % - % A single change is then made, but into one half of the system only. The - % aae index is then re-queried and it is verified that a signle segment - % difference is found. - % - % The segment Id found is then used in a query to find the Keys that make - % up that segment, and the delta discovered should be just that one key - % which was known to have been changed - - TreeSize = small, - % SegmentCount = 256 * 256, - UnitMins = 2, - AAE = {blacklist, [], 60, UnitMins}, - - % Test requires multiple different databases, so want to mount them all - % on individual file paths - RootPathA = testutil:reset_filestructure("testA"), - RootPathB = testutil:reset_filestructure("testB"), - RootPathC = testutil:reset_filestructure("testC"), - RootPathD = testutil:reset_filestructure("testD"), - StartOptsA = aae_startopts(RootPathA, AAE), - StartOptsB = aae_startopts(RootPathB, AAE), - StartOptsC = aae_startopts(RootPathC, AAE), - StartOptsD = aae_startopts(RootPathD, AAE), - - % Book1A to get all objects - {ok, Book1A} = leveled_bookie:book_start(StartOptsA), - % Book1B/C/D will have objects partitioned across it - {ok, Book1B} = leveled_bookie:book_start(StartOptsB), - {ok, Book1C} = leveled_bookie:book_start(StartOptsC), - {ok, Book1D} = leveled_bookie:book_start(StartOptsD), - - {B1, K1, V1, S1, MD} = {"Bucket", - "Key1.1.4567.4321", - "Value1", - [], - [{"MDK1", "MDV1"}]}, - {TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD), - - SW_StartLoad = os:timestamp(), - - ok = testutil:book_riakput(Book1A, TestObject, TestSpec), - ok = testutil:book_riakput(Book1B, TestObject, TestSpec), - testutil:check_forobject(Book1A, TestObject), - testutil:check_forobject(Book1B, TestObject), - - {TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} = - load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - false), - % Go compare! Also confirm we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, - TicTacTreeJoined), - - DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = DL1_0 == [], - true = length(DL1_1) > 100, - - ok = leveled_bookie:book_close(Book1A), - ok = leveled_bookie:book_close(Book1B), - ok = leveled_bookie:book_close(Book1C), - ok = leveled_bookie:book_close(Book1D), - - % Book2A to get all objects - {ok, Book2A} = leveled_bookie:book_start(StartOptsA), - % Book2B/C/D will have objects partitioned across it - {ok, Book2B} = leveled_bookie:book_start(StartOptsB), - {ok, Book2C} = leveled_bookie:book_start(StartOptsC), - {ok, Book2D} = leveled_bookie:book_start(StartOptsD), - - {TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} = - load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D, - SW_StartLoad, TreeSize, UnitMins, - LMDIndexes), - % Go compare! Also confirm we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, - TicTacTreeJoined), - - DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = DL1_0 == [], - true = length(DL1_1) > 100, - - V2 = "Value2", - {TestObject2, TestSpec2} = - testutil:generate_testobject(B1, K1, V2, S1, MD), - - New_startTS = os:timestamp(), - - ok = testutil:book_riakput(Book2B, TestObject2, TestSpec2), - testutil:check_forobject(Book2B, TestObject2), - testutil:check_forobject(Book2A, TestObject), - - New_endTS = os:timestamp(), - NewLMDIndexes = determine_lmd_indexes(New_startTS, New_endTS, UnitMins), - {TicTacTreeJoined2, TicTacTreeFull2, _EmptyTree, NewLMDIndexes} = - load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D, - New_startTS, TreeSize, UnitMins, - NewLMDIndexes), - DL2_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull2, - TicTacTreeJoined2), - - % DL2_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = length(DL2_0) == 1, - - [DirtySeg] = DL2_0, - TermPrefix = string:right(integer_to_list(DirtySeg), 8, $0), - - LMDSegFolder = - fun(LMD, {Acc, Bookie}) -> - IdxLMD = list_to_binary("$aae." ++ LMD ++ "_bin"), - IdxQ1 = - {index_query, - <<"$all">>, - {fun testutil:foldkeysfun_returnbucket/3, []}, - {IdxLMD, - list_to_binary(TermPrefix ++ "."), - list_to_binary(TermPrefix ++ "|")}, - {true, undefined}}, - {async, IdxFolder} = - leveled_bookie:book_returnfolder(Bookie, IdxQ1), - {Acc ++ IdxFolder(), Bookie} - end, - {KeysTerms2A, _} = lists:foldl(LMDSegFolder, - {[], Book2A}, - lists:usort(LMDIndexes ++ NewLMDIndexes)), - true = length(KeysTerms2A) >= 1, - - {KeysTerms2B, _} = lists:foldl(LMDSegFolder, - {[], Book2B}, - lists:usort(LMDIndexes ++ NewLMDIndexes)), - {KeysTerms2C, _} = lists:foldl(LMDSegFolder, - {[], Book2C}, - lists:usort(LMDIndexes ++ NewLMDIndexes)), - {KeysTerms2D, _} = lists:foldl(LMDSegFolder, - {[], Book2D}, - lists:usort(LMDIndexes ++ NewLMDIndexes)), - - KeysTerms2Joined = KeysTerms2B ++ KeysTerms2C ++ KeysTerms2D, - DeltaX = lists:subtract(KeysTerms2A, KeysTerms2Joined), - DeltaY = lists:subtract(KeysTerms2Joined, KeysTerms2A), - - io:format("DeltaX ~w~n", [DeltaX]), - io:format("DeltaY ~w~n", [DeltaY]), - - true = length(DeltaX) == 0, % This hasn't seen any extra changes - true = length(DeltaY) == 1, % This has seen an extra change - [{_, {B1, K1}}] = DeltaY, - - ok = leveled_bookie:book_close(Book2A), - ok = leveled_bookie:book_close(Book2B), - ok = leveled_bookie:book_close(Book2C), - ok = leveled_bookie:book_close(Book2D). - - - -recent_aae_bucketaae(_Config) -> - % Configure AAE to work only on a single whitelisted bucket - % Confirm that we can spot a delta in this bucket, but not - % in another bucket - - TreeSize = small, - % SegmentCount = 256 * 256, - UnitMins = 2, - AAE = {whitelist, [<<"Bucket">>], 60, UnitMins}, - - % Test requires multiple different databases, so want to mount them all - % on individual file paths - RootPathA = testutil:reset_filestructure("testA"), - RootPathB = testutil:reset_filestructure("testB"), - RootPathC = testutil:reset_filestructure("testC"), - RootPathD = testutil:reset_filestructure("testD"), - StartOptsA = aae_startopts(RootPathA, AAE), - StartOptsB = aae_startopts(RootPathB, AAE), - StartOptsC = aae_startopts(RootPathC, AAE), - StartOptsD = aae_startopts(RootPathD, AAE), - - % Book1A to get all objects - {ok, Book1A} = leveled_bookie:book_start(StartOptsA), - % Book1B/C/D will have objects partitioned across it - {ok, Book1B} = leveled_bookie:book_start(StartOptsB), - {ok, Book1C} = leveled_bookie:book_start(StartOptsC), - {ok, Book1D} = leveled_bookie:book_start(StartOptsD), - - {B1, K1, V1, S1, MD} = {<<"Bucket">>, - "Key1.1.4567.4321", - "Value1", - [], - [{"MDK1", "MDV1"}]}, - {TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD), - - SW_StartLoad = os:timestamp(), - - ok = testutil:book_riakput(Book1A, TestObject, TestSpec), - ok = testutil:book_riakput(Book1B, TestObject, TestSpec), - testutil:check_forobject(Book1A, TestObject), - testutil:check_forobject(Book1B, TestObject), - - {TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} = - load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - false, <<"Bucket">>), - % Go compare! Also confirm we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, - TicTacTreeJoined), - - DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = DL1_0 == [], - true = length(DL1_1) > 100, - - ok = leveled_bookie:book_close(Book1A), - ok = leveled_bookie:book_close(Book1B), - ok = leveled_bookie:book_close(Book1C), - ok = leveled_bookie:book_close(Book1D), - - % Book2A to get all objects - {ok, Book2A} = leveled_bookie:book_start(StartOptsA), - % Book2B/C/D will have objects partitioned across it - {ok, Book2B} = leveled_bookie:book_start(StartOptsB), - {ok, Book2C} = leveled_bookie:book_start(StartOptsC), - {ok, Book2D} = leveled_bookie:book_start(StartOptsD), - - % Change the value for a key in another bucket - % If we get trees for this period, no difference should be found - - V2 = "Value2", - {TestObject2, TestSpec2} = - testutil:generate_testobject(<<"NotBucket">>, K1, V2, S1, MD), - - New_startTS2 = os:timestamp(), - - ok = testutil:book_riakput(Book2B, TestObject2, TestSpec2), - testutil:check_forobject(Book2B, TestObject2), - testutil:check_forobject(Book2A, TestObject), - - New_endTS2 = os:timestamp(), - NewLMDIndexes2 = determine_lmd_indexes(New_startTS2, New_endTS2, UnitMins), - {TicTacTreeJoined2, TicTacTreeFull2, _EmptyTree, NewLMDIndexes2} = - load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D, - New_startTS2, TreeSize, UnitMins, - NewLMDIndexes2, <<"Bucket">>), - DL2_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull2, - TicTacTreeJoined2), - true = length(DL2_0) == 0, - - % Now create an object that is a change to an existing key in the - % monitored bucket. A differrence should be found - - {TestObject3, TestSpec3} = - testutil:generate_testobject(B1, K1, V2, S1, MD), - - New_startTS3 = os:timestamp(), - - ok = testutil:book_riakput(Book2B, TestObject3, TestSpec3), - testutil:check_forobject(Book2B, TestObject3), - testutil:check_forobject(Book2A, TestObject), - - New_endTS3 = os:timestamp(), - NewLMDIndexes3 = determine_lmd_indexes(New_startTS3, New_endTS3, UnitMins), - {TicTacTreeJoined3, TicTacTreeFull3, _EmptyTree, NewLMDIndexes3} = - load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D, - New_startTS3, TreeSize, UnitMins, - NewLMDIndexes3, <<"Bucket">>), - DL3_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull3, - TicTacTreeJoined3), - - % DL2_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = length(DL3_0) == 1, - - % Find the dirty segment, and use that to find the dirty key - % - % Note that unlike when monitoring $all, fold_keys can be used as there - % is no need to return the Bucket (as hte bucket is known) - - [DirtySeg] = DL3_0, - TermPrefix = string:right(integer_to_list(DirtySeg), 8, $0), - - LMDSegFolder = - fun(LMD, {Acc, Bookie}) -> - IdxLMD = list_to_binary("$aae." ++ LMD ++ "_bin"), - IdxQ1 = - {index_query, - <<"Bucket">>, - {fun testutil:foldkeysfun/3, []}, - {IdxLMD, - list_to_binary(TermPrefix ++ "."), - list_to_binary(TermPrefix ++ "|")}, - {true, undefined}}, - {async, IdxFolder} = - leveled_bookie:book_returnfolder(Bookie, IdxQ1), - {Acc ++ IdxFolder(), Bookie} - end, - {KeysTerms2A, _} = lists:foldl(LMDSegFolder, - {[], Book2A}, - lists:usort(LMDIndexes ++ NewLMDIndexes3)), - true = length(KeysTerms2A) >= 1, - - {KeysTerms2B, _} = lists:foldl(LMDSegFolder, - {[], Book2B}, - lists:usort(LMDIndexes ++ NewLMDIndexes3)), - {KeysTerms2C, _} = lists:foldl(LMDSegFolder, - {[], Book2C}, - lists:usort(LMDIndexes ++ NewLMDIndexes3)), - {KeysTerms2D, _} = lists:foldl(LMDSegFolder, - {[], Book2D}, - lists:usort(LMDIndexes ++ NewLMDIndexes3)), - - KeysTerms2Joined = KeysTerms2B ++ KeysTerms2C ++ KeysTerms2D, - DeltaX = lists:subtract(KeysTerms2A, KeysTerms2Joined), - DeltaY = lists:subtract(KeysTerms2Joined, KeysTerms2A), - - io:format("DeltaX ~w~n", [DeltaX]), - io:format("DeltaY ~w~n", [DeltaY]), - - true = length(DeltaX) == 0, % This hasn't seen any extra changes - true = length(DeltaY) == 1, % This has seen an extra change - [{_, K1}] = DeltaY, - - ok = leveled_bookie:book_close(Book2A), - ok = leveled_bookie:book_close(Book2B), - ok = leveled_bookie:book_close(Book2C), - ok = leveled_bookie:book_close(Book2D). - - -recent_aae_expiry(_Config) -> - % Proof that the index entries are indeed expired - - TreeSize = small, - % SegmentCount = 256 * 256, - UnitMins = 1, - TotalMins = 2, - AAE = {blacklist, [], TotalMins, UnitMins}, - - % Test requires multiple different databases, so want to mount them all - % on individual file paths - RootPathA = testutil:reset_filestructure("testA"), - StartOptsA = aae_startopts(RootPathA, AAE), - - % Book1A to get all objects - {ok, Book1A} = leveled_bookie:book_start(StartOptsA), - - GenMapFun = - fun(_X) -> - V = testutil:get_compressiblevalue(), - Indexes = testutil:get_randomindexes_generator(8), - testutil:generate_objects(5000, - binary_uuid, - [], - V, - Indexes) - end, - - ObjLists = lists:map(GenMapFun, lists:seq(1, 3)), - - SW0 = os:timestamp(), - % Load all nine lists into Book1A - lists:foreach(fun(ObjL) -> testutil:riakload(Book1A, ObjL) end, - ObjLists), - SW1 = os:timestamp(), - % sleep for two minutes, so all index entries will have expired - GetTicTacTreeFun = - fun(Bookie) -> - get_tictactree_fun(Bookie, <<"$all">>, TreeSize) - end, - EmptyTree = leveled_tictac:new_tree(empty, TreeSize), - LMDIndexes = determine_lmd_indexes(SW0, SW1, UnitMins), - - % Should get a non-empty answer to the query - TicTacTree1_Full = - lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes), - DL3_0 = leveled_tictac:find_dirtyleaves(TicTacTree1_Full, EmptyTree), - io:format("Dirty leaves found before expiry ~w~n", [length(DL3_0)]), - - true = length(DL3_0) > 0, - - SecondsSinceLMD = timer:now_diff(os:timestamp(), SW0) div 1000000, - SecondsToExpiry = (TotalMins + UnitMins) * 60, - - io:format("SecondsToExpiry ~w SecondsSinceLMD ~w~n", - [SecondsToExpiry, SecondsSinceLMD]), - io:format("LMDIndexes ~w~n", [LMDIndexes]), - - case SecondsToExpiry > SecondsSinceLMD of - true -> - timer:sleep((1 + SecondsToExpiry - SecondsSinceLMD) * 1000); - false -> - timer:sleep(1000) - end, - - % Should now get an empty answer - all entries have expired - TicTacTree2_Full = - lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes), - DL4_0 = leveled_tictac:find_dirtyleaves(TicTacTree2_Full, EmptyTree), - io:format("Dirty leaves found after expiry ~w~n", [length(DL4_0)]), - - timer:sleep(10000), - - TicTacTree3_Full = - lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes), - DL5_0 = leveled_tictac:find_dirtyleaves(TicTacTree3_Full, EmptyTree), - io:format("Dirty leaves found after expiry plus 10s ~w~n", [length(DL5_0)]), - - - ok = leveled_bookie:book_close(Book1A), - - true = length(DL4_0) == 0. - - basic_headonly(_Config) -> ObjectCount = 200000, RemoveCount = 100, @@ -1196,145 +716,6 @@ load_objectspecs(ObjectSpecL, SliceSize, Bookie) -> end. - -load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - LMDIndexes_Loaded) -> - load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - LMDIndexes_Loaded, <<"$all">>). - -load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - LMDIndexes_Loaded, Bucket) -> - LMDIndexes = - case LMDIndexes_Loaded of - false -> - % Generate nine lists of objects - % BucketBin = list_to_binary("Bucket"), - GenMapFun = - fun(_X) -> - V = testutil:get_compressiblevalue(), - Indexes = testutil:get_randomindexes_generator(8), - testutil:generate_objects(5000, - binary_uuid, - [], - V, - Indexes) - end, - - ObjLists = lists:map(GenMapFun, lists:seq(1, 9)), - - % Load all nine lists into Book1A - lists:foreach(fun(ObjL) -> testutil:riakload(Book1A, ObjL) end, - ObjLists), - - % Split nine lists across Book1B to Book1D, three object lists - % in each - lists:foreach(fun(ObjL) -> testutil:riakload(Book1B, ObjL) end, - lists:sublist(ObjLists, 1, 3)), - lists:foreach(fun(ObjL) -> testutil:riakload(Book1C, ObjL) end, - lists:sublist(ObjLists, 4, 3)), - lists:foreach(fun(ObjL) -> testutil:riakload(Book1D, ObjL) end, - lists:sublist(ObjLists, 7, 3)), - - SW_EndLoad = os:timestamp(), - determine_lmd_indexes(SW_StartLoad, SW_EndLoad, UnitMins); - _ -> - LMDIndexes_Loaded - end, - - EmptyTree = leveled_tictac:new_tree(empty, TreeSize), - - GetTicTacTreeFun = - fun(Bookie) -> - get_tictactree_fun(Bookie, Bucket, TreeSize) - end, - - % Get a TicTac tree representing one of the indexes in Bucket A - TicTacTree1_Full = - lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes), - - TicTacTree1_P1 = - lists:foldl(GetTicTacTreeFun(Book1B), EmptyTree, LMDIndexes), - TicTacTree1_P2 = - lists:foldl(GetTicTacTreeFun(Book1C), EmptyTree, LMDIndexes), - TicTacTree1_P3 = - lists:foldl(GetTicTacTreeFun(Book1D), EmptyTree, LMDIndexes), - - % Merge the tree across the partitions - TicTacTree1_Joined = lists:foldl(fun leveled_tictac:merge_trees/2, - TicTacTree1_P1, - [TicTacTree1_P2, TicTacTree1_P3]), - - {TicTacTree1_Full, TicTacTree1_Joined, EmptyTree, LMDIndexes}. - - -aae_startopts(RootPath, AAE) -> - LS = 2000, - JS = 50000000, - SS = testutil:sync_strategy(), - [{root_path, RootPath}, - {sync_strategy, SS}, - {cache_size, LS}, - {max_journalsize, JS}, - {recent_aae, AAE}]. - - -determine_lmd_indexes(StartTS, EndTS, UnitMins) -> - StartDT = calendar:now_to_datetime(StartTS), - EndDT = calendar:now_to_datetime(EndTS), - StartTimeStr = get_strtime(StartDT, UnitMins), - EndTimeStr = get_strtime(EndDT, UnitMins), - - AddTimeFun = - fun(X, Acc) -> - case lists:member(EndTimeStr, Acc) of - true -> - Acc; - false -> - NextTime = - UnitMins * 60 * X + - calendar:datetime_to_gregorian_seconds(StartDT), - NextDT = - calendar:gregorian_seconds_to_datetime(NextTime), - Acc ++ [get_strtime(NextDT, UnitMins)] - end - end, - - lists:foldl(AddTimeFun, [StartTimeStr], lists:seq(1, 10)). - - -get_strtime(DateTime, UnitMins) -> - {{Y, M, D}, {Hour, Minute, _Second}} = DateTime, - RoundMins = - UnitMins * (Minute div UnitMins), - StrTime = - lists:flatten(io_lib:format(?LMD_FORMAT, - [Y, M, D, Hour, RoundMins])), - StrTime. - - -get_tictactree_fun(Bookie, Bucket, TreeSize) -> - fun(LMD, Acc) -> - SW = os:timestamp(), - ST = <<"0">>, - ET = <<"A">>, - Q = {tictactree_idx, - {Bucket, - list_to_binary("$aae." ++ LMD ++ "_bin"), - ST, - ET}, - TreeSize, - fun(_B, _K) -> accumulate end}, - {async, Folder} = leveled_bookie:book_returnfolder(Bookie, Q), - R = Folder(), - io:format("TicTac Tree for index ~s took " ++ - "~w microseconds~n", - [LMD, timer:now_diff(os:timestamp(), SW)]), - leveled_tictac:merge_trees(R, Acc) - end. - get_segment(K, SegmentCount) -> BinKey = case is_binary(K) of From 671b6e7f99621fa0bfb8236f6341a48ec5961784 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 29 Oct 2018 16:56:58 +0000 Subject: [PATCH 02/29] Strip ALL_BUCKET - only used in AAE --- src/leveled_codec.erl | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index abb70c7..322913f 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -73,7 +73,6 @@ -define(MAGIC, 53). % riak_kv -> riak_object -define(LMD_FORMAT, "~4..0w~2..0w~2..0w~2..0w~2..0w"). -define(NRT_IDX, "$aae."). --define(ALL_BUCKETS, <<"$all">>). -type riak_metadata() :: {binary()|delete, % Sibling Metadata binary()|null, % Vclock Metadata @@ -247,8 +246,6 @@ from_ledgerkey(_ExpectedTag, _OtherKey) -> -spec from_ledgerkey(tuple()) -> tuple(). %% @doc %% Return identifying information from the LedgerKey -from_ledgerkey({?IDX_TAG, ?ALL_BUCKETS, {_IdxFld, IdxVal}, {Bucket, Key}}) -> - {Bucket, Key, IdxVal}; from_ledgerkey({?IDX_TAG, Bucket, {_IdxFld, IdxVal}, Key}) -> {Bucket, Key, IdxVal}; from_ledgerkey({?HEAD_TAG, Bucket, Key, SubKey}) -> @@ -546,22 +543,8 @@ idx_indexspecs(IndexSpecs, Bucket, Key, SQN, TTL) -> gen_indexspec(Bucket, Key, IdxOp, IdxField, IdxTerm, SQN, TTL) -> Status = set_status(IdxOp, TTL), - case Bucket of - {all, RealBucket} -> - {to_ledgerkey(?ALL_BUCKETS, - {RealBucket, Key}, - ?IDX_TAG, - IdxField, - IdxTerm), - {SQN, Status, no_lookup, null}}; - _ -> - {to_ledgerkey(Bucket, - Key, - ?IDX_TAG, - IdxField, - IdxTerm), - {SQN, Status, no_lookup, null}} - end. + {to_ledgerkey(Bucket, Key, ?IDX_TAG, IdxField, IdxTerm), + {SQN, Status, no_lookup, null}}. gen_headspec(Bucket, Key, IdxOp, SubKey, Value, SQN, TTL) -> Status = set_status(IdxOp, TTL), From baa446692370c8f9dfe18ad017a86b446732c00a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 29 Oct 2018 20:24:54 +0000 Subject: [PATCH 03/29] Remove knowledge of tuple length from ledger value Nothing should now care about the current tuple length - and hence the tuple length may be increased (for example to add a max_mod_date) --- src/leveled_bookie.erl | 4 ++-- src/leveled_codec.erl | 33 ++++++++++++++++----------------- src/leveled_runner.erl | 2 +- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index a1e77c6..19e31c4 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -1116,7 +1116,7 @@ handle_call({get, Bucket, Key, Tag}, _From, State) not_found; Head -> {Seqn, Status, _MH, _MD} = - leveled_codec:striphead_to_details(Head), + leveled_codec:striphead_to_v1details(Head), case Status of tomb -> not_found; @@ -1165,7 +1165,7 @@ handle_call({head, Bucket, Key, Tag}, _From, State) not_present -> {not_found, State#state.ink_checking}; Head -> - case leveled_codec:striphead_to_details(Head) of + case leveled_codec:striphead_to_v1details(Head) of {_SeqN, tomb, _MH, _MD} -> {not_found, State#state.ink_checking}; {SeqN, {active, TS}, _MH, MD} -> diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 322913f..d0baaf4 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -38,7 +38,7 @@ strip_to_statusonly/1, strip_to_keyseqonly/1, strip_to_seqnhashonly/1, - striphead_to_details/1, + striphead_to_v1details/1, is_active/3, endkey_passed/2, key_dominates/2, @@ -167,36 +167,35 @@ to_lookup(Key) -> %% Some helper functions to get a sub_components of the key/value -spec strip_to_statusonly(ledger_kv()) -> ledger_status(). -strip_to_statusonly({_, {_, St, _, _}}) -> St. +strip_to_statusonly({_, V}) -> element(2, V). -spec strip_to_seqonly(ledger_kv()) -> non_neg_integer(). -strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN. +strip_to_seqonly({_, V}) -> element(1, V). -spec strip_to_keyseqonly(ledger_kv()) -> {ledger_key(), integer()}. -strip_to_keyseqonly({LK, {SeqN, _, _, _}}) -> {LK, SeqN}. +strip_to_keyseqonly({LK, V}) -> {LK, element(1, V)}. -spec strip_to_seqnhashonly(ledger_kv()) -> {integer(), segment_hash()}. -strip_to_seqnhashonly({_, {SeqN, _, MH, _}}) -> {SeqN, MH}. +strip_to_seqnhashonly({_, V}) -> {element(1, V), element(3, V)}. --spec striphead_to_details(ledger_value()) -> ledger_value(). -striphead_to_details({SeqN, St, MH, MD}) -> {SeqN, St, MH, MD}. +-spec striphead_to_v1details(ledger_value()) -> ledger_value(). +striphead_to_v1details(V) -> + {element(1, V), element(2, V), element(3, V), element(4, V)}. -spec key_dominates(ledger_kv(), ledger_kv()) -> left_hand_first|right_hand_first|left_hand_dominant|right_hand_dominant. %% @doc %% When comparing two keys in the ledger need to find if one key comes before %% the other, or if the match, which key is "better" and should be the winner -key_dominates(LeftKey, RightKey) -> - case {LeftKey, RightKey} of - {{LK, _LVAL}, {RK, _RVAL}} when LK < RK -> - left_hand_first; - {{LK, _LVAL}, {RK, _RVAL}} when RK < LK -> - right_hand_first; - {{LK, {LSN, _LST, _LMH, _LMD}}, {RK, {RSN, _RST, _RMH, _RMD}}} - when LK == RK, LSN >= RSN -> +key_dominates({LK, _LVAL}, {RK, _RVAL}) when LK < RK -> + left_hand_first; +key_dominates({LK, _LVAL}, {RK, _RVAL}) when RK < LK -> + right_hand_first; +key_dominates(LObj, RObj) -> + case strip_to_seqonly(LObj) >= strip_to_seqonly(RObj) of + true -> left_hand_dominant; - {{LK, {LSN, _LST, _LMH, _LMD}}, {RK, {RSN, _RST, _RMH, _RMD}}} - when LK == RK, LSN < RSN -> + false -> right_hand_dominant end. diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index 7b88c21..c8ad66a 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -612,7 +612,7 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) -> case leveled_codec:is_active(LK, V, Now) of true -> {SQN, _St, _MH, MD} = - leveled_codec:striphead_to_details(V), + leveled_codec:striphead_to_v1details(V), {B, K} = case leveled_codec:from_ledgerkey(LK) of {B0, K0} -> From 14fd67e535ccfc35a83e49e2975e2555260ba501 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 29 Oct 2018 21:16:38 +0000 Subject: [PATCH 04/29] Add specs and comments and split function Need to change this, so refactor and make neater in preparation --- src/leveled_codec.erl | 33 +++++++++++---- src/leveled_sst.erl | 96 +++++++++++++++++++++++-------------------- 2 files changed, 77 insertions(+), 52 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index d0baaf4..ca6f3b1 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -37,7 +37,7 @@ strip_to_seqonly/1, strip_to_statusonly/1, strip_to_keyseqonly/1, - strip_to_seqnhashonly/1, + strip_to_indexdetails/1, striphead_to_v1details/1, is_active/3, endkey_passed/2, @@ -81,14 +81,30 @@ -type tag() :: ?STD_TAG|?RIAK_TAG|?IDX_TAG|?HEAD_TAG. +-type sqn() :: + % SQN of the object in the Journal + pos_integer(). -type segment_hash() :: + % hash of the key to an aae segment - to be used in ledger filters {integer(), integer()}|no_lookup. +-type metadata() :: + tuple()|null. % null for empty metadata +-type last_moddate() :: + % modified date as determined by the object (not this store) + % if the object has siblings in the store will be the maximum of those + % dates + integer()|undefined. + -type ledger_status() :: tomb|{active, non_neg_integer()|infinity}. -type ledger_key() :: {tag(), any(), any(), any()}|all. --type ledger_value() :: - {integer(), ledger_status(), segment_hash(), tuple()|null}. +-type ledger_value() :: + ledger_value_v1()|ledger_value_v2(). +-type ledger_value_v1() :: + {sqn(), ledger_status(), segment_hash(), metadata()}. +-type ledger_value_v2() :: + {sqn(), ledger_status(), segment_hash(), metadata(), last_moddate()}. -type ledger_kv() :: {ledger_key(), ledger_value()}. -type compaction_strategy() :: @@ -103,6 +119,8 @@ list({add|remove, any(), any()}). -type journal_keychanges() :: {index_specs(), infinity|integer()}. % {KeyChanges, TTL} +-type maybe_lookup() :: + lookup|no_lookup. -type segment_list() @@ -120,7 +138,8 @@ compression_method/0, journal_keychanges/0, index_specs/0, - segment_list/0]). + segment_list/0, + maybe_lookup/0]). %%%============================================================================ @@ -149,7 +168,7 @@ segment_hash(Key) -> segment_hash(term_to_binary(Key)). --spec to_lookup(ledger_key()) -> lookup|no_lookup. +-spec to_lookup(ledger_key()) -> maybe_lookup(). %% @doc %% Should it be possible to lookup a key in the merge tree. This is not true %% For keys that should only be read through range queries. Direct lookup @@ -175,8 +194,8 @@ strip_to_seqonly({_, V}) -> element(1, V). -spec strip_to_keyseqonly(ledger_kv()) -> {ledger_key(), integer()}. strip_to_keyseqonly({LK, V}) -> {LK, element(1, V)}. --spec strip_to_seqnhashonly(ledger_kv()) -> {integer(), segment_hash()}. -strip_to_seqnhashonly({_, V}) -> {element(1, V), element(3, V)}. +-spec strip_to_indexdetails(ledger_kv()) -> {integer(), segment_hash()}. +strip_to_indexdetails({_, V}) -> {element(1, V), element(3, V)}. -spec striphead_to_v1details(ledger_value()) -> ledger_value(). striphead_to_v1details(V) -> diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 64941d1..349f6c1 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -135,7 +135,7 @@ size :: integer(), max_sqn :: integer()}). --type press_methods() +-type press_method() :: lz4|native|none. -type range_endpoint() :: all|leveled_codec:ledger_key(). @@ -163,7 +163,7 @@ filename, yield_blockquery = false :: boolean(), blockindex_cache, - compression_method = native :: press_methods(), + compression_method = native :: press_method(), timings = no_timing :: sst_timings(), timings_countdown = 0 :: integer(), fetch_cache = array:new([{size, ?CACHE_SIZE}])}). @@ -219,7 +219,7 @@ sst_open(RootPath, Filename) -> -spec sst_new(string(), string(), integer(), list(leveled_codec:ledger_kv()), - integer(), press_methods()) + integer(), press_method()) -> {ok, pid(), {leveled_codec:ledger_key(), leveled_codec:ledger_key()}, binary()}. @@ -248,7 +248,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> list(leveled_codec:ledger_kv()|sst_pointer()), list(leveled_codec:ledger_kv()|sst_pointer()), boolean(), integer(), - integer(), press_methods()) + integer(), press_method()) -> empty|{ok, pid(), {{list(leveled_codec:ledger_kv()), list(leveled_codec:ledger_kv())}, @@ -293,7 +293,7 @@ sst_new(RootPath, Filename, -spec sst_newlevelzero(string(), string(), integer(), fun(), pid()|undefined, integer(), - press_methods()) -> + press_method()) -> {ok, pid(), noreply}. %% @doc %% Start a new file at level zero. At this level the file size is not fixed - @@ -860,7 +860,7 @@ fetch_range(StartKey, EndKey, ScanWidth, SegList, State) -> State#state.compression_method), {SlotsToFetchBinList, SlotsToPoint}. --spec compress_level(integer(), press_methods()) -> press_methods(). +-spec compress_level(integer(), press_method()) -> press_method(). %% @doc %% disable compression at higher levels for improved performance compress_level(Level, _PressMethod) when Level < ?COMPRESS_AT_LEVEL -> @@ -1016,7 +1016,7 @@ generate_filenames(RootFilename) -> end. --spec serialise_block(any(), press_methods()) -> binary(). +-spec serialise_block(any(), press_method()) -> binary(). %% @doc %% Convert term to binary %% Function split out to make it easier to experiment with different @@ -1036,7 +1036,7 @@ serialise_block(Term, none) -> <>. --spec deserialise_block(binary(), press_methods()) -> any(). +-spec deserialise_block(binary(), press_method()) -> any(). %% @doc %% Convert binary to term %% Function split out to make it easier to experiment with different @@ -1131,48 +1131,54 @@ lookup_slots(StartKey, EndKey, Tree) -> %% based on a 17-bit hash (so 0.0039 fpr). +accumulate_positions({K, V}, {PosBinAcc, NoHashCount, HashAcc}) -> + {_SQN, H1} = leveled_codec:strip_to_indexdetails({K, V}), + PosH1 = extra_hash(H1), + case is_integer(PosH1) of + true -> + case NoHashCount of + 0 -> + {<<1:1/integer, PosH1:15/integer,PosBinAcc/binary>>, + 0, + [H1|HashAcc]}; + N -> + % The No Hash Count is an integer between 0 and 127 + % and so at read time should count NHC + 1 + NHC = N - 1, + {<<1:1/integer, + PosH1:15/integer, + 0:1/integer, + NHC:7/integer, + PosBinAcc/binary>>, + 0, + HashAcc} + end; + false -> + {PosBinAcc, NoHashCount + 1, HashAcc} + end. + +-spec generate_binary_slot(leveled_codec:lookup(), + list(leveled_codec:ledger_kv()), + press_method(), + build_timings()) -> + {{binary(), + binary(), + list(integer()), + leveled_codec:ledger_key()}, + build_timings()}. +%% @doc +%% Generate the serialised slot to be used when storing this sublist of keys +%% and values generate_binary_slot(Lookup, KVL, PressMethod, BuildTimings0) -> SW0 = os:timestamp(), - - HashFoldFun = - fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) -> - - {_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}), - PosH1 = extra_hash(H1), - case is_integer(PosH1) of - true -> - case NoHashCount of - 0 -> - {<<1:1/integer, - PosH1:15/integer, - PosBinAcc/binary>>, - 0, - [H1|HashAcc]}; - N -> - % The No Hash Count is an integer between 0 and 127 - % and so at read time should count NHC + 1 - NHC = N - 1, - {<<1:1/integer, - PosH1:15/integer, - 0:1/integer, - NHC:7/integer, - PosBinAcc/binary>>, - 0, - HashAcc} - end; - false -> - {PosBinAcc, NoHashCount + 1, HashAcc} - end - - end, {HashL, PosBinIndex} = case Lookup of lookup -> - {PosBinIndex0, - NHC, - HashL0} = lists:foldr(HashFoldFun, {<<>>, 0, []}, KVL), + InitAcc = {<<>>, 0, []}, + {PosBinIndex0, NHC, HashL0} = + lists:foldr(fun accumulate_positions/2, InitAcc, KVL), PosBinIndex1 = case NHC of 0 -> @@ -1276,7 +1282,7 @@ generate_binary_slot(Lookup, KVL, PressMethod, BuildTimings0) -> binary(), integer(), leveled_codec:ledger_key()|false, - press_methods(), + press_method(), list()|not_present) -> list()|not_present. %% @doc %% Acc should start as not_present if LedgerKey is a key, and a list if @@ -1368,7 +1374,7 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> -spec read_slots(file:io_device(), list(), - {false|list(), any()}, press_methods()) + {false|list(), any()}, press_method()) -> list(binaryslot_element()). %% @doc %% The reading of sots will return a list of either 2-tuples containing From 8ba28700eba9fa494e85d255750cd8678469120d Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Mon, 29 Oct 2018 21:50:32 +0000 Subject: [PATCH 05/29] Start adding in last_moified dates With updated specs --- src/leveled_codec.erl | 9 +++++--- src/leveled_sst.erl | 52 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index ca6f3b1..5aca861 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -139,7 +139,8 @@ journal_keychanges/0, index_specs/0, segment_list/0, - maybe_lookup/0]). + maybe_lookup/0, + last_moddate/0]). %%%============================================================================ @@ -194,8 +195,10 @@ strip_to_seqonly({_, V}) -> element(1, V). -spec strip_to_keyseqonly(ledger_kv()) -> {ledger_key(), integer()}. strip_to_keyseqonly({LK, V}) -> {LK, element(1, V)}. --spec strip_to_indexdetails(ledger_kv()) -> {integer(), segment_hash()}. -strip_to_indexdetails({_, V}) -> {element(1, V), element(3, V)}. +-spec strip_to_indexdetails(ledger_kv()) -> + {integer(), segment_hash(), last_moddate()}. +strip_to_indexdetails({_, V}) when tuple_size(V) == 4 -> + {element(1, V), element(3, V), undefined}. -spec striphead_to_v1details(ledger_value()) -> ledger_value(). striphead_to_v1details(V) -> diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 349f6c1..fb6502f 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1131,8 +1131,21 @@ lookup_slots(StartKey, EndKey, Tree) -> %% based on a 17-bit hash (so 0.0039 fpr). -accumulate_positions({K, V}, {PosBinAcc, NoHashCount, HashAcc}) -> - {_SQN, H1} = leveled_codec:strip_to_indexdetails({K, V}), +-spec accumulate_positions(leveled_codec:ledger_kv(), + {binary(), + non_neg_integer(), + list(non_neg_integer()), + leveled_codec:last_moddate()}) -> + {binary(), + non_neg_integer(), + list(non_neg_integer()), + leveled_codec:last_moddate()}. +%% @doc +%% Fold function use to accumulate the position information needed to +%% populate the summary of the slot +accumulate_positions({K, V}, {PosBinAcc, NoHashCount, HashAcc, LMDAcc}) -> + {_SQN, H1, LMD} = leveled_codec:strip_to_indexdetails({K, V}), + LMDAcc0 = take_max_lastmoddate(LMD, LMDAcc), PosH1 = extra_hash(H1), case is_integer(PosH1) of true -> @@ -1140,7 +1153,8 @@ accumulate_positions({K, V}, {PosBinAcc, NoHashCount, HashAcc}) -> 0 -> {<<1:1/integer, PosH1:15/integer,PosBinAcc/binary>>, 0, - [H1|HashAcc]}; + [H1|HashAcc], + LMDAcc0}; N -> % The No Hash Count is an integer between 0 and 127 % and so at read time should count NHC + 1 @@ -1151,12 +1165,27 @@ accumulate_positions({K, V}, {PosBinAcc, NoHashCount, HashAcc}) -> NHC:7/integer, PosBinAcc/binary>>, 0, - HashAcc} + HashAcc, + LMDAcc0} end; false -> - {PosBinAcc, NoHashCount + 1, HashAcc} + {PosBinAcc, NoHashCount + 1, HashAcc, LMDAcc0} end. + +-spec take_max_lastmoddate(leveled_codec:last_moddate(), + leveled_codec:last_moddate()) -> + leveled_codec:last_moddate(). +%% @doc +%% Get the last modified date. If no Last Modified Date on any object, can't +%% add the accelerator and should check each object in turn +take_max_lastmoddate(_LMD, undefined) -> + undefined; +take_max_lastmoddate(undefined, _LMDAcc) -> + undefined; +take_max_lastmoddate(LMD, LMDAcc) -> + max(LMD, LMDAcc). + -spec generate_binary_slot(leveled_codec:lookup(), list(leveled_codec:ledger_kv()), press_method(), @@ -1176,8 +1205,8 @@ generate_binary_slot(Lookup, KVL, PressMethod, BuildTimings0) -> {HashL, PosBinIndex} = case Lookup of lookup -> - InitAcc = {<<>>, 0, []}, - {PosBinIndex0, NHC, HashL0} = + InitAcc = {<<>>, 0, [], 0}, + {PosBinIndex0, NHC, HashL0, _LMD} = lists:foldr(fun accumulate_positions/2, InitAcc, KVL), PosBinIndex1 = case NHC of @@ -1275,8 +1304,6 @@ generate_binary_slot(Lookup, KVL, PressMethod, BuildTimings0) -> {{Header, SlotBin, HashL, LastKey}, BuildTimings3}. - - -spec check_blocks(list(integer()), binary()|{file:io_device(), integer()}, binary(), @@ -2878,5 +2905,12 @@ timings_test() -> ?assertMatch(true, T4#sst_timings.slot_fetch_time > T3#sst_timings.slot_fetch_time). +take_max_lastmoddate_test() -> + % TODO: Remove this test + % Temporarily added to make dialyzer happy (until we've made use of last + % modified dates + ?assertMatch(1, take_max_lastmoddate(0, 1)). + + -endif. From 467c2fb89cf34145999724abb9cbe0d01fae7cdd Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 30 Oct 2018 10:25:54 +0000 Subject: [PATCH 06/29] Allow a boolean to be passed in to set IndexModDate Although we are still pre-release in Leveled, for completeness it is a useful test of this code change to show that it can be done in a backwards compatible way. So a boolean is added to indicate whether a file should index the modified date within the slot, and this can then be read when the file is opened. Nothing happens with the boolean, yet. --- src/leveled_sst.erl | 169 +++++++++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 74 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index fb6502f..0f11e2b 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -90,6 +90,7 @@ -define(BLOCK_LENGTHS_LENGTH, 20). -define(FLIPPER32, 4294967295). -define(COMPRESS_AT_LEVEL, 1). +-define(INDEX_MODDATE, true). -include_lib("eunit/include/eunit.hrl"). @@ -164,6 +165,7 @@ yield_blockquery = false :: boolean(), blockindex_cache, compression_method = native :: press_method(), + index_moddate = ?INDEX_MODDATE :: boolean(), timings = no_timing :: sst_timings(), timings_countdown = 0 :: integer(), fetch_cache = array:new([{size, ?CACHE_SIZE}])}). @@ -230,7 +232,8 @@ sst_open(RootPath, Filename) -> sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> {ok, Pid} = gen_fsm:start_link(?MODULE, [], []), PressMethod0 = compress_level(Level, PressMethod), - {[], [], SlotList, FK} = merge_lists(KVList, PressMethod0), + {[], [], SlotList, FK} = + merge_lists(KVList, PressMethod0, ?INDEX_MODDATE), case gen_fsm:sync_send_event(Pid, {sst_new, RootPath, @@ -238,7 +241,8 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> Level, {SlotList, FK}, MaxSQN, - PressMethod0}, + PressMethod0, + ?INDEX_MODDATE}, infinity) of {ok, {SK, EK}, Bloom} -> {ok, Pid, {SK, EK}, Bloom} @@ -271,7 +275,8 @@ sst_new(RootPath, Filename, MaxSQN, PressMethod) -> PressMethod0 = compress_level(Level, PressMethod), {Rem1, Rem2, SlotList, FK} = - merge_lists(KVL1, KVL2, {IsBasement, Level}, PressMethod0), + merge_lists(KVL1, KVL2, {IsBasement, Level}, + PressMethod0, ?INDEX_MODDATE), case SlotList of [] -> empty; @@ -284,7 +289,8 @@ sst_new(RootPath, Filename, Level, {SlotList, FK}, MaxSQN, - PressMethod0}, + PressMethod0, + ?INDEX_MODDATE}, infinity) of {ok, {SK, EK}, Bloom} -> {ok, Pid, {{Rem1, Rem2}, SK, EK}, Bloom} @@ -312,7 +318,8 @@ sst_newlevelzero(RootPath, Filename, FetchFun, Penciller, MaxSQN, - PressMethod0}), + PressMethod0, + ?INDEX_MODDATE}), {ok, Pid, noreply}. -spec sst_get(pid(), leveled_codec:ledger_key()) @@ -483,17 +490,15 @@ starting({sst_open, RootPath, Filename}, _From, State) -> starting({sst_new, RootPath, Filename, Level, {SlotList, FirstKey}, MaxSQN, - PressMethod}, _From, State) -> + PressMethod, IdxModDate}, _From, State) -> SW = os:timestamp(), - {Length, - SlotIndex, - BlockIndex, - SlotsBin, - Bloom} = build_all_slots(SlotList, PressMethod), + {Length, SlotIndex, BlockIndex, SlotsBin, Bloom} = + build_all_slots(SlotList), SummaryBin = build_table_summary(SlotIndex, Level, FirstKey, Length, MaxSQN, Bloom), ActualFilename = - write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod), + write_file(RootPath, Filename, SummaryBin, SlotsBin, + PressMethod, IdxModDate), YBQ = Level =< 2, {UpdState, Bloom} = read_file(ActualFilename, @@ -509,21 +514,19 @@ starting({sst_new, starting({sst_newlevelzero, RootPath, Filename, Slots, FetchFun, Penciller, MaxSQN, - PressMethod}, State) -> + PressMethod, IdxModDate}, State) -> SW0 = os:timestamp(), KVList = leveled_pmem:to_list(Slots, FetchFun), Time0 = timer:now_diff(os:timestamp(), SW0), SW1 = os:timestamp(), - {[], [], SlotList, FirstKey} = merge_lists(KVList, PressMethod), + {[], [], SlotList, FirstKey} = + merge_lists(KVList, PressMethod, IdxModDate), Time1 = timer:now_diff(os:timestamp(), SW1), SW2 = os:timestamp(), - {SlotCount, - SlotIndex, - BlockIndex, - SlotsBin, - Bloom} = build_all_slots(SlotList, PressMethod), + {SlotCount, SlotIndex, BlockIndex, SlotsBin,Bloom} = + build_all_slots(SlotList), Time2 = timer:now_diff(os:timestamp(), SW2), SW3 = os:timestamp(), @@ -533,7 +536,8 @@ starting({sst_newlevelzero, RootPath, Filename, SW4 = os:timestamp(), ActualFilename = - write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod), + write_file(RootPath, Filename, SummaryBin, SlotsBin, + PressMethod, IdxModDate), {UpdState, Bloom} = read_file(ActualFilename, State#state{root_path=RootPath, yield_blockquery=true}), @@ -868,11 +872,12 @@ compress_level(Level, _PressMethod) when Level < ?COMPRESS_AT_LEVEL -> compress_level(_Level, PressMethod) -> PressMethod. -write_file(RootPath, Filename, SummaryBin, SlotsBin, PressMethod) -> +write_file(RootPath, Filename, SummaryBin, SlotsBin, + PressMethod, IdxModDate) -> SummaryLength = byte_size(SummaryBin), SlotsLength = byte_size(SlotsBin), {PendingName, FinalName} = generate_filenames(Filename), - FileVersion = gen_fileversion(PressMethod), + FileVersion = gen_fileversion(PressMethod, IdxModDate), ok = file:write_file(filename:join(RootPath, PendingName), < filename = Filename}, Bloom}. -gen_fileversion(PressMethod) -> +gen_fileversion(PressMethod, IdxModDate) -> % Native or none can be treated the same once written, as reader % does not need to know as compression info will be in header of the % block @@ -921,17 +926,31 @@ gen_fileversion(PressMethod) -> native -> 0; none -> 0 end, - Bit1. + Bit2 = + case IdxModDate of + true -> + 2; + false -> + 0 + end, + Bit1+ Bit2. imp_fileversion(VersionInt, State) -> - UpdState = + UpdState0 = case VersionInt band 1 of 0 -> State#state{compression_method = native}; 1 -> State#state{compression_method = lz4} end, - UpdState. + UpdState1 = + case VersionInt band 2 of + 0 -> + UpdState0#state{index_moddate = false}; + 2 -> + UpdState0#state{index_moddate = true} + end, + UpdState1. open_reader(Filename) -> {ok, Handle} = file:open(Filename, [binary, raw, read]), @@ -964,28 +983,25 @@ read_table_summary(BinWithCheck) -> end. -build_all_slots(SlotList, PressMethod) -> +build_all_slots(SlotList) -> SlotCount = length(SlotList), - BuildResponse = build_all_slots(SlotList, - 9, - 1, - [], - array:new([{size, SlotCount}, - {default, none}]), - <<>>, - [], - PressMethod), - {SlotIndex, BlockIndex, SlotsBin, HashLists} = BuildResponse, + {SlotIndex, BlockIndex, SlotsBin, HashLists} = + build_all_slots(SlotList, + 9, + 1, + [], + array:new([{size, SlotCount}, + {default, none}]), + <<>>, + []), Bloom = leveled_ebloom:create_bloom(HashLists), {SlotCount, SlotIndex, BlockIndex, SlotsBin, Bloom}. build_all_slots([], _Pos, _SlotID, - SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists, - _PressMethod) -> + SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists) -> {SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists}; build_all_slots([SlotD|Rest], Pos, SlotID, - SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists, - PressMethod) -> + SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists) -> {BlockIdx, SlotBin, HashList, LastKey} = SlotD, Length = byte_size(SlotBin), SlotIndexV = #slot_index_value{slot_id = SlotID, @@ -997,8 +1013,7 @@ build_all_slots([SlotD|Rest], Pos, SlotID, [{LastKey, SlotIndexV}|SlotIdxAcc], array:set(SlotID - 1, BlockIdx, BlockIdxAcc), <>, - lists:append(HashLists, HashList), - PressMethod). + lists:append(HashLists, HashList)). generate_filenames(RootFilename) -> @@ -1189,6 +1204,7 @@ take_max_lastmoddate(LMD, LMDAcc) -> -spec generate_binary_slot(leveled_codec:lookup(), list(leveled_codec:ledger_kv()), press_method(), + boolean(), build_timings()) -> {{binary(), binary(), @@ -1198,7 +1214,7 @@ take_max_lastmoddate(LMD, LMDAcc) -> %% @doc %% Generate the serialised slot to be used when storing this sublist of keys %% and values -generate_binary_slot(Lookup, KVL, PressMethod, BuildTimings0) -> +generate_binary_slot(Lookup, KVL, PressMethod, _IndexModDate, BuildTimings0) -> SW0 = os:timestamp(), @@ -1801,7 +1817,7 @@ find_pos(<<0:1/integer, NHC:7/integer, T/binary>>, Hash, PosList, Count) -> %% large numbers of index keys are present - as well as improving compression %% ratios in the Ledger. %% -%% The outcome of merge_lists/1 and merge_lists/3 should be an list of slots. +%% The outcome of merge_lists/3 and merge_lists/5 should be an list of slots. %% Each slot should be ordered by Key and be of the form {Flag, KVList}, where %% Flag can either be lookup or no-lookup. The list of slots should also be %% ordered by Key (i.e. the first key in the slot) @@ -1817,63 +1833,65 @@ find_pos(<<0:1/integer, NHC:7/integer, T/binary>>, Hash, PosList, Count) -> %% there are matching keys then the highest sequence number must be chosen and %% any lower sequence numbers should be compacted out of existence --spec merge_lists(list(), atom()) +-spec merge_lists(list(), press_method(), boolean()) -> {list(), list(), list(tuple()), tuple()|null}. %% @doc %% %% Merge from asingle list (i.e. at Level 0) -merge_lists(KVList1, PressMethod) -> +merge_lists(KVList1, PressMethod, IdxModDate) -> SlotCount = length(KVList1) div ?LOOK_SLOTSIZE, {[], [], - split_lists(KVList1, [], SlotCount, PressMethod), + split_lists(KVList1, [], SlotCount, PressMethod, IdxModDate), element(1, lists:nth(1, KVList1))}. -split_lists([], SlotLists, 0, _PressMethod) -> +split_lists([], SlotLists, 0, _PressMethod, _IdxModDate) -> lists:reverse(SlotLists); -split_lists(LastPuff, SlotLists, 0, PressMethod) -> +split_lists(LastPuff, SlotLists, 0, PressMethod, IdxModDate) -> {SlotD, _} = - generate_binary_slot(lookup, LastPuff, PressMethod, no_timing), + generate_binary_slot(lookup, LastPuff, PressMethod, IdxModDate, no_timing), lists:reverse([SlotD|SlotLists]); -split_lists(KVList1, SlotLists, N, PressMethod) -> +split_lists(KVList1, SlotLists, N, PressMethod, IdxModDate) -> {Slot, KVListRem} = lists:split(?LOOK_SLOTSIZE, KVList1), {SlotD, _} = - generate_binary_slot(lookup, Slot, PressMethod, no_timing), - split_lists(KVListRem, [SlotD|SlotLists], N - 1, PressMethod). + generate_binary_slot(lookup, Slot, PressMethod, IdxModDate, no_timing), + split_lists(KVListRem, [SlotD|SlotLists], N - 1, PressMethod, IdxModDate). --spec merge_lists(list(), list(), tuple(), atom()) -> +-spec merge_lists(list(), list(), tuple(), press_method(), boolean()) -> {list(), list(), list(tuple()), tuple()|null}. %% @doc %% Merge lists when merging across more thna one file. KVLists that are %% provided may include pointers to fetch more Keys/Values from the source %% file -merge_lists(KVList1, KVList2, LevelInfo, PressMethod) -> +merge_lists(KVList1, KVList2, LevelInfo, PressMethod, IndexModDate) -> merge_lists(KVList1, KVList2, LevelInfo, [], null, 0, - PressMethod, + PressMethod, + IndexModDate, #build_timings{}). -merge_lists(KVList1, KVList2, LI, SlotList, FirstKey, ?MAX_SLOTS, - _PressMethod, T0) -> +merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, ?MAX_SLOTS, + _PressMethod, _IdxModDate, T0) -> % This SST file is full, move to complete file, and return the % remainder log_buildtimings(T0, LI), - {KVList1, KVList2, lists:reverse(SlotList), FirstKey}; -merge_lists([], [], LI, SlotList, FirstKey, _SlotCount, _PressMethod, T0) -> + {KVL1, KVL2, lists:reverse(SlotList), FirstKey}; +merge_lists([], [], LI, SlotList, FirstKey, _SlotCount, + _PressMethod, _IdxModDate, T0) -> % the source files are empty, complete the file log_buildtimings(T0, LI), {[], [], lists:reverse(SlotList), FirstKey}; -merge_lists(KVList1, KVList2, LI, SlotList, FirstKey, SlotCount, - PressMethod, T0) -> +merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, SlotCount, + PressMethod, IdxModDate, T0) -> % Form a slot by merging the two lists until the next 128 K/V pairs have % been determined SW = os:timestamp(), {KVRem1, KVRem2, Slot, FK0} = - form_slot(KVList1, KVList2, LI, no_lookup, 0, [], FirstKey), + form_slot(KVL1, KVL2, LI, no_lookup, 0, [], FirstKey), T1 = update_buildtimings(SW, T0, fold_toslot), case Slot of {_, []} -> @@ -1885,12 +1903,13 @@ merge_lists(KVList1, KVList2, LI, SlotList, FirstKey, SlotCount, FK0, SlotCount, PressMethod, + IdxModDate, T1); {Lookup, KVL} -> % Convert the list of KVs for the slot into a binary, and related % metadata {SlotD, T2} = - generate_binary_slot(Lookup, KVL, PressMethod, T1), + generate_binary_slot(Lookup, KVL, PressMethod, IdxModDate, T1), merge_lists(KVRem1, KVRem2, LI, @@ -1898,6 +1917,7 @@ merge_lists(KVList1, KVList2, LI, SlotList, FirstKey, SlotCount, FK0, SlotCount + 1, PressMethod, + IdxModDate, T2) end. @@ -2261,7 +2281,8 @@ merge_tombstonelist_test() -> R = merge_lists([SkippingKV1, SkippingKV3, SkippingKV5], [SkippingKV2, SkippingKV4], {true, 9999999}, - native), + native, + ?INDEX_MODDATE), ?assertMatch({[], [], [], null}, R). indexed_list_test() -> @@ -2273,7 +2294,7 @@ indexed_list_test() -> SW0 = os:timestamp(), {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = - generate_binary_slot(lookup, KVL1, native, no_timing), + generate_binary_slot(lookup, KVL1, native, ?INDEX_MODDATE, no_timing), io:format(user, "Indexed list created slot in ~w microseconds of size ~w~n", [timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]), @@ -2302,7 +2323,7 @@ indexed_list_mixedkeys_test() -> Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = - generate_binary_slot(lookup, Keys, native, no_timing), + generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE, no_timing), {TestK1, TestV1} = lists:nth(4, KVL1), MH1 = leveled_codec:segment_hash(TestK1), @@ -2329,7 +2350,7 @@ indexed_list_mixedkeys2_test() -> % this isn't actually ordered correctly Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2, {{_Header, FullBin, _HL, _LK}, no_timing} = - generate_binary_slot(lookup, Keys, native, no_timing), + generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE, no_timing), lists:foreach(fun({K, V}) -> MH = leveled_codec:segment_hash(K), test_binary_slot(FullBin, K, MH, {K, V}) @@ -2340,7 +2361,7 @@ indexed_list_allindexkeys_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), ?LOOK_SLOTSIZE), {{Header, FullBin, _HL, _LK}, no_timing} = - generate_binary_slot(lookup, Keys, native, no_timing), + generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE,no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, Header), % SW = os:timestamp(), @@ -2357,7 +2378,7 @@ indexed_list_allindexkeys_nolookup_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)), ?NOLOOK_SLOTSIZE), {{Header, FullBin, _HL, _LK}, no_timing} = - generate_binary_slot(no_lookup, Keys, native, no_timing), + generate_binary_slot(no_lookup, Keys, native, ?INDEX_MODDATE,no_timing), ?assertMatch(<<_BL:20/binary, 127:8/integer>>, Header), % SW = os:timestamp(), BinToList = binaryslot_tolist(FullBin, native), @@ -2373,7 +2394,7 @@ indexed_list_allindexkeys_trimmed_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), ?LOOK_SLOTSIZE), {{Header, FullBin, _HL, _LK}, no_timing} = - generate_binary_slot(lookup, Keys, native, no_timing), + generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE,no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, Header), ?assertMatch({Keys, none}, binaryslot_trimmedlist(FullBin, @@ -2415,7 +2436,7 @@ indexed_list_mixedkeys_bitflip_test() -> KVL1 = lists:sublist(KVL0, 33), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), {{Header, SlotBin, _HL, LK}, no_timing} = - generate_binary_slot(lookup, Keys, native, no_timing), + generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE,no_timing), ?assertMatch(LK, element(1, lists:last(Keys))), @@ -2874,7 +2895,7 @@ hashmatching_bytreesize_test() -> end, KVL = lists:map(GenKeyFun, lists:seq(1, 128)), {{PosBinIndex1, _FullBin, _HL, _LK}, no_timing} = - generate_binary_slot(lookup, KVL, native, no_timing), + generate_binary_slot(lookup, KVL, native, ?INDEX_MODDATE, no_timing), check_segment_match(PosBinIndex1, KVL, small), check_segment_match(PosBinIndex1, KVL, medium). From 7295a413212ab163b1817e8f0ac2a2f404684264 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 30 Oct 2018 11:47:03 +0000 Subject: [PATCH 07/29] Read (and ignore) last modified date Add presence of LMD into index - and check everything happily lets it pass by --- src/leveled_sst.erl | 253 +++++++++++++++++++++++++++++--------------- 1 file changed, 166 insertions(+), 87 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 0f11e2b..c83f93d 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -88,7 +88,9 @@ -define(TIMING_SAMPLESIZE, 100). -define(CACHE_SIZE, 32). -define(BLOCK_LENGTHS_LENGTH, 20). +-define(LMD_LENGTH, 6). -define(FLIPPER32, 4294967295). +-define(FLIPPER48, 281474976710655). -define(COMPRESS_AT_LEVEL, 1). -define(INDEX_MODDATE, true). @@ -382,9 +384,10 @@ sst_getfilteredrange(Pid, StartKey, EndKey, ScanWidth, SegList) -> StartKey, EndKey, ScanWidth, SegList0}, infinity) of - {yield, SlotsToFetchBinList, SlotsToPoint, PressMethod} -> + {yield, SlotsToFetchBinList, SlotsToPoint, PressMethod, IdxModDate} -> {L, _BIC} = - binaryslot_reader(SlotsToFetchBinList, PressMethod, SegList0), + binaryslot_reader(SlotsToFetchBinList, + PressMethod, IdxModDate, SegList0), L ++ SlotsToPoint; Reply -> Reply @@ -412,9 +415,9 @@ sst_getslots(Pid, SlotList) -> %% false as a SegList to not filter sst_getfilteredslots(Pid, SlotList, SegList) -> SegL0 = tune_seglist(SegList), - {SlotBins, PressMethod} = + {SlotBins, PressMethod, IdxModDate} = gen_fsm:sync_send_event(Pid, {get_slots, SlotList, SegL0}, infinity), - {L, _BIC} = binaryslot_reader(SlotBins, PressMethod, SegL0), + {L, _BIC} = binaryslot_reader(SlotBins, PressMethod, IdxModDate, SegL0), L. -spec sst_getmaxsequencenumber(pid()) -> integer(). @@ -584,6 +587,7 @@ reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, _From, State) -> State), PressMethod = State#state.compression_method, + IdxModDate = State#state.index_moddate, case State#state.yield_blockquery of true -> @@ -591,12 +595,14 @@ reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, _From, State) -> {yield, SlotsToFetchBinList, SlotsToPoint, - PressMethod}, + PressMethod, + IdxModDate}, reader, State}; false -> {L, BIC} = - binaryslot_reader(SlotsToFetchBinList, PressMethod, SegList), + binaryslot_reader(SlotsToFetchBinList, + PressMethod, IdxModDate, SegList), FoldFun = fun(CacheEntry, Cache) -> case CacheEntry of @@ -613,12 +619,15 @@ reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, _From, State) -> State#state{blockindex_cache = BlockIdxC0}} end; reader({get_slots, SlotList, SegList}, _From, State) -> + PressMethod = State#state.compression_method, + IdxModDate = State#state.index_moddate, SlotBins = read_slots(State#state.handle, SlotList, {SegList, State#state.blockindex_cache}, - State#state.compression_method), - {reply, {SlotBins, State#state.compression_method}, reader, State}; + State#state.compression_method, + State#state.index_moddate), + {reply, {SlotBins, PressMethod, IdxModDate}, reader, State}; reader(get_maxsequencenumber, _From, State) -> Summary = State#state.summary, {reply, Summary#summary.max_sqn, reader, State}; @@ -664,13 +673,16 @@ delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, State, ?DELETE_TIMEOUT}; delete_pending({get_slots, SlotList, SegList}, _From, State) -> + PressMethod = State#state.compression_method, + IdxModDate = State#state.index_moddate, SlotBins = read_slots(State#state.handle, SlotList, {SegList, State#state.blockindex_cache}, - State#state.compression_method), + PressMethod, + IdxModDate), {reply, - {SlotBins, State#state.compression_method}, + {SlotBins, PressMethod, IdxModDate}, delete_pending, State, ?DELETE_TIMEOUT}; @@ -730,6 +742,7 @@ fetch(LedgerKey, Hash, State, Timings0) -> Summary = State#state.summary, PressMethod = State#state.compression_method, + IdxModDate = State#state.index_moddate, Slot = lookup_slot(LedgerKey, Summary#summary.index), {SW1, Timings1} = update_timings(SW0, Timings0, index_query, true), @@ -738,13 +751,12 @@ fetch(LedgerKey, Hash, State, Timings0) -> CachedBlockIdx = array:get(SlotID - 1, State#state.blockindex_cache), {SW2, Timings2} = update_timings(SW1, Timings1, lookup_cache, true), - BL = ?BLOCK_LENGTHS_LENGTH, - case CachedBlockIdx of + case extract_header(CachedBlockIdx, IdxModDate) of none -> SlotBin = read_slot(State#state.handle, Slot), {Result, Header} = - binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod), + binaryslot_get(SlotBin, LedgerKey, Hash, PressMethod, IdxModDate), BlockIndexCache = array:set(SlotID - 1, Header, State#state.blockindex_cache), {_SW3, Timings3} = @@ -752,7 +764,7 @@ fetch(LedgerKey, Hash, State, Timings0) -> {Result, State#state{blockindex_cache = BlockIndexCache}, Timings3}; - <> -> + {BlockLengths, _LMD, PosBin} -> PosList = find_pos(PosBin, extra_hash(Hash), [], 0), case PosList of [] -> @@ -781,6 +793,7 @@ fetch(LedgerKey, Hash, State, Timings0) -> byte_size(PosBin), LedgerKey, PressMethod, + IdxModDate, not_present), FetchCache0 = array:set(CacheHash, Result, FetchCache), @@ -861,7 +874,8 @@ fetch_range(StartKey, EndKey, ScanWidth, SegList, State) -> read_slots(Handle, SlotsToFetch, {SegList, State#state.blockindex_cache}, - State#state.compression_method), + State#state.compression_method, + State#state.index_moddate), {SlotsToFetchBinList, SlotsToPoint}. -spec compress_level(integer(), press_method()) -> press_method(). @@ -1194,10 +1208,8 @@ accumulate_positions({K, V}, {PosBinAcc, NoHashCount, HashAcc, LMDAcc}) -> %% @doc %% Get the last modified date. If no Last Modified Date on any object, can't %% add the accelerator and should check each object in turn -take_max_lastmoddate(_LMD, undefined) -> - undefined; take_max_lastmoddate(undefined, _LMDAcc) -> - undefined; + ?FLIPPER48; take_max_lastmoddate(LMD, LMDAcc) -> max(LMD, LMDAcc). @@ -1218,11 +1230,11 @@ generate_binary_slot(Lookup, KVL, PressMethod, _IndexModDate, BuildTimings0) -> SW0 = os:timestamp(), - {HashL, PosBinIndex} = + {HashL, PosBinIndex, LMD} = case Lookup of lookup -> InitAcc = {<<>>, 0, [], 0}, - {PosBinIndex0, NHC, HashL0, _LMD} = + {PosBinIndex0, NHC, HashL0, LMD0} = lists:foldr(fun accumulate_positions/2, InitAcc, KVL), PosBinIndex1 = case NHC of @@ -1232,9 +1244,9 @@ generate_binary_slot(Lookup, KVL, PressMethod, _IndexModDate, BuildTimings0) -> N = NHC - 1, <<0:1/integer, N:7/integer, PosBinIndex0/binary>> end, - {HashL0, PosBinIndex1}; + {HashL0, PosBinIndex1, LMD0}; no_lookup -> - {[], <<0:1/integer, 127:7/integer>>} + {[], <<0:1/integer, 127:7/integer>>, 0} end, BuildTimings1 = update_buildtimings(SW0, BuildTimings0, slot_hashlist), @@ -1295,7 +1307,7 @@ generate_binary_slot(Lookup, KVL, PressMethod, _IndexModDate, BuildTimings0) -> BuildTimings2 = update_buildtimings(SW1, BuildTimings1, slot_serialise), SW2 = os:timestamp(), - B1P = byte_size(PosBinIndex) + ?BLOCK_LENGTHS_LENGTH, + B1P = byte_size(PosBinIndex) + ?BLOCK_LENGTHS_LENGTH + ?LMD_LENGTH, CheckB1P = hmac(B1P), B1L = byte_size(B1), B2L = byte_size(B2), @@ -1307,6 +1319,7 @@ generate_binary_slot(Lookup, KVL, PressMethod, _IndexModDate, BuildTimings0) -> B3L:32/integer, B4L:32/integer, B5L:32/integer, + LMD:48/integer, PosBinIndex/binary>>, CheckH = hmac(Header), SlotBin = < integer(), leveled_codec:ledger_key()|false, press_method(), + boolean(), list()|not_present) -> list()|not_present. %% @doc %% Acc should start as not_present if LedgerKey is a key, and a list if %% LedgerKey is false check_blocks([], _BlockPointer, _BlockLengths, _PosBinLength, - _LedgerKeyToCheck, _PressMethod, not_present) -> + _LedgerKeyToCheck, _PressMethod, _IdxModDate, not_present) -> not_present; check_blocks([], _BlockPointer, _BlockLengths, _PosBinLength, - _LedgerKeyToCheck, _PressMethod, Acc) -> + _LedgerKeyToCheck, _PressMethod, _IdxModDate, Acc) -> lists:reverse(Acc); check_blocks([Pos|Rest], BlockPointer, BlockLengths, PosBinLength, - LedgerKeyToCheck, PressMethod, Acc) -> + LedgerKeyToCheck, PressMethod, IdxModDate, Acc) -> {BlockNumber, BlockPos} = revert_position(Pos), BlockBin = read_block(BlockPointer, BlockLengths, PosBinLength, - BlockNumber), + BlockNumber, + additional_offset(IdxModDate)), BlockL = deserialise_block(BlockBin, PressMethod), {K, V} = lists:nth(BlockPos, BlockL), case K of @@ -1355,30 +1370,38 @@ check_blocks([Pos|Rest], BlockPointer, BlockLengths, PosBinLength, false -> check_blocks(Rest, BlockPointer, BlockLengths, PosBinLength, - LedgerKeyToCheck, PressMethod, + LedgerKeyToCheck, PressMethod, IdxModDate, [{K, V}|Acc]); _ -> check_blocks(Rest, BlockPointer, BlockLengths, PosBinLength, - LedgerKeyToCheck, PressMethod, Acc) + LedgerKeyToCheck, PressMethod, IdxModDate, + Acc) end end. +-spec additional_offset(boolean()) -> pos_integer(). +%% @doc +%% 4-byte CRC, 4-byte pos, 4-byte CRC, 5x4 byte lengths, 6 byte LMD +%% LMD may not be present +additional_offset(true) -> + ?BLOCK_LENGTHS_LENGTH + 4 + 4 + 4 + 6; +additional_offset(false) -> + ?BLOCK_LENGTHS_LENGTH + 4 + 4 + 4. -read_block({Handle, StartPos}, BlockLengths, PosBinLength, BlockID) -> + +read_block({Handle, StartPos}, BlockLengths, PosBinLength, BlockID, AO) -> {Offset, Length} = block_offsetandlength(BlockLengths, BlockID), {ok, BlockBin} = file:pread(Handle, StartPos + Offset + PosBinLength - + 32, - % 4-byte CRC, 4-byte pos, - % 4-byte CRC, 5x4 byte lengths + + AO, Length), BlockBin; -read_block(SlotBin, BlockLengths, PosBinLength, BlockID) -> +read_block(SlotBin, BlockLengths, PosBinLength, BlockID, AO) -> {Offset, Length} = block_offsetandlength(BlockLengths, BlockID), - StartPos = Offset + PosBinLength + 32, + StartPos = Offset + PosBinLength + AO, <<_Pre:StartPos/binary, BlockBin:Length/binary, _Rest/binary>> = SlotBin, BlockBin. @@ -1417,12 +1440,12 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> -spec read_slots(file:io_device(), list(), - {false|list(), any()}, press_method()) + {false|list(), any()}, press_method(), boolean()) -> list(binaryslot_element()). %% @doc %% The reading of sots will return a list of either 2-tuples containing %% {K, V} pairs - or 3-tuples containing {Binary, SK, EK}. The 3 tuples -%% can be exploded into lists of {K, V} pairs using the binaryslot_reader/3 +%% can be exploded into lists of {K, V} pairs using the binaryslot_reader/4 %% function %% %% Reading slots is generally unfiltered, but in the sepcial case when @@ -1433,12 +1456,14 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> %% any key comparison between levels should allow for a non-matching key to %% be considered as superior to a matching key - as otherwise a matching key %% may be intermittently removed from the result set -read_slots(Handle, SlotList, {false, _BlockIndexCache}, _PressMethod) -> +read_slots(Handle, SlotList, {false, _BlockIndexCache}, + _PressMethod, _IdxModDate) -> % No list of segments passed LengthList = lists:map(fun pointer_mapfun/1, SlotList), {MultiSlotBin, StartPos} = read_length_list(Handle, LengthList), lists:map(binarysplit_mapfun(MultiSlotBin, StartPos), LengthList); -read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) -> +read_slots(Handle, SlotList, {SegList, BlockIndexCache}, + PressMethod, IdxModDate) -> % List of segments passed so only {K, V} pairs matching those segments % should be returned. This required the {K, V} pair to have been added % with the appropriate hash - if the pair were added with no_lookup as @@ -1446,8 +1471,8 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) -> BinMapFun = fun(Pointer, Acc) -> {SP, _L, ID, _SK, _EK} = pointer_mapfun(Pointer), - BL = ?BLOCK_LENGTHS_LENGTH, - case array:get(ID - 1, BlockIndexCache) of + CachedHeader = array:get(ID - 1, BlockIndexCache), + case extract_header(CachedHeader, IdxModDate) of none -> % If there is an attempt to use the seg list query and the % index block cache isn't cached for any part this may be @@ -1457,7 +1482,7 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) -> read_length_list(Handle, [LengthDetails]), MapFun = binarysplit_mapfun(MultiSlotBin, StartPos), Acc ++ [MapFun(LengthDetails)]; - <> -> + {BlockLengths, _LMD, BlockIdx} -> % If there is a BlockIndex cached then we can use it to % check to see if any of the expected segments are % present without lifting the slot off disk. Also the @@ -1472,7 +1497,7 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) -> {Handle, SP}, BlockLengths, byte_size(BlockIdx), - false, PressMethod, + false, PressMethod, IdxModDate, []) end end @@ -1481,32 +1506,39 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, PressMethod) -> -spec binaryslot_reader(list(binaryslot_element()), - native|lz4, + press_method(), + boolean(), leveled_codec:segment_list()) -> {list({tuple(), tuple()}), list({integer(), binary()})}. %% @doc %% Read the binary slots converting them to {K, V} pairs if they were not %% already {K, V} pairs -binaryslot_reader(SlotBinsToFetch, PressMethod, SegList) -> - binaryslot_reader(SlotBinsToFetch, PressMethod, SegList, [], []). +binaryslot_reader(SlotBinsToFetch, PressMethod, IdxModDate, SegList) -> + binaryslot_reader(SlotBinsToFetch, + PressMethod, IdxModDate, SegList, [], []). -binaryslot_reader([], _PressMethod, _SegList, Acc, BIAcc) -> +binaryslot_reader([], _PressMethod, _IdxModDate, _SegList, Acc, BIAcc) -> {Acc, BIAcc}; binaryslot_reader([{SlotBin, ID, SK, EK}|Tail], - PressMethod, SegList, Acc, BIAcc) -> + PressMethod, IdxModDate, SegList, Acc, BIAcc) -> {TrimmedL, BICache} = binaryslot_trimmedlist(SlotBin, SK, EK, PressMethod, + IdxModDate, SegList), binaryslot_reader(Tail, PressMethod, + IdxModDate, SegList, Acc ++ TrimmedL, [{ID, BICache}|BIAcc]); -binaryslot_reader([{K, V}|Tail], PressMethod, SegList, Acc, BIAcc) -> - binaryslot_reader(Tail, PressMethod, SegList, Acc ++ [{K, V}], BIAcc). +binaryslot_reader([{K, V}|Tail], + PressMethod, IdxModDate, SegList, Acc, BIAcc) -> + binaryslot_reader(Tail, + PressMethod, IdxModDate, SegList, + Acc ++ [{K, V}], BIAcc). read_length_list(Handle, LengthList) -> @@ -1517,12 +1549,27 @@ read_length_list(Handle, LengthList) -> {MultiSlotBin, StartPos}. +-spec extract_header(binary()|none, boolean()) -> + {binary(), integer(), binary()}|none. +%% @doc +%% Helper for extracting the binaries from the header ignoring the missing LMD +%% if LMD is not indexed +extract_header(none, _IdxModDate) -> + none; % used when the block cache has returned none +extract_header(Header, true) -> + BL = ?BLOCK_LENGTHS_LENGTH, + <> = Header, + {BlockLengths, LMD, PosBinIndex}; +extract_header(Header, false) -> + BL = ?BLOCK_LENGTHS_LENGTH, + <> = Header, + {BlockLengths, 0, PosBinIndex}. -binaryslot_get(FullBin, Key, Hash, PressMethod) -> +binaryslot_get(FullBin, Key, Hash, PressMethod, IdxModDate) -> case crc_check_slot(FullBin) of {Header, Blocks} -> - BL = ?BLOCK_LENGTHS_LENGTH, - <> = Header, + {BlockLengths, _LMD, PosBinIndex} = + extract_header(Header, IdxModDate), PosList = find_pos(PosBinIndex, extra_hash(Hash), [], @@ -1534,7 +1581,7 @@ binaryslot_get(FullBin, Key, Hash, PressMethod) -> none} end. -binaryslot_tolist(FullBin, PressMethod) -> +binaryslot_tolist(FullBin, PressMethod, IdxModDate) -> BlockFetchFun = fun(Length, {Acc, Bin}) -> case Length of @@ -1549,12 +1596,13 @@ binaryslot_tolist(FullBin, PressMethod) -> {Out, _Rem} = case crc_check_slot(FullBin) of {Header, Blocks} -> + {BlockLengths, _LMD, _PosBinIndex} = + extract_header(Header, IdxModDate), <> = Header, + B5L:32/integer>> = BlockLengths, lists:foldl(BlockFetchFun, {[], Blocks}, [B1L, B2L, B3L, B4L, B5L]); @@ -1564,9 +1612,11 @@ binaryslot_tolist(FullBin, PressMethod) -> Out. -binaryslot_trimmedlist(FullBin, all, all, PressMethod, false) -> - {binaryslot_tolist(FullBin, PressMethod), none}; -binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod, SegList) -> +binaryslot_trimmedlist(FullBin, all, all, + PressMethod, IdxModDate, false) -> + {binaryslot_tolist(FullBin, PressMethod, IdxModDate), none}; +binaryslot_trimmedlist(FullBin, StartKey, EndKey, + PressMethod, IdxModDate, SegList) -> LTrimFun = fun({K, _V}) -> K < StartKey end, RTrimFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndKey, K) end, BlockCheckFun = @@ -1615,12 +1665,13 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod, SegList) -> % and Block3 (as Block 1 will also be checked), but finessing this last % scenario is hard to do in concise code {{Header, Blocks}, false} -> + {BlockLengths, _LMD, _PosBinIndex} = + extract_header(Header, IdxModDate), <> = Header, + B5L:32/integer>> = BlockLengths, <> = Blocks, @@ -1665,7 +1716,8 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, PressMethod, SegList) -> BlockLengths, byte_size(BlockIdx), false, - PressMethod, + PressMethod, + IdxModDate, []), {KVL, Header}; {crc_wonky, _} -> @@ -2360,35 +2412,52 @@ indexed_list_mixedkeys2_test() -> indexed_list_allindexkeys_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), ?LOOK_SLOTSIZE), - {{Header, FullBin, _HL, _LK}, no_timing} = - generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE,no_timing), + {{HeaderT, FullBinT, _HL, _LK}, no_timing} = + generate_binary_slot(lookup, Keys, native, true, no_timing), + {{HeaderF, FullBinF, _HL, _LK}, no_timing} = + generate_binary_slot(lookup, Keys, native, false, no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, - ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, Header), + LMD = ?FLIPPER48, + ?assertMatch(<<_BL:20/binary, LMD:48/integer, EmptySlotSize:8/integer>>, + HeaderT), + ?assertMatch(<<_BL:20/binary, LMD:48/integer, EmptySlotSize:8/integer>>, + HeaderF), % SW = os:timestamp(), - BinToList = binaryslot_tolist(FullBin, native), + BinToListT = binaryslot_tolist(FullBinT, native, true), + BinToListF = binaryslot_tolist(FullBinF, native, false), % io:format(user, % "Indexed list flattened in ~w microseconds ~n", % [timer:now_diff(os:timestamp(), SW)]), - ?assertMatch(Keys, BinToList), - ?assertMatch({Keys, none}, binaryslot_trimmedlist(FullBin, + ?assertMatch(Keys, BinToListT), + ?assertMatch({Keys, none}, binaryslot_trimmedlist(FullBinT, all, all, - native, false)). + native, + true, + false)), + ?assertMatch(Keys, BinToListF), + ?assertMatch({Keys, none}, binaryslot_trimmedlist(FullBinF, + all, all, + native, + false, + false)). indexed_list_allindexkeys_nolookup_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)), ?NOLOOK_SLOTSIZE), {{Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot(no_lookup, Keys, native, ?INDEX_MODDATE,no_timing), - ?assertMatch(<<_BL:20/binary, 127:8/integer>>, Header), + ?assertMatch(<<_BL:20/binary, _LMD:48/integer, 127:8/integer>>, Header), % SW = os:timestamp(), - BinToList = binaryslot_tolist(FullBin, native), + BinToList = binaryslot_tolist(FullBin, native, ?INDEX_MODDATE), % io:format(user, % "Indexed list flattened in ~w microseconds ~n", % [timer:now_diff(os:timestamp(), SW)]), ?assertMatch(Keys, BinToList), ?assertMatch({Keys, none}, binaryslot_trimmedlist(FullBin, all, all, - native, false)). + native, + ?INDEX_MODDATE, + false)). indexed_list_allindexkeys_trimmed_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), @@ -2396,7 +2465,8 @@ indexed_list_allindexkeys_trimmed_test() -> {{Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE,no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, - ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, Header), + ?assertMatch(<<_BL:20/binary, _LMD:48/integer, EmptySlotSize:8/integer>>, + Header), ?assertMatch({Keys, none}, binaryslot_trimmedlist(FullBin, {i, "Bucket", @@ -2407,26 +2477,30 @@ indexed_list_allindexkeys_trimmed_test() -> {"t1_int", 99999}, null}, native, + ?INDEX_MODDATE, false)), {SK1, _} = lists:nth(10, Keys), {EK1, _} = lists:nth(100, Keys), R1 = lists:sublist(Keys, 10, 91), - {O1, none} = binaryslot_trimmedlist(FullBin, SK1, EK1, native, false), + {O1, none} = binaryslot_trimmedlist(FullBin, SK1, EK1, + native, ?INDEX_MODDATE, false), ?assertMatch(91, length(O1)), ?assertMatch(R1, O1), {SK2, _} = lists:nth(10, Keys), {EK2, _} = lists:nth(20, Keys), R2 = lists:sublist(Keys, 10, 11), - {O2, none} = binaryslot_trimmedlist(FullBin, SK2, EK2, native, false), + {O2, none} = binaryslot_trimmedlist(FullBin, SK2, EK2, + native, ?INDEX_MODDATE, false), ?assertMatch(11, length(O2)), ?assertMatch(R2, O2), {SK3, _} = lists:nth(?LOOK_SLOTSIZE - 1, Keys), {EK3, _} = lists:nth(?LOOK_SLOTSIZE, Keys), R3 = lists:sublist(Keys, ?LOOK_SLOTSIZE - 1, 2), - {O3, none} = binaryslot_trimmedlist(FullBin, SK3, EK3, native, false), + {O3, none} = binaryslot_trimmedlist(FullBin, SK3, EK3, + native, ?INDEX_MODDATE, false), ?assertMatch(2, length(O3)), ?assertMatch(R3, O3). @@ -2436,7 +2510,7 @@ indexed_list_mixedkeys_bitflip_test() -> KVL1 = lists:sublist(KVL0, 33), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), {{Header, SlotBin, _HL, LK}, no_timing} = - generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE,no_timing), + generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE, no_timing), ?assertMatch(LK, element(1, lists:last(Keys))), @@ -2444,7 +2518,8 @@ indexed_list_mixedkeys_bitflip_test() -> _B2L:32/integer, _B3L:32/integer, _B4L:32/integer, - _B5L:32/integer, + _B5L:32/integer, + _LMD:48/integer, PosBin/binary>> = Header, TestKey1 = element(1, lists:nth(1, KVL1)), @@ -2454,7 +2529,7 @@ indexed_list_mixedkeys_bitflip_test() -> test_binary_slot(SlotBin, TestKey1, MH1, lists:nth(1, KVL1)), test_binary_slot(SlotBin, TestKey2, MH2, lists:nth(33, KVL1)), - ToList = binaryslot_tolist(SlotBin, native), + ToList = binaryslot_tolist(SlotBin, native, ?INDEX_MODDATE), ?assertMatch(Keys, ToList), [Pos1] = find_pos(PosBin, extra_hash(MH1), [], 0), @@ -2473,8 +2548,8 @@ indexed_list_mixedkeys_bitflip_test() -> test_binary_slot(SlotBin1, TestKey1, MH1, not_present), test_binary_slot(SlotBin2, TestKey2, MH2, not_present), - ToList1 = binaryslot_tolist(SlotBin1, native), - ToList2 = binaryslot_tolist(SlotBin2, native), + ToList1 = binaryslot_tolist(SlotBin1, native, ?INDEX_MODDATE), + ToList2 = binaryslot_tolist(SlotBin2, native, ?INDEX_MODDATE), ?assertMatch(true, is_list(ToList1)), ?assertMatch(true, is_list(ToList2)), @@ -2487,7 +2562,8 @@ indexed_list_mixedkeys_bitflip_test() -> {SK1, _} = lists:nth(10, Keys), {EK1, _} = lists:nth(20, Keys), - {O1, none} = binaryslot_trimmedlist(SlotBin3, SK1, EK1, native, false), + {O1, none} = binaryslot_trimmedlist(SlotBin3, SK1, EK1, + native, ?INDEX_MODDATE, false), ?assertMatch([], O1), SlotBin4 = flip_byte(SlotBin, 0, 20), @@ -2495,12 +2571,14 @@ indexed_list_mixedkeys_bitflip_test() -> test_binary_slot(SlotBin4, TestKey1, MH1, not_present), test_binary_slot(SlotBin5, TestKey1, MH1, not_present), - ToList4 = binaryslot_tolist(SlotBin4, native), - ToList5 = binaryslot_tolist(SlotBin5, native), + ToList4 = binaryslot_tolist(SlotBin4, native, ?INDEX_MODDATE), + ToList5 = binaryslot_tolist(SlotBin5, native, ?INDEX_MODDATE), ?assertMatch([], ToList4), ?assertMatch([], ToList5), - {O4, none} = binaryslot_trimmedlist(SlotBin4, SK1, EK1, native, false), - {O5, none} = binaryslot_trimmedlist(SlotBin4, SK1, EK1, native, false), + {O4, none} = binaryslot_trimmedlist(SlotBin4, SK1, EK1, + native, ?INDEX_MODDATE, false), + {O5, none} = binaryslot_trimmedlist(SlotBin4, SK1, EK1, + native, ?INDEX_MODDATE, false), ?assertMatch([], O4), ?assertMatch([], O5). @@ -2518,7 +2596,8 @@ flip_byte(Binary, Offset, Length) -> test_binary_slot(FullBin, Key, Hash, ExpectedValue) -> % SW = os:timestamp(), - {ReturnedValue, _Header} = binaryslot_get(FullBin, Key, Hash, native), + {ReturnedValue, _Header} = + binaryslot_get(FullBin, Key, Hash, native, ?INDEX_MODDATE), ?assertMatch(ExpectedValue, ReturnedValue). % io:format(user, "Fetch success in ~w microseconds ~n", % [timer:now_diff(os:timestamp(), SW)]). From 75d2e2d5461cd926f976eb6ca027bac3d665a8d9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 30 Oct 2018 13:00:23 +0000 Subject: [PATCH 08/29] Fix yield Wrong format of repsonse if was delete_pending --- src/leveled_sst.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index c83f93d..aa52e28 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -667,8 +667,9 @@ delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, State), % Always yield as about to clear and de-reference PressMethod = State#state.compression_method, + IdxModDate = State#state.index_moddate, {reply, - {yield, SlotsToFetchBinList, SlotsToPoint, PressMethod}, + {yield, SlotsToFetchBinList, SlotsToPoint, PressMethod, IdxModDate}, delete_pending, State, ?DELETE_TIMEOUT}; From bdd1762130d1a6d2a9728fd167c70f6149c89000 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 30 Oct 2018 14:06:17 +0000 Subject: [PATCH 09/29] Missing use of extract_header Spotted by ct test crossbucket_aae --- src/leveled_sst.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index aa52e28..2349ef7 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1709,8 +1709,7 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey, {Acc, _Continue} = lists:foldl(BlockCheckFun, {[], true}, BlocksToCheck), {Acc, none}; {{Header, _Blocks}, SegList} -> - BL = ?BLOCK_LENGTHS_LENGTH, - <> = Header, + {BlockLengths, _LMD, BlockIdx} = extract_header(Header, IdxModDate), PosList = find_pos(BlockIdx, SegList, [], 0), KVL = check_blocks(PosList, FullBin, From b7e697f7f027e6e410b245a1c4bcae9d6b9defff Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 30 Oct 2018 16:44:00 +0000 Subject: [PATCH 10/29] Fold API to leveled_sst Externally to leveled_sst all folds are actually managed through exapnd_list_by_pointer. Make the API a bit clearer in this regards, and add specs to help dialyzer. This also adds LowLastMod to the API for expanding pointers (although the leveled_penciller just defaults this to 0 for everything. --- src/leveled_penciller.erl | 18 +- src/leveled_sst.erl | 357 +++++++++++++++++++++++--------------- 2 files changed, 225 insertions(+), 150 deletions(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 4692bfc..4b5581a 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -1519,10 +1519,11 @@ find_nextkey(QueryArray, LCnt, % The first key at this level is pointer to a file - need to query % the file to expand this level out before proceeding Pointer = {next, Owner, StartKey, EndKey}, - UpdList = leveled_sst:expand_list_by_pointer(Pointer, - RestOfKeys, - Width, - SegList), + UpdList = leveled_sst:sst_expandpointer(Pointer, + RestOfKeys, + Width, + SegList, + 0), NewEntry = {LCnt, UpdList}, % Need to loop around at this level (LCnt) as we have not yet % examined a real key at this level @@ -1535,10 +1536,11 @@ find_nextkey(QueryArray, LCnt, % The first key at this level is pointer within a file - need to % query the file to expand this level out before proceeding Pointer = {pointer, SSTPid, Slot, PSK, PEK}, - UpdList = leveled_sst:expand_list_by_pointer(Pointer, - RestOfKeys, - Width, - SegList), + UpdList = leveled_sst:sst_expandpointer(Pointer, + RestOfKeys, + Width, + SegList, + 0), NewEntry = {LCnt, UpdList}, % Need to loop around at this level (LCnt) as we have not yet % examined a real key at this level diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 2349ef7..e9d6bd1 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -114,10 +114,7 @@ sst_open/2, sst_get/2, sst_get/3, - sst_getkvrange/4, - sst_getfilteredrange/5, - sst_getslots/2, - sst_getfilteredslots/3, + sst_expandpointer/5, sst_getmaxsequencenumber/1, sst_setfordelete/2, sst_clear/1, @@ -125,7 +122,6 @@ sst_deleteconfirmed/1, sst_close/1]). --export([expand_list_by_pointer/4]). -record(slot_index_value, {slot_id :: integer(), @@ -144,10 +140,22 @@ :: all|leveled_codec:ledger_key(). -type slot_pointer() :: {pointer, pid(), integer(), range_endpoint(), range_endpoint()}. --type sst_pointer() +-type sst_pointer() + % Used in sst_new :: {next, leveled_pmanifest:manifest_entry(), - leveled_codec:ledger_key()|all}. + range_endpoint()}. +-type sst_closed_pointer() + % used in expand_list_by_pointer + % (close point is added by maybe_expand_pointer + :: {next, + leveled_pmanifest:manifest_entry(), + range_endpoint(), + range_endpoint()}. +-type expandable_pointer() + :: slot_pointer()|sst_closed_pointer(). +-type expanded_pointer() + :: leveled_codec:ledger_kv()|expandable_pointer(). -type binaryslot_element() :: {tuple(), tuple()}|{binary(), integer(), tuple(), tuple()}. @@ -341,91 +349,29 @@ sst_get(Pid, LedgerKey) -> sst_get(Pid, LedgerKey, Hash) -> gen_fsm:sync_send_event(Pid, {get_kv, LedgerKey, Hash}, infinity). - --spec sst_getkvrange(pid(), - range_endpoint(), - range_endpoint(), - integer()) - -> list(leveled_codec:ledger_kv()|slot_pointer()). -%% @doc -%% Get a range of {Key, Value} pairs as a list between StartKey and EndKey -%% (inclusive). The ScanWidth is the maximum size of the range, a pointer -%% will be placed on the tail of the resulting list if results expand beyond -%% the Scan Width -sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) -> - sst_getfilteredrange(Pid, StartKey, EndKey, ScanWidth, false). - - --spec sst_getfilteredrange(pid(), - range_endpoint(), - range_endpoint(), - integer(), - leveled_codec:segment_list()) - -> list(leveled_codec:ledger_kv()|slot_pointer()). -%% @doc -%% Get a range of {Key, Value} pairs as a list between StartKey and EndKey -%% (inclusive). The ScanWidth is the maximum size of the range, a pointer -%% will be placed on the tail of the resulting list if results expand beyond -%% the Scan Width -%% -%% To make the range open-ended (either to start, end or both) the all atom -%% can be used in place of the Key tuple. -%% -%% A segment list can also be passed, which inidcates a subset of segment -%% hashes of interest in the query. -%% -%% TODO: Optimise this so that passing a list of segments that tune to the -%% same hash is faster - perhaps provide an exportable function in -%% leveled_tictac -sst_getfilteredrange(Pid, StartKey, EndKey, ScanWidth, SegList) -> - SegList0 = tune_seglist(SegList), - case gen_fsm:sync_send_event(Pid, - {get_kvrange, - StartKey, EndKey, - ScanWidth, SegList0}, - infinity) of - {yield, SlotsToFetchBinList, SlotsToPoint, PressMethod, IdxModDate} -> - {L, _BIC} = - binaryslot_reader(SlotsToFetchBinList, - PressMethod, IdxModDate, SegList0), - L ++ SlotsToPoint; - Reply -> - Reply - end. - --spec sst_getslots(pid(), list(slot_pointer())) - -> list(leveled_codec:ledger_kv()). -%% @doc -%% Get a list of slots by their ID. The slot will be converted from the binary -%% to term form outside of the FSM loop, this is to stop the copying of the -%% converted term to the calling process. -sst_getslots(Pid, SlotList) -> - sst_getfilteredslots(Pid, SlotList, false). - --spec sst_getfilteredslots(pid(), - list(slot_pointer()), - leveled_codec:segment_list()) - -> list(leveled_codec:ledger_kv()). -%% @doc -%% Get a list of slots by their ID. The slot will be converted from the binary -%% to term form outside of the FSM loop -%% -%% A list of 16-bit integer Segment IDs can be passed to filter the keys -%% returned (not precisely - with false results returned in addition). Use -%% false as a SegList to not filter -sst_getfilteredslots(Pid, SlotList, SegList) -> - SegL0 = tune_seglist(SegList), - {SlotBins, PressMethod, IdxModDate} = - gen_fsm:sync_send_event(Pid, {get_slots, SlotList, SegL0}, infinity), - {L, _BIC} = binaryslot_reader(SlotBins, PressMethod, IdxModDate, SegL0), - L. - -spec sst_getmaxsequencenumber(pid()) -> integer(). %% @doc %% Get the maximume sequence number for this SST file sst_getmaxsequencenumber(Pid) -> gen_fsm:sync_send_event(Pid, get_maxsequencenumber, infinity). +-spec sst_expandpointer(expandable_pointer(), + list(expandable_pointer()), + pos_integer(), + leveled_codec:segment_list(), + non_neg_integer()) + -> list(expanded_pointer()). +%% @doc +%% Expand out a list of pointer to return a list of Keys and Values with a +%% tail of pointers (once the ScanWidth has been satisfied). +%% Folding over keys in a store uses this function, although this function +%% does not directly call the gen_server - it does so by sst_getfilteredslots +%% or sst_getfilteredrange depending on the nature of the pointer. +sst_expandpointer(Pointer, MorePointers, ScanWidth, SegmentList, LowLastMod) -> + expand_list_by_pointer(Pointer, MorePointers, ScanWidth, + SegmentList, LowLastMod). + + -spec sst_setfordelete(pid(), pid()|false) -> ok. %% @doc %% If the SST is no longer in use in the active ledger it can be set for @@ -579,11 +525,13 @@ reader({get_kv, LedgerKey, Hash}, _From, State) -> {reply, Result, reader, UpdState#state{timings = UpdTimings0, timings_countdown = CountDown}}; -reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, _From, State) -> +reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod}, + _From, State) -> {SlotsToFetchBinList, SlotsToPoint} = fetch_range(StartKey, EndKey, ScanWidth, SegList, + LowLastMod, State), PressMethod = State#state.compression_method, @@ -618,13 +566,13 @@ reader({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, _From, State) -> reader, State#state{blockindex_cache = BlockIdxC0}} end; -reader({get_slots, SlotList, SegList}, _From, State) -> +reader({get_slots, SlotList, SegList, LowLastMod}, _From, State) -> PressMethod = State#state.compression_method, IdxModDate = State#state.index_moddate, SlotBins = read_slots(State#state.handle, SlotList, - {SegList, State#state.blockindex_cache}, + {SegList, LowLastMod, State#state.blockindex_cache}, State#state.compression_method, State#state.index_moddate), {reply, {SlotBins, PressMethod, IdxModDate}, reader, State}; @@ -658,12 +606,13 @@ reader(close, _From, State) -> delete_pending({get_kv, LedgerKey, Hash}, _From, State) -> {Result, UpdState, _Ts} = fetch(LedgerKey, Hash, State, no_timing), {reply, Result, delete_pending, UpdState, ?DELETE_TIMEOUT}; -delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, - _From, State) -> +delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList, LowLastMod}, + _From, State) -> {SlotsToFetchBinList, SlotsToPoint} = fetch_range(StartKey, EndKey, ScanWidth, SegList, + LowLastMod, State), % Always yield as about to clear and de-reference PressMethod = State#state.compression_method, @@ -673,13 +622,13 @@ delete_pending({get_kvrange, StartKey, EndKey, ScanWidth, SegList}, delete_pending, State, ?DELETE_TIMEOUT}; -delete_pending({get_slots, SlotList, SegList}, _From, State) -> +delete_pending({get_slots, SlotList, SegList, LowLastMod}, _From, State) -> PressMethod = State#state.compression_method, IdxModDate = State#state.index_moddate, SlotBins = read_slots(State#state.handle, SlotList, - {SegList, State#state.blockindex_cache}, + {SegList, LowLastMod, State#state.blockindex_cache}, PressMethod, IdxModDate), {reply, @@ -726,6 +675,157 @@ code_change(_OldVsn, StateName, State, _Extra) -> {ok, StateName, State}. +%%%============================================================================ +%%% External Functions +%%%============================================================================ + +-spec expand_list_by_pointer(expandable_pointer(), + list(expandable_pointer()), + pos_integer()) + -> list(expanded_pointer()). +%% @doc +%% Expand a list of pointers, maybe ending up with a list of keys and values +%% with a tail of pointers +%% By defauls will not have a segment filter, or a low last_modified_date, but +%% they can be used. Range checking a last modified date must still be made on +%% the output - at this stage the low last_modified_date has been used to bulk +%% skip those slots not containing any information over the low last modified +%% date +expand_list_by_pointer(Pointer, Tail, Width) -> + expand_list_by_pointer(Pointer, Tail, Width, false). + +%% TODO until leveled_penciller updated +expand_list_by_pointer(Pointer, Tail, Width, SegList) -> + expand_list_by_pointer(Pointer, Tail, Width, SegList, 0). + +-spec expand_list_by_pointer(expandable_pointer(), + list(expandable_pointer()), + pos_integer(), + leveled_codec:segment_list(), + non_neg_integer()) + -> list(expanded_pointer()). +%% @doc +%% With filters (as described in expand_list_by_pointer/3 +expand_list_by_pointer({pointer, SSTPid, Slot, StartKey, EndKey}, + Tail, Width, SegList, LowLastMod) -> + FoldFun = + fun(X, {Pointers, Remainder}) -> + case length(Pointers) of + L when L < Width -> + case X of + {pointer, SSTPid, S, SK, EK} -> + {Pointers ++ [{pointer, S, SK, EK}], Remainder}; + _ -> + {Pointers, Remainder ++ [X]} + end; + _ -> + {Pointers, Remainder ++ [X]} + end + end, + InitAcc = {[{pointer, Slot, StartKey, EndKey}], []}, + {AccPointers, AccTail} = lists:foldl(FoldFun, InitAcc, Tail), + ExpPointers = sst_getfilteredslots(SSTPid, + AccPointers, + SegList, + LowLastMod), + lists:append(ExpPointers, AccTail); +expand_list_by_pointer({next, ManEntry, StartKey, EndKey}, + Tail, Width, SegList, LowLastMod) -> + SSTPid = ManEntry#manifest_entry.owner, + leveled_log:log("SST10", [SSTPid, is_process_alive(SSTPid)]), + ExpPointer = sst_getfilteredrange(SSTPid, + StartKey, + EndKey, + Width, + SegList, + LowLastMod), + ExpPointer ++ Tail. + + +-spec sst_getkvrange(pid(), + range_endpoint(), + range_endpoint(), + integer()) + -> list(leveled_codec:ledger_kv()|slot_pointer()). +%% @doc +%% Get a range of {Key, Value} pairs as a list between StartKey and EndKey +%% (inclusive). The ScanWidth is the maximum size of the range, a pointer +%% will be placed on the tail of the resulting list if results expand beyond +%% the Scan Width +sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) -> + sst_getfilteredrange(Pid, StartKey, EndKey, ScanWidth, false, 0). + + +-spec sst_getfilteredrange(pid(), + range_endpoint(), + range_endpoint(), + integer(), + leveled_codec:segment_list(), + non_neg_integer()) + -> list(leveled_codec:ledger_kv()|slot_pointer()). +%% @doc +%% Get a range of {Key, Value} pairs as a list between StartKey and EndKey +%% (inclusive). The ScanWidth is the maximum size of the range, a pointer +%% will be placed on the tail of the resulting list if results expand beyond +%% the Scan Width +%% +%% To make the range open-ended (either to start, end or both) the all atom +%% can be used in place of the Key tuple. +%% +%% A segment list can also be passed, which inidcates a subset of segment +%% hashes of interest in the query. +%% +%% TODO: Optimise this so that passing a list of segments that tune to the +%% same hash is faster - perhaps provide an exportable function in +%% leveled_tictac +sst_getfilteredrange(Pid, StartKey, EndKey, ScanWidth, SegList, LowLastMod) -> + SegList0 = tune_seglist(SegList), + case gen_fsm:sync_send_event(Pid, + {get_kvrange, + StartKey, EndKey, + ScanWidth, SegList0, LowLastMod}, + infinity) of + {yield, SlotsToFetchBinList, SlotsToPoint, PressMethod, IdxModDate} -> + {L, _BIC} = + binaryslot_reader(SlotsToFetchBinList, + PressMethod, IdxModDate, SegList0), + L ++ SlotsToPoint; + Reply -> + Reply + end. + +-spec sst_getslots(pid(), list(slot_pointer())) + -> list(leveled_codec:ledger_kv()). +%% @doc +%% Get a list of slots by their ID. The slot will be converted from the binary +%% to term form outside of the FSM loop, this is to stop the copying of the +%% converted term to the calling process. +sst_getslots(Pid, SlotList) -> + sst_getfilteredslots(Pid, SlotList, false, 0). + +-spec sst_getfilteredslots(pid(), + list(slot_pointer()), + leveled_codec:segment_list(), + non_neg_integer()) + -> list(leveled_codec:ledger_kv()). +%% @doc +%% Get a list of slots by their ID. The slot will be converted from the binary +%% to term form outside of the FSM loop +%% +%% A list of 16-bit integer Segment IDs can be passed to filter the keys +%% returned (not precisely - with false results returned in addition). Use +%% false as a SegList to not filter. +%% An integer can be provided which gives a floor for the LastModified Date +%% of the object, if the object is to be covered by the query +sst_getfilteredslots(Pid, SlotList, SegList, LowLastMod) -> + SegL0 = tune_seglist(SegList), + {SlotBins, PressMethod, IdxModDate} = + gen_fsm:sync_send_event(Pid, + {get_slots, SlotList, SegL0, LowLastMod}, + infinity), + {L, _BIC} = binaryslot_reader(SlotBins, PressMethod, IdxModDate, SegL0), + L. + %%%============================================================================ %%% Internal Functions %%%============================================================================ @@ -811,8 +911,9 @@ fetch(LedgerKey, Hash, State, Timings0) -> end. --spec fetch_range(tuple(), tuple(), integer(), leveled_codec:segment_list(), - sst_state()) -> {list(), list()}. +-spec fetch_range(tuple(), tuple(), integer(), + leveled_codec:segment_list(), non_neg_integer(), + sst_state()) -> {list(), list()}. %% @doc %% Fetch the contents of the SST file for a given key range. This will %% pre-fetch some results, and append pointers for additional results. @@ -820,7 +921,7 @@ fetch(LedgerKey, Hash, State, Timings0) -> %% A filter can be provided based on the Segment ID (usable for hashable %% objects not no_lookup entries) to accelerate the query if the 5-arity %% version is used -fetch_range(StartKey, EndKey, ScanWidth, SegList, State) -> +fetch_range(StartKey, EndKey, ScanWidth, SegList, LowLastMod, State) -> Summary = State#state.summary, Handle = State#state.handle, {Slots, RTrim} = lookup_slots(StartKey, EndKey, Summary#summary.index), @@ -874,7 +975,7 @@ fetch_range(StartKey, EndKey, ScanWidth, SegList, State) -> SlotsToFetchBinList = read_slots(Handle, SlotsToFetch, - {SegList, State#state.blockindex_cache}, + {SegList, LowLastMod, State#state.blockindex_cache}, State#state.compression_method, State#state.index_moddate), {SlotsToFetchBinList, SlotsToPoint}. @@ -1441,8 +1542,8 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> -spec read_slots(file:io_device(), list(), - {false|list(), any()}, press_method(), boolean()) - -> list(binaryslot_element()). + {false|list(), non_neg_integer(), binary()}, + press_method(), boolean()) -> list(binaryslot_element()). %% @doc %% The reading of sots will return a list of either 2-tuples containing %% {K, V} pairs - or 3-tuples containing {Binary, SK, EK}. The 3 tuples @@ -1457,13 +1558,13 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> %% any key comparison between levels should allow for a non-matching key to %% be considered as superior to a matching key - as otherwise a matching key %% may be intermittently removed from the result set -read_slots(Handle, SlotList, {false, _BlockIndexCache}, +read_slots(Handle, SlotList, {false, 0, _BlockIndexCache}, _PressMethod, _IdxModDate) -> - % No list of segments passed + % No list of segments passed or usefult Low LastModified Date LengthList = lists:map(fun pointer_mapfun/1, SlotList), {MultiSlotBin, StartPos} = read_length_list(Handle, LengthList), lists:map(binarysplit_mapfun(MultiSlotBin, StartPos), LengthList); -read_slots(Handle, SlotList, {SegList, BlockIndexCache}, +read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache}, PressMethod, IdxModDate) -> % List of segments passed so only {K, V} pairs matching those segments % should be returned. This required the {K, V} pair to have been added @@ -1483,23 +1584,30 @@ read_slots(Handle, SlotList, {SegList, BlockIndexCache}, read_length_list(Handle, [LengthDetails]), MapFun = binarysplit_mapfun(MultiSlotBin, StartPos), Acc ++ [MapFun(LengthDetails)]; - {BlockLengths, _LMD, BlockIdx} -> + {BlockLengths, LMD, BlockIdx} -> % If there is a BlockIndex cached then we can use it to % check to see if any of the expected segments are % present without lifting the slot off disk. Also the % fact that we know position can be used to filter out % other keys - case find_pos(BlockIdx, SegList, [], 0) of - [] -> + % + % Note that LMD will be 0 if the indexing of last mod + % date was not enable at creation time. So in this + % case the filter should always map + case LMD >= LowLastMod of + false -> Acc; - PositionList -> + true -> + PositionList = find_pos(BlockIdx, SegList, [], 0), Acc ++ - check_blocks(PositionList, + check_blocks(PositionList, {Handle, SP}, BlockLengths, byte_size(BlockIdx), false, PressMethod, IdxModDate, []) + % Note check_blocks shouldreturn [] if + % PositionList is empty end end end, @@ -2094,41 +2202,6 @@ maybe_expand_pointer(List) -> List. -expand_list_by_pointer(Pointer, Tail, Width) -> - expand_list_by_pointer(Pointer, Tail, Width, false). - -expand_list_by_pointer({pointer, SSTPid, Slot, StartKey, EndKey}, - Tail, Width, SegList) -> - FoldFun = - fun(X, {Pointers, Remainder}) -> - case length(Pointers) of - L when L < Width -> - case X of - {pointer, SSTPid, S, SK, EK} -> - {Pointers ++ [{pointer, S, SK, EK}], Remainder}; - _ -> - {Pointers, Remainder ++ [X]} - end; - _ -> - {Pointers, Remainder ++ [X]} - end - end, - InitAcc = {[{pointer, Slot, StartKey, EndKey}], []}, - {AccPointers, AccTail} = lists:foldl(FoldFun, InitAcc, Tail), - ExpPointers = - leveled_sst:sst_getfilteredslots(SSTPid, AccPointers, SegList), - lists:append(ExpPointers, AccTail); -expand_list_by_pointer({next, ManEntry, StartKey, EndKey}, - Tail, Width, SegList) -> - SSTPid = ManEntry#manifest_entry.owner, - leveled_log:log("SST10", [SSTPid, is_process_alive(SSTPid)]), - ExpPointer = - leveled_sst:sst_getfilteredrange(SSTPid, - StartKey, EndKey, - Width, SegList), - ExpPointer ++ Tail. - - %%%============================================================================ %%% Timing Functions From ae1ada86b29ad1593d25cfc25067de7c93f4c655 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 30 Oct 2018 19:35:29 +0000 Subject: [PATCH 11/29] Add accumulator check for last mod range Perhaps should also do the segment check at this point. Seems odd to check last modified date and segments in different places. --- src/leveled_penciller.erl | 227 ++++++++++++++++++++++++-------------- src/leveled_sst.erl | 2 + src/leveled_tree.erl | 3 + 3 files changed, 148 insertions(+), 84 deletions(-) diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 4b5581a..91a0b59 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -248,7 +248,7 @@ levelzero_pending = false :: boolean(), levelzero_constructor :: pid() | undefined, - levelzero_cache = [] :: list(), % a list of trees + levelzero_cache = [] :: levelzero_cache(), levelzero_size = 0 :: integer(), levelzero_maxcachesize :: integer() | undefined, levelzero_cointoss = false :: boolean(), @@ -293,6 +293,12 @@ integer()}. -type pcl_state() :: #state{}. -type pcl_timings() :: no_timing|#pcl_timings{}. +-type levelzero_cacheentry() :: {pos_integer(), levled_tree:leveled_tree()}. +-type levelzero_cache() :: list(levelzero_cacheentry()). +-type iterator_entry() + :: {pos_integer(), + list(leveled_codec:ledger_kv()|leveled_sst:expandable_pointer())}. +-type iterator() :: list(iterator_entry()). %%%============================================================================ %%% API @@ -720,7 +726,8 @@ handle_call({fetch_keys, keyfolder({L0AsList, SSTiter}, {StartKey, EndKey}, {AccFun, InitAcc}, - {SegmentList, MaxKeys}) + {SegmentList, {0, infinity}, MaxKeys}) + % TODO: Allow query to set last mod range end, case By of as_pcl -> @@ -1375,27 +1382,31 @@ keyfolder(IMMiter, SSTiter, StartKey, EndKey, {AccFun, Acc}) -> keyfolder({IMMiter, SSTiter}, {StartKey, EndKey}, {AccFun, Acc}, - {false, -1}). + {false, {0, infinity}, -1}). -keyfolder(_Iterators, _KeyRange, {_AccFun, Acc}, {_SegmentList, MaxKeys}) - when MaxKeys == 0 -> +keyfolder(_Iterators, _KeyRange, {_AccFun, Acc}, + {_SegmentList, _LastModRange, MaxKeys}) when MaxKeys == 0 -> Acc; -keyfolder({[], SSTiter}, KeyRange, {AccFun, Acc}, {SegmentList, MaxKeys}) -> +keyfolder({[], SSTiter}, KeyRange, {AccFun, Acc}, + {SegmentList, LastModRange, MaxKeys}) -> {StartKey, EndKey} = KeyRange, - case find_nextkey(SSTiter, StartKey, EndKey, SegmentList) of + case find_nextkey(SSTiter, StartKey, EndKey, + SegmentList, element(1, LastModRange)) of no_more_keys -> Acc; {NxSSTiter, {SSTKey, SSTVal}} -> - Acc1 = AccFun(SSTKey, SSTVal, Acc), + {Acc1, MK1} = + maybe_accumulate(SSTKey, SSTVal, Acc, AccFun, + MaxKeys, LastModRange), keyfolder({[], NxSSTiter}, KeyRange, {AccFun, Acc1}, - {SegmentList, MaxKeys - 1}) + {SegmentList, LastModRange, MK1}) end; keyfolder({[{IMMKey, IMMVal}|NxIMMiterator], SSTiterator}, KeyRange, {AccFun, Acc}, - {SegmentList, MaxKeys}) -> + {SegmentList, LastModRange, MaxKeys}) -> {StartKey, EndKey} = KeyRange, case {IMMKey < StartKey, leveled_codec:endkey_passed(EndKey, IMMKey)} of {false, true} -> @@ -1405,18 +1416,21 @@ keyfolder({[{IMMKey, IMMVal}|NxIMMiterator], SSTiterator}, keyfolder({[], SSTiterator}, KeyRange, {AccFun, Acc}, - {SegmentList, MaxKeys}); + {SegmentList, LastModRange, MaxKeys}); {false, false} -> - case find_nextkey(SSTiterator, StartKey, EndKey, SegmentList) of + case find_nextkey(SSTiterator, StartKey, EndKey, + SegmentList, element(1, LastModRange)) of no_more_keys -> % No more keys in range in the persisted store, so use the % in-memory KV as the next - Acc1 = AccFun(IMMKey, IMMVal, Acc), + {Acc1, MK1} = + maybe_accumulate(IMMKey, IMMVal, Acc, AccFun, + MaxKeys, LastModRange), keyfolder({NxIMMiterator, []}, KeyRange, {AccFun, Acc1}, - {SegmentList, MaxKeys - 1}); + {SegmentList, LastModRange, MK1}); {NxSSTiterator, {SSTKey, SSTVal}} -> % There is a next key, so need to know which is the % next key between the two (and handle two keys @@ -1426,7 +1440,9 @@ keyfolder({[{IMMKey, IMMVal}|NxIMMiterator], SSTiterator}, {SSTKey, SSTVal}) of left_hand_first -> - Acc1 = AccFun(IMMKey, IMMVal, Acc), + {Acc1, MK1} = + maybe_accumulate(IMMKey, IMMVal, Acc, AccFun, + MaxKeys, LastModRange), % Stow the previous best result away at Level -1 % so that there is no need to iterate to it again NewEntry = {-1, [{SSTKey, SSTVal}]}, @@ -1437,16 +1453,20 @@ keyfolder({[{IMMKey, IMMVal}|NxIMMiterator], SSTiterator}, NewEntry)}, KeyRange, {AccFun, Acc1}, - {SegmentList, MaxKeys - 1}); + {SegmentList, LastModRange, MK1}); right_hand_first -> - Acc1 = AccFun(SSTKey, SSTVal, Acc), + {Acc1, MK1} = + maybe_accumulate(SSTKey, SSTVal, Acc, AccFun, + MaxKeys, LastModRange), keyfolder({[{IMMKey, IMMVal}|NxIMMiterator], NxSSTiterator}, KeyRange, {AccFun, Acc1}, - {SegmentList, MaxKeys - 1}); + {SegmentList, LastModRange, MK1}); left_hand_dominant -> - Acc1 = AccFun(IMMKey, IMMVal, Acc), + {Acc1, MK1} = + maybe_accumulate(IMMKey, IMMVal, Acc, AccFun, + MaxKeys, LastModRange), % We can add to the accumulator here. As the SST % key was the most dominant across all SST levels, % so there is no need to hold off until the IMMKey @@ -1455,30 +1475,55 @@ keyfolder({[{IMMKey, IMMVal}|NxIMMiterator], SSTiterator}, NxSSTiterator}, KeyRange, {AccFun, Acc1}, - {SegmentList, MaxKeys - 1}) + {SegmentList, LastModRange, MK1}) end end end. +-spec maybe_accumulate(leveled_codec:ledger_key(), + leveled_codec:ledger_value(), + any(), fun(), integer(), + {non_neg_integer(), non_neg_integer()|infinity}) -> + any(). +%% @doc +%% Make an accumulation decision based one the date range +maybe_accumulate(LK, LV, Acc, AccFun, MaxKeys, {LowLastMod, HighLastMod}) -> + {_SQN, _SH, LMD} = leveled_codec:strip_to_indexdetails({LK, LV}), + RunAcc = + (LMD == undefined) or ((LMD >= LowLastMod) and (LMD =< HighLastMod)), + case RunAcc of + true -> + {AccFun(LK, LV, Acc), MaxKeys - 1}; + false -> + {Acc, MaxKeys} + end. + + +-spec find_nextkey(iterator(), + leveled_codec:ledger_key(), leveled_codec:ledger_key()) -> + no_more_keys|{iterator(), leveled_codec:ledger_kv()}. +%% @doc %% Looks to find the best choice for the next key across the levels (other %% than in-memory table) %% In finding the best choice, the next key in a given level may be a next %% block or next file pointer which will need to be expanded find_nextkey(QueryArray, StartKey, EndKey) -> - find_nextkey(QueryArray, StartKey, EndKey, false). + find_nextkey(QueryArray, StartKey, EndKey, false, 0). -find_nextkey(QueryArray, StartKey, EndKey, SegmentList) -> +find_nextkey(QueryArray, StartKey, EndKey, SegmentList, LowLastMod) -> find_nextkey(QueryArray, -1, {null, null}, StartKey, EndKey, - SegmentList, ?ITERATOR_SCANWIDTH). + SegmentList, + LowLastMod, + ?ITERATOR_SCANWIDTH). find_nextkey(_QueryArray, LCnt, {null, null}, _StartKey, _EndKey, - _SegList, _Width) when LCnt > ?MAX_LEVELS -> + _SegList, _LowLastMod, _Width) when LCnt > ?MAX_LEVELS -> % The array has been scanned wihtout finding a best key - must be % exhausted - respond to indicate no more keys to be found by the % iterator @@ -1486,7 +1531,7 @@ find_nextkey(_QueryArray, LCnt, find_nextkey(QueryArray, LCnt, {BKL, BestKV}, _StartKey, _EndKey, - _SegList, _Width) when LCnt > ?MAX_LEVELS -> + _SegList, _LowLastMod, _Width) when LCnt > ?MAX_LEVELS -> % All levels have been scanned, so need to remove the best result from % the array, and return that array along with the best key/sqn/status % combination @@ -1495,7 +1540,7 @@ find_nextkey(QueryArray, LCnt, find_nextkey(QueryArray, LCnt, {BestKeyLevel, BestKV}, StartKey, EndKey, - SegList, Width) -> + SegList, LowLastMod, Width) -> % Get the next key at this level {NextKey, RestOfKeys} = case lists:keyfind(LCnt, 1, QueryArray) of @@ -1514,7 +1559,7 @@ find_nextkey(QueryArray, LCnt, LCnt + 1, {BKL, BKV}, StartKey, EndKey, - SegList, Width); + SegList, LowLastMod, Width); {{next, Owner, _SK}, BKL, BKV} -> % The first key at this level is pointer to a file - need to query % the file to expand this level out before proceeding @@ -1523,7 +1568,7 @@ find_nextkey(QueryArray, LCnt, RestOfKeys, Width, SegList, - 0), + LowLastMod), NewEntry = {LCnt, UpdList}, % Need to loop around at this level (LCnt) as we have not yet % examined a real key at this level @@ -1531,7 +1576,7 @@ find_nextkey(QueryArray, LCnt, LCnt, {BKL, BKV}, StartKey, EndKey, - SegList, Width); + SegList, LowLastMod, Width); {{pointer, SSTPid, Slot, PSK, PEK}, BKL, BKV} -> % The first key at this level is pointer within a file - need to % query the file to expand this level out before proceeding @@ -1540,7 +1585,7 @@ find_nextkey(QueryArray, LCnt, RestOfKeys, Width, SegList, - 0), + LowLastMod), NewEntry = {LCnt, UpdList}, % Need to loop around at this level (LCnt) as we have not yet % examined a real key at this level @@ -1548,7 +1593,7 @@ find_nextkey(QueryArray, LCnt, LCnt, {BKL, BKV}, StartKey, EndKey, - SegList, Width); + SegList, LowLastMod, Width); {{Key, Val}, null, null} -> % No best key set - so can assume that this key is the best key, % and check the lower levels @@ -1556,7 +1601,7 @@ find_nextkey(QueryArray, LCnt, LCnt + 1, {LCnt, {Key, Val}}, StartKey, EndKey, - SegList, Width); + SegList, LowLastMod, Width); {{Key, Val}, _BKL, {BestKey, _BestVal}} when Key < BestKey -> % There is a real key and a best key to compare, and the real key % at this level is before the best key, and so is now the new best @@ -1566,7 +1611,7 @@ find_nextkey(QueryArray, LCnt, LCnt + 1, {LCnt, {Key, Val}}, StartKey, EndKey, - SegList, Width); + SegList, LowLastMod, Width); {{Key, Val}, BKL, {BestKey, BestVal}} when Key == BestKey -> SQN = leveled_codec:strip_to_seqonly({Key, Val}), BestSQN = leveled_codec:strip_to_seqonly({BestKey, BestVal}), @@ -1581,7 +1626,7 @@ find_nextkey(QueryArray, LCnt, LCnt + 1, {BKL, {BestKey, BestVal}}, StartKey, EndKey, - SegList, Width); + SegList, LowLastMod, Width); SQN > BestSQN -> % There is a real key at the front of this level and it has % a higher SQN than the best key, so we should use this as @@ -1597,7 +1642,7 @@ find_nextkey(QueryArray, LCnt, LCnt + 1, {LCnt, {Key, Val}}, StartKey, EndKey, - SegList, Width) + SegList, LowLastMod, Width) end; {_, BKL, BKV} -> % This is not the best key @@ -1605,7 +1650,7 @@ find_nextkey(QueryArray, LCnt, LCnt + 1, {BKL, BKV}, StartKey, EndKey, - SegList, Width) + SegList, LowLastMod, Width) end. @@ -1966,84 +2011,98 @@ simple_server_test() -> simple_findnextkey_test() -> QueryArray = [ - {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}}, - {{o, "Bucket1", "Key5"}, {4, {active, infinity}, null}}]}, - {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}]}, - {5, [{{o, "Bucket1", "Key2"}, {2, {active, infinity}, null}}]} + {2, [{{o, "Bucket1", "Key1", null}, {5, {active, infinity}, {0, 0}, null}}, + {{o, "Bucket1", "Key5", null}, {4, {active, infinity}, {0, 0}, null}}]}, + {3, [{{o, "Bucket1", "Key3", null}, {3, {active, infinity}, {0, 0}, null}}]}, + {5, [{{o, "Bucket1", "Key2", null}, {2, {active, infinity}, {0, 0}, null}}]} ], {Array2, KV1} = find_nextkey(QueryArray, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, null}}, KV1), + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key1", null}, + {5, {active, infinity}, {0, 0}, null}}, + KV1), {Array3, KV2} = find_nextkey(Array2, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key2"}, {2, {active, infinity}, null}}, KV2), + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key2", null}, + {2, {active, infinity}, {0, 0}, null}}, + KV2), {Array4, KV3} = find_nextkey(Array3, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}, KV3), + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key3", null}, + {3, {active, infinity}, {0, 0}, null}}, + KV3), {Array5, KV4} = find_nextkey(Array4, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key5"}, {4, {active, infinity}, null}}, KV4), + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key5", null}, + {4, {active, infinity}, {0, 0}, null}}, + KV4), ER = find_nextkey(Array5, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), ?assertMatch(no_more_keys, ER). sqnoverlap_findnextkey_test() -> QueryArray = [ - {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}}, - {{o, "Bucket1", "Key5"}, {4, {active, infinity}, 0, null}}]}, - {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]}, - {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]} + {2, [{{o, "Bucket1", "Key1", null}, {5, {active, infinity}, {0, 0}, null}}, + {{o, "Bucket1", "Key5", null}, {4, {active, infinity}, {0, 0}, null}}]}, + {3, [{{o, "Bucket1", "Key3", null}, {3, {active, infinity}, {0, 0}, null}}]}, + {5, [{{o, "Bucket1", "Key5", null}, {2, {active, infinity}, {0, 0}, null}}]} ], {Array2, KV1} = find_nextkey(QueryArray, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}}, + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key1", null}, + {5, {active, infinity}, {0, 0}, null}}, KV1), {Array3, KV2} = find_nextkey(Array2, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}, + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key3", null}, + {3, {active, infinity}, {0, 0}, null}}, KV2), {Array4, KV3} = find_nextkey(Array3, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key5"}, {4, {active, infinity}, 0, null}}, + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key5", null}, + {4, {active, infinity}, {0, 0}, null}}, KV3), ER = find_nextkey(Array4, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), ?assertMatch(no_more_keys, ER). sqnoverlap_otherway_findnextkey_test() -> QueryArray = [ - {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}}, - {{o, "Bucket1", "Key5"}, {1, {active, infinity}, 0, null}}]}, - {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]}, - {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]} + {2, [{{o, "Bucket1", "Key1", null}, {5, {active, infinity}, {0, 0}, null}}, + {{o, "Bucket1", "Key5", null}, {1, {active, infinity}, {0, 0}, null}}]}, + {3, [{{o, "Bucket1", "Key3", null}, {3, {active, infinity}, {0, 0}, null}}]}, + {5, [{{o, "Bucket1", "Key5", null}, {2, {active, infinity}, {0, 0}, null}}]} ], {Array2, KV1} = find_nextkey(QueryArray, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}}, + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key1", null}, + {5, {active, infinity}, {0, 0}, null}}, KV1), {Array3, KV2} = find_nextkey(Array2, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}, + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key3", null}, + {3, {active, infinity}, {0, 0}, null}}, KV2), {Array4, KV3} = find_nextkey(Array3, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), - ?assertMatch({{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}, + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), + ?assertMatch({{o, "Bucket1", "Key5", null}, + {2, {active, infinity}, {0, 0}, null}}, KV3), ER = find_nextkey(Array4, - {o, "Bucket1", "Key0"}, - {o, "Bucket1", "Key5"}), + {o, "Bucket1", "Key0", null}, + {o, "Bucket1", "Key5", null}), ?assertMatch(no_more_keys, ER). foldwithimm_simple_test() -> diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index e9d6bd1..eb6c291 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -204,6 +204,8 @@ -type sst_timings() :: no_timing|#sst_timings{}. -type build_timings() :: no_timing|#build_timings{}. +-export_type([expandable_pointer/0]). + %%%============================================================================ %%% API %%%============================================================================ diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 38df85d..f033bb4 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -35,6 +35,9 @@ integer(), % length any()}. +-export_type([leveled_tree/0]). + + %%%============================================================================ %%% API %%%============================================================================ From ffe4c39ee81e0ea99c2ba32c9b284c8de9f1428c Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 30 Oct 2018 21:43:49 +0000 Subject: [PATCH 12/29] Add tests with old file format --- src/leveled_sst.erl | 111 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 24 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index eb6c291..489ee1b 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -242,10 +242,15 @@ sst_open(RootPath, Filename) -> %% pairs. This should not be used for basement levels or unexpanded Key/Value %% lists as merge_lists will not be called. sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> + sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod, + ?INDEX_MODDATE). + +sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod, + IndexModDate) -> {ok, Pid} = gen_fsm:start_link(?MODULE, [], []), PressMethod0 = compress_level(Level, PressMethod), {[], [], SlotList, FK} = - merge_lists(KVList, PressMethod0, ?INDEX_MODDATE), + merge_lists(KVList, PressMethod0, IndexModDate), case gen_fsm:sync_send_event(Pid, {sst_new, RootPath, @@ -254,7 +259,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> {SlotList, FK}, MaxSQN, PressMethod0, - ?INDEX_MODDATE}, + IndexModDate}, infinity) of {ok, {SK, EK}, Bloom} -> {ok, Pid, {SK, EK}, Bloom} @@ -285,10 +290,17 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, PressMethod) -> + sst_new(RootPath, Filename, + KVL1, KVL2, IsBasement, Level, + MaxSQN, PressMethod, ?INDEX_MODDATE). + +sst_new(RootPath, Filename, + KVL1, KVL2, IsBasement, Level, + MaxSQN, PressMethod, IndexModDate) -> PressMethod0 = compress_level(Level, PressMethod), {Rem1, Rem2, SlotList, FK} = merge_lists(KVL1, KVL2, {IsBasement, Level}, - PressMethod0, ?INDEX_MODDATE), + PressMethod0, IndexModDate), case SlotList of [] -> empty; @@ -302,7 +314,7 @@ sst_new(RootPath, Filename, {SlotList, FK}, MaxSQN, PressMethod0, - ?INDEX_MODDATE}, + IndexModDate}, infinity) of {ok, {SK, EK}, Bloom} -> {ok, Pid, {{Rem1, Rem2}, SK, EK}, Bloom} @@ -869,6 +881,7 @@ fetch(LedgerKey, Hash, State, Timings0) -> Timings3}; {BlockLengths, _LMD, PosBin} -> PosList = find_pos(PosBin, extra_hash(Hash), [], 0), + io:format("Fetching referring to cache with PosList ~w~n", [PosList]), case PosList of [] -> {_SW3, Timings3} = @@ -1330,7 +1343,7 @@ take_max_lastmoddate(LMD, LMDAcc) -> %% @doc %% Generate the serialised slot to be used when storing this sublist of keys %% and values -generate_binary_slot(Lookup, KVL, PressMethod, _IndexModDate, BuildTimings0) -> +generate_binary_slot(Lookup, KVL, PressMethod, IndexModDate, BuildTimings0) -> SW0 = os:timestamp(), @@ -1411,20 +1424,37 @@ generate_binary_slot(Lookup, KVL, PressMethod, _IndexModDate, BuildTimings0) -> BuildTimings2 = update_buildtimings(SW1, BuildTimings1, slot_serialise), SW2 = os:timestamp(), - B1P = byte_size(PosBinIndex) + ?BLOCK_LENGTHS_LENGTH + ?LMD_LENGTH, + B1P = + case IndexModDate of + true -> + byte_size(PosBinIndex) + ?BLOCK_LENGTHS_LENGTH + ?LMD_LENGTH; + false -> + byte_size(PosBinIndex) + ?BLOCK_LENGTHS_LENGTH + end, CheckB1P = hmac(B1P), B1L = byte_size(B1), B2L = byte_size(B2), B3L = byte_size(B3), B4L = byte_size(B4), B5L = byte_size(B5), - Header = <>, + Header = + case IndexModDate of + true -> + <>; + false -> + <> + end, CheckH = hmac(Header), SlotBin = < + io:format("Key matched~n"), {K, V}; _ -> case LedgerKeyToCheck of @@ -1596,10 +1630,14 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache}, % Note that LMD will be 0 if the indexing of last mod % date was not enable at creation time. So in this % case the filter should always map - case LMD >= LowLastMod of - false -> - Acc; + case LowLastMod > LMD of true -> + % The highest LMD on the slot was before the + % LowLastMod date passed in the query - therefore + % there ar eno interetsing modifictaions in this + % slot - it is all too old + Acc; + false -> PositionList = find_pos(BlockIdx, SegList, [], 0), Acc ++ check_blocks(PositionList, @@ -2344,6 +2382,14 @@ update_timings(SW, Timings, Stage, Continue) -> -ifdef(TEST). +testsst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> + sst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod, false). + +testsst_new(RootPath, Filename, + KVL1, KVL2, IsBasement, Level, MaxSQN, PressMethod) -> + sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, + PressMethod, false). + generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> generate_randomkeys(Seqn, Count, @@ -2495,7 +2541,7 @@ indexed_list_allindexkeys_test() -> LMD = ?FLIPPER48, ?assertMatch(<<_BL:20/binary, LMD:48/integer, EmptySlotSize:8/integer>>, HeaderT), - ?assertMatch(<<_BL:20/binary, LMD:48/integer, EmptySlotSize:8/integer>>, + ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, HeaderF), % SW = os:timestamp(), BinToListT = binaryslot_tolist(FullBinT, native, true), @@ -2678,17 +2724,21 @@ test_binary_slot(FullBin, Key, Hash, ExpectedValue) -> % [timer:now_diff(os:timestamp(), SW)]). - merge_test() -> + merge_tester(fun testsst_new/6, fun testsst_new/8), + merge_tester(fun sst_new/6, fun sst_new/8). + + +merge_tester(NewFunS, NewFunM) -> N = 3000, KVL1 = lists:ukeysort(1, generate_randomkeys(N + 1, N, 1, 20)), KVL2 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 20)), KVL3 = lists:ukeymerge(1, KVL1, KVL2), SW0 = os:timestamp(), {ok, P1, {FK1, LK1}, _Bloom1} = - sst_new("../test/", "level1_src", 1, KVL1, 6000, native), + NewFunS("../test/", "level1_src", 1, KVL1, 6000, native), {ok, P2, {FK2, LK2}, _Bloom2} = - sst_new("../test/", "level2_src", 2, KVL2, 3000, native), + NewFunS("../test/", "level2_src", 2, KVL2, 3000, native), ExpFK1 = element(1, lists:nth(1, KVL1)), ExpLK1 = element(1, lists:last(KVL1)), ExpFK2 = element(1, lists:nth(1, KVL2)), @@ -2700,7 +2750,7 @@ merge_test() -> ML1 = [{next, #manifest_entry{owner = P1}, FK1}], ML2 = [{next, #manifest_entry{owner = P2}, FK2}], NewR = - sst_new("../test/", "level2_merge", ML1, ML2, false, 2, N * 2, native), + NewFunM("../test/", "level2_merge", ML1, ML2, false, 2, N * 2, native), {ok, P3, {{Rem1, Rem2}, FK3, LK3}, _Bloom3} = NewR, ?assertMatch([], Rem1), ?assertMatch([], Rem2), @@ -2729,13 +2779,17 @@ merge_test() -> simple_persisted_range_test() -> + simple_persisted_range_tester(fun testsst_new/6), + simple_persisted_range_tester(fun sst_new/6). + +simple_persisted_range_tester(SSTNewFun) -> {RP, Filename} = {"../test/", "simple_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 16, 1, 20), KVList1 = lists:ukeysort(1, KVList0), [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - sst_new(RP, Filename, 1, KVList1, length(KVList1), native), + SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), {o, B, K, null} = LastKey, SK1 = {o, B, K, 0}, @@ -2836,6 +2890,11 @@ additional_range_test() -> simple_persisted_slotsize_test() -> + simple_persisted_slotsize_tester(fun testsst_new/6), + simple_persisted_slotsize_tester(fun sst_new/6). + + +simple_persisted_slotsize_tester(SSTNewFun) -> {RP, Filename} = {"../test/", "simple_slotsize_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 2, 1, 20), KVList1 = lists:sublist(lists:ukeysort(1, KVList0), @@ -2843,7 +2902,7 @@ simple_persisted_slotsize_test() -> [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - sst_new(RP, Filename, 1, KVList1, length(KVList1), native), + SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), lists:foreach(fun({K, V}) -> ?assertMatch({K, V}, sst_get(Pid, K)) end, @@ -2852,13 +2911,17 @@ simple_persisted_slotsize_test() -> ok = file:delete(filename:join(RP, Filename ++ ".sst")). simple_persisted_test() -> + simple_persisted_tester(fun testsst_new/6), + simple_persisted_tester(fun sst_new/6). + +simple_persisted_tester(SSTNewFun) -> {RP, Filename} = {"../test/", "simple_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 32, 1, 20), KVList1 = lists:ukeysort(1, KVList0), [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - sst_new(RP, Filename, 1, KVList1, length(KVList1), native), + SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), SW0 = os:timestamp(), lists:foreach(fun({K, V}) -> ?assertMatch({K, V}, sst_get(Pid, K)) From 1f976948a1ce442794fbb2959e68f6b2e687961a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 30 Oct 2018 21:52:17 +0000 Subject: [PATCH 13/29] Add test timeout As timed out with coverage enabled --- src/leveled_sst.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 489ee1b..0600005 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -2910,7 +2910,10 @@ simple_persisted_slotsize_tester(SSTNewFun) -> ok = sst_close(Pid), ok = file:delete(filename:join(RP, Filename ++ ".sst")). -simple_persisted_test() -> +simple_persisted_test_() -> + {timeout, 60, fun simple_persisted_test_bothformats/0}. + +simple_persisted_test_bothformats() -> simple_persisted_tester(fun testsst_new/6), simple_persisted_tester(fun sst_new/6). From 11627bbdd9e97652c0d01d678a59374a219e8ae9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 31 Oct 2018 00:09:24 +0000 Subject: [PATCH 14/29] Extend API To support max_keys and the last modified date range. This applies the last modified date check on all ledger folds. This is hard to avoid, but ultimately a very low cost. The limit on the number of heads to fold, is the limit based on passing to the accumulator - not on the limit being added to the accumulator. So if the FoldFun perfoms a filter (e.g. for the preflist), then those filtered results will still count towards the maximum. There needs to be someway at the end of signalling from the fold if the outcome was or was not 'constrained' by max_keys - as the fold cannot simply tel by lenght checking the outcome. Note this is used rather than length checking the buffer and throwing a 'stop_fold' message when the limit is reached. The choice is made for simplicity, and ease of testing. The throw mechanism is necessary if there is a need to stop parallel folds across the the cluster - but in this case the node_worker_pool will be used. --- src/leveled_bookie.erl | 88 +++++++++++++++++++++++++++++++++------ src/leveled_codec.erl | 4 +- src/leveled_penciller.erl | 37 ++++++++++++---- src/leveled_runner.erl | 32 +++++++++++--- 4 files changed, 132 insertions(+), 29 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 19e31c4..e3c77ab 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -82,7 +82,8 @@ book_objectfold/5, book_objectfold/6, book_headfold/6, - book_headfold/7 + book_headfold/7, + book_headfold/9 ]). -export([empty_ledgercache/0, @@ -111,6 +112,7 @@ -define(DUMMY, dummy). % Dummy key used for mput operations -define(MAX_KEYCHECK_FREQUENCY, 100). -define(MIN_KEYCHECK_FREQUENCY, 1). +-define(OPEN_LASTMOD_RANGE, {0, infinity}). -define(OPTION_DEFAULTS, [{root_path, undefined}, {snapshot_bookie, undefined}, @@ -893,10 +895,58 @@ book_headfold(Pid, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> SegmentList :: false | list(integer()), Runner :: fun(() -> Acc). book_headfold(Pid, Tag, {bucket_list, BucketList}, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - RunnerType = {foldheads_bybucket, Tag, BucketList, bucket_list, FoldAccT, JournalCheck, SnapPreFold, SegmentList}, + RunnerType = + {foldheads_bybucket, Tag, BucketList, bucket_list, FoldAccT, + JournalCheck, SnapPreFold, SegmentList, false, false}, book_returnfolder(Pid, RunnerType); book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - RunnerType = {foldheads_bybucket, Tag, Bucket, KeyRange, FoldAccT, JournalCheck, SnapPreFold, SegmentList}, + RunnerType = + {foldheads_bybucket, Tag, Bucket, KeyRange, FoldAccT, + JournalCheck, SnapPreFold, SegmentList, false, false}, + book_returnfolder(Pid, RunnerType). + +%% @doc as book_headfold/7, but with the addition of a Last Modified Date +%% Range and Max Head Count. For version 2 objects this will filter out +%% all objects with a highest Last Modified Date that is outside of the range. +%% All version 1 objects will be included in the result set regardless of Last +%% Modified Date. +%% The Max Head Count will stop the fold once the count has been reached on +%% this store only +-spec book_headfold(pid(), Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount) -> + {async, Runner} when + Tag :: leveled_codec:tag(), + Limiter :: BucketList | BucketKeyRange, + BucketList :: {bucket_list, list(Bucket)}, + BucketKeyRange :: {range, Bucket, KeyRange}, + KeyRange :: {StartKey, EndKey} | all, + StartKey :: Key, + EndKey :: Key, + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), + Acc :: term(), + Bucket :: term(), + Key :: term(), + Value :: term(), + JournalCheck :: boolean(), + SnapPreFold :: boolean(), + SegmentList :: false | list(integer()), + LastModRange :: false | leveled_codec:lastmod_range(), + MaxObjectCount :: false | pos_integer(), + Runner :: fun(() -> Acc). +book_headfold(Pid, Tag, {bucket_list, BucketList}, FoldAccT, JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount) -> + RunnerType = + {foldheads_bybucket, Tag, BucketList, bucket_list, FoldAccT, + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}, + book_returnfolder(Pid, RunnerType); +book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount) -> + RunnerType = + {foldheads_bybucket, Tag, Bucket, KeyRange, FoldAccT, + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}, book_returnfolder(Pid, RunnerType). -spec book_snapshot(pid(), @@ -1576,7 +1626,8 @@ get_runner(State, Tag, BucketList, bucket_list, FoldFun, - JournalCheck, SnapPreFold, SegmentList}) -> + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}) -> KeyRangeFun = fun(Bucket) -> {StartKey, EndKey, _} = return_ledger_keyrange(Tag, Bucket, all), @@ -1588,13 +1639,16 @@ get_runner(State, Tag, lists:map(KeyRangeFun, BucketList), FoldFun, - JournalCheck, SegmentList); + JournalCheck, + SegmentList, + LastModRange, MaxObjectCount); get_runner(State, {foldheads_bybucket, Tag, Bucket, KeyRange, FoldFun, - JournalCheck, SnapPreFold, SegmentList}) -> + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}) -> {StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange), SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, SnapQ, true, SnapPreFold), @@ -1602,7 +1656,9 @@ get_runner(State, Tag, [{StartKey, EndKey}], FoldFun, - JournalCheck, SegmentList); + JournalCheck, + SegmentList, + LastModRange, MaxObjectCount); get_runner(State, {foldobjects_bybucket, Tag, Bucket, KeyRange, @@ -2521,7 +2577,8 @@ folder_cache_test(CacheSize) -> "BucketA", all, FoldHeadsFun, - true, true, false}), + true, true, + false, false, false}), KeyHashList2A = HTFolder2A(), {async, HTFolder2B} = book_returnfolder(Bookie1, @@ -2530,7 +2587,8 @@ folder_cache_test(CacheSize) -> "BucketB", all, FoldHeadsFun, - true, false, false}), + true, false, + false, false, false}), KeyHashList2B = HTFolder2B(), ?assertMatch(true, @@ -2545,7 +2603,8 @@ folder_cache_test(CacheSize) -> "BucketB", {"Key", <<"$all">>}, FoldHeadsFun, - true, false, false}), + true, false, + false, false, false}), KeyHashList2C = HTFolder2C(), {async, HTFolder2D} = book_returnfolder(Bookie1, @@ -2554,7 +2613,8 @@ folder_cache_test(CacheSize) -> "BucketB", {"Key", "Keyzzzzz"}, FoldHeadsFun, - true, true, false}), + true, true, + false, false, false}), KeyHashList2D = HTFolder2D(), ?assertMatch(true, lists:usort(KeyHashList2B) == lists:usort(KeyHashList2C)), @@ -2574,7 +2634,8 @@ folder_cache_test(CacheSize) -> "BucketB", {"Key", SplitIntEnd}, FoldHeadsFun, - true, false, false}), + true, false, + false, false, false}), KeyHashList2E = HTFolder2E(), {async, HTFolder2F} = book_returnfolder(Bookie1, @@ -2583,7 +2644,8 @@ folder_cache_test(CacheSize) -> "BucketB", {SplitIntStart, "Key|"}, FoldHeadsFun, - true, false, false}), + true, false, + false, false, false}), KeyHashList2F = HTFolder2F(), ?assertMatch(true, length(KeyHashList2E) > 0), diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 5aca861..dbbb16e 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -94,6 +94,7 @@ % if the object has siblings in the store will be the maximum of those % dates integer()|undefined. +-type lastmod_range() :: {integer(), pos_integer()|infinity}. -type ledger_status() :: tomb|{active, non_neg_integer()|infinity}. @@ -140,7 +141,8 @@ index_specs/0, segment_list/0, maybe_lookup/0, - last_moddate/0]). + last_moddate/0, + lastmod_range/0]). %%%============================================================================ diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 91a0b59..64b89d8 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -177,7 +177,7 @@ pcl_fetch/4, pcl_fetchkeys/5, pcl_fetchkeys/6, - pcl_fetchkeysbysegment/6, + pcl_fetchkeysbysegment/8, pcl_fetchnextkey/5, pcl_checksequencenumber/3, pcl_workforclerk/1, @@ -237,6 +237,7 @@ -define(SNAPSHOT_TIMEOUT_SHORT, 600). -define(TIMING_SAMPLECOUNTDOWN, 10000). -define(TIMING_SAMPLESIZE, 100). +-define(OPEN_LASTMOD_RANGE, {0, infinity}). -record(state, {manifest, % a manifest record from the leveled_manifest module persisted_sqn = 0 :: integer(), % The highest SQN persisted @@ -411,7 +412,7 @@ pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc, By) -> {fetch_keys, StartKey, EndKey, AccFun, InitAcc, - false, -1, + false, false, -1, By}, infinity). @@ -420,7 +421,9 @@ pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc, By) -> leveled_codec:ledger_key(), leveled_codec:ledger_key(), fun(), any(), - leveled_codec:segment_list()) -> any(). + leveled_codec:segment_list(), + false | leveled_codec:lastmod_range(), + false | pos_integer()) -> any(). %% @doc %% Run a range query between StartKey and EndKey (inclusive). This will cover %% all keys in the range - so must only be run against snapshots of the @@ -433,12 +436,22 @@ pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc, By) -> %% Note that segment must be false unless the object Tag supports additional %% indexing by segment. This cannot be used on ?IDX_TAG and other tags that %% use the no_lookup hash -pcl_fetchkeysbysegment(Pid, StartKey, EndKey, AccFun, InitAcc, SegmentList) -> +pcl_fetchkeysbysegment(Pid, StartKey, EndKey, AccFun, InitAcc, + SegmentList, LastModRange, MaxObjectCount) -> + MaxKeys = + case MaxObjectCount of + false -> + -1; + MOC when is_integer(MOC) -> + MOC + end, gen_server:call(Pid, {fetch_keys, StartKey, EndKey, AccFun, InitAcc, - SegmentList, -1, + SegmentList, + LastModRange, + MaxKeys, as_pcl}, infinity). @@ -455,7 +468,7 @@ pcl_fetchnextkey(Pid, StartKey, EndKey, AccFun, InitAcc) -> {fetch_keys, StartKey, EndKey, AccFun, InitAcc, - false, 1, + false, false, 1, as_pcl}, infinity). @@ -690,10 +703,17 @@ handle_call({check_sqn, Key, Hash, SQN}, _From, State) -> handle_call({fetch_keys, StartKey, EndKey, AccFun, InitAcc, - SegmentList, MaxKeys, By}, + SegmentList, LastModRange, MaxKeys, By}, _From, State=#state{snapshot_fully_loaded=Ready}) when Ready == true -> + LastModRange0 = + case LastModRange of + false -> + ?OPEN_LASTMOD_RANGE; + R -> + R + end, SW = os:timestamp(), L0AsList = case State#state.levelzero_astree of @@ -726,8 +746,7 @@ handle_call({fetch_keys, keyfolder({L0AsList, SSTiter}, {StartKey, EndKey}, {AccFun, InitAcc}, - {SegmentList, {0, infinity}, MaxKeys}) - % TODO: Allow query to set last mod range + {SegmentList, LastModRange0, MaxKeys}) end, case By of as_pcl -> diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index c8ad66a..e3b5445 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -32,7 +32,7 @@ tictactree/5, foldheads_allkeys/5, foldobjects_allkeys/4, - foldheads_bybucket/6, + foldheads_bybucket/8, foldobjects_bybucket/4, foldobjects_byindex/3 ]). @@ -49,6 +49,7 @@ :: {fun(), any()}. -type term_regex() :: re:mp()|undefined. + %%%============================================================================ %%% External functions %%%============================================================================ @@ -399,7 +400,10 @@ foldobjects_bybucket(SnapFun, Tag, KeyRanges, FoldFun) -> atom(), list({any(), any()}), fun(), - boolean(), false|list(integer())) + boolean(), + false|list(integer()), + false|leveled_codec:lastmod_range(), + false|pos_integer()) -> {async, fun()}. %% @doc %% Fold over all object metadata within a given key range in a bucket @@ -407,13 +411,16 @@ foldheads_bybucket(SnapFun, Tag, KeyRanges, FoldFun, - JournalCheck, SegmentList) -> + JournalCheck, + SegmentList, LastModRange, MaxObjectCount) -> foldobjects(SnapFun, Tag, KeyRanges, FoldFun, {true, JournalCheck}, - SegmentList). + SegmentList, + LastModRange, + MaxObjectCount). -spec foldobjects_byindex(fun(), tuple(), fun()) -> {async, fun()}. %% @doc @@ -484,6 +491,16 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) -> -spec foldobjects(fun(), atom(), list(), fun(), false|{true, boolean()}, false|list(integer())) -> {async, fun()}. +foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) -> + foldobjects(SnapFun, Tag, KeyRanges, + FoldObjFun, DeferredFetch, SegmentList, false, false). + +-spec foldobjects(fun(), atom(), list(), fun(), + false|{true, boolean()}, + false|list(integer()), + false|leveled_codec:lastmod_range(), + false|pos_integer()) -> + {async, fun()}. %% @doc %% The object folder should be passed DeferredFetch. %% DeferredFetch can either be false (which will return to the fold function @@ -491,7 +508,8 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) -> %% will be created that if understood by the fold function will allow the fold %% function to work on the head of the object, and defer fetching the body in %% case such a fetch is unecessary. -foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) -> +foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, + SegmentList, LastModRange, MaxObjectCount) -> {FoldFun, InitAcc} = case is_tuple(FoldObjFun) of true -> @@ -519,7 +537,9 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) -> EndKey, AccFun, FoldAcc, - SegmentList) + SegmentList, + LastModRange, + MaxObjectCount) end, Acc = lists:foldl(ListFoldFun, InitAcc, KeyRanges), ok = leveled_penciller:pcl_close(LedgerSnapshot), From f0208e9b122448553f3163d95b08e968f87d1e8a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 31 Oct 2018 11:04:23 +0000 Subject: [PATCH 15/29] Fix issues with deprecated folders They were deprecated for a reason --- test/end_to_end/iterator_SUITE.erl | 41 +++++++++++++++--------------- test/end_to_end/riak_SUITE.erl | 2 +- test/end_to_end/tictac_SUITE.erl | 36 +++++++++++++++----------- 3 files changed, 43 insertions(+), 36 deletions(-) diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 8ec9312..5d9aa28 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -529,30 +529,31 @@ multibucket_fold(_Config) -> end, FoldAccT = {FF, []}, - {async, R1} = leveled_bookie:book_headfold(Bookie1, - ?RIAK_TAG, - {bucket_list, - [{<<"Type1">>, <<"Bucket1">>}, - {<<"Type2">>, <<"Bucket4">>}]}, - FoldAccT, - false, - true, - false), + {async, R1} = + leveled_bookie:book_headfold(Bookie1, + ?RIAK_TAG, + {bucket_list, + [{<<"Type1">>, <<"Bucket1">>}, + {<<"Type2">>, <<"Bucket4">>}]}, + FoldAccT, + false, + true, + false), O1 = length(R1()), io:format("Result R1 of length ~w~n", [O1]), - Q2 = {foldheads_bybucket, - ?RIAK_TAG, - [<<"Bucket2">>, <<"Bucket3">>], bucket_list, - {fun(_B, _K, _PO, Acc) -> - Acc +1 - end, - 0}, - false, - true, - false}, - {async, R2} = leveled_bookie:book_returnfolder(Bookie1, Q2), + {async, R2} = + leveled_bookie:book_headfold(Bookie1, + ?RIAK_TAG, + {bucket_list, + [<<"Bucket2">>, + <<"Bucket3">>]}, + {fun(_B, _K, _PO, Acc) -> + Acc +1 + end, + 0}, + false, true, false), O2 = R2(), io:format("Result R2 of ~w~n", [O2]), diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 0482bd5..0ce737f 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -182,7 +182,7 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> all, {fun head_tictac_foldfun/4, {0, leveled_tictac:new_tree(test, TreeSize)}}, - false, false, false}, + false, false, false, false, false}, DLs = check_tictacfold(BookA, BookB, TicTacFolder, diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 5a22abe..690160d 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -156,14 +156,15 @@ many_put_compare(_Config) -> [timer:now_diff(os:timestamp(), SWB0Obj)]), true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj0)) == 0, - FoldQ1 = {foldheads_bybucket, - o_rkv, - "Bucket", - all, - {FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, - true, true, false}, + InitAccTree = leveled_tictac:new_tree(0, TreeSize), + {async, TreeAObjFolder1} = - leveled_bookie:book_returnfolder(Bookie2, FoldQ1), + leveled_bookie:book_headfold(Bookie2, + ?RIAK_TAG, + {range, "Bucket", all}, + {FoldObjectsFun, + InitAccTree}, + true, true, false), SWB1Obj = os:timestamp(), TreeAObj1 = TreeAObjFolder1(), io:format("Build tictac tree via object fold with "++ @@ -184,21 +185,26 @@ many_put_compare(_Config) -> fun(_Bucket, Key, Value, Acc) -> leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun) end, - AltFoldQ0 = {foldheads_bybucket, - o_rkv, - "Bucket", - all, - {AltFoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, - false, true, false}, + {async, TreeAAltObjFolder0} = - leveled_bookie:book_returnfolder(Bookie2, AltFoldQ0), + leveled_bookie:book_headfold(Bookie2, + ?RIAK_TAG, + {range, "Bucket", all}, + {AltFoldObjectsFun, + InitAccTree}, + false, true, false), SWB2Obj = os:timestamp(), TreeAAltObj = TreeAAltObjFolder0(), io:format("Build tictac tree via object fold with no "++ "presence check and 200K objects and alt hash in ~w~n", [timer:now_diff(os:timestamp(), SWB2Obj)]), {async, TreeBAltObjFolder0} = - leveled_bookie:book_returnfolder(Bookie3, AltFoldQ0), + leveled_bookie:book_headfold(Bookie3, + ?RIAK_TAG, + {range, "Bucket", all}, + {AltFoldObjectsFun, + InitAccTree}, + false, true, false), SWB3Obj = os:timestamp(), TreeBAltObj = TreeBAltObjFolder0(), io:format("Build tictac tree via object fold with no "++ From 142e3a17bbdc1c61e8a098b692d6ebd5248ae644 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 31 Oct 2018 11:44:46 +0000 Subject: [PATCH 16/29] Add in modifictaion date to v2 value And restrict it to 32 bits - as 80 years should be enough. --- src/leveled_codec.erl | 23 +++++++++++++++++++---- src/leveled_sst.erl | 28 +++++++++++----------------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index dbbb16e..1f9560c 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -200,7 +200,11 @@ strip_to_keyseqonly({LK, V}) -> {LK, element(1, V)}. -spec strip_to_indexdetails(ledger_kv()) -> {integer(), segment_hash(), last_moddate()}. strip_to_indexdetails({_, V}) when tuple_size(V) == 4 -> - {element(1, V), element(3, V), undefined}. + % A v1 value + {element(1, V), element(3, V), undefined}; +strip_to_indexdetails({_, V}) when tuple_size(V) > 4 -> + % A v2 value should have a fith element - Last Modified Date + {element(1, V), element(3, V), element(5, V)}. -spec striphead_to_v1details(ledger_value()) -> ledger_value(). striphead_to_v1details(V) -> @@ -612,9 +616,20 @@ generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) -> Value = {SQN, Status, Hash, - MD}, + MD, + get_last_lastmodification(LastMods)}, {Bucket, Key, Value, {Hash, ObjHash}, LastMods}. +-spec get_last_lastmodification(list(erlang:timestamp())) -> non_neg_integer(). +%% @doc +%% Get the highest of the last modifications measured in seconds. This will be +%% stored as 4 bytes (unsigned) so will last for another 80 + years +get_last_lastmodification([]) -> + 0; +get_last_lastmodification(LastMods) -> + {Mega, Sec, _Micro} = lists:max(LastMods), + Mega * 1000000 + Sec. + extract_metadata(Obj, Size, ?RIAK_TAG) -> riak_extract_metadata(Obj, Size); @@ -623,7 +638,7 @@ extract_metadata(Obj, Size, ?STD_TAG) -> get_size(PK, Value) -> {Tag, _Bucket, _Key, _} = PK, - {_, _, _, MD} = Value, + MD = element(4, Value), case Tag of ?RIAK_TAG -> {_RMD, _VC, _Hash, Size} = MD, @@ -640,7 +655,7 @@ get_size(PK, Value) -> %% the sorted vclock) get_keyandobjhash(LK, Value) -> {Tag, Bucket, Key, _} = LK, - {_, _, _, MD} = Value, + MD = element(4, Value), case Tag of ?IDX_TAG -> from_ledgerkey(LK); % returns {Bucket, Key, IdxValue} diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 0600005..4b0510b 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -88,9 +88,8 @@ -define(TIMING_SAMPLESIZE, 100). -define(CACHE_SIZE, 32). -define(BLOCK_LENGTHS_LENGTH, 20). --define(LMD_LENGTH, 6). +-define(LMD_LENGTH, 4). -define(FLIPPER32, 4294967295). --define(FLIPPER48, 281474976710655). -define(COMPRESS_AT_LEVEL, 1). -define(INDEX_MODDATE, true). @@ -881,7 +880,6 @@ fetch(LedgerKey, Hash, State, Timings0) -> Timings3}; {BlockLengths, _LMD, PosBin} -> PosList = find_pos(PosBin, extra_hash(Hash), [], 0), - io:format("Fetching referring to cache with PosList ~w~n", [PosList]), case PosList of [] -> {_SW3, Timings3} = @@ -1326,7 +1324,7 @@ accumulate_positions({K, V}, {PosBinAcc, NoHashCount, HashAcc, LMDAcc}) -> %% Get the last modified date. If no Last Modified Date on any object, can't %% add the accelerator and should check each object in turn take_max_lastmoddate(undefined, _LMDAcc) -> - ?FLIPPER48; + ?FLIPPER32; take_max_lastmoddate(LMD, LMDAcc) -> max(LMD, LMDAcc). @@ -1445,7 +1443,7 @@ generate_binary_slot(Lookup, KVL, PressMethod, IndexModDate, BuildTimings0) -> B3L:32/integer, B4L:32/integer, B5L:32/integer, - LMD:48/integer, + LMD:32/integer, PosBinIndex/binary>>; false -> < - io:format("Key matched~n"), {K, V}; _ -> case LedgerKeyToCheck of @@ -1520,10 +1514,10 @@ check_blocks([Pos|Rest], BlockPointer, BlockLengths, PosBinLength, -spec additional_offset(boolean()) -> pos_integer(). %% @doc -%% 4-byte CRC, 4-byte pos, 4-byte CRC, 5x4 byte lengths, 6 byte LMD +%% 4-byte CRC, 4-byte pos, 4-byte CRC, 5x4 byte lengths, 4 byte LMD %% LMD may not be present additional_offset(true) -> - ?BLOCK_LENGTHS_LENGTH + 4 + 4 + 4 + 6; + ?BLOCK_LENGTHS_LENGTH + 4 + 4 + 4 + ?LMD_LENGTH; additional_offset(false) -> ?BLOCK_LENGTHS_LENGTH + 4 + 4 + 4. @@ -1707,7 +1701,7 @@ extract_header(none, _IdxModDate) -> none; % used when the block cache has returned none extract_header(Header, true) -> BL = ?BLOCK_LENGTHS_LENGTH, - <> = Header, + <> = Header, {BlockLengths, LMD, PosBinIndex}; extract_header(Header, false) -> BL = ?BLOCK_LENGTHS_LENGTH, @@ -2538,8 +2532,8 @@ indexed_list_allindexkeys_test() -> {{HeaderF, FullBinF, _HL, _LK}, no_timing} = generate_binary_slot(lookup, Keys, native, false, no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, - LMD = ?FLIPPER48, - ?assertMatch(<<_BL:20/binary, LMD:48/integer, EmptySlotSize:8/integer>>, + LMD = ?FLIPPER32, + ?assertMatch(<<_BL:20/binary, LMD:32/integer, EmptySlotSize:8/integer>>, HeaderT), ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, HeaderF), @@ -2567,7 +2561,7 @@ indexed_list_allindexkeys_nolookup_test() -> ?NOLOOK_SLOTSIZE), {{Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot(no_lookup, Keys, native, ?INDEX_MODDATE,no_timing), - ?assertMatch(<<_BL:20/binary, _LMD:48/integer, 127:8/integer>>, Header), + ?assertMatch(<<_BL:20/binary, _LMD:32/integer, 127:8/integer>>, Header), % SW = os:timestamp(), BinToList = binaryslot_tolist(FullBin, native, ?INDEX_MODDATE), % io:format(user, @@ -2586,7 +2580,7 @@ indexed_list_allindexkeys_trimmed_test() -> {{Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE,no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, - ?assertMatch(<<_BL:20/binary, _LMD:48/integer, EmptySlotSize:8/integer>>, + ?assertMatch(<<_BL:20/binary, _LMD:32/integer, EmptySlotSize:8/integer>>, Header), ?assertMatch({Keys, none}, binaryslot_trimmedlist(FullBin, {i, @@ -2640,7 +2634,7 @@ indexed_list_mixedkeys_bitflip_test() -> _B3L:32/integer, _B4L:32/integer, _B5L:32/integer, - _LMD:48/integer, + _LMD:32/integer, PosBin/binary>> = Header, TestKey1 = element(1, lists:nth(1, KVL1)), From aaccd09a98139714e258b259fcdff17d47400093 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 31 Oct 2018 14:22:28 +0000 Subject: [PATCH 17/29] Allow for setting max_keys to wrap Acc Acc in response is now of form {Reason, Acc} not just Acc so that the application can understand the reason for the results ending - and take appropriate action (e.g. restart again from the LastKey to return more results). --- src/leveled_bookie.erl | 14 +++++++++++--- src/leveled_penciller.erl | 11 +++++++++-- src/leveled_runner.erl | 4 ++-- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index e3c77ab..363912a 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -906,12 +906,20 @@ book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapP book_returnfolder(Pid, RunnerType). %% @doc as book_headfold/7, but with the addition of a Last Modified Date -%% Range and Max Head Count. For version 2 objects this will filter out +%% Range and Max Object Count. For version 2 objects this will filter out %% all objects with a highest Last Modified Date that is outside of the range. %% All version 1 objects will be included in the result set regardless of Last %% Modified Date. -%% The Max Head Count will stop the fold once the count has been reached on -%% this store only +%% The Max Object Count will stop the fold once the count has been reached on +%% this store only. The Max Object Count if provided will mean that on +%% completion of the fold the accumulator will be wrapped in a tuple to +%% indicate the reason for completion: +%% - {no_more_keys, Acc} if the end of the range was reached wihtout hitting +%% the Max Object Count limit +%% - {max_count, Acc} if the Max Object Count limit was reached before +%% reaching the end of the range +%% If MaxObjectCount is false then the Acc will be returned not wrapped in a +%% tuple -spec book_headfold(pid(), Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount) -> {async, Runner} when diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 64b89d8..21c822c 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -1405,14 +1405,21 @@ keyfolder(IMMiter, SSTiter, StartKey, EndKey, {AccFun, Acc}) -> keyfolder(_Iterators, _KeyRange, {_AccFun, Acc}, {_SegmentList, _LastModRange, MaxKeys}) when MaxKeys == 0 -> - Acc; + {max_count, Acc}; keyfolder({[], SSTiter}, KeyRange, {AccFun, Acc}, {SegmentList, LastModRange, MaxKeys}) -> {StartKey, EndKey} = KeyRange, case find_nextkey(SSTiter, StartKey, EndKey, SegmentList, element(1, LastModRange)) of no_more_keys -> - Acc; + case MaxKeys > 0 of + true -> + % Need to single this query ended not because the + % MaxKeys was reached + {no_more_keys, Acc}; + false -> + Acc + end; {NxSSTiter, {SSTKey, SSTVal}} -> {Acc1, MK1} = maybe_accumulate(SSTKey, SSTVal, Acc, AccFun, diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index e3b5445..ffaf509 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -461,10 +461,10 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) -> ExtractFun, null), case R of - null -> + {no_more_keys, null} -> leveled_log:log("B0008",[]), BKList; - {{B, K}, V} -> + {_, {{B, K}, V}} -> case leveled_codec:is_active({Tag, B, K, null}, V, Now) of true -> leveled_log:log("B0009",[B]), From 19bfe48564aa35461fca8f4d388ba1e9aa652183 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 31 Oct 2018 16:35:53 +0000 Subject: [PATCH 18/29] Initial ct test Which exposed it wasn't working. If there is no segment list passed - just a modification filter, you don't need to check the position list (as checking the position list returns an empty position so sipping all the matching results!) --- src/leveled_bookie.erl | 1 + src/leveled_sst.erl | 51 +++++++----- test/end_to_end/riak_SUITE.erl | 143 +++++++++++++++++++++++++++++++++ test/end_to_end/testutil.erl | 7 +- 4 files changed, 180 insertions(+), 22 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 363912a..a609330 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -951,6 +951,7 @@ book_headfold(Pid, Tag, {bucket_list, BucketList}, FoldAccT, JournalCheck, SnapP book_returnfolder(Pid, RunnerType); book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount) -> + RunnerType = {foldheads_bybucket, Tag, Bucket, KeyRange, FoldAccT, JournalCheck, SnapPreFold, diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 4b0510b..b68b130 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1590,10 +1590,9 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> %% may be intermittently removed from the result set read_slots(Handle, SlotList, {false, 0, _BlockIndexCache}, _PressMethod, _IdxModDate) -> - % No list of segments passed or usefult Low LastModified Date - LengthList = lists:map(fun pointer_mapfun/1, SlotList), - {MultiSlotBin, StartPos} = read_length_list(Handle, LengthList), - lists:map(binarysplit_mapfun(MultiSlotBin, StartPos), LengthList); + % No list of segments passed or useful Low LastModified Date + % Just read slots in SlotList + read_slotlist(SlotList, Handle); read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache}, PressMethod, IdxModDate) -> % List of segments passed so only {K, V} pairs matching those segments @@ -1609,11 +1608,7 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache}, % If there is an attempt to use the seg list query and the % index block cache isn't cached for any part this may be % slower as each slot will be read in turn - LengthDetails = pointer_mapfun(Pointer), - {MultiSlotBin, StartPos} = - read_length_list(Handle, [LengthDetails]), - MapFun = binarysplit_mapfun(MultiSlotBin, StartPos), - Acc ++ [MapFun(LengthDetails)]; + Acc ++ read_slotlist([Pointer], Handle); {BlockLengths, LMD, BlockIdx} -> % If there is a BlockIndex cached then we can use it to % check to see if any of the expected segments are @@ -1628,26 +1623,42 @@ read_slots(Handle, SlotList, {SegList, LowLastMod, BlockIndexCache}, true -> % The highest LMD on the slot was before the % LowLastMod date passed in the query - therefore - % there ar eno interetsing modifictaions in this + % there are no interesting modifications in this % slot - it is all too old Acc; false -> - PositionList = find_pos(BlockIdx, SegList, [], 0), - Acc ++ - check_blocks(PositionList, - {Handle, SP}, - BlockLengths, - byte_size(BlockIdx), - false, PressMethod, IdxModDate, - []) - % Note check_blocks shouldreturn [] if - % PositionList is empty + case SegList of + false -> + % Need all the slot now + Acc ++ read_slotlist([Pointer], Handle); + _SL -> + % Need to find just the right keys + PositionList = + find_pos(BlockIdx, SegList, [], 0), + Acc ++ + check_blocks(PositionList, + {Handle, SP}, + BlockLengths, + byte_size(BlockIdx), + false, + PressMethod, + IdxModDate, + []) + % Note check_blocks shouldreturn [] if + % PositionList is empty + end end end end, lists:foldl(BinMapFun, [], SlotList). +read_slotlist(SlotList, Handle) -> + LengthList = lists:map(fun pointer_mapfun/1, SlotList), + {MultiSlotBin, StartPos} = read_length_list(Handle, LengthList), + lists:map(binarysplit_mapfun(MultiSlotBin, StartPos), LengthList). + + -spec binaryslot_reader(list(binaryslot_element()), press_method(), boolean(), diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 0ce737f..65f6a12 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -3,6 +3,7 @@ -include("include/leveled.hrl"). -export([all/0]). -export([ + fetchclocks_modifiedbetween/1, crossbucket_aae/1, handoff/1, dollar_bucket_index/1, @@ -10,6 +11,7 @@ ]). all() -> [ + fetchclocks_modifiedbetween, crossbucket_aae, handoff, dollar_bucket_index, @@ -18,6 +20,147 @@ all() -> [ -define(MAGIC, 53). % riak_kv -> riak_object + +fetchclocks_modifiedbetween(_Config) -> + RootPathA = testutil:reset_filestructure("fetchClockA"), + RootPathB = testutil:reset_filestructure("fetchClockB"), + StartOpts1A = [{root_path, RootPathA}, + {max_journalsize, 500000000}, + {max_pencillercachesize, 16000}, + {sync_strategy, testutil:sync_strategy()}], + StartOpts1B = [{root_path, RootPathB}, + {max_journalsize, 500000000}, + {max_pencillercachesize, 16000}, + {sync_strategy, testutil:sync_strategy()}], + {ok, Bookie1A} = leveled_bookie:book_start(StartOpts1A), + {ok, Bookie1B} = leveled_bookie:book_start(StartOpts1B), + + _ObjL1StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjList1 = + testutil:generate_objects(20000, + {fixed_binary, 1}, [], + leveled_rand:rand_bytes(512), + fun() -> [] end, + <<"B0">>), + timer:sleep(1000), + _ObjL1EndTS = testutil:convert_to_seconds(os:timestamp()), + timer:sleep(1000), + + _ObjL2StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjList2 = + testutil:generate_objects(15000, + {fixed_binary, 20001}, [], + leveled_rand:rand_bytes(512), + fun() -> [] end, + <<"B0">>), + timer:sleep(1000), + _ObjList2EndTS = testutil:convert_to_seconds(os:timestamp()), + timer:sleep(1000), + + ObjL3StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjList3 = + testutil:generate_objects(35000, + {fixed_binary, 35001}, [], + leveled_rand:rand_bytes(512), + fun() -> [] end, + <<"B0">>), + timer:sleep(1000), + ObjL3EndTS = testutil:convert_to_seconds(os:timestamp()), + timer:sleep(1000), + + _ObjL4StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjList4 = + testutil:generate_objects(30000, + {fixed_binary, 70001}, [], + leveled_rand:rand_bytes(512), + fun() -> [] end, + "B0"), + timer:sleep(1000), + _ObjL4EndTS = testutil:convert_to_seconds(os:timestamp()), + timer:sleep(1000), + + testutil:riakload(Bookie1A, ObjList1), + testutil:riakload(Bookie1A, ObjList2), + testutil:riakload(Bookie1A, ObjList3), + testutil:riakload(Bookie1A, ObjList4), + + testutil:riakload(Bookie1B, ObjList1), + testutil:riakload(Bookie1B, ObjList3), + testutil:riakload(Bookie1B, ObjList4), + + RevertFixedBinKey = + fun(FBK) -> + <<$K, $e, $y, KeyNumber:64/integer>> = FBK, + KeyNumber + end, + StoreFoldFun = + fun(_B, K, _V, {_LK, AccC}) -> + {RevertFixedBinKey(K), AccC + 1} + end, + + KeyRangeFun = + fun(StartNumber, EndNumber) -> + {range, + <<"B0">>, + {testutil:fixed_bin_key(StartNumber), + testutil:fixed_bin_key(EndNumber)}} + end, + + % Count with max object count + FoldRangesFun = + fun(FoldTarget, ModRange, EndNumber) -> + fun(_I, {LKN, KC}) -> + {async, Runner} = + leveled_bookie:book_headfold(FoldTarget, + ?RIAK_TAG, + KeyRangeFun(LKN + 1, + EndNumber), + {StoreFoldFun, {LKN, KC}}, + false, + true, + false, + ModRange, + 13000), + {_, {LKN0, KC0}} = Runner(), + {LKN0, KC0} + end + end, + + R1A = lists:foldl(FoldRangesFun(Bookie1A, false, 50000), + {0, 0}, lists:seq(1, 4)), + io:format("R1A ~w~n", [R1A]), + true = {50000, 50000} == R1A, + + R1B = lists:foldl(FoldRangesFun(Bookie1B, false, 50000), + {0, 0}, lists:seq(1, 3)), + io:format("R1B ~w~n", [R1B]), + true = {50000, 35000} == R1B, + + R2A = lists:foldl(FoldRangesFun(Bookie1A, + {ObjL3StartTS, ObjL3EndTS}, + 60000), + {10000, 0}, lists:seq(1, 2)), + io:format("R2A ~w~n", [R2A]), + true = {60000, 25000} == R2A, + R2A_SR = lists:foldl(FoldRangesFun(Bookie1A, + {ObjL3StartTS, ObjL3EndTS}, + 60000), + {10000, 0}, lists:seq(1, 1)), + io:format("R2A_SingleRotation ~w~n", [R2A]), + true = {48000, 13000} == R2A_SR, % Hit at max results + R2B = lists:foldl(FoldRangesFun(Bookie1B, + {ObjL3StartTS, ObjL3EndTS}, + 60000), + {10000, 0}, lists:seq(1, 2)), + io:format("R2B ~w~n", [R1B]), + true = {60000, 25000} == R2B, + + + ok = leveled_bookie:book_destroy(Bookie1A), + ok = leveled_bookie:book_destroy(Bookie1B). + + + crossbucket_aae(_Config) -> % Test requires multiple different databases, so want to mount them all % on individual file paths diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 176c065..6abdba2 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -51,8 +51,9 @@ sync_strategy/0, riak_object/4, get_value_from_objectlistitem/1, - numbered_key/1, - fixed_bin_key/1]). + numbered_key/1, + fixed_bin_key/1, + convert_to_seconds/1]). -define(RETURN_TERMS, {true, undefined}). -define(SLOWOFFER_DELAY, 5). @@ -771,3 +772,5 @@ find_journals(RootPath) -> FNsA_J), CDBFiles. +convert_to_seconds({MegaSec, Seconds, _MicroSec}) -> + MegaSec * 1000000 + Seconds. \ No newline at end of file From 62f1302305558a56d141cb9c44e29a545f864d7b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 31 Oct 2018 18:34:27 +0000 Subject: [PATCH 19/29] Add test comparing filter with non-filter How do timings differ - does the SST filter accelerate? The answer appears to be yes - the filtered version takes half the time. --- test/end_to_end/riak_SUITE.erl | 77 ++++++++++++++++++++++++++++------ test/end_to_end/testutil.erl | 19 +++++++++ 2 files changed, 84 insertions(+), 12 deletions(-) diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 65f6a12..390a13c 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -35,7 +35,7 @@ fetchclocks_modifiedbetween(_Config) -> {ok, Bookie1A} = leveled_bookie:book_start(StartOpts1A), {ok, Bookie1B} = leveled_bookie:book_start(StartOpts1B), - _ObjL1StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjL1StartTS = testutil:convert_to_seconds(os:timestamp()), ObjList1 = testutil:generate_objects(20000, {fixed_binary, 1}, [], @@ -43,7 +43,7 @@ fetchclocks_modifiedbetween(_Config) -> fun() -> [] end, <<"B0">>), timer:sleep(1000), - _ObjL1EndTS = testutil:convert_to_seconds(os:timestamp()), + ObjL1EndTS = testutil:convert_to_seconds(os:timestamp()), timer:sleep(1000), _ObjL2StartTS = testutil:convert_to_seconds(os:timestamp()), @@ -84,9 +84,10 @@ fetchclocks_modifiedbetween(_Config) -> testutil:riakload(Bookie1A, ObjList3), testutil:riakload(Bookie1A, ObjList4), + testutil:riakload(Bookie1B, ObjList4), testutil:riakload(Bookie1B, ObjList1), testutil:riakload(Bookie1B, ObjList3), - testutil:riakload(Bookie1B, ObjList4), + RevertFixedBinKey = fun(FBK) -> @@ -108,7 +109,7 @@ fetchclocks_modifiedbetween(_Config) -> % Count with max object count FoldRangesFun = - fun(FoldTarget, ModRange, EndNumber) -> + fun(FoldTarget, ModRange, EndNumber, MaxCount) -> fun(_I, {LKN, KC}) -> {async, Runner} = leveled_bookie:book_headfold(FoldTarget, @@ -120,41 +121,93 @@ fetchclocks_modifiedbetween(_Config) -> true, false, ModRange, - 13000), + MaxCount), {_, {LKN0, KC0}} = Runner(), {LKN0, KC0} end end, - R1A = lists:foldl(FoldRangesFun(Bookie1A, false, 50000), + R1A = lists:foldl(FoldRangesFun(Bookie1A, false, 50000, 13000), {0, 0}, lists:seq(1, 4)), io:format("R1A ~w~n", [R1A]), true = {50000, 50000} == R1A, - R1B = lists:foldl(FoldRangesFun(Bookie1B, false, 50000), + R1B = lists:foldl(FoldRangesFun(Bookie1B, false, 50000, 13000), {0, 0}, lists:seq(1, 3)), io:format("R1B ~w~n", [R1B]), true = {50000, 35000} == R1B, R2A = lists:foldl(FoldRangesFun(Bookie1A, {ObjL3StartTS, ObjL3EndTS}, - 60000), + 60000, + 13000), {10000, 0}, lists:seq(1, 2)), io:format("R2A ~w~n", [R2A]), true = {60000, 25000} == R2A, R2A_SR = lists:foldl(FoldRangesFun(Bookie1A, {ObjL3StartTS, ObjL3EndTS}, - 60000), - {10000, 0}, lists:seq(1, 1)), - io:format("R2A_SingleRotation ~w~n", [R2A]), + 60000, + 13000), + {10000, 0}, lists:seq(1, 1)), % Only single rotation + io:format("R2A_SingleRotation ~w~n", [R2A_SR]), true = {48000, 13000} == R2A_SR, % Hit at max results R2B = lists:foldl(FoldRangesFun(Bookie1B, {ObjL3StartTS, ObjL3EndTS}, - 60000), + 60000, + 13000), {10000, 0}, lists:seq(1, 2)), io:format("R2B ~w~n", [R1B]), true = {60000, 25000} == R2B, + CrudeStoreFoldFun = + fun(LowLMD, HighLMD) -> + fun(_B, K, V, {LK, AccC}) -> + % Value is proxy_object? Can we get the metadata and + % read the last modified date? The do a non-accelerated + % fold to chekc that it is slower + {proxy_object, MDBin, _Size, _Fetcher} = binary_to_term(V), + LMDTS = testutil:get_lastmodified(MDBin), + LMD = testutil:convert_to_seconds(LMDTS), + case (LMD >= LowLMD) and (LMD =< HighLMD) of + true -> + {RevertFixedBinKey(K), AccC + 1}; + false -> + {LK, AccC} + end + end + end, + + io:format("Comparing queries for Obj1 TS range ~w ~w~n", + [ObjL1StartTS, ObjL1EndTS]), + + PlusFilterStart = os:timestamp(), + R3A_PlusFilter = lists:foldl(FoldRangesFun(Bookie1A, + {ObjL1StartTS, ObjL1EndTS}, + 100000, + 100000), + {0, 0}, lists:seq(1, 1)), + PlusFilterTime = timer:now_diff(os:timestamp(), PlusFilterStart)/1000, + io:format("R3A_PlusFilter ~w~n", [R3A_PlusFilter]), + true = {20000, 20000} == R3A_PlusFilter, + + NoFilterStart = os:timestamp(), + {async, R3A_NoFilterRunner} = + leveled_bookie:book_headfold(Bookie1A, + ?RIAK_TAG, + KeyRangeFun(1, 100000), + {CrudeStoreFoldFun(ObjL1StartTS, + ObjL1EndTS), + {0, 0}}, + false, + true, + false), + R3A_NoFilter = R3A_NoFilterRunner(), + NoFilterTime = timer:now_diff(os:timestamp(), NoFilterStart)/1000, + io:format("R3A_NoFilter ~w~n", [R3A_NoFilter]), + true = {20000, 20000} == R3A_NoFilter, + io:format("Filtered query ~w ms and unfiltered query ~w ms~n", + [PlusFilterTime, NoFilterTime]), + true = NoFilterTime > PlusFilterTime, ok = leveled_bookie:book_destroy(Bookie1A), ok = leveled_bookie:book_destroy(Bookie1B). diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 6abdba2..b2c3eaa 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -28,6 +28,7 @@ get_key/1, get_value/1, get_vclock/1, + get_lastmodified/1, get_compressiblevalue/0, get_compressiblevalue_andinteger/0, get_randomindexes_generator/1, @@ -552,6 +553,24 @@ get_value(ObjectBin) -> error end. +get_lastmodified(ObjectBin) -> + <<_Magic:8/integer, _Vers:8/integer, VclockLen:32/integer, + Rest1/binary>> = ObjectBin, + <<_VclockBin:VclockLen/binary, SibCount:32/integer, SibsBin/binary>> = Rest1, + case SibCount of + 1 -> + <> = SibsBin, + <<_ContentBin:SibLength/binary, + MetaLength:32/integer, + MetaBin:MetaLength/binary, + _Rest3/binary>> = Rest2, + <> = MetaBin, + {MegaSec, Sec, MicroSec} + end. + get_vclock(ObjectBin) -> <<_Magic:8/integer, _Vers:8/integer, VclockLen:32/integer, Rest1/binary>> = ObjectBin, From 376407fa6685c6875e1672e4d3efa0fdb141cfef Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 31 Oct 2018 21:37:53 +0000 Subject: [PATCH 20/29] Extend testing In particular discover that after objects are modifed (outside of the LMD window) - they no longer appear in results for queries in the LMD window. --- test/end_to_end/riak_SUITE.erl | 97 +++++++++++++++++++++++++++++++--- test/end_to_end/testutil.erl | 5 +- 2 files changed, 95 insertions(+), 7 deletions(-) diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 390a13c..280b7da 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -26,11 +26,11 @@ fetchclocks_modifiedbetween(_Config) -> RootPathB = testutil:reset_filestructure("fetchClockB"), StartOpts1A = [{root_path, RootPathA}, {max_journalsize, 500000000}, - {max_pencillercachesize, 16000}, + {max_pencillercachesize, 8000}, {sync_strategy, testutil:sync_strategy()}], StartOpts1B = [{root_path, RootPathB}, {max_journalsize, 500000000}, - {max_pencillercachesize, 16000}, + {max_pencillercachesize, 12000}, {sync_strategy, testutil:sync_strategy()}], {ok, Bookie1A} = leveled_bookie:book_start(StartOpts1A), {ok, Bookie1B} = leveled_bookie:book_start(StartOpts1B), @@ -68,27 +68,52 @@ fetchclocks_modifiedbetween(_Config) -> ObjL3EndTS = testutil:convert_to_seconds(os:timestamp()), timer:sleep(1000), - _ObjL4StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjL4StartTS = testutil:convert_to_seconds(os:timestamp()), ObjList4 = testutil:generate_objects(30000, {fixed_binary, 70001}, [], leveled_rand:rand_bytes(512), fun() -> [] end, - "B0"), + <<"B0">>), timer:sleep(1000), _ObjL4EndTS = testutil:convert_to_seconds(os:timestamp()), timer:sleep(1000), + _ObjL5StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjList5 = + testutil:generate_objects(8000, + {fixed_binary, 1}, [], + leveled_rand:rand_bytes(512), + fun() -> [] end, + <<"B1">>), + timer:sleep(1000), + _ObjL5EndTS = testutil:convert_to_seconds(os:timestamp()), + timer:sleep(1000), + + _ObjL6StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjList6 = + testutil:generate_objects(7000, + {fixed_binary, 1}, [], + leveled_rand:rand_bytes(512), + fun() -> [] end, + <<"B2">>), + timer:sleep(1000), + ObjL6EndTS = testutil:convert_to_seconds(os:timestamp()), + timer:sleep(1000), + + testutil:riakload(Bookie1A, ObjList5), testutil:riakload(Bookie1A, ObjList1), testutil:riakload(Bookie1A, ObjList2), testutil:riakload(Bookie1A, ObjList3), testutil:riakload(Bookie1A, ObjList4), + testutil:riakload(Bookie1A, ObjList6), testutil:riakload(Bookie1B, ObjList4), + testutil:riakload(Bookie1B, ObjList5), testutil:riakload(Bookie1B, ObjList1), + testutil:riakload(Bookie1B, ObjList6), testutil:riakload(Bookie1B, ObjList3), - RevertFixedBinKey = fun(FBK) -> <<$K, $e, $y, KeyNumber:64/integer>> = FBK, @@ -208,7 +233,67 @@ fetchclocks_modifiedbetween(_Config) -> io:format("Filtered query ~w ms and unfiltered query ~w ms~n", [PlusFilterTime, NoFilterTime]), true = NoFilterTime > PlusFilterTime, - + + SimpleCountFun = + fun(_B, _K, _V, AccC) -> AccC + 1 end, + + {async, R4A_MultiBucketRunner} = + leveled_bookie:book_headfold(Bookie1A, + ?RIAK_TAG, + {bucket_list, [<<"B0">>, <<"B2">>]}, + {SimpleCountFun, 0}, + false, + true, + false, + {ObjL4StartTS, ObjL6EndTS}, + % Range includes ObjjL5 LMDs, + % but these ar enot in bucket list + false), + R4A_MultiBucket = R4A_MultiBucketRunner(), + io:format("R4A_MultiBucket ~w ~n", [R4A_MultiBucket]), + true = R4A_MultiBucket == 37000, + + {async, R5A_MultiBucketRunner} = + leveled_bookie:book_headfold(Bookie1A, + ?RIAK_TAG, + {bucket_list, [<<"B2">>, <<"B0">>]}, + % Reverse the buckets in the bucket + % list + {SimpleCountFun, 0}, + false, + true, + false, + {ObjL4StartTS, ObjL6EndTS}, + false), + R5A_MultiBucket = R5A_MultiBucketRunner(), + io:format("R5A_MultiBucket ~w ~n", [R5A_MultiBucket]), + true = R5A_MultiBucket == 37000, + + + {async, R5B_MultiBucketRunner} = + leveled_bookie:book_headfold(Bookie1B, + % Same query - other bookie + ?RIAK_TAG, + {bucket_list, [<<"B2">>, <<"B0">>]}, + {SimpleCountFun, 0}, + false, + true, + false, + {ObjL4StartTS, ObjL6EndTS}, + false), + R5B_MultiBucket = R5B_MultiBucketRunner(), + io:format("R5B_MultiBucket ~w ~n", [R5B_MultiBucket]), + true = R5A_MultiBucket == 37000, + + testutil:update_some_objects(Bookie1A, ObjList1, 1000), + R6A_PlusFilter = lists:foldl(FoldRangesFun(Bookie1A, + {ObjL1StartTS, ObjL1EndTS}, + 100000, + 100000), + {0, 0}, lists:seq(1, 1)), + io:format("R6A_PlusFilter ~w~n", [R6A_PlusFilter]), + true = 19000 == element(2, R6A_PlusFilter), + ok = leveled_bookie:book_destroy(Bookie1A), ok = leveled_bookie:book_destroy(Bookie1B). diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index b2c3eaa..37ffea9 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -493,7 +493,10 @@ update_some_objects(Bookie, ObjList, SampleSize) -> VC = Obj#r_object.vclock, VC0 = update_vclock(VC), [C] = Obj#r_object.contents, - C0 = C#r_content{value = leveled_rand:rand_bytes(512)}, + MD = C#r_content.metadata, + MD0 = dict:store(?MD_LASTMOD, os:timestamp(), MD), + C0 = C#r_content{value = leveled_rand:rand_bytes(512), + metadata = MD0}, UpdObj = Obj#r_object{vclock = VC0, contents = [C0]}, {R, UpdObj, Spec} end, From f77dc8c3a521054d50bfeab7d6bf0beeb822aed7 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 1 Nov 2018 10:41:46 +0000 Subject: [PATCH 21/29] Add object_spec type Initial refactor to prepare to allow for a new version object_spec type that will support LMD being promoted as an accessible item. --- src/leveled_bookie.erl | 29 ++++++++++++++++------------- src/leveled_codec.erl | 22 ++++++++++++++++++---- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index a609330..e0d12fa 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -185,9 +185,7 @@ -type fold_timings() :: no_timing|#fold_timings{}. -type head_timings() :: no_timing|#head_timings{}. -type timing_types() :: head|get|put|fold. --type key() :: binary()|string()|{binary(), binary()}. - % Keys SHOULD be binary() - % string() support is a legacy of old tests + -type open_options() :: %% For full description of options see ../docs/STARTUP_OPTIONS.md [{root_path, string()|undefined} | @@ -302,7 +300,6 @@ % Defaults to ?COMPRESSION_POINT ]. --export_type([key/0]). %%%============================================================================ @@ -363,7 +360,7 @@ book_plainstart(Opts) -> gen_server:start(?MODULE, [set_defaults(Opts)], []). --spec book_tempput(pid(), key(), key(), any(), +-spec book_tempput(pid(), leveled_codec:key(), leveled_codec:key(), any(), leveled_codec:index_specs(), leveled_codec:tag(), integer()) -> ok|pause. @@ -430,7 +427,7 @@ book_put(Pid, Bucket, Key, Object, IndexSpecs) -> book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag) -> book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, infinity). --spec book_put(pid(), key(), key(), any(), +-spec book_put(pid(), leveled_codec:key(), leveled_codec:key(), any(), leveled_codec:index_specs(), leveled_codec:tag(), infinity|integer()) -> ok|pause. @@ -440,7 +437,7 @@ book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL) -> infinity). --spec book_mput(pid(), list(tuple())) -> ok|pause. +-spec book_mput(pid(), list(leveled_codec:object_spec())) -> ok|pause. %% @doc %% %% When the store is being run in head_only mode, batches of object specs may @@ -453,7 +450,8 @@ book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL) -> book_mput(Pid, ObjectSpecs) -> book_mput(Pid, ObjectSpecs, infinity). --spec book_mput(pid(), list(tuple()), infinity|integer()) -> ok|pause. +-spec book_mput(pid(), list(leveled_codec:object_spec()), infinity|integer()) + -> ok|pause. %% @doc %% %% When the store is being run in head_only mode, batches of object specs may @@ -466,8 +464,9 @@ book_mput(Pid, ObjectSpecs) -> book_mput(Pid, ObjectSpecs, TTL) -> gen_server:call(Pid, {mput, ObjectSpecs, TTL}, infinity). --spec book_delete(pid(), key(), key(), leveled_codec:index_specs()) - -> ok|pause. +-spec book_delete(pid(), + leveled_codec:key(), leveled_codec:key(), + leveled_codec:index_specs()) -> ok|pause. %% @doc %% @@ -478,11 +477,15 @@ book_delete(Pid, Bucket, Key, IndexSpecs) -> book_put(Pid, Bucket, Key, delete, IndexSpecs, ?STD_TAG). --spec book_get(pid(), key(), key(), leveled_codec:tag()) +-spec book_get(pid(), + leveled_codec:key(), leveled_codec:key(), leveled_codec:tag()) -> {ok, any()}|not_found. --spec book_head(pid(), key(), key(), leveled_codec:tag()) +-spec book_head(pid(), + leveled_codec:key(), leveled_codec:key(), leveled_codec:tag()) + -> {ok, any()}|not_found. +-spec book_headonly(pid(), + leveled_codec:key(), leveled_codec:key(), leveled_codec:key()) -> {ok, any()}|not_found. --spec book_headonly(pid(), key(), key(), key()) -> {ok, any()}|not_found. %% @doc - GET and HEAD requests %% diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 1f9560c..0a81d1d 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -81,6 +81,10 @@ -type tag() :: ?STD_TAG|?RIAK_TAG|?IDX_TAG|?HEAD_TAG. +-type key() :: + binary()|string()|{binary(), binary()}. + % Keys SHOULD be binary() + % string() support is a legacy of old tests -type sqn() :: % SQN of the object in the Journal pos_integer(). @@ -114,6 +118,10 @@ ?INKT_STND|?INKT_TOMB|?INKT_MPUT|?INKT_KEYD. -type journal_key() :: {integer(), journal_key_tag(), ledger_key()}. +-type object_spec_v0() :: + {add|remove, key(), key(), key()|null, any()}. +-type object_spec() :: + object_spec_v0(). -type compression_method() :: lz4|native. -type index_specs() :: @@ -128,6 +136,8 @@ :: list(integer())|false. -export_type([tag/0, + key/0, + object_spec/0, segment_hash/0, ledger_status/0, ledger_key/0, @@ -550,9 +560,7 @@ hash(Obj) -> %% @doc %% Convert object specs to KV entries ready for the ledger obj_objectspecs(ObjectSpecs, SQN, TTL) -> - lists:map(fun({IdxOp, Bucket, Key, SubKey, Value}) -> - gen_headspec(Bucket, Key, IdxOp, SubKey, Value, SQN, TTL) - end, + lists:map(fun(ObjectSpec) -> gen_headspec(ObjectSpec, SQN, TTL) end, ObjectSpecs). -spec idx_indexspecs(index_specs(), @@ -573,7 +581,13 @@ gen_indexspec(Bucket, Key, IdxOp, IdxField, IdxTerm, SQN, TTL) -> {to_ledgerkey(Bucket, Key, ?IDX_TAG, IdxField, IdxTerm), {SQN, Status, no_lookup, null}}. -gen_headspec(Bucket, Key, IdxOp, SubKey, Value, SQN, TTL) -> +-spec gen_headspec(object_spec(), integer(), integer()|infinity) -> ledger_kv(). +%% @doc +%% Take an object_spec as passed in a book_mput, and convert it into to a +%% valid ledger key and value. Supports different shaped tuples for different +%% versions of the object_spec +gen_headspec({IdxOp, Bucket, Key, SubKey, Value}, SQN, TTL) -> + % v0 object spec Status = set_status(IdxOp, TTL), K = to_ledgerkey(Bucket, {Key, SubKey}, ?HEAD_TAG), {K, {SQN, Status, segment_hash(K), Value}}. From aa123a80a7ffccb760d28c9a06567f84a79e1d9c Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 1 Nov 2018 12:40:24 +0000 Subject: [PATCH 22/29] Allow for backwards/forwards compatibility in specs --- src/leveled_codec.erl | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 0a81d1d..71bc83d 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -120,8 +120,11 @@ {integer(), journal_key_tag(), ledger_key()}. -type object_spec_v0() :: {add|remove, key(), key(), key()|null, any()}. +-type object_spec_v1() :: + {add|remove, v1, key(), key(), key()|null, + list(erlang:timestamp())|undefined, any()}. -type object_spec() :: - object_spec_v0(). + object_spec_v0()|object_spec_v1(). -type compression_method() :: lz4|native. -type index_specs() :: @@ -586,11 +589,17 @@ gen_indexspec(Bucket, Key, IdxOp, IdxField, IdxTerm, SQN, TTL) -> %% Take an object_spec as passed in a book_mput, and convert it into to a %% valid ledger key and value. Supports different shaped tuples for different %% versions of the object_spec +gen_headspec({IdxOp, v1, Bucket, Key, SubKey, LMD, Value}, SQN, TTL) -> + % v1 object spec + Status = set_status(IdxOp, TTL), + K = to_ledgerkey(Bucket, {Key, SubKey}, ?HEAD_TAG), + {K, {SQN, Status, segment_hash(K), Value, get_last_lastmodification(LMD)}}; gen_headspec({IdxOp, Bucket, Key, SubKey, Value}, SQN, TTL) -> % v0 object spec Status = set_status(IdxOp, TTL), K = to_ledgerkey(Bucket, {Key, SubKey}, ?HEAD_TAG), - {K, {SQN, Status, segment_hash(K), Value}}. + {K, {SQN, Status, segment_hash(K), Value, undefined}}. + set_status(add, TTL) -> @@ -634,12 +643,15 @@ generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) -> get_last_lastmodification(LastMods)}, {Bucket, Key, Value, {Hash, ObjHash}, LastMods}. --spec get_last_lastmodification(list(erlang:timestamp())) -> non_neg_integer(). +-spec get_last_lastmodification(list(erlang:timestamp())|undefined) + -> pos_integer()|undefined. %% @doc %% Get the highest of the last modifications measured in seconds. This will be %% stored as 4 bytes (unsigned) so will last for another 80 + years +get_last_lastmodification(undefined) -> + undefined; get_last_lastmodification([]) -> - 0; + undefined; get_last_lastmodification(LastMods) -> {Mega, Sec, _Micro} = lists:max(LastMods), Mega * 1000000 + Sec. @@ -872,4 +884,13 @@ head_segment_compare_test() -> ?assertMatch(H1, H2), ?assertMatch(H1, H3). +headspec_v0v1_test() -> + % A v0 object spec generates the same outcome as a v1 object spec with the + % last modified date undefined + V1 = {add, v1, <<"B">>, <<"K">>, <<"SK">>, undefined, <<"V">>}, + V0 = {add, <<"B">>, <<"K">>, <<"SK">>, <<"V">>}, + TTL = infinity, + ?assertMatch(true, gen_headspec(V0, 1, TTL) == gen_headspec(V1, 1, TTL)). + + -endif. From 71fa1447e0eda39746adb2cffdb006870e14c7e7 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 1 Nov 2018 17:30:18 +0000 Subject: [PATCH 23/29] Allow for all keys head folds to used modifed range This helps with kv_index_tictcatree with the leveled_so backend. Now this cna do folds over ranges of keys with modified filters (as folds over ranges of keys must go over lal keys if the backend is segment_ordered) --- src/leveled_bookie.erl | 40 ++++++++++++++++-------------- src/leveled_runner.erl | 14 +++++++---- test/end_to_end/recovery_SUITE.erl | 3 ++- test/end_to_end/riak_SUITE.erl | 14 +++++------ test/end_to_end/tictac_SUITE.erl | 3 ++- 5 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index e0d12fa..dcfb6cd 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -864,8 +864,9 @@ book_objectfold(Pid, Tag, Bucket, Limiter, FoldAccT, SnapPreFold) -> SegmentList :: false | list(integer()), Runner :: fun(() -> Acc). book_headfold(Pid, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - RunnerType = {foldheads_allkeys, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList}, - book_returnfolder(Pid, RunnerType). + book_headfold(Pid, Tag, all, + FoldAccT, JournalCheck, SnapPreFold, + SegmentList, false, false). %% @doc as book_headfold/6, but with the addition of a `Limiter' that %% restricts the set of objects folded over. `Limiter' can either be a @@ -897,16 +898,10 @@ book_headfold(Pid, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> SnapPreFold :: boolean(), SegmentList :: false | list(integer()), Runner :: fun(() -> Acc). -book_headfold(Pid, Tag, {bucket_list, BucketList}, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - RunnerType = - {foldheads_bybucket, Tag, BucketList, bucket_list, FoldAccT, - JournalCheck, SnapPreFold, SegmentList, false, false}, - book_returnfolder(Pid, RunnerType); -book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - RunnerType = - {foldheads_bybucket, Tag, Bucket, KeyRange, FoldAccT, - JournalCheck, SnapPreFold, SegmentList, false, false}, - book_returnfolder(Pid, RunnerType). +book_headfold(Pid, Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> + book_headfold(Pid, Tag, Limiter, + FoldAccT, JournalCheck, SnapPreFold, + SegmentList, false, false). %% @doc as book_headfold/7, but with the addition of a Last Modified Date %% Range and Max Object Count. For version 2 objects this will filter out @@ -927,7 +922,7 @@ book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapP SegmentList, LastModRange, MaxObjectCount) -> {async, Runner} when Tag :: leveled_codec:tag(), - Limiter :: BucketList | BucketKeyRange, + Limiter :: BucketList | BucketKeyRange | all, BucketList :: {bucket_list, list(Bucket)}, BucketKeyRange :: {range, Bucket, KeyRange}, KeyRange :: {StartKey, EndKey} | all, @@ -953,12 +948,17 @@ book_headfold(Pid, Tag, {bucket_list, BucketList}, FoldAccT, JournalCheck, SnapP SegmentList, LastModRange, MaxObjectCount}, book_returnfolder(Pid, RunnerType); book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount) -> - + SegmentList, LastModRange, MaxObjectCount) -> RunnerType = {foldheads_bybucket, Tag, Bucket, KeyRange, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount}, + book_returnfolder(Pid, RunnerType); +book_headfold(Pid, Tag, all, FoldAccT, JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount) -> + RunnerType = {foldheads_allkeys, Tag, FoldAccT, + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}, book_returnfolder(Pid, RunnerType). -spec book_snapshot(pid(), @@ -1615,12 +1615,14 @@ get_runner(State, {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}) -> get_runner(State, {foldheads_allkeys, Tag, FoldFun, - JournalCheck, SnapPreFold, SegmentList}) -> + JournalCheck, SnapPreFold, SegmentList, + LastModRange, MaxObjectCount}) -> SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, no_lookup, true, SnapPreFold), leveled_runner:foldheads_allkeys(SnapFun, Tag, FoldFun, - JournalCheck, SegmentList); + JournalCheck, SegmentList, + LastModRange, MaxObjectCount); get_runner(State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) -> get_runner(State, @@ -2494,7 +2496,7 @@ foldobjects_vs_hashtree_testto() -> {foldheads_allkeys, ?STD_TAG, FoldHeadsFun, - true, true, false}), + true, true, false, false, false}), KeyHashList3 = HTFolder3(), ?assertMatch(KeyHashList1, lists:usort(KeyHashList3)), @@ -2513,7 +2515,7 @@ foldobjects_vs_hashtree_testto() -> {foldheads_allkeys, ?STD_TAG, FoldHeadsFun2, - false, false, false}), + false, false, false, false, false}), KeyHashList4 = HTFolder4(), ?assertMatch(KeyHashList1, lists:usort(KeyHashList4)), diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index ffaf509..9f4c25e 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -30,7 +30,7 @@ bucketkey_query/6, hashlist_query/3, tictactree/5, - foldheads_allkeys/5, + foldheads_allkeys/7, foldobjects_allkeys/4, foldheads_bybucket/8, foldobjects_bybucket/4, @@ -270,12 +270,14 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) -> {async, Runner}. -spec foldheads_allkeys(fun(), leveled_codec:tag(), - fun(), boolean(), false|list(integer())) - -> {async, fun()}. + fun(), boolean(), false|list(integer()), + false|leveled_codec:lastmod_range(), + false|pos_integer()) -> {async, fun()}. %% @doc %% Fold over all heads in the store for a given tag - applying the passed %% function to each proxy object -foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck, SegmentList) -> +foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck, + SegmentList, LastModRange, MaxObjectCount) -> StartKey = leveled_codec:to_ledgerkey(null, null, Tag), EndKey = leveled_codec:to_ledgerkey(null, null, Tag), foldobjects(SnapFun, @@ -283,7 +285,9 @@ foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck, SegmentList) -> [{StartKey, EndKey}], FoldFun, {true, JournalCheck}, - SegmentList). + SegmentList, + LastModRange, + MaxObjectCount). -spec foldobjects_allkeys(fun(), leveled_codec:tag(), fun(), key_order|sqn_order) -> {async, fun()}. diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 0f2f81a..eb748cc 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -227,7 +227,8 @@ aae_missingjournal(_Config) -> {foldheads_allkeys, ?RIAK_TAG, FoldHeadsFun, - true, true, false}), + true, true, false, + false, false}), HeadL2 = length(AllHeadF2()), io:format("Fold head returned ~w objects~n", [HeadL2]), true = HeadL2 < HeadL1, diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 280b7da..6eb1962 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -422,7 +422,7 @@ test_segfilter_query(Bookie, CLs) -> Acc end end, 0}, - false, true, SegL} + false, true, SegL, false, false} end, {async, SL1Folder} = @@ -455,7 +455,7 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> ?RIAK_TAG, {fun head_tictac_foldfun/4, {0, leveled_tictac:new_tree(test, TreeSize)}}, - false, true, false}, + false, true, false, false, false}, % tictac query by bucket (should be same result as all stores) TicTacByBucketFolder = {foldheads_bybucket, @@ -478,7 +478,7 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, - false, true, false}, + false, true, false, false, false}, SW_SL0 = os:timestamp(), {async, BookASegFolder} = @@ -502,7 +502,7 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, - false, true, SegFilterList}, + false, true, SegFilterList, false, false}, SW_SL1 = os:timestamp(), {async, BookASegFolder1} = @@ -521,7 +521,7 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, - true, true, SegFilterList}, + true, true, SegFilterList, false, false}, SW_SL1CP = os:timestamp(), {async, BookASegFolder1CP} = @@ -545,7 +545,7 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, - false, true, SegFilterListF}, + false, true, SegFilterListF, false, false}, SW_SL1F = os:timestamp(), {async, BookASegFolder1F} = @@ -749,7 +749,7 @@ handoff(_Config) -> ?RIAK_TAG, {fun head_tictac_foldfun/4, {0, leveled_tictac:new_tree(test, TreeSize)}}, - false, true, false}, + false, true, false, false, false}, check_tictacfold(Bookie1, Bookie2, TicTacFolder, none, TreeSize), check_tictacfold(Bookie2, Bookie3, TicTacFolder, none, TreeSize), check_tictacfold(Bookie3, Bookie4, TicTacFolder, none, TreeSize), diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 690160d..20c748c 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -595,7 +595,8 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) -> InitAcc = {0, 0}, RunnerDefinition = - {foldheads_allkeys, h, {FoldFun, InitAcc}, false, false, false}, + {foldheads_allkeys, h, {FoldFun, InitAcc}, + false, false, false, false, false}, {async, Runner1} = leveled_bookie:book_returnfolder(Bookie1, RunnerDefinition), From 2b57ff831c67fb8387e29f10a658b85ae4430f4b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 1 Nov 2018 19:58:32 +0000 Subject: [PATCH 24/29] Temp log --- src/leveled_bookie.erl | 3 ++- src/leveled_penciller.erl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index dcfb6cd..cf2de34 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -956,7 +956,8 @@ book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapP book_returnfolder(Pid, RunnerType); book_headfold(Pid, Tag, all, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount) -> - RunnerType = {foldheads_allkeys, Tag, FoldAccT, + RunnerType = + {foldheads_allkeys, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount}, book_returnfolder(Pid, RunnerType). diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 21c822c..664cfde 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -1517,6 +1517,7 @@ maybe_accumulate(LK, LV, Acc, AccFun, MaxKeys, {LowLastMod, HighLastMod}) -> {_SQN, _SH, LMD} = leveled_codec:strip_to_indexdetails({LK, LV}), RunAcc = (LMD == undefined) or ((LMD >= LowLastMod) and (LMD =< HighLastMod)), + io:format("Checking ~w to see if between ~w and ~w", [LMD, LowLastMod, HighLastMod]), case RunAcc of true -> {AccFun(LK, LV, Acc), MaxKeys - 1}; From 7e959f1353088ad9c381135ea65178bfb1c3f6b9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 1 Nov 2018 20:04:22 +0000 Subject: [PATCH 25/29] More temp logs --- src/leveled_bookie.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index cf2de34..733b1e6 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -960,6 +960,7 @@ book_headfold(Pid, Tag, all, FoldAccT, JournalCheck, SnapPreFold, {foldheads_allkeys, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount}, + io:format("RunnerType ~w~n", [RunnerType]), book_returnfolder(Pid, RunnerType). -spec book_snapshot(pid(), From c126872fabf7ea506906f1805b63b3aa5c7751f9 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 1 Nov 2018 20:16:04 +0000 Subject: [PATCH 26/29] Revert "More temp logs" This reverts commit 7e959f1353088ad9c381135ea65178bfb1c3f6b9. --- src/leveled_bookie.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 733b1e6..cf2de34 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -960,7 +960,6 @@ book_headfold(Pid, Tag, all, FoldAccT, JournalCheck, SnapPreFold, {foldheads_allkeys, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount}, - io:format("RunnerType ~w~n", [RunnerType]), book_returnfolder(Pid, RunnerType). -spec book_snapshot(pid(), From dc84eabe0cefb054e631059e3eee8a92094cda6b Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 1 Nov 2018 20:16:08 +0000 Subject: [PATCH 27/29] Revert "Temp log" This reverts commit 2b57ff831c67fb8387e29f10a658b85ae4430f4b. --- src/leveled_bookie.erl | 3 +-- src/leveled_penciller.erl | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index cf2de34..dcfb6cd 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -956,8 +956,7 @@ book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapP book_returnfolder(Pid, RunnerType); book_headfold(Pid, Tag, all, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount) -> - RunnerType = - {foldheads_allkeys, Tag, FoldAccT, + RunnerType = {foldheads_allkeys, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount}, book_returnfolder(Pid, RunnerType). diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 664cfde..21c822c 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -1517,7 +1517,6 @@ maybe_accumulate(LK, LV, Acc, AccFun, MaxKeys, {LowLastMod, HighLastMod}) -> {_SQN, _SH, LMD} = leveled_codec:strip_to_indexdetails({LK, LV}), RunAcc = (LMD == undefined) or ((LMD >= LowLastMod) and (LMD =< HighLastMod)), - io:format("Checking ~w to see if between ~w and ~w", [LMD, LowLastMod, HighLastMod]), case RunAcc of true -> {AccFun(LK, LV, Acc), MaxKeys - 1}; From 2eec8a53780cca3861605258861f0ce29fe0d612 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 1 Nov 2018 23:40:28 +0000 Subject: [PATCH 28/29] MaxCount monitoring and responding Stop issue of {no_more_keys, Acc} being passed on fold over list of ranges to next range (and blowing up) --- src/leveled_bookie.erl | 14 ++++---------- src/leveled_penciller.erl | 33 +++++++++++++++++---------------- src/leveled_runner.erl | 26 +++++++++++++++++--------- test/end_to_end/riak_SUITE.erl | 32 +++++++++++++++++++++++++++++++- 4 files changed, 69 insertions(+), 36 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index dcfb6cd..09debba 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -909,15 +909,8 @@ book_headfold(Pid, Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentLis %% All version 1 objects will be included in the result set regardless of Last %% Modified Date. %% The Max Object Count will stop the fold once the count has been reached on -%% this store only. The Max Object Count if provided will mean that on -%% completion of the fold the accumulator will be wrapped in a tuple to -%% indicate the reason for completion: -%% - {no_more_keys, Acc} if the end of the range was reached wihtout hitting -%% the Max Object Count limit -%% - {max_count, Acc} if the Max Object Count limit was reached before -%% reaching the end of the range -%% If MaxObjectCount is false then the Acc will be returned not wrapped in a -%% tuple +%% this store only. The Max Object Count if provided will mean that the runner +%% will return {RemainingCount, Acc} not just Acc -spec book_headfold(pid(), Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount) -> {async, Runner} when @@ -939,7 +932,8 @@ book_headfold(Pid, Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentLis SegmentList :: false | list(integer()), LastModRange :: false | leveled_codec:lastmod_range(), MaxObjectCount :: false | pos_integer(), - Runner :: fun(() -> Acc). + Runner :: fun(() -> ResultingAcc), + ResultingAcc :: Acc | {non_neg_integer(), Acc}. book_headfold(Pid, Tag, {bucket_list, BucketList}, FoldAccT, JournalCheck, SnapPreFold, SegmentList, LastModRange, MaxObjectCount) -> RunnerType = diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 21c822c..63c9fdc 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -423,7 +423,7 @@ pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc, By) -> fun(), any(), leveled_codec:segment_list(), false | leveled_codec:lastmod_range(), - false | pos_integer()) -> any(). + boolean()) -> any(). %% @doc %% Run a range query between StartKey and EndKey (inclusive). This will cover %% all keys in the range - so must only be run against snapshots of the @@ -437,21 +437,20 @@ pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc, By) -> %% indexing by segment. This cannot be used on ?IDX_TAG and other tags that %% use the no_lookup hash pcl_fetchkeysbysegment(Pid, StartKey, EndKey, AccFun, InitAcc, - SegmentList, LastModRange, MaxObjectCount) -> - MaxKeys = - case MaxObjectCount of + SegmentList, LastModRange, LimitByCount) -> + {MaxKeys, InitAcc0} = + case LimitByCount of + true -> + % The passed in accumulator should have the Max Key Count + % as the first element of a tuple with the actual accumulator + InitAcc; false -> - -1; - MOC when is_integer(MOC) -> - MOC + {-1, InitAcc} end, gen_server:call(Pid, {fetch_keys, - StartKey, EndKey, - AccFun, InitAcc, - SegmentList, - LastModRange, - MaxKeys, + StartKey, EndKey, AccFun, InitAcc0, + SegmentList, LastModRange, MaxKeys, as_pcl}, infinity). @@ -1405,7 +1404,7 @@ keyfolder(IMMiter, SSTiter, StartKey, EndKey, {AccFun, Acc}) -> keyfolder(_Iterators, _KeyRange, {_AccFun, Acc}, {_SegmentList, _LastModRange, MaxKeys}) when MaxKeys == 0 -> - {max_count, Acc}; + {0, Acc}; keyfolder({[], SSTiter}, KeyRange, {AccFun, Acc}, {SegmentList, LastModRange, MaxKeys}) -> {StartKey, EndKey} = KeyRange, @@ -1414,10 +1413,12 @@ keyfolder({[], SSTiter}, KeyRange, {AccFun, Acc}, no_more_keys -> case MaxKeys > 0 of true -> - % Need to single this query ended not because the - % MaxKeys was reached - {no_more_keys, Acc}; + % This query had a max count, so we must respond with the + % remainder on the count + {MaxKeys, Acc}; false -> + % This query started with a MaxKeys set to -1. Query is + % not interested in having MaxKeys in Response Acc end; {NxSSTiter, {SSTKey, SSTVal}} -> diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index 9f4c25e..f034890 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -465,10 +465,10 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) -> ExtractFun, null), case R of - {no_more_keys, null} -> + {1, null} -> leveled_log:log("B0008",[]), BKList; - {_, {{B, K}, V}} -> + {0, {{B, K}, V}} -> case leveled_codec:is_active({Tag, B, K, null}, V, Now) of true -> leveled_log:log("B0009",[B]), @@ -524,16 +524,24 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, % no initial accumulator passed, and so should be just a list {FoldObjFun, []} end, + {LimitByCount, InitAcc0} = + case MaxObjectCount of + false -> + {false, InitAcc}; + MOC when is_integer(MOC) -> + {true, {MOC, InitAcc}} + end, Folder = fun() -> {ok, LedgerSnapshot, JournalSnapshot} = SnapFun(), - AccFun = accumulate_objects(FoldFun, - JournalSnapshot, - Tag, - DeferredFetch), - + AccFun = + accumulate_objects(FoldFun, + JournalSnapshot, + Tag, + DeferredFetch), + ListFoldFun = fun({StartKey, EndKey}, FoldAcc) -> leveled_penciller:pcl_fetchkeysbysegment(LedgerSnapshot, @@ -543,9 +551,9 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, FoldAcc, SegmentList, LastModRange, - MaxObjectCount) + LimitByCount) end, - Acc = lists:foldl(ListFoldFun, InitAcc, KeyRanges), + Acc = lists:foldl(ListFoldFun, InitAcc0, KeyRanges), ok = leveled_penciller:pcl_close(LedgerSnapshot), case DeferredFetch of {true, false} -> diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 6eb1962..cf3aa01 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -79,7 +79,7 @@ fetchclocks_modifiedbetween(_Config) -> _ObjL4EndTS = testutil:convert_to_seconds(os:timestamp()), timer:sleep(1000), - _ObjL5StartTS = testutil:convert_to_seconds(os:timestamp()), + ObjL5StartTS = testutil:convert_to_seconds(os:timestamp()), ObjList5 = testutil:generate_objects(8000, {fixed_binary, 1}, [], @@ -294,6 +294,36 @@ fetchclocks_modifiedbetween(_Config) -> io:format("R6A_PlusFilter ~w~n", [R6A_PlusFilter]), true = 19000 == element(2, R6A_PlusFilter), + % Hit limit of max count before trying next bucket, with and without a + % timestamp filter + {async, R7A_MultiBucketRunner} = + leveled_bookie:book_headfold(Bookie1A, + ?RIAK_TAG, + {bucket_list, [<<"B1">>, <<"B2">>]}, + {SimpleCountFun, 0}, + false, + true, + false, + {ObjL5StartTS, ObjL6EndTS}, + 5000), + R7A_MultiBucket = R7A_MultiBucketRunner(), + io:format("R7A_MultiBucket ~w ~n", [R7A_MultiBucket]), + true = R7A_MultiBucket == {0, 5000}, + + {async, R8A_MultiBucketRunner} = + leveled_bookie:book_headfold(Bookie1A, + ?RIAK_TAG, + {bucket_list, [<<"B1">>, <<"B2">>]}, + {SimpleCountFun, 0}, + false, + true, + false, + false, + 5000), + R8A_MultiBucket = R8A_MultiBucketRunner(), + io:format("R8A_MultiBucket ~w ~n", [R8A_MultiBucket]), + true = R8A_MultiBucket == {0, 5000}, + ok = leveled_bookie:book_destroy(Bookie1A), ok = leveled_bookie:book_destroy(Bookie1B). From 4b8f493fd6ce799712c6a86fe87dce081fe1129e Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 2 Nov 2018 17:34:58 +0000 Subject: [PATCH 29/29] Allow compariosn with empty binaries If trees are non-existent, then fetch_root may return an empty binary (in tictac aae) - still need to compare --- src/leveled_tictac.erl | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 76d77da..495ef7b 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -189,7 +189,7 @@ import_tree(ExportedTree) -> level2 = Lv2}. --spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree(). +-spec add_kv(tictactree(), term(), term(), fun()) -> tictactree(). %% @doc %% Add a Key and value to a tictactree using the BinExtractFun to extract a %% binary from the Key and value from which to generate the hash. The @@ -198,7 +198,7 @@ import_tree(ExportedTree) -> add_kv(TicTacTree, Key, Value, BinExtractFun) -> add_kv(TicTacTree, Key, Value, BinExtractFun, false). --spec add_kv(tictactree(), tuple(), tuple(), fun(), boolean()) +-spec add_kv(tictactree(), term(), term(), fun(), boolean()) -> tictactree()|{tictactree(), integer()}. %% @doc %% add_kv with ability to return segment ID of Key added @@ -523,8 +523,15 @@ get_size(Size) -> ?XLARGE end. -segmentcompare(SrcBin, SinkBin) when byte_size(SrcBin)==byte_size(SinkBin) -> - segmentcompare(SrcBin, SinkBin, [], 0). + +segmentcompare(SrcBin, SinkBin) when byte_size(SrcBin) == byte_size(SinkBin) -> + segmentcompare(SrcBin, SinkBin, [], 0); +segmentcompare(<<>>, SinkBin) -> + Size = bit_size(SinkBin), + segmentcompare(<<0:Size/integer>>, SinkBin); +segmentcompare(SrcBin, <<>>) -> + Size = bit_size(SrcBin), + segmentcompare(SrcBin, <<0:Size/integer>>). segmentcompare(<<>>, <<>>, Acc, _Counter) -> Acc; @@ -836,6 +843,19 @@ matchbysegment_check(SegList, MatchList, SmallSize, LargeSize) -> OL = lists:filter(PredFun, MatchList), {timer:now_diff(os:timestamp(), SW)/1000, OL}. +find_dirtysegments_withanemptytree_test() -> + T1 = new_tree(t1), + T2 = new_tree(t2), + ?assertMatch([], find_dirtysegments(fetch_root(T1), fetch_root(T2))), + + {T3, DS1} = + add_kv(T2, <<"TestKey">>, <<"V1">>, fun(B, K) -> {B, K} end, true), + ExpectedAnswer = [DS1 div 256], + ?assertMatch(ExpectedAnswer, find_dirtysegments(<<>>, fetch_root(T3))), + ?assertMatch(ExpectedAnswer, find_dirtysegments(fetch_root(T3), <<>>)). + + + -endif.