diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 39384d9..a1e77c6 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -104,7 +104,6 @@ -define(JOURNAL_SIZE_JITTER, 20). -define(ABSOLUTEMAX_JOURNALSIZE, 4000000000). -define(LONG_RUNNING, 80000). --define(RECENT_AAE, false). -define(COMPRESSION_METHOD, lz4). -define(COMPRESSION_POINT, on_receipt). -define(TIMING_SAMPLESIZE, 100). @@ -118,7 +117,6 @@ {cache_size, ?CACHE_SIZE}, {max_journalsize, 1000000000}, {sync_strategy, none}, - {recent_aae, ?RECENT_AAE}, {head_only, false}, {waste_retention_period, undefined}, {max_run_length, undefined}, @@ -140,7 +138,6 @@ -record(state, {inker :: pid() | undefined, penciller :: pid() | undefined, cache_size :: integer() | undefined, - recent_aae :: recent_aae(), ledger_cache = #ledger_cache{}, is_snapshot :: boolean() | undefined, slow_offer = false :: boolean(), @@ -186,7 +183,6 @@ -type fold_timings() :: no_timing|#fold_timings{}. -type head_timings() :: no_timing|#head_timings{}. -type timing_types() :: head|get|put|fold. --type recent_aae() :: false|#recent_aae{}|undefined. -type key() :: binary()|string()|{binary(), binary()}. % Keys SHOULD be binary() % string() support is a legacy of old tests @@ -220,12 +216,6 @@ % riak_sync is used for backwards compatability with OTP16 - and % will manually call sync() after each write (rather than use the % O_SYNC option on startup - {recent_aae, false|{atom(), list(), integer(), integer()}} | - % DEPRECATED - % Before working on kv_index_tictactree looked at the possibility - % of maintaining AAE just for recent changes. Given the efficiency - % of the kv_index_tictactree approach this is unecessary. - % Should be set to false {head_only, false|with_lookup|no_lookup} | % When set to true, there are three fundamental changes as to how % leveled will work: @@ -1008,16 +998,6 @@ init([Opts]) -> ConfiguredCacheSize div (100 div ?CACHE_SIZE_JITTER), CacheSize = ConfiguredCacheSize + erlang:phash2(self()) rem CacheJitter, - RecentAAE = - case proplists:get_value(recent_aae, Opts) of - false -> - false; - {FilterType, BucketList, LimitMinutes, UnitMinutes} -> - #recent_aae{filter = FilterType, - buckets = BucketList, - limit_minutes = LimitMinutes, - unit_minutes = UnitMinutes} - end, {HeadOnly, HeadLookup} = case proplists:get_value(head_only, Opts) of @@ -1030,7 +1010,6 @@ init([Opts]) -> end, State0 = #state{cache_size=CacheSize, - recent_aae=RecentAAE, is_snapshot=false, head_only=HeadOnly, head_lookup = HeadLookup}, @@ -1926,14 +1905,12 @@ preparefor_ledgercache(?INKT_KEYD, {no_lookup, SQN, KeyChanges}; preparefor_ledgercache(_InkTag, LedgerKey, SQN, Obj, Size, {IdxSpecs, TTL}, - State) -> - {Bucket, Key, MetaValue, {KeyH, ObjH}, LastMods} = + _State) -> + {Bucket, Key, MetaValue, {KeyH, _ObjH}, _LastMods} = leveled_codec:generate_ledgerkv(LedgerKey, SQN, Obj, Size, TTL), KeyChanges = [{LedgerKey, MetaValue}] ++ - leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL) ++ - leveled_codec:aae_indexspecs(State#state.recent_aae, - Bucket, Key, SQN, ObjH, LastMods), + leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL), {KeyH, SQN, KeyChanges}. diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 92c677b..abb70c7 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -63,7 +63,6 @@ get_keyandobjhash/2, idx_indexspecs/5, obj_objectspecs/3, - aae_indexspecs/6, riak_extract_metadata/2, segment_hash/1, to_lookup/1, @@ -76,7 +75,6 @@ -define(NRT_IDX, "$aae."). -define(ALL_BUCKETS, <<"$all">>). --type recent_aae() :: #recent_aae{}. -type riak_metadata() :: {binary()|delete, % Sibling Metadata binary()|null, % Vclock Metadata integer()|null, % Hash of vclock - non-exportable @@ -577,103 +575,6 @@ set_status(remove, _TTL) -> %% TODO: timestamps for delayed reaping tomb. --spec aae_indexspecs(false|recent_aae(), - any(), any(), - integer(), integer(), - list()) - -> list(). -%% @doc -%% Generate an additional index term representing the change, if the last -%% modified date for the change is within the definition of recency. -%% -%% The object may have multiple last modified dates (siblings), and in this -%% case index entries for all dates within the range are added. -%% -%% The index should entry auto-expire in the future (when it is no longer -%% relevant to assessing recent changes) -aae_indexspecs(false, _Bucket, _Key, _SQN, _H, _LastMods) -> - []; -aae_indexspecs(_AAE, _Bucket, _Key, _SQN, _H, []) -> - []; -aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) -> - InList = lists:member(Bucket, AAE#recent_aae.buckets), - Bucket0 = - case AAE#recent_aae.filter of - blacklist -> - case InList of - true -> - false; - false -> - {all, Bucket} - end; - whitelist -> - case InList of - true -> - Bucket; - false -> - false - end - end, - case Bucket0 of - false -> - []; - Bucket0 -> - GenIdxFun = - fun(LMD0, Acc) -> - Dates = parse_date(LMD0, - AAE#recent_aae.unit_minutes, - AAE#recent_aae.limit_minutes, - leveled_util:integer_now()), - case Dates of - no_index -> - Acc; - {LMD1, TTL} -> - TreeSize = AAE#recent_aae.tree_size, - SegID32 = leveled_tictac:keyto_segment32(Key), - SegID = - leveled_tictac:get_segment(SegID32, TreeSize), - IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin", - IdxTrmStr = - string:right(integer_to_list(SegID), 8, $0) ++ - "." ++ - string:right(integer_to_list(H), 8, $0), - {IdxK, IdxV} = - gen_indexspec(Bucket0, Key, - add, - list_to_binary(IdxFldStr), - list_to_binary(IdxTrmStr), - SQN, TTL), - [{IdxK, IdxV}|Acc] - end - end, - lists:foldl(GenIdxFun, [], LastMods) - end. - --spec parse_date(tuple(), integer(), integer(), integer()) -> - no_index|{list(), integer()}. -%% @doc -%% Parse the last modified date and the AAE date configuration to return a -%% binary to be used as the last modified date part of the index, and an -%% integer to be used as the TTL of the index entry. -%% Return no_index if the change is not recent. -parse_date(LMD, UnitMins, LimitMins, Now) -> - LMDsecs = leveled_util:integer_time(LMD), - Recent = (LMDsecs + LimitMins * 60) > Now, - case Recent of - false -> - no_index; - true -> - {{Y, M, D}, {Hour, Minute, _Second}} = - calendar:now_to_datetime(LMD), - RoundMins = - UnitMins * (Minute div UnitMins), - StrTime = - lists:flatten(io_lib:format(?LMD_FORMAT, - [Y, M, D, Hour, RoundMins])), - TTL = min(Now, LMDsecs) + (LimitMins + UnitMins) * 60, - {StrTime, TTL} - end. - -spec generate_ledgerkv( tuple(), integer(), any(), integer(), tuple()|infinity) -> {any(), any(), any(), @@ -927,95 +828,6 @@ hashperf_test() -> io:format(user, "1000 object hashes in ~w microseconds~n", [timer:now_diff(os:timestamp(), SW)]). -parsedate_test() -> - {MeS, S, MiS} = os:timestamp(), - timer:sleep(100), - Now = leveled_util:integer_now(), - UnitMins = 5, - LimitMins = 60, - PD = parse_date({MeS, S, MiS}, UnitMins, LimitMins, Now), - io:format("Parsed Date ~w~n", [PD]), - ?assertMatch(true, is_tuple(PD)), - check_pd(PD, UnitMins), - CheckFun = - fun(Offset) -> - ModDate = {MeS, S + Offset * 60, MiS}, - check_pd(parse_date(ModDate, UnitMins, LimitMins, Now), UnitMins) - end, - lists:foreach(CheckFun, lists:seq(1, 60)). - -check_pd(PD, UnitMins) -> - {LMDstr, _TTL} = PD, - Minutes = list_to_integer(lists:nthtail(10, LMDstr)), - ?assertMatch(0, Minutes rem UnitMins). - -parseolddate_test() -> - LMD = os:timestamp(), - timer:sleep(100), - Now = leveled_util:integer_now() + 60 * 60, - UnitMins = 5, - LimitMins = 60, - PD = parse_date(LMD, UnitMins, LimitMins, Now), - io:format("Parsed Date ~w~n", [PD]), - ?assertMatch(no_index, PD). - -genaaeidx_test() -> - AAE = #recent_aae{filter=blacklist, - buckets=[], - limit_minutes=60, - unit_minutes=5}, - Bucket = <<"Bucket1">>, - Key = <<"Key1">>, - SQN = 1, - H = erlang:phash2(null), - LastMods = [os:timestamp(), os:timestamp()], - - AAESpecs = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods), - ?assertMatch(2, length(AAESpecs)), - - LastMods1 = [os:timestamp()], - AAESpecs1 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods1), - ?assertMatch(1, length(AAESpecs1)), - IdxB = element(2, element(1, lists:nth(1, AAESpecs1))), - io:format(user, "AAE IDXSpecs1 ~w~n", [AAESpecs1]), - ?assertMatch(<<"$all">>, IdxB), - - LastMods0 = [], - AAESpecs0 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods0), - ?assertMatch(0, length(AAESpecs0)), - - AAE0 = AAE#recent_aae{filter=whitelist, - buckets=[<<"Bucket0">>]}, - AAESpecsB0 = aae_indexspecs(AAE0, Bucket, Key, SQN, H, LastMods1), - ?assertMatch(0, length(AAESpecsB0)), - - AAESpecsB1 = aae_indexspecs(AAE0, <<"Bucket0">>, Key, SQN, H, LastMods1), - ?assertMatch(1, length(AAESpecsB1)), - [{{?IDX_TAG, <<"Bucket0">>, {Fld, Term}, <<"Key1">>}, - {SQN, {active, TS}, no_lookup, null}}] = AAESpecsB1, - ?assertMatch(true, is_integer(TS)), - ?assertMatch(17, length(binary_to_list(Term))), - ?assertMatch("$aae.", lists:sublist(binary_to_list(Fld), 5)), - - AAE1 = AAE#recent_aae{filter=blacklist, - buckets=[<<"Bucket0">>]}, - AAESpecsB2 = aae_indexspecs(AAE1, <<"Bucket0">>, Key, SQN, H, LastMods1), - ?assertMatch(0, length(AAESpecsB2)). - -delayedupdate_aaeidx_test() -> - AAE = #recent_aae{filter=blacklist, - buckets=[], - limit_minutes=60, - unit_minutes=5}, - Bucket = <<"Bucket1">>, - Key = <<"Key1">>, - SQN = 1, - H = erlang:phash2(null), - {Mega, Sec, MSec} = os:timestamp(), - LastMods = [{Mega -1, Sec, MSec}], - AAESpecs = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods), - ?assertMatch(0, length(AAESpecs)). - head_segment_compare_test() -> % Reminder to align native and parallel(leveled_ko) key stores for % kv_index_tictactree diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 2c0435d..5a22abe 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -5,20 +5,12 @@ -export([ many_put_compare/1, index_compare/1, - recent_aae_noaae/1, - recent_aae_allaae/1, - recent_aae_bucketaae/1, - recent_aae_expiry/1, basic_headonly/1 ]). all() -> [ many_put_compare, index_compare, - recent_aae_noaae, - recent_aae_allaae, - recent_aae_bucketaae, - recent_aae_expiry, basic_headonly ]. @@ -542,478 +534,6 @@ index_compare(_Config) -> ok = leveled_bookie:book_close(Book2D). -recent_aae_noaae(_Config) -> - % Starts databases with recent_aae tables, and attempt to query to fetch - % recent aae trees returns empty trees as no index entries are found. - - TreeSize = small, - % SegmentCount = 256 * 256, - UnitMins = 2, - - % Test requires multiple different databases, so want to mount them all - % on individual file paths - RootPathA = testutil:reset_filestructure("testA"), - RootPathB = testutil:reset_filestructure("testB"), - RootPathC = testutil:reset_filestructure("testC"), - RootPathD = testutil:reset_filestructure("testD"), - StartOptsA = aae_startopts(RootPathA, false), - StartOptsB = aae_startopts(RootPathB, false), - StartOptsC = aae_startopts(RootPathC, false), - StartOptsD = aae_startopts(RootPathD, false), - - % Book1A to get all objects - {ok, Book1A} = leveled_bookie:book_start(StartOptsA), - % Book1B/C/D will have objects partitioned across it - {ok, Book1B} = leveled_bookie:book_start(StartOptsB), - {ok, Book1C} = leveled_bookie:book_start(StartOptsC), - {ok, Book1D} = leveled_bookie:book_start(StartOptsD), - - {B1, K1, V1, S1, MD} = {"Bucket", - "Key1.1.4567.4321", - "Value1", - [], - [{"MDK1", "MDV1"}]}, - {TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD), - - SW_StartLoad = os:timestamp(), - - ok = testutil:book_riakput(Book1A, TestObject, TestSpec), - ok = testutil:book_riakput(Book1B, TestObject, TestSpec), - testutil:check_forobject(Book1A, TestObject), - testutil:check_forobject(Book1B, TestObject), - - {TicTacTreeJoined, TicTacTreeFull, EmptyTree, _LMDIndexes} = - load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - false), - % Go compare! Also confirm we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, - TicTacTreeJoined), - - DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = DL1_0 == [], - true = length(DL1_1) == 0, - - ok = leveled_bookie:book_close(Book1A), - ok = leveled_bookie:book_close(Book1B), - ok = leveled_bookie:book_close(Book1C), - ok = leveled_bookie:book_close(Book1D). - - -recent_aae_allaae(_Config) -> - % Leveled is started in blacklisted mode with no buckets blacklisted. - % - % A number of changes are then loaded into a store, and also partitioned - % across a separate set of three stores. A merge tree is returned from - % both the single store and the partitioned store, and proven to compare - % the same. - % - % A single change is then made, but into one half of the system only. The - % aae index is then re-queried and it is verified that a signle segment - % difference is found. - % - % The segment Id found is then used in a query to find the Keys that make - % up that segment, and the delta discovered should be just that one key - % which was known to have been changed - - TreeSize = small, - % SegmentCount = 256 * 256, - UnitMins = 2, - AAE = {blacklist, [], 60, UnitMins}, - - % Test requires multiple different databases, so want to mount them all - % on individual file paths - RootPathA = testutil:reset_filestructure("testA"), - RootPathB = testutil:reset_filestructure("testB"), - RootPathC = testutil:reset_filestructure("testC"), - RootPathD = testutil:reset_filestructure("testD"), - StartOptsA = aae_startopts(RootPathA, AAE), - StartOptsB = aae_startopts(RootPathB, AAE), - StartOptsC = aae_startopts(RootPathC, AAE), - StartOptsD = aae_startopts(RootPathD, AAE), - - % Book1A to get all objects - {ok, Book1A} = leveled_bookie:book_start(StartOptsA), - % Book1B/C/D will have objects partitioned across it - {ok, Book1B} = leveled_bookie:book_start(StartOptsB), - {ok, Book1C} = leveled_bookie:book_start(StartOptsC), - {ok, Book1D} = leveled_bookie:book_start(StartOptsD), - - {B1, K1, V1, S1, MD} = {"Bucket", - "Key1.1.4567.4321", - "Value1", - [], - [{"MDK1", "MDV1"}]}, - {TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD), - - SW_StartLoad = os:timestamp(), - - ok = testutil:book_riakput(Book1A, TestObject, TestSpec), - ok = testutil:book_riakput(Book1B, TestObject, TestSpec), - testutil:check_forobject(Book1A, TestObject), - testutil:check_forobject(Book1B, TestObject), - - {TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} = - load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - false), - % Go compare! Also confirm we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, - TicTacTreeJoined), - - DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = DL1_0 == [], - true = length(DL1_1) > 100, - - ok = leveled_bookie:book_close(Book1A), - ok = leveled_bookie:book_close(Book1B), - ok = leveled_bookie:book_close(Book1C), - ok = leveled_bookie:book_close(Book1D), - - % Book2A to get all objects - {ok, Book2A} = leveled_bookie:book_start(StartOptsA), - % Book2B/C/D will have objects partitioned across it - {ok, Book2B} = leveled_bookie:book_start(StartOptsB), - {ok, Book2C} = leveled_bookie:book_start(StartOptsC), - {ok, Book2D} = leveled_bookie:book_start(StartOptsD), - - {TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} = - load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D, - SW_StartLoad, TreeSize, UnitMins, - LMDIndexes), - % Go compare! Also confirm we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, - TicTacTreeJoined), - - DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = DL1_0 == [], - true = length(DL1_1) > 100, - - V2 = "Value2", - {TestObject2, TestSpec2} = - testutil:generate_testobject(B1, K1, V2, S1, MD), - - New_startTS = os:timestamp(), - - ok = testutil:book_riakput(Book2B, TestObject2, TestSpec2), - testutil:check_forobject(Book2B, TestObject2), - testutil:check_forobject(Book2A, TestObject), - - New_endTS = os:timestamp(), - NewLMDIndexes = determine_lmd_indexes(New_startTS, New_endTS, UnitMins), - {TicTacTreeJoined2, TicTacTreeFull2, _EmptyTree, NewLMDIndexes} = - load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D, - New_startTS, TreeSize, UnitMins, - NewLMDIndexes), - DL2_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull2, - TicTacTreeJoined2), - - % DL2_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = length(DL2_0) == 1, - - [DirtySeg] = DL2_0, - TermPrefix = string:right(integer_to_list(DirtySeg), 8, $0), - - LMDSegFolder = - fun(LMD, {Acc, Bookie}) -> - IdxLMD = list_to_binary("$aae." ++ LMD ++ "_bin"), - IdxQ1 = - {index_query, - <<"$all">>, - {fun testutil:foldkeysfun_returnbucket/3, []}, - {IdxLMD, - list_to_binary(TermPrefix ++ "."), - list_to_binary(TermPrefix ++ "|")}, - {true, undefined}}, - {async, IdxFolder} = - leveled_bookie:book_returnfolder(Bookie, IdxQ1), - {Acc ++ IdxFolder(), Bookie} - end, - {KeysTerms2A, _} = lists:foldl(LMDSegFolder, - {[], Book2A}, - lists:usort(LMDIndexes ++ NewLMDIndexes)), - true = length(KeysTerms2A) >= 1, - - {KeysTerms2B, _} = lists:foldl(LMDSegFolder, - {[], Book2B}, - lists:usort(LMDIndexes ++ NewLMDIndexes)), - {KeysTerms2C, _} = lists:foldl(LMDSegFolder, - {[], Book2C}, - lists:usort(LMDIndexes ++ NewLMDIndexes)), - {KeysTerms2D, _} = lists:foldl(LMDSegFolder, - {[], Book2D}, - lists:usort(LMDIndexes ++ NewLMDIndexes)), - - KeysTerms2Joined = KeysTerms2B ++ KeysTerms2C ++ KeysTerms2D, - DeltaX = lists:subtract(KeysTerms2A, KeysTerms2Joined), - DeltaY = lists:subtract(KeysTerms2Joined, KeysTerms2A), - - io:format("DeltaX ~w~n", [DeltaX]), - io:format("DeltaY ~w~n", [DeltaY]), - - true = length(DeltaX) == 0, % This hasn't seen any extra changes - true = length(DeltaY) == 1, % This has seen an extra change - [{_, {B1, K1}}] = DeltaY, - - ok = leveled_bookie:book_close(Book2A), - ok = leveled_bookie:book_close(Book2B), - ok = leveled_bookie:book_close(Book2C), - ok = leveled_bookie:book_close(Book2D). - - - -recent_aae_bucketaae(_Config) -> - % Configure AAE to work only on a single whitelisted bucket - % Confirm that we can spot a delta in this bucket, but not - % in another bucket - - TreeSize = small, - % SegmentCount = 256 * 256, - UnitMins = 2, - AAE = {whitelist, [<<"Bucket">>], 60, UnitMins}, - - % Test requires multiple different databases, so want to mount them all - % on individual file paths - RootPathA = testutil:reset_filestructure("testA"), - RootPathB = testutil:reset_filestructure("testB"), - RootPathC = testutil:reset_filestructure("testC"), - RootPathD = testutil:reset_filestructure("testD"), - StartOptsA = aae_startopts(RootPathA, AAE), - StartOptsB = aae_startopts(RootPathB, AAE), - StartOptsC = aae_startopts(RootPathC, AAE), - StartOptsD = aae_startopts(RootPathD, AAE), - - % Book1A to get all objects - {ok, Book1A} = leveled_bookie:book_start(StartOptsA), - % Book1B/C/D will have objects partitioned across it - {ok, Book1B} = leveled_bookie:book_start(StartOptsB), - {ok, Book1C} = leveled_bookie:book_start(StartOptsC), - {ok, Book1D} = leveled_bookie:book_start(StartOptsD), - - {B1, K1, V1, S1, MD} = {<<"Bucket">>, - "Key1.1.4567.4321", - "Value1", - [], - [{"MDK1", "MDV1"}]}, - {TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD), - - SW_StartLoad = os:timestamp(), - - ok = testutil:book_riakput(Book1A, TestObject, TestSpec), - ok = testutil:book_riakput(Book1B, TestObject, TestSpec), - testutil:check_forobject(Book1A, TestObject), - testutil:check_forobject(Book1B, TestObject), - - {TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} = - load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - false, <<"Bucket">>), - % Go compare! Also confirm we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, - TicTacTreeJoined), - - DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = DL1_0 == [], - true = length(DL1_1) > 100, - - ok = leveled_bookie:book_close(Book1A), - ok = leveled_bookie:book_close(Book1B), - ok = leveled_bookie:book_close(Book1C), - ok = leveled_bookie:book_close(Book1D), - - % Book2A to get all objects - {ok, Book2A} = leveled_bookie:book_start(StartOptsA), - % Book2B/C/D will have objects partitioned across it - {ok, Book2B} = leveled_bookie:book_start(StartOptsB), - {ok, Book2C} = leveled_bookie:book_start(StartOptsC), - {ok, Book2D} = leveled_bookie:book_start(StartOptsD), - - % Change the value for a key in another bucket - % If we get trees for this period, no difference should be found - - V2 = "Value2", - {TestObject2, TestSpec2} = - testutil:generate_testobject(<<"NotBucket">>, K1, V2, S1, MD), - - New_startTS2 = os:timestamp(), - - ok = testutil:book_riakput(Book2B, TestObject2, TestSpec2), - testutil:check_forobject(Book2B, TestObject2), - testutil:check_forobject(Book2A, TestObject), - - New_endTS2 = os:timestamp(), - NewLMDIndexes2 = determine_lmd_indexes(New_startTS2, New_endTS2, UnitMins), - {TicTacTreeJoined2, TicTacTreeFull2, _EmptyTree, NewLMDIndexes2} = - load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D, - New_startTS2, TreeSize, UnitMins, - NewLMDIndexes2, <<"Bucket">>), - DL2_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull2, - TicTacTreeJoined2), - true = length(DL2_0) == 0, - - % Now create an object that is a change to an existing key in the - % monitored bucket. A differrence should be found - - {TestObject3, TestSpec3} = - testutil:generate_testobject(B1, K1, V2, S1, MD), - - New_startTS3 = os:timestamp(), - - ok = testutil:book_riakput(Book2B, TestObject3, TestSpec3), - testutil:check_forobject(Book2B, TestObject3), - testutil:check_forobject(Book2A, TestObject), - - New_endTS3 = os:timestamp(), - NewLMDIndexes3 = determine_lmd_indexes(New_startTS3, New_endTS3, UnitMins), - {TicTacTreeJoined3, TicTacTreeFull3, _EmptyTree, NewLMDIndexes3} = - load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D, - New_startTS3, TreeSize, UnitMins, - NewLMDIndexes3, <<"Bucket">>), - DL3_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull3, - TicTacTreeJoined3), - - % DL2_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree), - true = length(DL3_0) == 1, - - % Find the dirty segment, and use that to find the dirty key - % - % Note that unlike when monitoring $all, fold_keys can be used as there - % is no need to return the Bucket (as hte bucket is known) - - [DirtySeg] = DL3_0, - TermPrefix = string:right(integer_to_list(DirtySeg), 8, $0), - - LMDSegFolder = - fun(LMD, {Acc, Bookie}) -> - IdxLMD = list_to_binary("$aae." ++ LMD ++ "_bin"), - IdxQ1 = - {index_query, - <<"Bucket">>, - {fun testutil:foldkeysfun/3, []}, - {IdxLMD, - list_to_binary(TermPrefix ++ "."), - list_to_binary(TermPrefix ++ "|")}, - {true, undefined}}, - {async, IdxFolder} = - leveled_bookie:book_returnfolder(Bookie, IdxQ1), - {Acc ++ IdxFolder(), Bookie} - end, - {KeysTerms2A, _} = lists:foldl(LMDSegFolder, - {[], Book2A}, - lists:usort(LMDIndexes ++ NewLMDIndexes3)), - true = length(KeysTerms2A) >= 1, - - {KeysTerms2B, _} = lists:foldl(LMDSegFolder, - {[], Book2B}, - lists:usort(LMDIndexes ++ NewLMDIndexes3)), - {KeysTerms2C, _} = lists:foldl(LMDSegFolder, - {[], Book2C}, - lists:usort(LMDIndexes ++ NewLMDIndexes3)), - {KeysTerms2D, _} = lists:foldl(LMDSegFolder, - {[], Book2D}, - lists:usort(LMDIndexes ++ NewLMDIndexes3)), - - KeysTerms2Joined = KeysTerms2B ++ KeysTerms2C ++ KeysTerms2D, - DeltaX = lists:subtract(KeysTerms2A, KeysTerms2Joined), - DeltaY = lists:subtract(KeysTerms2Joined, KeysTerms2A), - - io:format("DeltaX ~w~n", [DeltaX]), - io:format("DeltaY ~w~n", [DeltaY]), - - true = length(DeltaX) == 0, % This hasn't seen any extra changes - true = length(DeltaY) == 1, % This has seen an extra change - [{_, K1}] = DeltaY, - - ok = leveled_bookie:book_close(Book2A), - ok = leveled_bookie:book_close(Book2B), - ok = leveled_bookie:book_close(Book2C), - ok = leveled_bookie:book_close(Book2D). - - -recent_aae_expiry(_Config) -> - % Proof that the index entries are indeed expired - - TreeSize = small, - % SegmentCount = 256 * 256, - UnitMins = 1, - TotalMins = 2, - AAE = {blacklist, [], TotalMins, UnitMins}, - - % Test requires multiple different databases, so want to mount them all - % on individual file paths - RootPathA = testutil:reset_filestructure("testA"), - StartOptsA = aae_startopts(RootPathA, AAE), - - % Book1A to get all objects - {ok, Book1A} = leveled_bookie:book_start(StartOptsA), - - GenMapFun = - fun(_X) -> - V = testutil:get_compressiblevalue(), - Indexes = testutil:get_randomindexes_generator(8), - testutil:generate_objects(5000, - binary_uuid, - [], - V, - Indexes) - end, - - ObjLists = lists:map(GenMapFun, lists:seq(1, 3)), - - SW0 = os:timestamp(), - % Load all nine lists into Book1A - lists:foreach(fun(ObjL) -> testutil:riakload(Book1A, ObjL) end, - ObjLists), - SW1 = os:timestamp(), - % sleep for two minutes, so all index entries will have expired - GetTicTacTreeFun = - fun(Bookie) -> - get_tictactree_fun(Bookie, <<"$all">>, TreeSize) - end, - EmptyTree = leveled_tictac:new_tree(empty, TreeSize), - LMDIndexes = determine_lmd_indexes(SW0, SW1, UnitMins), - - % Should get a non-empty answer to the query - TicTacTree1_Full = - lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes), - DL3_0 = leveled_tictac:find_dirtyleaves(TicTacTree1_Full, EmptyTree), - io:format("Dirty leaves found before expiry ~w~n", [length(DL3_0)]), - - true = length(DL3_0) > 0, - - SecondsSinceLMD = timer:now_diff(os:timestamp(), SW0) div 1000000, - SecondsToExpiry = (TotalMins + UnitMins) * 60, - - io:format("SecondsToExpiry ~w SecondsSinceLMD ~w~n", - [SecondsToExpiry, SecondsSinceLMD]), - io:format("LMDIndexes ~w~n", [LMDIndexes]), - - case SecondsToExpiry > SecondsSinceLMD of - true -> - timer:sleep((1 + SecondsToExpiry - SecondsSinceLMD) * 1000); - false -> - timer:sleep(1000) - end, - - % Should now get an empty answer - all entries have expired - TicTacTree2_Full = - lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes), - DL4_0 = leveled_tictac:find_dirtyleaves(TicTacTree2_Full, EmptyTree), - io:format("Dirty leaves found after expiry ~w~n", [length(DL4_0)]), - - timer:sleep(10000), - - TicTacTree3_Full = - lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes), - DL5_0 = leveled_tictac:find_dirtyleaves(TicTacTree3_Full, EmptyTree), - io:format("Dirty leaves found after expiry plus 10s ~w~n", [length(DL5_0)]), - - - ok = leveled_bookie:book_close(Book1A), - - true = length(DL4_0) == 0. - - basic_headonly(_Config) -> ObjectCount = 200000, RemoveCount = 100, @@ -1196,145 +716,6 @@ load_objectspecs(ObjectSpecL, SliceSize, Bookie) -> end. - -load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - LMDIndexes_Loaded) -> - load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - LMDIndexes_Loaded, <<"$all">>). - -load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D, - SW_StartLoad, TreeSize, UnitMins, - LMDIndexes_Loaded, Bucket) -> - LMDIndexes = - case LMDIndexes_Loaded of - false -> - % Generate nine lists of objects - % BucketBin = list_to_binary("Bucket"), - GenMapFun = - fun(_X) -> - V = testutil:get_compressiblevalue(), - Indexes = testutil:get_randomindexes_generator(8), - testutil:generate_objects(5000, - binary_uuid, - [], - V, - Indexes) - end, - - ObjLists = lists:map(GenMapFun, lists:seq(1, 9)), - - % Load all nine lists into Book1A - lists:foreach(fun(ObjL) -> testutil:riakload(Book1A, ObjL) end, - ObjLists), - - % Split nine lists across Book1B to Book1D, three object lists - % in each - lists:foreach(fun(ObjL) -> testutil:riakload(Book1B, ObjL) end, - lists:sublist(ObjLists, 1, 3)), - lists:foreach(fun(ObjL) -> testutil:riakload(Book1C, ObjL) end, - lists:sublist(ObjLists, 4, 3)), - lists:foreach(fun(ObjL) -> testutil:riakload(Book1D, ObjL) end, - lists:sublist(ObjLists, 7, 3)), - - SW_EndLoad = os:timestamp(), - determine_lmd_indexes(SW_StartLoad, SW_EndLoad, UnitMins); - _ -> - LMDIndexes_Loaded - end, - - EmptyTree = leveled_tictac:new_tree(empty, TreeSize), - - GetTicTacTreeFun = - fun(Bookie) -> - get_tictactree_fun(Bookie, Bucket, TreeSize) - end, - - % Get a TicTac tree representing one of the indexes in Bucket A - TicTacTree1_Full = - lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes), - - TicTacTree1_P1 = - lists:foldl(GetTicTacTreeFun(Book1B), EmptyTree, LMDIndexes), - TicTacTree1_P2 = - lists:foldl(GetTicTacTreeFun(Book1C), EmptyTree, LMDIndexes), - TicTacTree1_P3 = - lists:foldl(GetTicTacTreeFun(Book1D), EmptyTree, LMDIndexes), - - % Merge the tree across the partitions - TicTacTree1_Joined = lists:foldl(fun leveled_tictac:merge_trees/2, - TicTacTree1_P1, - [TicTacTree1_P2, TicTacTree1_P3]), - - {TicTacTree1_Full, TicTacTree1_Joined, EmptyTree, LMDIndexes}. - - -aae_startopts(RootPath, AAE) -> - LS = 2000, - JS = 50000000, - SS = testutil:sync_strategy(), - [{root_path, RootPath}, - {sync_strategy, SS}, - {cache_size, LS}, - {max_journalsize, JS}, - {recent_aae, AAE}]. - - -determine_lmd_indexes(StartTS, EndTS, UnitMins) -> - StartDT = calendar:now_to_datetime(StartTS), - EndDT = calendar:now_to_datetime(EndTS), - StartTimeStr = get_strtime(StartDT, UnitMins), - EndTimeStr = get_strtime(EndDT, UnitMins), - - AddTimeFun = - fun(X, Acc) -> - case lists:member(EndTimeStr, Acc) of - true -> - Acc; - false -> - NextTime = - UnitMins * 60 * X + - calendar:datetime_to_gregorian_seconds(StartDT), - NextDT = - calendar:gregorian_seconds_to_datetime(NextTime), - Acc ++ [get_strtime(NextDT, UnitMins)] - end - end, - - lists:foldl(AddTimeFun, [StartTimeStr], lists:seq(1, 10)). - - -get_strtime(DateTime, UnitMins) -> - {{Y, M, D}, {Hour, Minute, _Second}} = DateTime, - RoundMins = - UnitMins * (Minute div UnitMins), - StrTime = - lists:flatten(io_lib:format(?LMD_FORMAT, - [Y, M, D, Hour, RoundMins])), - StrTime. - - -get_tictactree_fun(Bookie, Bucket, TreeSize) -> - fun(LMD, Acc) -> - SW = os:timestamp(), - ST = <<"0">>, - ET = <<"A">>, - Q = {tictactree_idx, - {Bucket, - list_to_binary("$aae." ++ LMD ++ "_bin"), - ST, - ET}, - TreeSize, - fun(_B, _K) -> accumulate end}, - {async, Folder} = leveled_bookie:book_returnfolder(Bookie, Q), - R = Folder(), - io:format("TicTac Tree for index ~s took " ++ - "~w microseconds~n", - [LMD, timer:now_diff(os:timestamp(), SW)]), - leveled_tictac:merge_trees(R, Acc) - end. - get_segment(K, SegmentCount) -> BinKey = case is_binary(K) of