Extract deprecated recent_aae

Ready to add other forms of last modified filtering
This commit is contained in:
Martin Sumner 2018-10-29 15:49:50 +00:00
parent 5cbda7c2f7
commit 2e2c35fe1b
3 changed files with 3 additions and 833 deletions

View file

@ -104,7 +104,6 @@
-define(JOURNAL_SIZE_JITTER, 20). -define(JOURNAL_SIZE_JITTER, 20).
-define(ABSOLUTEMAX_JOURNALSIZE, 4000000000). -define(ABSOLUTEMAX_JOURNALSIZE, 4000000000).
-define(LONG_RUNNING, 80000). -define(LONG_RUNNING, 80000).
-define(RECENT_AAE, false).
-define(COMPRESSION_METHOD, lz4). -define(COMPRESSION_METHOD, lz4).
-define(COMPRESSION_POINT, on_receipt). -define(COMPRESSION_POINT, on_receipt).
-define(TIMING_SAMPLESIZE, 100). -define(TIMING_SAMPLESIZE, 100).
@ -118,7 +117,6 @@
{cache_size, ?CACHE_SIZE}, {cache_size, ?CACHE_SIZE},
{max_journalsize, 1000000000}, {max_journalsize, 1000000000},
{sync_strategy, none}, {sync_strategy, none},
{recent_aae, ?RECENT_AAE},
{head_only, false}, {head_only, false},
{waste_retention_period, undefined}, {waste_retention_period, undefined},
{max_run_length, undefined}, {max_run_length, undefined},
@ -140,7 +138,6 @@
-record(state, {inker :: pid() | undefined, -record(state, {inker :: pid() | undefined,
penciller :: pid() | undefined, penciller :: pid() | undefined,
cache_size :: integer() | undefined, cache_size :: integer() | undefined,
recent_aae :: recent_aae(),
ledger_cache = #ledger_cache{}, ledger_cache = #ledger_cache{},
is_snapshot :: boolean() | undefined, is_snapshot :: boolean() | undefined,
slow_offer = false :: boolean(), slow_offer = false :: boolean(),
@ -186,7 +183,6 @@
-type fold_timings() :: no_timing|#fold_timings{}. -type fold_timings() :: no_timing|#fold_timings{}.
-type head_timings() :: no_timing|#head_timings{}. -type head_timings() :: no_timing|#head_timings{}.
-type timing_types() :: head|get|put|fold. -type timing_types() :: head|get|put|fold.
-type recent_aae() :: false|#recent_aae{}|undefined.
-type key() :: binary()|string()|{binary(), binary()}. -type key() :: binary()|string()|{binary(), binary()}.
% Keys SHOULD be binary() % Keys SHOULD be binary()
% string() support is a legacy of old tests % string() support is a legacy of old tests
@ -220,12 +216,6 @@
% riak_sync is used for backwards compatability with OTP16 - and % riak_sync is used for backwards compatability with OTP16 - and
% will manually call sync() after each write (rather than use the % will manually call sync() after each write (rather than use the
% O_SYNC option on startup % O_SYNC option on startup
{recent_aae, false|{atom(), list(), integer(), integer()}} |
% DEPRECATED
% Before working on kv_index_tictactree looked at the possibility
% of maintaining AAE just for recent changes. Given the efficiency
% of the kv_index_tictactree approach this is unecessary.
% Should be set to false
{head_only, false|with_lookup|no_lookup} | {head_only, false|with_lookup|no_lookup} |
% When set to true, there are three fundamental changes as to how % When set to true, there are three fundamental changes as to how
% leveled will work: % leveled will work:
@ -1008,16 +998,6 @@ init([Opts]) ->
ConfiguredCacheSize div (100 div ?CACHE_SIZE_JITTER), ConfiguredCacheSize div (100 div ?CACHE_SIZE_JITTER),
CacheSize = CacheSize =
ConfiguredCacheSize + erlang:phash2(self()) rem CacheJitter, ConfiguredCacheSize + erlang:phash2(self()) rem CacheJitter,
RecentAAE =
case proplists:get_value(recent_aae, Opts) of
false ->
false;
{FilterType, BucketList, LimitMinutes, UnitMinutes} ->
#recent_aae{filter = FilterType,
buckets = BucketList,
limit_minutes = LimitMinutes,
unit_minutes = UnitMinutes}
end,
{HeadOnly, HeadLookup} = {HeadOnly, HeadLookup} =
case proplists:get_value(head_only, Opts) of case proplists:get_value(head_only, Opts) of
@ -1030,7 +1010,6 @@ init([Opts]) ->
end, end,
State0 = #state{cache_size=CacheSize, State0 = #state{cache_size=CacheSize,
recent_aae=RecentAAE,
is_snapshot=false, is_snapshot=false,
head_only=HeadOnly, head_only=HeadOnly,
head_lookup = HeadLookup}, head_lookup = HeadLookup},
@ -1926,14 +1905,12 @@ preparefor_ledgercache(?INKT_KEYD,
{no_lookup, SQN, KeyChanges}; {no_lookup, SQN, KeyChanges};
preparefor_ledgercache(_InkTag, preparefor_ledgercache(_InkTag,
LedgerKey, SQN, Obj, Size, {IdxSpecs, TTL}, LedgerKey, SQN, Obj, Size, {IdxSpecs, TTL},
State) -> _State) ->
{Bucket, Key, MetaValue, {KeyH, ObjH}, LastMods} = {Bucket, Key, MetaValue, {KeyH, _ObjH}, _LastMods} =
leveled_codec:generate_ledgerkv(LedgerKey, SQN, Obj, Size, TTL), leveled_codec:generate_ledgerkv(LedgerKey, SQN, Obj, Size, TTL),
KeyChanges = KeyChanges =
[{LedgerKey, MetaValue}] ++ [{LedgerKey, MetaValue}] ++
leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL) ++ leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL),
leveled_codec:aae_indexspecs(State#state.recent_aae,
Bucket, Key, SQN, ObjH, LastMods),
{KeyH, SQN, KeyChanges}. {KeyH, SQN, KeyChanges}.

View file

@ -63,7 +63,6 @@
get_keyandobjhash/2, get_keyandobjhash/2,
idx_indexspecs/5, idx_indexspecs/5,
obj_objectspecs/3, obj_objectspecs/3,
aae_indexspecs/6,
riak_extract_metadata/2, riak_extract_metadata/2,
segment_hash/1, segment_hash/1,
to_lookup/1, to_lookup/1,
@ -76,7 +75,6 @@
-define(NRT_IDX, "$aae."). -define(NRT_IDX, "$aae.").
-define(ALL_BUCKETS, <<"$all">>). -define(ALL_BUCKETS, <<"$all">>).
-type recent_aae() :: #recent_aae{}.
-type riak_metadata() :: {binary()|delete, % Sibling Metadata -type riak_metadata() :: {binary()|delete, % Sibling Metadata
binary()|null, % Vclock Metadata binary()|null, % Vclock Metadata
integer()|null, % Hash of vclock - non-exportable integer()|null, % Hash of vclock - non-exportable
@ -577,103 +575,6 @@ set_status(remove, _TTL) ->
%% TODO: timestamps for delayed reaping %% TODO: timestamps for delayed reaping
tomb. tomb.
-spec aae_indexspecs(false|recent_aae(),
any(), any(),
integer(), integer(),
list())
-> list().
%% @doc
%% Generate an additional index term representing the change, if the last
%% modified date for the change is within the definition of recency.
%%
%% The object may have multiple last modified dates (siblings), and in this
%% case index entries for all dates within the range are added.
%%
%% The index should entry auto-expire in the future (when it is no longer
%% relevant to assessing recent changes)
aae_indexspecs(false, _Bucket, _Key, _SQN, _H, _LastMods) ->
[];
aae_indexspecs(_AAE, _Bucket, _Key, _SQN, _H, []) ->
[];
aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) ->
InList = lists:member(Bucket, AAE#recent_aae.buckets),
Bucket0 =
case AAE#recent_aae.filter of
blacklist ->
case InList of
true ->
false;
false ->
{all, Bucket}
end;
whitelist ->
case InList of
true ->
Bucket;
false ->
false
end
end,
case Bucket0 of
false ->
[];
Bucket0 ->
GenIdxFun =
fun(LMD0, Acc) ->
Dates = parse_date(LMD0,
AAE#recent_aae.unit_minutes,
AAE#recent_aae.limit_minutes,
leveled_util:integer_now()),
case Dates of
no_index ->
Acc;
{LMD1, TTL} ->
TreeSize = AAE#recent_aae.tree_size,
SegID32 = leveled_tictac:keyto_segment32(Key),
SegID =
leveled_tictac:get_segment(SegID32, TreeSize),
IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin",
IdxTrmStr =
string:right(integer_to_list(SegID), 8, $0) ++
"." ++
string:right(integer_to_list(H), 8, $0),
{IdxK, IdxV} =
gen_indexspec(Bucket0, Key,
add,
list_to_binary(IdxFldStr),
list_to_binary(IdxTrmStr),
SQN, TTL),
[{IdxK, IdxV}|Acc]
end
end,
lists:foldl(GenIdxFun, [], LastMods)
end.
-spec parse_date(tuple(), integer(), integer(), integer()) ->
no_index|{list(), integer()}.
%% @doc
%% Parse the last modified date and the AAE date configuration to return a
%% binary to be used as the last modified date part of the index, and an
%% integer to be used as the TTL of the index entry.
%% Return no_index if the change is not recent.
parse_date(LMD, UnitMins, LimitMins, Now) ->
LMDsecs = leveled_util:integer_time(LMD),
Recent = (LMDsecs + LimitMins * 60) > Now,
case Recent of
false ->
no_index;
true ->
{{Y, M, D}, {Hour, Minute, _Second}} =
calendar:now_to_datetime(LMD),
RoundMins =
UnitMins * (Minute div UnitMins),
StrTime =
lists:flatten(io_lib:format(?LMD_FORMAT,
[Y, M, D, Hour, RoundMins])),
TTL = min(Now, LMDsecs) + (LimitMins + UnitMins) * 60,
{StrTime, TTL}
end.
-spec generate_ledgerkv( -spec generate_ledgerkv(
tuple(), integer(), any(), integer(), tuple()|infinity) -> tuple(), integer(), any(), integer(), tuple()|infinity) ->
{any(), any(), any(), {any(), any(), any(),
@ -927,95 +828,6 @@ hashperf_test() ->
io:format(user, "1000 object hashes in ~w microseconds~n", io:format(user, "1000 object hashes in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SW)]). [timer:now_diff(os:timestamp(), SW)]).
parsedate_test() ->
{MeS, S, MiS} = os:timestamp(),
timer:sleep(100),
Now = leveled_util:integer_now(),
UnitMins = 5,
LimitMins = 60,
PD = parse_date({MeS, S, MiS}, UnitMins, LimitMins, Now),
io:format("Parsed Date ~w~n", [PD]),
?assertMatch(true, is_tuple(PD)),
check_pd(PD, UnitMins),
CheckFun =
fun(Offset) ->
ModDate = {MeS, S + Offset * 60, MiS},
check_pd(parse_date(ModDate, UnitMins, LimitMins, Now), UnitMins)
end,
lists:foreach(CheckFun, lists:seq(1, 60)).
check_pd(PD, UnitMins) ->
{LMDstr, _TTL} = PD,
Minutes = list_to_integer(lists:nthtail(10, LMDstr)),
?assertMatch(0, Minutes rem UnitMins).
parseolddate_test() ->
LMD = os:timestamp(),
timer:sleep(100),
Now = leveled_util:integer_now() + 60 * 60,
UnitMins = 5,
LimitMins = 60,
PD = parse_date(LMD, UnitMins, LimitMins, Now),
io:format("Parsed Date ~w~n", [PD]),
?assertMatch(no_index, PD).
genaaeidx_test() ->
AAE = #recent_aae{filter=blacklist,
buckets=[],
limit_minutes=60,
unit_minutes=5},
Bucket = <<"Bucket1">>,
Key = <<"Key1">>,
SQN = 1,
H = erlang:phash2(null),
LastMods = [os:timestamp(), os:timestamp()],
AAESpecs = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods),
?assertMatch(2, length(AAESpecs)),
LastMods1 = [os:timestamp()],
AAESpecs1 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods1),
?assertMatch(1, length(AAESpecs1)),
IdxB = element(2, element(1, lists:nth(1, AAESpecs1))),
io:format(user, "AAE IDXSpecs1 ~w~n", [AAESpecs1]),
?assertMatch(<<"$all">>, IdxB),
LastMods0 = [],
AAESpecs0 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods0),
?assertMatch(0, length(AAESpecs0)),
AAE0 = AAE#recent_aae{filter=whitelist,
buckets=[<<"Bucket0">>]},
AAESpecsB0 = aae_indexspecs(AAE0, Bucket, Key, SQN, H, LastMods1),
?assertMatch(0, length(AAESpecsB0)),
AAESpecsB1 = aae_indexspecs(AAE0, <<"Bucket0">>, Key, SQN, H, LastMods1),
?assertMatch(1, length(AAESpecsB1)),
[{{?IDX_TAG, <<"Bucket0">>, {Fld, Term}, <<"Key1">>},
{SQN, {active, TS}, no_lookup, null}}] = AAESpecsB1,
?assertMatch(true, is_integer(TS)),
?assertMatch(17, length(binary_to_list(Term))),
?assertMatch("$aae.", lists:sublist(binary_to_list(Fld), 5)),
AAE1 = AAE#recent_aae{filter=blacklist,
buckets=[<<"Bucket0">>]},
AAESpecsB2 = aae_indexspecs(AAE1, <<"Bucket0">>, Key, SQN, H, LastMods1),
?assertMatch(0, length(AAESpecsB2)).
delayedupdate_aaeidx_test() ->
AAE = #recent_aae{filter=blacklist,
buckets=[],
limit_minutes=60,
unit_minutes=5},
Bucket = <<"Bucket1">>,
Key = <<"Key1">>,
SQN = 1,
H = erlang:phash2(null),
{Mega, Sec, MSec} = os:timestamp(),
LastMods = [{Mega -1, Sec, MSec}],
AAESpecs = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods),
?assertMatch(0, length(AAESpecs)).
head_segment_compare_test() -> head_segment_compare_test() ->
% Reminder to align native and parallel(leveled_ko) key stores for % Reminder to align native and parallel(leveled_ko) key stores for
% kv_index_tictactree % kv_index_tictactree

View file

@ -5,20 +5,12 @@
-export([ -export([
many_put_compare/1, many_put_compare/1,
index_compare/1, index_compare/1,
recent_aae_noaae/1,
recent_aae_allaae/1,
recent_aae_bucketaae/1,
recent_aae_expiry/1,
basic_headonly/1 basic_headonly/1
]). ]).
all() -> [ all() -> [
many_put_compare, many_put_compare,
index_compare, index_compare,
recent_aae_noaae,
recent_aae_allaae,
recent_aae_bucketaae,
recent_aae_expiry,
basic_headonly basic_headonly
]. ].
@ -542,478 +534,6 @@ index_compare(_Config) ->
ok = leveled_bookie:book_close(Book2D). ok = leveled_bookie:book_close(Book2D).
recent_aae_noaae(_Config) ->
% Starts databases with recent_aae tables, and attempt to query to fetch
% recent aae trees returns empty trees as no index entries are found.
TreeSize = small,
% SegmentCount = 256 * 256,
UnitMins = 2,
% Test requires multiple different databases, so want to mount them all
% on individual file paths
RootPathA = testutil:reset_filestructure("testA"),
RootPathB = testutil:reset_filestructure("testB"),
RootPathC = testutil:reset_filestructure("testC"),
RootPathD = testutil:reset_filestructure("testD"),
StartOptsA = aae_startopts(RootPathA, false),
StartOptsB = aae_startopts(RootPathB, false),
StartOptsC = aae_startopts(RootPathC, false),
StartOptsD = aae_startopts(RootPathD, false),
% Book1A to get all objects
{ok, Book1A} = leveled_bookie:book_start(StartOptsA),
% Book1B/C/D will have objects partitioned across it
{ok, Book1B} = leveled_bookie:book_start(StartOptsB),
{ok, Book1C} = leveled_bookie:book_start(StartOptsC),
{ok, Book1D} = leveled_bookie:book_start(StartOptsD),
{B1, K1, V1, S1, MD} = {"Bucket",
"Key1.1.4567.4321",
"Value1",
[],
[{"MDK1", "MDV1"}]},
{TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD),
SW_StartLoad = os:timestamp(),
ok = testutil:book_riakput(Book1A, TestObject, TestSpec),
ok = testutil:book_riakput(Book1B, TestObject, TestSpec),
testutil:check_forobject(Book1A, TestObject),
testutil:check_forobject(Book1B, TestObject),
{TicTacTreeJoined, TicTacTreeFull, EmptyTree, _LMDIndexes} =
load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D,
SW_StartLoad, TreeSize, UnitMins,
false),
% Go compare! Also confirm we're not comparing empty trees
DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull,
TicTacTreeJoined),
DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree),
true = DL1_0 == [],
true = length(DL1_1) == 0,
ok = leveled_bookie:book_close(Book1A),
ok = leveled_bookie:book_close(Book1B),
ok = leveled_bookie:book_close(Book1C),
ok = leveled_bookie:book_close(Book1D).
recent_aae_allaae(_Config) ->
% Leveled is started in blacklisted mode with no buckets blacklisted.
%
% A number of changes are then loaded into a store, and also partitioned
% across a separate set of three stores. A merge tree is returned from
% both the single store and the partitioned store, and proven to compare
% the same.
%
% A single change is then made, but into one half of the system only. The
% aae index is then re-queried and it is verified that a signle segment
% difference is found.
%
% The segment Id found is then used in a query to find the Keys that make
% up that segment, and the delta discovered should be just that one key
% which was known to have been changed
TreeSize = small,
% SegmentCount = 256 * 256,
UnitMins = 2,
AAE = {blacklist, [], 60, UnitMins},
% Test requires multiple different databases, so want to mount them all
% on individual file paths
RootPathA = testutil:reset_filestructure("testA"),
RootPathB = testutil:reset_filestructure("testB"),
RootPathC = testutil:reset_filestructure("testC"),
RootPathD = testutil:reset_filestructure("testD"),
StartOptsA = aae_startopts(RootPathA, AAE),
StartOptsB = aae_startopts(RootPathB, AAE),
StartOptsC = aae_startopts(RootPathC, AAE),
StartOptsD = aae_startopts(RootPathD, AAE),
% Book1A to get all objects
{ok, Book1A} = leveled_bookie:book_start(StartOptsA),
% Book1B/C/D will have objects partitioned across it
{ok, Book1B} = leveled_bookie:book_start(StartOptsB),
{ok, Book1C} = leveled_bookie:book_start(StartOptsC),
{ok, Book1D} = leveled_bookie:book_start(StartOptsD),
{B1, K1, V1, S1, MD} = {"Bucket",
"Key1.1.4567.4321",
"Value1",
[],
[{"MDK1", "MDV1"}]},
{TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD),
SW_StartLoad = os:timestamp(),
ok = testutil:book_riakput(Book1A, TestObject, TestSpec),
ok = testutil:book_riakput(Book1B, TestObject, TestSpec),
testutil:check_forobject(Book1A, TestObject),
testutil:check_forobject(Book1B, TestObject),
{TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} =
load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D,
SW_StartLoad, TreeSize, UnitMins,
false),
% Go compare! Also confirm we're not comparing empty trees
DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull,
TicTacTreeJoined),
DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree),
true = DL1_0 == [],
true = length(DL1_1) > 100,
ok = leveled_bookie:book_close(Book1A),
ok = leveled_bookie:book_close(Book1B),
ok = leveled_bookie:book_close(Book1C),
ok = leveled_bookie:book_close(Book1D),
% Book2A to get all objects
{ok, Book2A} = leveled_bookie:book_start(StartOptsA),
% Book2B/C/D will have objects partitioned across it
{ok, Book2B} = leveled_bookie:book_start(StartOptsB),
{ok, Book2C} = leveled_bookie:book_start(StartOptsC),
{ok, Book2D} = leveled_bookie:book_start(StartOptsD),
{TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} =
load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D,
SW_StartLoad, TreeSize, UnitMins,
LMDIndexes),
% Go compare! Also confirm we're not comparing empty trees
DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull,
TicTacTreeJoined),
DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree),
true = DL1_0 == [],
true = length(DL1_1) > 100,
V2 = "Value2",
{TestObject2, TestSpec2} =
testutil:generate_testobject(B1, K1, V2, S1, MD),
New_startTS = os:timestamp(),
ok = testutil:book_riakput(Book2B, TestObject2, TestSpec2),
testutil:check_forobject(Book2B, TestObject2),
testutil:check_forobject(Book2A, TestObject),
New_endTS = os:timestamp(),
NewLMDIndexes = determine_lmd_indexes(New_startTS, New_endTS, UnitMins),
{TicTacTreeJoined2, TicTacTreeFull2, _EmptyTree, NewLMDIndexes} =
load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D,
New_startTS, TreeSize, UnitMins,
NewLMDIndexes),
DL2_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull2,
TicTacTreeJoined2),
% DL2_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree),
true = length(DL2_0) == 1,
[DirtySeg] = DL2_0,
TermPrefix = string:right(integer_to_list(DirtySeg), 8, $0),
LMDSegFolder =
fun(LMD, {Acc, Bookie}) ->
IdxLMD = list_to_binary("$aae." ++ LMD ++ "_bin"),
IdxQ1 =
{index_query,
<<"$all">>,
{fun testutil:foldkeysfun_returnbucket/3, []},
{IdxLMD,
list_to_binary(TermPrefix ++ "."),
list_to_binary(TermPrefix ++ "|")},
{true, undefined}},
{async, IdxFolder} =
leveled_bookie:book_returnfolder(Bookie, IdxQ1),
{Acc ++ IdxFolder(), Bookie}
end,
{KeysTerms2A, _} = lists:foldl(LMDSegFolder,
{[], Book2A},
lists:usort(LMDIndexes ++ NewLMDIndexes)),
true = length(KeysTerms2A) >= 1,
{KeysTerms2B, _} = lists:foldl(LMDSegFolder,
{[], Book2B},
lists:usort(LMDIndexes ++ NewLMDIndexes)),
{KeysTerms2C, _} = lists:foldl(LMDSegFolder,
{[], Book2C},
lists:usort(LMDIndexes ++ NewLMDIndexes)),
{KeysTerms2D, _} = lists:foldl(LMDSegFolder,
{[], Book2D},
lists:usort(LMDIndexes ++ NewLMDIndexes)),
KeysTerms2Joined = KeysTerms2B ++ KeysTerms2C ++ KeysTerms2D,
DeltaX = lists:subtract(KeysTerms2A, KeysTerms2Joined),
DeltaY = lists:subtract(KeysTerms2Joined, KeysTerms2A),
io:format("DeltaX ~w~n", [DeltaX]),
io:format("DeltaY ~w~n", [DeltaY]),
true = length(DeltaX) == 0, % This hasn't seen any extra changes
true = length(DeltaY) == 1, % This has seen an extra change
[{_, {B1, K1}}] = DeltaY,
ok = leveled_bookie:book_close(Book2A),
ok = leveled_bookie:book_close(Book2B),
ok = leveled_bookie:book_close(Book2C),
ok = leveled_bookie:book_close(Book2D).
recent_aae_bucketaae(_Config) ->
% Configure AAE to work only on a single whitelisted bucket
% Confirm that we can spot a delta in this bucket, but not
% in another bucket
TreeSize = small,
% SegmentCount = 256 * 256,
UnitMins = 2,
AAE = {whitelist, [<<"Bucket">>], 60, UnitMins},
% Test requires multiple different databases, so want to mount them all
% on individual file paths
RootPathA = testutil:reset_filestructure("testA"),
RootPathB = testutil:reset_filestructure("testB"),
RootPathC = testutil:reset_filestructure("testC"),
RootPathD = testutil:reset_filestructure("testD"),
StartOptsA = aae_startopts(RootPathA, AAE),
StartOptsB = aae_startopts(RootPathB, AAE),
StartOptsC = aae_startopts(RootPathC, AAE),
StartOptsD = aae_startopts(RootPathD, AAE),
% Book1A to get all objects
{ok, Book1A} = leveled_bookie:book_start(StartOptsA),
% Book1B/C/D will have objects partitioned across it
{ok, Book1B} = leveled_bookie:book_start(StartOptsB),
{ok, Book1C} = leveled_bookie:book_start(StartOptsC),
{ok, Book1D} = leveled_bookie:book_start(StartOptsD),
{B1, K1, V1, S1, MD} = {<<"Bucket">>,
"Key1.1.4567.4321",
"Value1",
[],
[{"MDK1", "MDV1"}]},
{TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD),
SW_StartLoad = os:timestamp(),
ok = testutil:book_riakput(Book1A, TestObject, TestSpec),
ok = testutil:book_riakput(Book1B, TestObject, TestSpec),
testutil:check_forobject(Book1A, TestObject),
testutil:check_forobject(Book1B, TestObject),
{TicTacTreeJoined, TicTacTreeFull, EmptyTree, LMDIndexes} =
load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D,
SW_StartLoad, TreeSize, UnitMins,
false, <<"Bucket">>),
% Go compare! Also confirm we're not comparing empty trees
DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull,
TicTacTreeJoined),
DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree),
true = DL1_0 == [],
true = length(DL1_1) > 100,
ok = leveled_bookie:book_close(Book1A),
ok = leveled_bookie:book_close(Book1B),
ok = leveled_bookie:book_close(Book1C),
ok = leveled_bookie:book_close(Book1D),
% Book2A to get all objects
{ok, Book2A} = leveled_bookie:book_start(StartOptsA),
% Book2B/C/D will have objects partitioned across it
{ok, Book2B} = leveled_bookie:book_start(StartOptsB),
{ok, Book2C} = leveled_bookie:book_start(StartOptsC),
{ok, Book2D} = leveled_bookie:book_start(StartOptsD),
% Change the value for a key in another bucket
% If we get trees for this period, no difference should be found
V2 = "Value2",
{TestObject2, TestSpec2} =
testutil:generate_testobject(<<"NotBucket">>, K1, V2, S1, MD),
New_startTS2 = os:timestamp(),
ok = testutil:book_riakput(Book2B, TestObject2, TestSpec2),
testutil:check_forobject(Book2B, TestObject2),
testutil:check_forobject(Book2A, TestObject),
New_endTS2 = os:timestamp(),
NewLMDIndexes2 = determine_lmd_indexes(New_startTS2, New_endTS2, UnitMins),
{TicTacTreeJoined2, TicTacTreeFull2, _EmptyTree, NewLMDIndexes2} =
load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D,
New_startTS2, TreeSize, UnitMins,
NewLMDIndexes2, <<"Bucket">>),
DL2_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull2,
TicTacTreeJoined2),
true = length(DL2_0) == 0,
% Now create an object that is a change to an existing key in the
% monitored bucket. A differrence should be found
{TestObject3, TestSpec3} =
testutil:generate_testobject(B1, K1, V2, S1, MD),
New_startTS3 = os:timestamp(),
ok = testutil:book_riakput(Book2B, TestObject3, TestSpec3),
testutil:check_forobject(Book2B, TestObject3),
testutil:check_forobject(Book2A, TestObject),
New_endTS3 = os:timestamp(),
NewLMDIndexes3 = determine_lmd_indexes(New_startTS3, New_endTS3, UnitMins),
{TicTacTreeJoined3, TicTacTreeFull3, _EmptyTree, NewLMDIndexes3} =
load_and_check_recentaae(Book2A, Book2B, Book2C, Book2D,
New_startTS3, TreeSize, UnitMins,
NewLMDIndexes3, <<"Bucket">>),
DL3_0 = leveled_tictac:find_dirtyleaves(TicTacTreeFull3,
TicTacTreeJoined3),
% DL2_1 = leveled_tictac:find_dirtyleaves(TicTacTreeFull, EmptyTree),
true = length(DL3_0) == 1,
% Find the dirty segment, and use that to find the dirty key
%
% Note that unlike when monitoring $all, fold_keys can be used as there
% is no need to return the Bucket (as hte bucket is known)
[DirtySeg] = DL3_0,
TermPrefix = string:right(integer_to_list(DirtySeg), 8, $0),
LMDSegFolder =
fun(LMD, {Acc, Bookie}) ->
IdxLMD = list_to_binary("$aae." ++ LMD ++ "_bin"),
IdxQ1 =
{index_query,
<<"Bucket">>,
{fun testutil:foldkeysfun/3, []},
{IdxLMD,
list_to_binary(TermPrefix ++ "."),
list_to_binary(TermPrefix ++ "|")},
{true, undefined}},
{async, IdxFolder} =
leveled_bookie:book_returnfolder(Bookie, IdxQ1),
{Acc ++ IdxFolder(), Bookie}
end,
{KeysTerms2A, _} = lists:foldl(LMDSegFolder,
{[], Book2A},
lists:usort(LMDIndexes ++ NewLMDIndexes3)),
true = length(KeysTerms2A) >= 1,
{KeysTerms2B, _} = lists:foldl(LMDSegFolder,
{[], Book2B},
lists:usort(LMDIndexes ++ NewLMDIndexes3)),
{KeysTerms2C, _} = lists:foldl(LMDSegFolder,
{[], Book2C},
lists:usort(LMDIndexes ++ NewLMDIndexes3)),
{KeysTerms2D, _} = lists:foldl(LMDSegFolder,
{[], Book2D},
lists:usort(LMDIndexes ++ NewLMDIndexes3)),
KeysTerms2Joined = KeysTerms2B ++ KeysTerms2C ++ KeysTerms2D,
DeltaX = lists:subtract(KeysTerms2A, KeysTerms2Joined),
DeltaY = lists:subtract(KeysTerms2Joined, KeysTerms2A),
io:format("DeltaX ~w~n", [DeltaX]),
io:format("DeltaY ~w~n", [DeltaY]),
true = length(DeltaX) == 0, % This hasn't seen any extra changes
true = length(DeltaY) == 1, % This has seen an extra change
[{_, K1}] = DeltaY,
ok = leveled_bookie:book_close(Book2A),
ok = leveled_bookie:book_close(Book2B),
ok = leveled_bookie:book_close(Book2C),
ok = leveled_bookie:book_close(Book2D).
recent_aae_expiry(_Config) ->
% Proof that the index entries are indeed expired
TreeSize = small,
% SegmentCount = 256 * 256,
UnitMins = 1,
TotalMins = 2,
AAE = {blacklist, [], TotalMins, UnitMins},
% Test requires multiple different databases, so want to mount them all
% on individual file paths
RootPathA = testutil:reset_filestructure("testA"),
StartOptsA = aae_startopts(RootPathA, AAE),
% Book1A to get all objects
{ok, Book1A} = leveled_bookie:book_start(StartOptsA),
GenMapFun =
fun(_X) ->
V = testutil:get_compressiblevalue(),
Indexes = testutil:get_randomindexes_generator(8),
testutil:generate_objects(5000,
binary_uuid,
[],
V,
Indexes)
end,
ObjLists = lists:map(GenMapFun, lists:seq(1, 3)),
SW0 = os:timestamp(),
% Load all nine lists into Book1A
lists:foreach(fun(ObjL) -> testutil:riakload(Book1A, ObjL) end,
ObjLists),
SW1 = os:timestamp(),
% sleep for two minutes, so all index entries will have expired
GetTicTacTreeFun =
fun(Bookie) ->
get_tictactree_fun(Bookie, <<"$all">>, TreeSize)
end,
EmptyTree = leveled_tictac:new_tree(empty, TreeSize),
LMDIndexes = determine_lmd_indexes(SW0, SW1, UnitMins),
% Should get a non-empty answer to the query
TicTacTree1_Full =
lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes),
DL3_0 = leveled_tictac:find_dirtyleaves(TicTacTree1_Full, EmptyTree),
io:format("Dirty leaves found before expiry ~w~n", [length(DL3_0)]),
true = length(DL3_0) > 0,
SecondsSinceLMD = timer:now_diff(os:timestamp(), SW0) div 1000000,
SecondsToExpiry = (TotalMins + UnitMins) * 60,
io:format("SecondsToExpiry ~w SecondsSinceLMD ~w~n",
[SecondsToExpiry, SecondsSinceLMD]),
io:format("LMDIndexes ~w~n", [LMDIndexes]),
case SecondsToExpiry > SecondsSinceLMD of
true ->
timer:sleep((1 + SecondsToExpiry - SecondsSinceLMD) * 1000);
false ->
timer:sleep(1000)
end,
% Should now get an empty answer - all entries have expired
TicTacTree2_Full =
lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes),
DL4_0 = leveled_tictac:find_dirtyleaves(TicTacTree2_Full, EmptyTree),
io:format("Dirty leaves found after expiry ~w~n", [length(DL4_0)]),
timer:sleep(10000),
TicTacTree3_Full =
lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes),
DL5_0 = leveled_tictac:find_dirtyleaves(TicTacTree3_Full, EmptyTree),
io:format("Dirty leaves found after expiry plus 10s ~w~n", [length(DL5_0)]),
ok = leveled_bookie:book_close(Book1A),
true = length(DL4_0) == 0.
basic_headonly(_Config) -> basic_headonly(_Config) ->
ObjectCount = 200000, ObjectCount = 200000,
RemoveCount = 100, RemoveCount = 100,
@ -1196,145 +716,6 @@ load_objectspecs(ObjectSpecL, SliceSize, Bookie) ->
end. end.
load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D,
SW_StartLoad, TreeSize, UnitMins,
LMDIndexes_Loaded) ->
load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D,
SW_StartLoad, TreeSize, UnitMins,
LMDIndexes_Loaded, <<"$all">>).
load_and_check_recentaae(Book1A, Book1B, Book1C, Book1D,
SW_StartLoad, TreeSize, UnitMins,
LMDIndexes_Loaded, Bucket) ->
LMDIndexes =
case LMDIndexes_Loaded of
false ->
% Generate nine lists of objects
% BucketBin = list_to_binary("Bucket"),
GenMapFun =
fun(_X) ->
V = testutil:get_compressiblevalue(),
Indexes = testutil:get_randomindexes_generator(8),
testutil:generate_objects(5000,
binary_uuid,
[],
V,
Indexes)
end,
ObjLists = lists:map(GenMapFun, lists:seq(1, 9)),
% Load all nine lists into Book1A
lists:foreach(fun(ObjL) -> testutil:riakload(Book1A, ObjL) end,
ObjLists),
% Split nine lists across Book1B to Book1D, three object lists
% in each
lists:foreach(fun(ObjL) -> testutil:riakload(Book1B, ObjL) end,
lists:sublist(ObjLists, 1, 3)),
lists:foreach(fun(ObjL) -> testutil:riakload(Book1C, ObjL) end,
lists:sublist(ObjLists, 4, 3)),
lists:foreach(fun(ObjL) -> testutil:riakload(Book1D, ObjL) end,
lists:sublist(ObjLists, 7, 3)),
SW_EndLoad = os:timestamp(),
determine_lmd_indexes(SW_StartLoad, SW_EndLoad, UnitMins);
_ ->
LMDIndexes_Loaded
end,
EmptyTree = leveled_tictac:new_tree(empty, TreeSize),
GetTicTacTreeFun =
fun(Bookie) ->
get_tictactree_fun(Bookie, Bucket, TreeSize)
end,
% Get a TicTac tree representing one of the indexes in Bucket A
TicTacTree1_Full =
lists:foldl(GetTicTacTreeFun(Book1A), EmptyTree, LMDIndexes),
TicTacTree1_P1 =
lists:foldl(GetTicTacTreeFun(Book1B), EmptyTree, LMDIndexes),
TicTacTree1_P2 =
lists:foldl(GetTicTacTreeFun(Book1C), EmptyTree, LMDIndexes),
TicTacTree1_P3 =
lists:foldl(GetTicTacTreeFun(Book1D), EmptyTree, LMDIndexes),
% Merge the tree across the partitions
TicTacTree1_Joined = lists:foldl(fun leveled_tictac:merge_trees/2,
TicTacTree1_P1,
[TicTacTree1_P2, TicTacTree1_P3]),
{TicTacTree1_Full, TicTacTree1_Joined, EmptyTree, LMDIndexes}.
aae_startopts(RootPath, AAE) ->
LS = 2000,
JS = 50000000,
SS = testutil:sync_strategy(),
[{root_path, RootPath},
{sync_strategy, SS},
{cache_size, LS},
{max_journalsize, JS},
{recent_aae, AAE}].
determine_lmd_indexes(StartTS, EndTS, UnitMins) ->
StartDT = calendar:now_to_datetime(StartTS),
EndDT = calendar:now_to_datetime(EndTS),
StartTimeStr = get_strtime(StartDT, UnitMins),
EndTimeStr = get_strtime(EndDT, UnitMins),
AddTimeFun =
fun(X, Acc) ->
case lists:member(EndTimeStr, Acc) of
true ->
Acc;
false ->
NextTime =
UnitMins * 60 * X +
calendar:datetime_to_gregorian_seconds(StartDT),
NextDT =
calendar:gregorian_seconds_to_datetime(NextTime),
Acc ++ [get_strtime(NextDT, UnitMins)]
end
end,
lists:foldl(AddTimeFun, [StartTimeStr], lists:seq(1, 10)).
get_strtime(DateTime, UnitMins) ->
{{Y, M, D}, {Hour, Minute, _Second}} = DateTime,
RoundMins =
UnitMins * (Minute div UnitMins),
StrTime =
lists:flatten(io_lib:format(?LMD_FORMAT,
[Y, M, D, Hour, RoundMins])),
StrTime.
get_tictactree_fun(Bookie, Bucket, TreeSize) ->
fun(LMD, Acc) ->
SW = os:timestamp(),
ST = <<"0">>,
ET = <<"A">>,
Q = {tictactree_idx,
{Bucket,
list_to_binary("$aae." ++ LMD ++ "_bin"),
ST,
ET},
TreeSize,
fun(_B, _K) -> accumulate end},
{async, Folder} = leveled_bookie:book_returnfolder(Bookie, Q),
R = Folder(),
io:format("TicTac Tree for index ~s took " ++
"~w microseconds~n",
[LMD, timer:now_diff(os:timestamp(), SW)]),
leveled_tictac:merge_trees(R, Acc)
end.
get_segment(K, SegmentCount) -> get_segment(K, SegmentCount) ->
BinKey = BinKey =
case is_binary(K) of case is_binary(K) of