Implement blacklist/whitelist

Change from the all/whitelist ebhavior to the blacklist/whitelist
behaviour documented in the write-up
This commit is contained in:
martinsumner 2017-07-11 11:44:01 +01:00
parent 7c86de2491
commit 80fd2615f6
5 changed files with 43 additions and 19 deletions

View file

@ -2,7 +2,7 @@
## Background
In the initial releases of Riak, there were three levels of protection against loss of data, where loss is caused by either a backend store not receiving data (because it was unavailable), or losing writes (due to a crash, or corruption of previously written data):
In the early history of Riak, there were three levels of protection against loss of data, where loss is caused by either a backend store not receiving data (because it was unavailable), or losing writes (due to a crash, or corruption of previously written data):
- [Read repair](http://docs.basho.com/riak/kv/2.2.3/learn/concepts/replication/#read-repair), whenever an object was read, if as part of that read it was discovered that a vnode that should have the an update but instead has an older version of an object; then post the completion of the read the finite-state-machine managing the get would update the out-of-date vnode with the latest version.
@ -36,7 +36,7 @@ Although this represented an improvement in terms of entropy management, there w
- The hash of the object was *not* based on a canonicalised version of the object, so could be inconsistent between trees (https://github.com/basho/riak_kv/issues/1189).
- Converting the object from_binary and sending it to another process has a potentially non-trivial cost for larger objects with significant amounts of metadata (e.g. 2i terms).
- Converting the object from_binary and sending it to another process (to pass from the `riak_kv_vnode` to the `riak_kv_index_hashtree` has a potentially non-trivial cost for larger objects with significant amounts of metadata (e.g. 2i terms).
- Hashtrees may become mysteriously inconsistent following rebuilds, if the rebuild followed a cluster change operation (e.g. adding/removing a node) - and there would be storms of read actions prompted that would not lead to repairs.

View file

@ -67,9 +67,17 @@
waste_retention_period :: integer(),
reload_strategy = [] :: list()}).
-record(recent_aae, {buckets :: list()|all,
% whitelist of buckets to support recent recent AAE
% or all to support all buckets
-record(recent_aae, {filter :: whitelist|blacklist,
% the buckets list should either be a
% - whitelist - specific buckets are included, and
% entries are indexed by bucket name
% - blacklist - specific buckets are excluded, and
% all other entries are indexes using the special
% $all bucket
buckets :: list(),
% whitelist or blacklist of buckets to support recent
% AAE
limit_minutes :: integer(),
% how long to retain entries the temporary index for

View file

@ -393,8 +393,9 @@ init([Opts]) ->
case get_opt(recent_aae, Opts, ?RECENT_AAE) of
false ->
false;
{BucketList, LimitMinutes, UnitMinutes} ->
#recent_aae{buckets = BucketList,
{FilterType, BucketList, LimitMinutes, UnitMinutes} ->
#recent_aae{filter = FilterType,
buckets = BucketList,
limit_minutes = LimitMinutes,
unit_minutes = UnitMinutes}
end,

View file

@ -430,12 +430,18 @@ aae_indexspecs(false, _Bucket, _Key, _SQN, _H, _LastMods) ->
aae_indexspecs(_AAE, _Bucket, _Key, _SQN, _H, []) ->
[];
aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) ->
InList = lists:member(Bucket, AAE#recent_aae.buckets),
Bucket0 =
case AAE#recent_aae.buckets of
all ->
{all, Bucket};
ListB ->
case lists:member(Bucket, ListB) of
case AAE#recent_aae.filter of
blacklist ->
case InList of
true ->
false;
false ->
{all, Bucket}
end;
whitelist ->
case InList of
true ->
Bucket;
false ->
@ -811,7 +817,10 @@ parseolddate_test() ->
?assertMatch(no_index, PD).
genaaeidx_test() ->
AAE = #recent_aae{buckets=all, limit_minutes=60, unit_minutes=5},
AAE = #recent_aae{filter=blacklist,
buckets=[],
limit_minutes=60,
unit_minutes=5},
Bucket = <<"Bucket1">>,
Key = <<"Key1">>,
SQN = 1,
@ -832,16 +841,22 @@ genaaeidx_test() ->
AAESpecs0 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods0),
?assertMatch(0, length(AAESpecs0)),
AAE0 = AAE#recent_aae{buckets=[<<"Bucket0">>]},
AAE0 = AAE#recent_aae{filter=whitelist,
buckets=[<<"Bucket0">>]},
AAESpecsB0 = aae_indexspecs(AAE0, Bucket, Key, SQN, H, LastMods1),
?assertMatch(0, length(AAESpecsB0)),
AAESpecsB1 = aae_indexspecs(AAE0, <<"Bucket0">>, Key, SQN, H, LastMods1),
AAESpecsB1 = aae_indexspecs(AAE0, <<"Bucket0">>, Key, SQN, H, LastMods1),
?assertMatch(1, length(AAESpecsB1)),
[{{?IDX_TAG, <<"Bucket0">>, {Fld, Term}, <<"Key1">>},
{SQN, {active, TS}, no_lookup, null}}] = AAESpecsB1,
?assertMatch(true, is_integer(TS)),
?assertMatch(17, length(binary_to_list(Term))),
?assertMatch("$aae.", lists:sublist(binary_to_list(Fld), 5)).
?assertMatch("$aae.", lists:sublist(binary_to_list(Fld), 5)),
AAE1 = AAE#recent_aae{filter=blacklist,
buckets=[<<"Bucket0">>]},
AAESpecsB2 = aae_indexspecs(AAE1, <<"Bucket0">>, Key, SQN, H, LastMods1),
?assertMatch(0, length(AAESpecsB2)).
-endif.

View file

@ -507,7 +507,7 @@ recent_aae_allaae(_Config) ->
TreeSize = small,
% SegmentCount = 256 * 256,
UnitMins = 2,
AAE = {all, 60, UnitMins},
AAE = {blacklist, [], 60, UnitMins},
% Test requires multiple different databases, so want to mount them all
% on individual file paths
@ -658,7 +658,7 @@ recent_aae_bucketaae(_Config) ->
TreeSize = small,
% SegmentCount = 256 * 256,
UnitMins = 2,
AAE = {[<<"Bucket">>], 60, UnitMins},
AAE = {whitelist, [<<"Bucket">>], 60, UnitMins},
% Test requires multiple different databases, so want to mount them all
% on individual file paths
@ -825,7 +825,7 @@ recent_aae_expiry(_Config) ->
% SegmentCount = 256 * 256,
UnitMins = 1,
TotalMins = 2,
AAE = {all, TotalMins, UnitMins},
AAE = {backlist, [], TotalMins, UnitMins},
% Test requires multiple different databases, so want to mount them all
% on individual file paths