2016-10-13 21:02:15 +01:00
|
|
|
%% -------- Key Codec ---------
|
|
|
|
%%
|
2016-10-14 18:43:16 +01:00
|
|
|
%% Functions for manipulating keys and values within leveled.
|
|
|
|
%%
|
|
|
|
%%
|
|
|
|
%% Within the LEDGER:
|
|
|
|
%% Keys are of the form -
|
|
|
|
%% {Tag, Bucket, Key, SubKey|null}
|
|
|
|
%% Values are of the form
|
|
|
|
%% {SQN, Status, MD}
|
|
|
|
%%
|
|
|
|
%% Within the JOURNAL:
|
|
|
|
%% Keys are of the form -
|
|
|
|
%% {SQN, LedgerKey}
|
|
|
|
%% Values are of the form
|
|
|
|
%% {Object, IndexSpecs} (as a binary)
|
|
|
|
%%
|
|
|
|
%% IndexSpecs are of the form of a Ledger Key/Value
|
|
|
|
%%
|
|
|
|
%% Tags need to be set during PUT operations and each Tag used must be
|
|
|
|
%% supported in an extract_metadata and a build_metadata_object function clause
|
|
|
|
%%
|
|
|
|
%% Currently the only tags supported are:
|
|
|
|
%% - o (standard objects)
|
2016-10-16 15:41:09 +01:00
|
|
|
%% - o_rkv (riak objects)
|
2016-10-14 18:43:16 +01:00
|
|
|
%% - i (index entries)
|
|
|
|
|
2016-10-13 21:02:15 +01:00
|
|
|
|
|
|
|
-module(leveled_codec).
|
|
|
|
|
2016-10-18 01:59:03 +01:00
|
|
|
-include("include/leveled.hrl").
|
2016-10-13 21:02:15 +01:00
|
|
|
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
2016-10-25 23:13:14 +01:00
|
|
|
-export([
|
|
|
|
inker_reload_strategy/1,
|
2016-10-13 21:02:15 +01:00
|
|
|
strip_to_seqonly/1,
|
|
|
|
strip_to_statusonly/1,
|
2016-10-31 17:26:28 +00:00
|
|
|
strip_to_keyseqonly/1,
|
2016-12-11 01:02:56 +00:00
|
|
|
strip_to_seqnhashonly/1,
|
2016-10-13 21:02:15 +01:00
|
|
|
striphead_to_details/1,
|
2016-10-31 12:12:06 +00:00
|
|
|
is_active/3,
|
2016-10-13 21:02:15 +01:00
|
|
|
endkey_passed/2,
|
|
|
|
key_dominates/2,
|
2016-10-21 16:08:41 +01:00
|
|
|
maybe_reap_expiredkey/2,
|
2016-10-14 18:43:16 +01:00
|
|
|
to_ledgerkey/3,
|
2016-10-18 01:59:03 +01:00
|
|
|
to_ledgerkey/5,
|
|
|
|
from_ledgerkey/1,
|
2017-11-06 18:44:08 +00:00
|
|
|
to_inkerkv/3,
|
|
|
|
to_inkerkv/6,
|
2016-10-25 23:13:14 +01:00
|
|
|
from_inkerkv/1,
|
2017-03-29 15:37:04 +01:00
|
|
|
from_inkerkv/2,
|
2016-10-25 23:13:14 +01:00
|
|
|
from_journalkey/1,
|
|
|
|
compact_inkerkvc/2,
|
|
|
|
split_inkvalue/1,
|
|
|
|
check_forinkertype/2,
|
2017-11-06 21:16:46 +00:00
|
|
|
maybe_compress/2,
|
2017-11-06 15:54:58 +00:00
|
|
|
create_value_for_journal/3,
|
2016-10-13 21:02:15 +01:00
|
|
|
build_metadata_object/2,
|
2016-10-14 18:43:16 +01:00
|
|
|
generate_ledgerkv/5,
|
|
|
|
get_size/2,
|
2017-06-30 16:31:22 +01:00
|
|
|
get_keyandobjhash/2,
|
2017-06-30 10:03:36 +01:00
|
|
|
idx_indexspecs/5,
|
|
|
|
aae_indexspecs/6,
|
2016-10-31 12:12:06 +00:00
|
|
|
generate_uuid/0,
|
2016-11-28 22:26:09 +00:00
|
|
|
integer_now/0,
|
2016-12-11 01:02:56 +00:00
|
|
|
riak_extract_metadata/2,
|
2017-03-10 20:43:37 +00:00
|
|
|
magic_hash/1,
|
2017-10-20 23:04:29 +01:00
|
|
|
segment_hash/1,
|
2017-11-01 11:51:51 +00:00
|
|
|
to_lookup/1,
|
|
|
|
riak_metadata_to_binary/2]).
|
2016-11-28 22:26:09 +00:00
|
|
|
|
|
|
|
-define(V1_VERS, 1).
|
|
|
|
-define(MAGIC, 53). % riak_kv -> riak_object
|
2017-06-30 10:03:36 +01:00
|
|
|
-define(LMD_FORMAT, "~4..0w~2..0w~2..0w~2..0w~2..0w").
|
2017-06-30 16:31:22 +01:00
|
|
|
-define(NRT_IDX, "$aae.").
|
2017-07-03 18:03:13 +01:00
|
|
|
-define(ALL_BUCKETS, <<"$all">>).
|
2017-06-30 10:03:36 +01:00
|
|
|
|
|
|
|
-type recent_aae() :: #recent_aae{}.
|
2017-09-27 12:15:18 +01:00
|
|
|
-type riak_metadata() :: {binary()|delete, % Sibling Metadata
|
|
|
|
binary()|null, % Vclock Metadata
|
2017-09-27 12:26:12 +01:00
|
|
|
integer()|null, % Hash of vclock - non-exportable
|
2017-09-27 12:15:18 +01:00
|
|
|
integer()}. % Size in bytes of real object
|
2017-06-30 10:03:36 +01:00
|
|
|
|
2017-10-20 23:04:29 +01:00
|
|
|
|
|
|
|
-spec segment_hash(any()) -> {integer(), integer()}.
|
|
|
|
%% @doc
|
2017-10-24 15:16:25 +01:00
|
|
|
%% Return two 16 bit integers - the segment ID and a second integer for spare
|
2017-10-20 23:04:29 +01:00
|
|
|
%% entropy. The hashed should be used in blooms or indexes such that some
|
|
|
|
%% speed can be gained if just the segment ID is known - but more can be
|
|
|
|
%% gained should the extended hash (with the second element) is known
|
|
|
|
segment_hash(Key) when is_binary(Key) ->
|
2017-10-24 15:42:53 +01:00
|
|
|
<<SegmentID:16/integer, ExtraHash:32/integer, _Rest/binary>> =
|
2017-10-20 23:04:29 +01:00
|
|
|
crypto:hash(md5, Key),
|
|
|
|
{SegmentID, ExtraHash};
|
2017-10-30 17:39:21 +00:00
|
|
|
segment_hash({?RIAK_TAG, Bucket, Key, null})
|
|
|
|
when is_binary(Bucket), is_binary(Key) ->
|
|
|
|
segment_hash(<<Bucket/binary, Key/binary>>);
|
2017-10-20 23:04:29 +01:00
|
|
|
segment_hash(Key) ->
|
|
|
|
segment_hash(term_to_binary(Key)).
|
|
|
|
|
2017-10-31 23:28:35 +00:00
|
|
|
|
2017-06-30 10:03:36 +01:00
|
|
|
-spec magic_hash(any()) -> integer().
|
|
|
|
%% @doc
|
2016-12-11 01:02:56 +00:00
|
|
|
%% Use DJ Bernstein magic hash function. Note, this is more expensive than
|
|
|
|
%% phash2 but provides a much more balanced result.
|
|
|
|
%%
|
|
|
|
%% Hash function contains mysterious constants, some explanation here as to
|
|
|
|
%% what they are -
|
2016-12-11 20:38:20 +00:00
|
|
|
%% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function
|
2017-09-26 16:32:59 +01:00
|
|
|
magic_hash({binary, BinaryKey}) ->
|
|
|
|
H = 5381,
|
|
|
|
hash1(H, BinaryKey) band 16#FFFFFFFF;
|
2016-12-11 01:02:56 +00:00
|
|
|
magic_hash(AnyKey) ->
|
|
|
|
BK = term_to_binary(AnyKey),
|
2017-09-26 16:32:59 +01:00
|
|
|
magic_hash({binary, BK}).
|
2016-12-11 01:02:56 +00:00
|
|
|
|
|
|
|
hash1(H, <<>>) ->
|
|
|
|
H;
|
|
|
|
hash1(H, <<B:8/integer, Rest/bytes>>) ->
|
|
|
|
H1 = H * 33,
|
|
|
|
H2 = H1 bxor B,
|
|
|
|
hash1(H2, Rest).
|
|
|
|
|
2017-06-30 10:03:36 +01:00
|
|
|
%% @doc
|
|
|
|
%% Should it be possible to lookup a key in the merge tree. This is not true
|
|
|
|
%% For keys that should only be read through range queries. Direct lookup
|
|
|
|
%% keys will have presence in bloom filters and other lookup accelerators.
|
|
|
|
to_lookup(Key) ->
|
|
|
|
case element(1, Key) of
|
|
|
|
?IDX_TAG ->
|
|
|
|
no_lookup;
|
|
|
|
_ ->
|
|
|
|
lookup
|
|
|
|
end.
|
2016-10-14 18:43:16 +01:00
|
|
|
|
2017-06-30 10:03:36 +01:00
|
|
|
-spec generate_uuid() -> list().
|
|
|
|
%% @doc
|
|
|
|
%% Generate a new globally unique ID as a string.
|
2016-10-18 01:59:03 +01:00
|
|
|
%% Credit to
|
|
|
|
%% https://github.com/afiskon/erlang-uuid-v4/blob/master/src/uuid.erl
|
|
|
|
generate_uuid() ->
|
2017-07-31 20:20:39 +02:00
|
|
|
<<A:32, B:16, C:16, D:16, E:48>> = leveled_rand:rand_bytes(16),
|
2016-11-18 15:53:22 +00:00
|
|
|
L = io_lib:format("~8.16.0b-~4.16.0b-4~3.16.0b-~4.16.0b-~12.16.0b",
|
|
|
|
[A, B, C band 16#0fff, D band 16#3fff bor 16#8000, E]),
|
|
|
|
binary_to_list(list_to_binary(L)).
|
2016-10-18 01:59:03 +01:00
|
|
|
|
2016-10-25 23:13:14 +01:00
|
|
|
inker_reload_strategy(AltList) ->
|
|
|
|
ReloadStrategy0 = [{?RIAK_TAG, retain}, {?STD_TAG, retain}],
|
|
|
|
lists:foldl(fun({X, Y}, SList) ->
|
2016-10-27 00:57:19 +01:00
|
|
|
lists:keyreplace(X, 1, SList, {X, Y})
|
2016-10-25 23:13:14 +01:00
|
|
|
end,
|
|
|
|
ReloadStrategy0,
|
|
|
|
AltList).
|
2016-10-13 21:02:15 +01:00
|
|
|
|
2016-12-11 01:02:56 +00:00
|
|
|
strip_to_statusonly({_, {_, St, _, _}}) -> St.
|
2016-10-13 21:02:15 +01:00
|
|
|
|
2016-12-11 01:02:56 +00:00
|
|
|
strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN.
|
2016-10-13 21:02:15 +01:00
|
|
|
|
2016-12-11 01:02:56 +00:00
|
|
|
strip_to_keyseqonly({LK, {SeqN, _, _, _}}) -> {LK, SeqN}.
|
2016-10-13 21:02:15 +01:00
|
|
|
|
2016-12-11 01:02:56 +00:00
|
|
|
strip_to_seqnhashonly({_, {SeqN, _, MH, _}}) -> {SeqN, MH}.
|
|
|
|
|
|
|
|
striphead_to_details({SeqN, St, MH, MD}) -> {SeqN, St, MH, MD}.
|
2016-10-31 17:26:28 +00:00
|
|
|
|
2016-10-13 21:02:15 +01:00
|
|
|
|
|
|
|
key_dominates(LeftKey, RightKey) ->
|
|
|
|
case {LeftKey, RightKey} of
|
|
|
|
{{LK, _LVAL}, {RK, _RVAL}} when LK < RK ->
|
|
|
|
left_hand_first;
|
|
|
|
{{LK, _LVAL}, {RK, _RVAL}} when RK < LK ->
|
|
|
|
right_hand_first;
|
2016-12-11 01:02:56 +00:00
|
|
|
{{LK, {LSN, _LST, _LMH, _LMD}}, {RK, {RSN, _RST, _RMH, _RMD}}}
|
2016-10-13 21:02:15 +01:00
|
|
|
when LK == RK, LSN >= RSN ->
|
|
|
|
left_hand_dominant;
|
2016-12-11 01:02:56 +00:00
|
|
|
{{LK, {LSN, _LST, _LMH, _LMD}}, {RK, {RSN, _RST, _RMH, _RMD}}}
|
2016-10-13 21:02:15 +01:00
|
|
|
when LK == RK, LSN < RSN ->
|
|
|
|
right_hand_dominant
|
|
|
|
end.
|
2016-10-16 15:41:09 +01:00
|
|
|
|
2016-10-21 16:08:41 +01:00
|
|
|
|
2016-10-21 21:26:28 +01:00
|
|
|
maybe_reap_expiredkey(KV, LevelD) ->
|
2016-10-21 16:08:41 +01:00
|
|
|
Status = strip_to_statusonly(KV),
|
2016-10-21 21:26:28 +01:00
|
|
|
maybe_reap(Status, LevelD).
|
2016-10-21 16:08:41 +01:00
|
|
|
|
|
|
|
maybe_reap({_, infinity}, _) ->
|
|
|
|
false; % key is not set to expire
|
2016-10-21 21:26:28 +01:00
|
|
|
maybe_reap({_, TS}, {true, CurrTS}) when CurrTS > TS ->
|
2016-10-21 16:08:41 +01:00
|
|
|
true; % basement and ready to expire
|
2016-10-21 21:26:28 +01:00
|
|
|
maybe_reap(tomb, {true, _CurrTS}) ->
|
2016-10-21 16:08:41 +01:00
|
|
|
true; % always expire in basement
|
|
|
|
maybe_reap(_, _) ->
|
|
|
|
false.
|
|
|
|
|
2016-10-31 12:12:06 +00:00
|
|
|
is_active(Key, Value, Now) ->
|
2016-10-16 15:41:09 +01:00
|
|
|
case strip_to_statusonly({Key, Value}) of
|
|
|
|
{active, infinity} ->
|
|
|
|
true;
|
|
|
|
tomb ->
|
2016-10-31 12:12:06 +00:00
|
|
|
false;
|
2016-10-31 16:02:32 +00:00
|
|
|
{active, TS} when TS >= Now ->
|
2016-10-31 12:12:06 +00:00
|
|
|
true;
|
|
|
|
{active, _TS} ->
|
2016-10-16 15:41:09 +01:00
|
|
|
false
|
|
|
|
end.
|
|
|
|
|
2017-07-03 18:03:13 +01:00
|
|
|
from_ledgerkey({?IDX_TAG, ?ALL_BUCKETS, {_IdxFld, IdxVal}, {Bucket, Key}}) ->
|
|
|
|
{Bucket, Key, IdxVal};
|
|
|
|
from_ledgerkey({?IDX_TAG, Bucket, {_IdxFld, IdxVal}, Key}) ->
|
|
|
|
{Bucket, Key, IdxVal};
|
2016-10-23 22:45:43 +01:00
|
|
|
from_ledgerkey({_Tag, Bucket, Key, null}) ->
|
|
|
|
{Bucket, Key}.
|
2016-10-18 01:59:03 +01:00
|
|
|
|
|
|
|
to_ledgerkey(Bucket, Key, Tag, Field, Value) when Tag == ?IDX_TAG ->
|
|
|
|
{?IDX_TAG, Bucket, {Field, Value}, Key}.
|
|
|
|
|
2016-10-14 18:43:16 +01:00
|
|
|
to_ledgerkey(Bucket, Key, Tag) ->
|
|
|
|
{Tag, Bucket, Key, null}.
|
2016-10-13 21:02:15 +01:00
|
|
|
|
2016-10-25 23:13:14 +01:00
|
|
|
%% Return the Key, Value and Hash Option for this object. The hash option
|
|
|
|
%% indicates whether the key would ever be looked up directly, and so if it
|
|
|
|
%% requires an entry in the hash table
|
2017-11-06 18:44:08 +00:00
|
|
|
to_inkerkv(LedgerKey, SQN, to_fetch) ->
|
|
|
|
{{SQN, ?INKT_STND, LedgerKey}, null, true}.
|
|
|
|
|
|
|
|
to_inkerkv(LedgerKey, SQN, Object, KeyChanges, PressMethod, Compress) ->
|
2016-10-26 11:39:27 +01:00
|
|
|
InkerType = check_forinkertype(LedgerKey, Object),
|
2017-11-06 15:54:58 +00:00
|
|
|
Value =
|
2017-11-06 18:44:08 +00:00
|
|
|
create_value_for_journal({Object, KeyChanges}, Compress, PressMethod),
|
2016-10-26 11:39:27 +01:00
|
|
|
{{SQN, InkerType, LedgerKey}, Value}.
|
2016-10-25 23:13:14 +01:00
|
|
|
|
|
|
|
%% Used when fetching objects, so only handles standard, hashable entries
|
|
|
|
from_inkerkv(Object) ->
|
2017-03-29 15:37:04 +01:00
|
|
|
from_inkerkv(Object, false).
|
|
|
|
|
|
|
|
from_inkerkv(Object, ToIgnoreKeyChanges) ->
|
2016-10-25 23:13:14 +01:00
|
|
|
case Object of
|
|
|
|
{{SQN, ?INKT_STND, PK}, Bin} when is_binary(Bin) ->
|
2017-03-29 15:37:04 +01:00
|
|
|
{{SQN, PK}, revert_value_from_journal(Bin, ToIgnoreKeyChanges)};
|
2016-10-25 23:13:14 +01:00
|
|
|
_ ->
|
|
|
|
Object
|
|
|
|
end.
|
|
|
|
|
2017-11-06 15:54:58 +00:00
|
|
|
create_value_for_journal({Object, KeyChanges}, Compress, Method)
|
2017-03-29 15:37:04 +01:00
|
|
|
when not is_binary(KeyChanges) ->
|
2017-03-20 15:43:54 +00:00
|
|
|
KeyChangeBin = term_to_binary(KeyChanges, [compressed]),
|
2017-11-06 15:54:58 +00:00
|
|
|
create_value_for_journal({Object, KeyChangeBin}, Compress, Method);
|
|
|
|
create_value_for_journal({Object, KeyChangeBin}, Compress, Method) ->
|
2017-03-20 15:43:54 +00:00
|
|
|
KeyChangeBinLen = byte_size(KeyChangeBin),
|
2017-11-06 15:54:58 +00:00
|
|
|
ObjectBin = serialise_object(Object, Compress, Method),
|
|
|
|
TypeCode = encode_valuetype(is_binary(Object), Compress, Method),
|
2017-03-20 15:43:54 +00:00
|
|
|
<<ObjectBin/binary,
|
|
|
|
KeyChangeBin/binary,
|
|
|
|
KeyChangeBinLen:32/integer,
|
|
|
|
TypeCode:8/integer>>.
|
|
|
|
|
2017-11-06 21:16:46 +00:00
|
|
|
maybe_compress({null, KeyChanges}, _PressMethod) ->
|
2017-11-06 15:54:58 +00:00
|
|
|
create_value_for_journal({null, KeyChanges}, false, native);
|
2017-11-06 21:16:46 +00:00
|
|
|
maybe_compress(JournalBin, PressMethod) ->
|
2017-03-29 15:37:04 +01:00
|
|
|
Length0 = byte_size(JournalBin) - 5,
|
|
|
|
<<JBin0:Length0/binary,
|
|
|
|
KeyChangeLength:32/integer,
|
|
|
|
Type:8/integer>> = JournalBin,
|
2017-11-06 15:54:58 +00:00
|
|
|
{IsBinary, IsCompressed, IsLz4} = decode_valuetype(Type),
|
2017-03-20 15:43:54 +00:00
|
|
|
case IsCompressed of
|
|
|
|
true ->
|
|
|
|
JournalBin;
|
|
|
|
false ->
|
2017-03-29 15:37:04 +01:00
|
|
|
Length1 = Length0 - KeyChangeLength,
|
|
|
|
<<OBin2:Length1/binary, KCBin2:KeyChangeLength/binary>> = JBin0,
|
2017-11-06 15:54:58 +00:00
|
|
|
V0 = {deserialise_object(OBin2, IsBinary, IsCompressed, IsLz4),
|
2017-03-29 15:37:04 +01:00
|
|
|
binary_to_term(KCBin2)},
|
2017-11-06 15:54:58 +00:00
|
|
|
create_value_for_journal(V0, true, PressMethod)
|
2017-03-20 15:43:54 +00:00
|
|
|
end.
|
|
|
|
|
2017-11-06 15:54:58 +00:00
|
|
|
serialise_object(Object, false, _Method) when is_binary(Object) ->
|
2017-03-20 15:43:54 +00:00
|
|
|
Object;
|
2017-11-06 15:54:58 +00:00
|
|
|
serialise_object(Object, true, Method) when is_binary(Object) ->
|
|
|
|
case Method of
|
|
|
|
lz4 ->
|
|
|
|
{ok, Bin} = lz4:pack(Object),
|
|
|
|
Bin;
|
|
|
|
native ->
|
|
|
|
zlib:compress(Object)
|
|
|
|
end;
|
|
|
|
serialise_object(Object, false, _Method) ->
|
2017-03-20 15:43:54 +00:00
|
|
|
term_to_binary(Object);
|
2017-11-06 15:54:58 +00:00
|
|
|
serialise_object(Object, true, _Method) ->
|
2017-03-20 15:43:54 +00:00
|
|
|
term_to_binary(Object, [compressed]).
|
|
|
|
|
|
|
|
revert_value_from_journal(JournalBin) ->
|
2017-03-29 15:37:04 +01:00
|
|
|
revert_value_from_journal(JournalBin, false).
|
|
|
|
|
|
|
|
revert_value_from_journal(JournalBin, ToIgnoreKeyChanges) ->
|
|
|
|
Length0 = byte_size(JournalBin) - 5,
|
|
|
|
<<JBin0:Length0/binary,
|
|
|
|
KeyChangeLength:32/integer,
|
|
|
|
Type:8/integer>> = JournalBin,
|
2017-11-06 15:54:58 +00:00
|
|
|
{IsBinary, IsCompressed, IsLz4} = decode_valuetype(Type),
|
2017-03-29 15:37:04 +01:00
|
|
|
Length1 = Length0 - KeyChangeLength,
|
|
|
|
case ToIgnoreKeyChanges of
|
|
|
|
true ->
|
|
|
|
<<OBin2:Length1/binary, _KCBin2:KeyChangeLength/binary>> = JBin0,
|
2017-11-06 15:54:58 +00:00
|
|
|
{deserialise_object(OBin2, IsBinary, IsCompressed, IsLz4), []};
|
2017-03-29 15:37:04 +01:00
|
|
|
false ->
|
|
|
|
<<OBin2:Length1/binary, KCBin2:KeyChangeLength/binary>> = JBin0,
|
2017-11-06 15:54:58 +00:00
|
|
|
{deserialise_object(OBin2, IsBinary, IsCompressed, IsLz4),
|
2017-03-29 15:37:04 +01:00
|
|
|
binary_to_term(KCBin2)}
|
|
|
|
end.
|
2017-03-20 15:43:54 +00:00
|
|
|
|
2017-11-06 15:54:58 +00:00
|
|
|
deserialise_object(Binary, true, true, true) ->
|
2017-11-05 21:48:57 +00:00
|
|
|
{ok, Deflated} = lz4:unpack(Binary),
|
|
|
|
Deflated;
|
2017-11-06 15:54:58 +00:00
|
|
|
deserialise_object(Binary, true, true, false) ->
|
2017-03-20 15:43:54 +00:00
|
|
|
zlib:uncompress(Binary);
|
2017-11-06 15:54:58 +00:00
|
|
|
deserialise_object(Binary, true, false, _IsLz4) ->
|
2017-03-20 15:43:54 +00:00
|
|
|
Binary;
|
2017-11-06 15:54:58 +00:00
|
|
|
deserialise_object(Binary, false, _, _IsLz4) ->
|
2017-03-20 15:43:54 +00:00
|
|
|
binary_to_term(Binary).
|
|
|
|
|
2017-11-06 15:54:58 +00:00
|
|
|
encode_valuetype(IsBinary, IsCompressed, Method) ->
|
|
|
|
Bit3 =
|
|
|
|
case Method of
|
|
|
|
lz4 -> 4;
|
|
|
|
native -> 0
|
|
|
|
end,
|
2017-03-20 15:43:54 +00:00
|
|
|
Bit2 =
|
|
|
|
case IsBinary of
|
|
|
|
true -> 2;
|
|
|
|
false -> 0
|
|
|
|
end,
|
|
|
|
Bit1 =
|
|
|
|
case IsCompressed of
|
|
|
|
true -> 1;
|
|
|
|
false -> 0
|
|
|
|
end,
|
2017-11-06 15:54:58 +00:00
|
|
|
Bit1 + Bit2 + Bit3.
|
2017-03-20 15:43:54 +00:00
|
|
|
|
|
|
|
decode_valuetype(TypeInt) ->
|
|
|
|
IsCompressed = TypeInt band 1 == 1,
|
|
|
|
IsBinary = TypeInt band 2 == 2,
|
2017-11-06 21:16:46 +00:00
|
|
|
IsLz4 = TypeInt band 4 == 4,
|
2017-11-06 15:54:58 +00:00
|
|
|
{IsBinary, IsCompressed, IsLz4}.
|
2017-03-20 15:43:54 +00:00
|
|
|
|
2016-10-25 23:13:14 +01:00
|
|
|
from_journalkey({SQN, _Type, LedgerKey}) ->
|
|
|
|
{SQN, LedgerKey}.
|
|
|
|
|
2016-11-01 00:46:14 +00:00
|
|
|
compact_inkerkvc({_InkerKey, crc_wonky, false}, _Strategy) ->
|
|
|
|
skip;
|
2016-10-25 23:13:14 +01:00
|
|
|
compact_inkerkvc({{_SQN, ?INKT_TOMB, _LK}, _V, _CrcCheck}, _Strategy) ->
|
|
|
|
skip;
|
2016-10-27 00:57:19 +01:00
|
|
|
compact_inkerkvc({{SQN, ?INKT_KEYD, LK}, V, CrcCheck}, Strategy) ->
|
2017-03-13 14:32:46 +00:00
|
|
|
case get_tagstrategy(LK, Strategy) of
|
|
|
|
skip ->
|
|
|
|
skip;
|
2016-10-25 23:13:14 +01:00
|
|
|
retain ->
|
2016-10-27 00:57:19 +01:00
|
|
|
{retain, {{SQN, ?INKT_KEYD, LK}, V, CrcCheck}};
|
2016-10-25 23:13:14 +01:00
|
|
|
TagStrat ->
|
|
|
|
{TagStrat, null}
|
|
|
|
end;
|
|
|
|
compact_inkerkvc({{SQN, ?INKT_STND, LK}, V, CrcCheck}, Strategy) ->
|
2017-03-13 14:32:46 +00:00
|
|
|
case get_tagstrategy(LK, Strategy) of
|
|
|
|
skip ->
|
|
|
|
skip;
|
|
|
|
retain ->
|
2017-03-20 15:43:54 +00:00
|
|
|
{_V, KeyDeltas} = revert_value_from_journal(V),
|
2017-03-13 14:32:46 +00:00
|
|
|
{retain, {{SQN, ?INKT_KEYD, LK}, {null, KeyDeltas}, CrcCheck}};
|
|
|
|
TagStrat ->
|
|
|
|
{TagStrat, null}
|
2016-12-11 06:53:25 +00:00
|
|
|
end;
|
|
|
|
compact_inkerkvc(_KVC, _Strategy) ->
|
|
|
|
skip.
|
2016-10-25 23:13:14 +01:00
|
|
|
|
2017-03-13 14:32:46 +00:00
|
|
|
get_tagstrategy(LK, Strategy) ->
|
|
|
|
case LK of
|
|
|
|
{Tag, _, _, _} ->
|
|
|
|
case lists:keyfind(Tag, 1, Strategy) of
|
|
|
|
{Tag, TagStrat} ->
|
|
|
|
TagStrat;
|
|
|
|
false ->
|
|
|
|
leveled_log:log("IC012", [Tag, Strategy]),
|
|
|
|
skip
|
|
|
|
end;
|
|
|
|
_ ->
|
|
|
|
skip
|
|
|
|
end.
|
|
|
|
|
2017-11-07 13:43:29 +00:00
|
|
|
split_inkvalue(VBin) when is_binary(VBin) ->
|
|
|
|
revert_value_from_journal(VBin).
|
2016-10-25 23:13:14 +01:00
|
|
|
|
|
|
|
check_forinkertype(_LedgerKey, delete) ->
|
2016-10-26 11:39:27 +01:00
|
|
|
?INKT_TOMB;
|
2016-10-25 23:13:14 +01:00
|
|
|
check_forinkertype(_LedgerKey, _Object) ->
|
2016-10-26 11:39:27 +01:00
|
|
|
?INKT_STND.
|
2016-10-25 23:13:14 +01:00
|
|
|
|
2016-10-14 18:43:16 +01:00
|
|
|
hash(Obj) ->
|
|
|
|
erlang:phash2(term_to_binary(Obj)).
|
2016-10-13 21:02:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
% Compare a key against a query key, only comparing elements that are non-null
|
|
|
|
% in the Query key. This is used for comparing against end keys in queries.
|
2017-01-14 16:36:05 +00:00
|
|
|
endkey_passed(all, _) ->
|
|
|
|
false;
|
2016-10-13 21:02:15 +01:00
|
|
|
endkey_passed({EK1, null, null, null}, {CK1, _, _, _}) ->
|
|
|
|
EK1 < CK1;
|
|
|
|
endkey_passed({EK1, EK2, null, null}, {CK1, CK2, _, _}) ->
|
|
|
|
{EK1, EK2} < {CK1, CK2};
|
|
|
|
endkey_passed({EK1, EK2, EK3, null}, {CK1, CK2, CK3, _}) ->
|
|
|
|
{EK1, EK2, EK3} < {CK1, CK2, CK3};
|
|
|
|
endkey_passed(EndKey, CheckingKey) ->
|
|
|
|
EndKey < CheckingKey.
|
|
|
|
|
2017-06-30 10:03:36 +01:00
|
|
|
idx_indexspecs(IndexSpecs, Bucket, Key, SQN, TTL) ->
|
2017-06-30 16:31:22 +01:00
|
|
|
lists:map(
|
|
|
|
fun({IdxOp, IdxFld, IdxTrm}) ->
|
|
|
|
gen_indexspec(Bucket, Key, IdxOp, IdxFld, IdxTrm, SQN, TTL)
|
|
|
|
end,
|
|
|
|
IndexSpecs
|
|
|
|
).
|
|
|
|
|
|
|
|
gen_indexspec(Bucket, Key, IdxOp, IdxField, IdxTerm, SQN, TTL) ->
|
|
|
|
Status =
|
|
|
|
case IdxOp of
|
|
|
|
add ->
|
|
|
|
{active, TTL};
|
|
|
|
remove ->
|
|
|
|
%% TODO: timestamps for delayed reaping
|
|
|
|
tomb
|
|
|
|
end,
|
2017-07-03 18:03:13 +01:00
|
|
|
case Bucket of
|
|
|
|
{all, RealBucket} ->
|
|
|
|
{to_ledgerkey(?ALL_BUCKETS,
|
|
|
|
{RealBucket, Key},
|
|
|
|
?IDX_TAG,
|
|
|
|
IdxField,
|
|
|
|
IdxTerm),
|
|
|
|
{SQN, Status, no_lookup, null}};
|
|
|
|
_ ->
|
|
|
|
{to_ledgerkey(Bucket,
|
|
|
|
Key,
|
|
|
|
?IDX_TAG,
|
|
|
|
IdxField,
|
|
|
|
IdxTerm),
|
|
|
|
{SQN, Status, no_lookup, null}}
|
|
|
|
end.
|
2016-10-14 18:43:16 +01:00
|
|
|
|
2017-06-30 10:03:36 +01:00
|
|
|
-spec aae_indexspecs(false|recent_aae(),
|
|
|
|
any(), any(),
|
|
|
|
integer(), integer(),
|
|
|
|
list())
|
|
|
|
-> list().
|
|
|
|
%% @doc
|
|
|
|
%% Generate an additional index term representing the change, if the last
|
|
|
|
%% modified date for the change is within the definition of recency.
|
|
|
|
%%
|
|
|
|
%% The objetc may have multiple last modified dates (siblings), and in this
|
|
|
|
%% case index entries for all dates within the range are added.
|
|
|
|
%%
|
|
|
|
%% The index should entry auto-expire in the future (when it is no longer
|
|
|
|
%% relevant to assessing recent changes)
|
|
|
|
aae_indexspecs(false, _Bucket, _Key, _SQN, _H, _LastMods) ->
|
|
|
|
[];
|
|
|
|
aae_indexspecs(_AAE, _Bucket, _Key, _SQN, _H, []) ->
|
|
|
|
[];
|
|
|
|
aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods) ->
|
2017-07-11 11:44:01 +01:00
|
|
|
InList = lists:member(Bucket, AAE#recent_aae.buckets),
|
2017-06-30 16:31:22 +01:00
|
|
|
Bucket0 =
|
2017-07-11 11:44:01 +01:00
|
|
|
case AAE#recent_aae.filter of
|
|
|
|
blacklist ->
|
|
|
|
case InList of
|
|
|
|
true ->
|
|
|
|
false;
|
|
|
|
false ->
|
|
|
|
{all, Bucket}
|
|
|
|
end;
|
|
|
|
whitelist ->
|
|
|
|
case InList of
|
2017-06-30 16:31:22 +01:00
|
|
|
true ->
|
|
|
|
Bucket;
|
|
|
|
false ->
|
|
|
|
false
|
|
|
|
end
|
2017-06-30 10:03:36 +01:00
|
|
|
end,
|
2017-06-30 16:31:22 +01:00
|
|
|
case Bucket0 of
|
|
|
|
false ->
|
|
|
|
[];
|
|
|
|
Bucket0 ->
|
|
|
|
GenIdxFun =
|
2017-06-30 10:03:36 +01:00
|
|
|
fun(LMD0, Acc) ->
|
|
|
|
Dates = parse_date(LMD0,
|
|
|
|
AAE#recent_aae.unit_minutes,
|
|
|
|
AAE#recent_aae.limit_minutes,
|
|
|
|
integer_now()),
|
|
|
|
case Dates of
|
|
|
|
no_index ->
|
|
|
|
Acc;
|
|
|
|
{LMD1, TTL} ->
|
|
|
|
TreeSize = AAE#recent_aae.tree_size,
|
2017-10-30 13:57:41 +00:00
|
|
|
SegID32 = leveled_tictac:keyto_segment32(Key),
|
2017-06-30 16:31:22 +01:00
|
|
|
SegID =
|
2017-10-30 13:57:41 +00:00
|
|
|
leveled_tictac:get_segment(SegID32, TreeSize),
|
2017-06-30 16:31:22 +01:00
|
|
|
IdxFldStr = ?NRT_IDX ++ LMD1 ++ "_bin",
|
|
|
|
IdxTrmStr =
|
2017-07-02 19:33:18 +01:00
|
|
|
string:right(integer_to_list(SegID), 8, $0) ++
|
2017-06-30 16:31:22 +01:00
|
|
|
"." ++
|
2017-07-02 19:33:18 +01:00
|
|
|
string:right(integer_to_list(H), 8, $0),
|
2017-06-30 16:31:22 +01:00
|
|
|
{IdxK, IdxV} =
|
|
|
|
gen_indexspec(Bucket0, Key,
|
|
|
|
add,
|
|
|
|
list_to_binary(IdxFldStr),
|
|
|
|
list_to_binary(IdxTrmStr),
|
|
|
|
SQN, TTL),
|
2017-06-30 10:03:36 +01:00
|
|
|
[{IdxK, IdxV}|Acc]
|
|
|
|
end
|
|
|
|
end,
|
2017-06-30 16:31:22 +01:00
|
|
|
lists:foldl(GenIdxFun, [], LastMods)
|
2017-06-30 10:03:36 +01:00
|
|
|
end.
|
|
|
|
|
|
|
|
-spec parse_date(tuple(), integer(), integer(), integer()) ->
|
|
|
|
no_index|{binary(), integer()}.
|
|
|
|
%% @doc
|
|
|
|
%% Parse the lat modified date and the AAE date configuration to return a
|
|
|
|
%% binary to be used as the last modified date part of the index, and an
|
|
|
|
%% integer to be used as the TTL of the index entry.
|
|
|
|
%% Return no_index if the change is not recent.
|
|
|
|
parse_date(LMD, UnitMins, LimitMins, Now) ->
|
|
|
|
LMDsecs = integer_time(LMD),
|
|
|
|
Recent = (LMDsecs + LimitMins * 60) > Now,
|
|
|
|
case Recent of
|
|
|
|
false ->
|
|
|
|
no_index;
|
|
|
|
true ->
|
|
|
|
{{Y, M, D}, {Hour, Minute, _Second}} =
|
|
|
|
calendar:now_to_datetime(LMD),
|
|
|
|
RoundMins =
|
|
|
|
UnitMins * (Minute div UnitMins),
|
|
|
|
StrTime =
|
|
|
|
lists:flatten(io_lib:format(?LMD_FORMAT,
|
|
|
|
[Y, M, D, Hour, RoundMins])),
|
|
|
|
TTL = min(Now, LMDsecs) + (LimitMins + UnitMins) * 60,
|
2017-06-30 16:31:22 +01:00
|
|
|
{StrTime, TTL}
|
2017-06-30 10:03:36 +01:00
|
|
|
end.
|
|
|
|
|
2017-06-30 16:31:22 +01:00
|
|
|
-spec generate_ledgerkv(
|
|
|
|
tuple(), integer(), any(), integer(), tuple()|infinity) ->
|
2017-10-20 23:04:29 +01:00
|
|
|
{any(), any(), any(),
|
|
|
|
{{integer(), integer()}|no_lookup, integer()},
|
|
|
|
list()}.
|
2017-06-27 16:25:09 +01:00
|
|
|
%% @doc
|
|
|
|
%% Function to extract from an object the information necessary to populate
|
|
|
|
%% the Penciller's ledger.
|
|
|
|
%% Outputs -
|
|
|
|
%% Bucket - original Bucket extracted from the PrimaryKey
|
|
|
|
%% Key - original Key extracted from the PrimaryKey
|
|
|
|
%% Value - the value to be used in the Ledger (essentially the extracted
|
|
|
|
%% metadata)
|
2017-06-30 16:31:22 +01:00
|
|
|
%% {Hash, ObjHash} - A magic hash of the key to accelerate lookups, and a hash
|
|
|
|
%% of the value to be used for equality checking between objects
|
2017-06-27 16:25:09 +01:00
|
|
|
%% LastMods - the last modified dates for the object (may be multiple due to
|
|
|
|
%% siblings)
|
2016-10-14 18:43:16 +01:00
|
|
|
generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) ->
|
|
|
|
{Tag, Bucket, Key, _} = PrimaryKey,
|
2016-10-16 15:41:09 +01:00
|
|
|
Status = case Obj of
|
|
|
|
delete ->
|
|
|
|
tomb;
|
|
|
|
_ ->
|
|
|
|
{active, TS}
|
|
|
|
end,
|
2017-10-20 23:04:29 +01:00
|
|
|
Hash = segment_hash(PrimaryKey),
|
2017-06-27 16:25:09 +01:00
|
|
|
{MD, LastMods} = extract_metadata(Obj, Size, Tag),
|
2017-06-30 16:31:22 +01:00
|
|
|
ObjHash = get_objhash(Tag, MD),
|
2016-12-11 01:02:56 +00:00
|
|
|
Value = {SQN,
|
|
|
|
Status,
|
2017-01-05 21:58:33 +00:00
|
|
|
Hash,
|
2017-06-27 16:25:09 +01:00
|
|
|
MD},
|
2017-06-30 16:31:22 +01:00
|
|
|
{Bucket, Key, Value, {Hash, ObjHash}, LastMods}.
|
2016-10-16 15:41:09 +01:00
|
|
|
|
|
|
|
|
2016-10-31 12:12:06 +00:00
|
|
|
integer_now() ->
|
|
|
|
integer_time(os:timestamp()).
|
2016-10-14 18:43:16 +01:00
|
|
|
|
2016-10-31 12:12:06 +00:00
|
|
|
integer_time(TS) ->
|
|
|
|
DT = calendar:now_to_universal_time(TS),
|
|
|
|
calendar:datetime_to_gregorian_seconds(DT).
|
2016-10-14 18:43:16 +01:00
|
|
|
|
2016-10-16 15:41:09 +01:00
|
|
|
extract_metadata(Obj, Size, ?RIAK_TAG) ->
|
2016-10-14 18:43:16 +01:00
|
|
|
riak_extract_metadata(Obj, Size);
|
2016-10-16 15:41:09 +01:00
|
|
|
extract_metadata(Obj, Size, ?STD_TAG) ->
|
2017-06-27 16:25:09 +01:00
|
|
|
{{hash(Obj), Size}, []}.
|
2016-10-14 18:43:16 +01:00
|
|
|
|
|
|
|
get_size(PK, Value) ->
|
|
|
|
{Tag, _Bucket, _Key, _} = PK,
|
2016-12-11 01:02:56 +00:00
|
|
|
{_, _, _, MD} = Value,
|
2016-10-14 18:43:16 +01:00
|
|
|
case Tag of
|
2016-10-16 15:41:09 +01:00
|
|
|
?RIAK_TAG ->
|
2016-10-14 18:43:16 +01:00
|
|
|
{_RMD, _VC, _Hash, Size} = MD,
|
|
|
|
Size;
|
2016-10-16 15:41:09 +01:00
|
|
|
?STD_TAG ->
|
2016-10-14 18:43:16 +01:00
|
|
|
{_Hash, Size} = MD,
|
|
|
|
Size
|
|
|
|
end.
|
2017-06-30 16:31:22 +01:00
|
|
|
|
|
|
|
-spec get_keyandobjhash(tuple(), tuple()) -> tuple().
|
|
|
|
%% @doc
|
2017-07-03 18:03:13 +01:00
|
|
|
%% Return a tucple of {Bucket, Key, Hash} where hash is a hash of the object
|
2017-06-30 16:31:22 +01:00
|
|
|
%% not the key (for example with Riak tagged objects this will be a hash of
|
|
|
|
%% the sorted vclock)
|
|
|
|
get_keyandobjhash(LK, Value) ->
|
2016-10-31 16:02:32 +00:00
|
|
|
{Tag, Bucket, Key, _} = LK,
|
2016-12-11 01:02:56 +00:00
|
|
|
{_, _, _, MD} = Value,
|
2016-10-31 16:02:32 +00:00
|
|
|
case Tag of
|
2017-06-19 11:36:57 +01:00
|
|
|
?IDX_TAG ->
|
2017-06-30 16:31:22 +01:00
|
|
|
from_ledgerkey(LK); % returns {Bucket, Key, IdxValue}
|
|
|
|
_ ->
|
|
|
|
{Bucket, Key, get_objhash(Tag, MD)}
|
2016-10-31 16:02:32 +00:00
|
|
|
end.
|
|
|
|
|
2017-06-30 16:31:22 +01:00
|
|
|
get_objhash(Tag, ObjMetaData) ->
|
|
|
|
case Tag of
|
|
|
|
?RIAK_TAG ->
|
|
|
|
{_RMD, _VC, Hash, _Size} = ObjMetaData,
|
|
|
|
Hash;
|
|
|
|
?STD_TAG ->
|
|
|
|
{Hash, _Size} = ObjMetaData,
|
|
|
|
Hash
|
|
|
|
end.
|
|
|
|
|
2017-09-27 12:26:12 +01:00
|
|
|
|
2016-10-14 18:43:16 +01:00
|
|
|
build_metadata_object(PrimaryKey, MD) ->
|
2016-11-28 22:26:09 +00:00
|
|
|
{Tag, _Bucket, _Key, null} = PrimaryKey,
|
2016-10-14 18:43:16 +01:00
|
|
|
case Tag of
|
2016-10-16 15:41:09 +01:00
|
|
|
?RIAK_TAG ->
|
2016-12-14 10:27:11 +00:00
|
|
|
{SibData, Vclock, _Hash, _Size} = MD,
|
|
|
|
riak_metadata_to_binary(Vclock, SibData);
|
2016-10-16 15:41:09 +01:00
|
|
|
?STD_TAG ->
|
2016-10-14 18:43:16 +01:00
|
|
|
MD
|
|
|
|
end.
|
|
|
|
|
|
|
|
|
2017-09-27 12:15:18 +01:00
|
|
|
-spec riak_extract_metadata(binary()|delete, non_neg_integer()) ->
|
|
|
|
{riak_metadata(), list()}.
|
|
|
|
%% @doc
|
|
|
|
%% Riak extract metadata should extract a metadata object which is a
|
|
|
|
%% five-tuple of:
|
|
|
|
%% - Binary of sibling Metadata
|
|
|
|
%% - Binary of vector clock metadata
|
|
|
|
%% - Non-exportable hash of the vector clock metadata
|
|
|
|
%% - The largest last modified date of the object
|
|
|
|
%% - Size of the object
|
|
|
|
%%
|
|
|
|
%% The metadata object should be returned with the full list of last
|
|
|
|
%% modified dates (which will be used for recent anti-entropy index creation)
|
2016-10-16 15:41:09 +01:00
|
|
|
riak_extract_metadata(delete, Size) ->
|
2017-06-27 16:25:09 +01:00
|
|
|
{{delete, null, null, Size}, []};
|
2016-11-28 22:26:09 +00:00
|
|
|
riak_extract_metadata(ObjBin, Size) ->
|
2017-06-27 16:25:09 +01:00
|
|
|
{VclockBin, SibBin, LastMods} = riak_metadata_from_binary(ObjBin),
|
|
|
|
{{SibBin,
|
|
|
|
VclockBin,
|
|
|
|
erlang:phash2(lists:sort(binary_to_term(VclockBin))),
|
|
|
|
Size},
|
|
|
|
LastMods}.
|
2016-11-28 22:26:09 +00:00
|
|
|
|
|
|
|
%% <<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
|
|
|
|
%%% VclockBin/binary, SibCount:32/integer, SibsBin/binary>>.
|
|
|
|
|
2017-04-04 10:02:35 +00:00
|
|
|
riak_metadata_to_binary(VclockBin, SibMetaBin) ->
|
2016-11-28 22:26:09 +00:00
|
|
|
VclockLen = byte_size(VclockBin),
|
2017-04-04 10:02:35 +00:00
|
|
|
<<?MAGIC:8/integer, ?V1_VERS:8/integer,
|
|
|
|
VclockLen:32/integer, VclockBin/binary,
|
|
|
|
SibMetaBin/binary>>.
|
2016-11-28 22:26:09 +00:00
|
|
|
|
|
|
|
riak_metadata_from_binary(V1Binary) ->
|
|
|
|
<<?MAGIC:8/integer, ?V1_VERS:8/integer, VclockLen:32/integer,
|
|
|
|
Rest/binary>> = V1Binary,
|
2016-12-14 10:27:11 +00:00
|
|
|
<<VclockBin:VclockLen/binary, SibCount:32/integer, SibsBin/binary>> = Rest,
|
2017-06-27 16:25:09 +01:00
|
|
|
{SibMetaBin, LastMods} =
|
2016-12-14 10:27:11 +00:00
|
|
|
case SibCount of
|
|
|
|
SC when is_integer(SC) ->
|
2017-04-04 10:02:35 +00:00
|
|
|
get_metadata_from_siblings(SibsBin,
|
|
|
|
SibCount,
|
2017-06-27 16:25:09 +01:00
|
|
|
<<SibCount:32/integer>>,
|
|
|
|
[])
|
2016-12-14 10:27:11 +00:00
|
|
|
end,
|
2017-06-27 16:25:09 +01:00
|
|
|
{VclockBin, SibMetaBin, LastMods}.
|
2016-12-14 10:27:11 +00:00
|
|
|
|
2017-06-27 16:25:09 +01:00
|
|
|
get_metadata_from_siblings(<<>>, 0, SibMetaBin, LastMods) ->
|
|
|
|
{SibMetaBin, LastMods};
|
2016-12-14 10:27:11 +00:00
|
|
|
get_metadata_from_siblings(<<ValLen:32/integer, Rest0/binary>>,
|
|
|
|
SibCount,
|
2017-06-27 16:25:09 +01:00
|
|
|
SibMetaBin,
|
|
|
|
LastMods) ->
|
2016-12-14 10:27:11 +00:00
|
|
|
<<_ValBin:ValLen/binary, MetaLen:32/integer, Rest1/binary>> = Rest0,
|
|
|
|
<<MetaBin:MetaLen/binary, Rest2/binary>> = Rest1,
|
2017-06-27 16:25:09 +01:00
|
|
|
LastMod =
|
|
|
|
case MetaBin of
|
|
|
|
<<MegaSec:32/integer,
|
|
|
|
Sec:32/integer,
|
|
|
|
MicroSec:32/integer,
|
|
|
|
_Rest/binary>> ->
|
|
|
|
{MegaSec, Sec, MicroSec};
|
|
|
|
_ ->
|
|
|
|
{0, 0, 0}
|
|
|
|
end,
|
2016-12-14 10:27:11 +00:00
|
|
|
get_metadata_from_siblings(Rest2,
|
|
|
|
SibCount - 1,
|
2017-04-04 10:02:35 +00:00
|
|
|
<<SibMetaBin/binary,
|
|
|
|
0:32/integer,
|
|
|
|
MetaLen:32/integer,
|
2017-06-27 16:25:09 +01:00
|
|
|
MetaBin:MetaLen/binary>>,
|
|
|
|
[LastMod|LastMods]).
|
2016-12-14 10:27:11 +00:00
|
|
|
|
2016-10-14 18:43:16 +01:00
|
|
|
|
2016-10-13 21:02:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
%%%============================================================================
|
|
|
|
%%% Test
|
|
|
|
%%%============================================================================
|
|
|
|
|
|
|
|
-ifdef(TEST).
|
|
|
|
|
|
|
|
|
|
|
|
indexspecs_test() ->
|
|
|
|
IndexSpecs = [{add, "t1_int", 456},
|
|
|
|
{add, "t1_bin", "adbc123"},
|
|
|
|
{remove, "t1_bin", "abdc456"}],
|
2017-06-30 10:03:36 +01:00
|
|
|
Changes = idx_indexspecs(IndexSpecs, "Bucket", "Key2", 1, infinity),
|
2016-10-13 21:02:15 +01:00
|
|
|
?assertMatch({{i, "Bucket", {"t1_int", 456}, "Key2"},
|
2016-12-11 01:02:56 +00:00
|
|
|
{1, {active, infinity}, no_lookup, null}},
|
|
|
|
lists:nth(1, Changes)),
|
2016-10-13 21:02:15 +01:00
|
|
|
?assertMatch({{i, "Bucket", {"t1_bin", "adbc123"}, "Key2"},
|
2016-12-11 01:02:56 +00:00
|
|
|
{1, {active, infinity}, no_lookup, null}},
|
|
|
|
lists:nth(2, Changes)),
|
2016-10-13 21:02:15 +01:00
|
|
|
?assertMatch({{i, "Bucket", {"t1_bin", "abdc456"}, "Key2"},
|
2016-12-11 01:02:56 +00:00
|
|
|
{1, tomb, no_lookup, null}},
|
|
|
|
lists:nth(3, Changes)).
|
2016-10-14 22:58:01 +01:00
|
|
|
|
|
|
|
endkey_passed_test() ->
|
|
|
|
TestKey = {i, null, null, null},
|
|
|
|
K1 = {i, 123, {"a", "b"}, <<>>},
|
|
|
|
K2 = {o, 123, {"a", "b"}, <<>>},
|
|
|
|
?assertMatch(false, endkey_passed(TestKey, K1)),
|
|
|
|
?assertMatch(true, endkey_passed(TestKey, K2)).
|
|
|
|
|
|
|
|
|
2017-03-13 14:32:46 +00:00
|
|
|
corrupted_ledgerkey_test() ->
|
|
|
|
% When testing for compacted journal which has been corrupted, there may
|
|
|
|
% be a corruptes ledger key. Always skip these keys
|
|
|
|
% Key has become a 3-tuple not a 4-tuple
|
|
|
|
TagStrat1 = compact_inkerkvc({{1,
|
|
|
|
?INKT_STND,
|
|
|
|
{?STD_TAG, "B1", "K1andSK"}},
|
|
|
|
{},
|
|
|
|
true},
|
|
|
|
[{?STD_TAG, retain}]),
|
|
|
|
?assertMatch(skip, TagStrat1),
|
|
|
|
TagStrat2 = compact_inkerkvc({{1,
|
|
|
|
?INKT_KEYD,
|
|
|
|
{?STD_TAG, "B1", "K1andSK"}},
|
|
|
|
{},
|
|
|
|
true},
|
|
|
|
[{?STD_TAG, retain}]),
|
|
|
|
?assertMatch(skip, TagStrat2).
|
2017-03-14 17:26:39 +00:00
|
|
|
|
|
|
|
general_skip_strategy_test() ->
|
|
|
|
% Confirm that we will skip if the strategy says so
|
|
|
|
TagStrat1 = compact_inkerkvc({{1,
|
|
|
|
?INKT_STND,
|
|
|
|
{?STD_TAG, "B1", "K1andSK"}},
|
|
|
|
{},
|
|
|
|
true},
|
|
|
|
[{?STD_TAG, skip}]),
|
|
|
|
?assertMatch(skip, TagStrat1),
|
|
|
|
TagStrat2 = compact_inkerkvc({{1,
|
|
|
|
?INKT_KEYD,
|
|
|
|
{?STD_TAG, "B1", "K1andSK"}},
|
|
|
|
{},
|
|
|
|
true},
|
|
|
|
[{?STD_TAG, skip}]),
|
2017-03-14 22:47:48 +00:00
|
|
|
?assertMatch(skip, TagStrat2),
|
|
|
|
TagStrat3 = compact_inkerkvc({{1,
|
|
|
|
?INKT_KEYD,
|
|
|
|
{?IDX_TAG, "B1", "K1", "SK"}},
|
|
|
|
{},
|
|
|
|
true},
|
|
|
|
[{?STD_TAG, skip}]),
|
|
|
|
?assertMatch(skip, TagStrat3),
|
|
|
|
TagStrat4 = compact_inkerkvc({{1,
|
|
|
|
?INKT_KEYD,
|
|
|
|
{?IDX_TAG, "B1", "K1", "SK"}},
|
|
|
|
{},
|
|
|
|
true},
|
|
|
|
[{?STD_TAG, skip}, {?IDX_TAG, recalc}]),
|
|
|
|
?assertMatch({recalc, null}, TagStrat4),
|
|
|
|
TagStrat5 = compact_inkerkvc({{1,
|
|
|
|
?INKT_TOMB,
|
|
|
|
{?IDX_TAG, "B1", "K1", "SK"}},
|
|
|
|
{},
|
|
|
|
true},
|
|
|
|
[{?STD_TAG, skip}, {?IDX_TAG, recalc}]),
|
|
|
|
?assertMatch(skip, TagStrat5).
|
2017-03-13 14:32:46 +00:00
|
|
|
|
2017-03-14 17:26:39 +00:00
|
|
|
corrupted_inker_tag_test() ->
|
|
|
|
% Confirm that we will skip on unknown inker tag
|
|
|
|
TagStrat1 = compact_inkerkvc({{1,
|
|
|
|
foo,
|
|
|
|
{?STD_TAG, "B1", "K1andSK"}},
|
|
|
|
{},
|
|
|
|
true},
|
|
|
|
[{?STD_TAG, retain}]),
|
|
|
|
?assertMatch(skip, TagStrat1).
|
2017-03-13 14:32:46 +00:00
|
|
|
|
2016-12-20 20:55:56 +00:00
|
|
|
%% Test below proved that the overhead of performing hashes was trivial
|
|
|
|
%% Maybe 5 microseconds per hash
|
|
|
|
|
2017-03-20 20:28:47 +00:00
|
|
|
hashperf_test() ->
|
2017-07-31 20:20:39 +02:00
|
|
|
OL = lists:map(fun(_X) -> leveled_rand:rand_bytes(8192) end, lists:seq(1, 1000)),
|
2017-03-20 20:28:47 +00:00
|
|
|
SW = os:timestamp(),
|
|
|
|
_HL = lists:map(fun(Obj) -> erlang:phash2(Obj) end, OL),
|
|
|
|
io:format(user, "1000 object hashes in ~w microseconds~n",
|
|
|
|
[timer:now_diff(os:timestamp(), SW)]).
|
|
|
|
|
|
|
|
magichashperf_test() ->
|
|
|
|
KeyFun =
|
|
|
|
fun(X) ->
|
|
|
|
K = {o, "Bucket", "Key" ++ integer_to_list(X), null},
|
|
|
|
{K, X}
|
|
|
|
end,
|
|
|
|
KL = lists:map(KeyFun, lists:seq(1, 1000)),
|
|
|
|
{TimeMH, _HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]),
|
|
|
|
io:format(user, "1000 keys magic hashed in ~w microseconds~n", [TimeMH]),
|
|
|
|
{TimePH, _Hl2} = timer:tc(lists, map, [fun(K) -> erlang:phash2(K) end, KL]),
|
|
|
|
io:format(user, "1000 keys phash2 hashed in ~w microseconds~n", [TimePH]),
|
|
|
|
{TimeMH2, _HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]),
|
|
|
|
io:format(user, "1000 keys magic hashed in ~w microseconds~n", [TimeMH2]).
|
|
|
|
|
2017-06-30 10:03:36 +01:00
|
|
|
parsedate_test() ->
|
|
|
|
{MeS, S, MiS} = os:timestamp(),
|
|
|
|
timer:sleep(100),
|
|
|
|
Now = integer_now(),
|
|
|
|
UnitMins = 5,
|
|
|
|
LimitMins = 60,
|
|
|
|
PD = parse_date({MeS, S, MiS}, UnitMins, LimitMins, Now),
|
|
|
|
io:format("Parsed Date ~w~n", [PD]),
|
|
|
|
?assertMatch(true, is_tuple(PD)),
|
|
|
|
check_pd(PD, UnitMins),
|
|
|
|
CheckFun =
|
|
|
|
fun(Offset) ->
|
|
|
|
ModDate = {MeS, S + Offset * 60, MiS},
|
|
|
|
check_pd(parse_date(ModDate, UnitMins, LimitMins, Now), UnitMins)
|
|
|
|
end,
|
|
|
|
lists:foreach(CheckFun, lists:seq(1, 60)).
|
|
|
|
|
|
|
|
check_pd(PD, UnitMins) ->
|
2017-06-30 16:31:22 +01:00
|
|
|
{LMDstr, _TTL} = PD,
|
2017-06-30 10:03:36 +01:00
|
|
|
Minutes = list_to_integer(lists:nthtail(10, LMDstr)),
|
|
|
|
?assertMatch(0, Minutes rem UnitMins).
|
|
|
|
|
|
|
|
parseolddate_test() ->
|
|
|
|
LMD = os:timestamp(),
|
|
|
|
timer:sleep(100),
|
|
|
|
Now = integer_now() + 60 * 60,
|
|
|
|
UnitMins = 5,
|
|
|
|
LimitMins = 60,
|
|
|
|
PD = parse_date(LMD, UnitMins, LimitMins, Now),
|
|
|
|
io:format("Parsed Date ~w~n", [PD]),
|
|
|
|
?assertMatch(no_index, PD).
|
|
|
|
|
|
|
|
genaaeidx_test() ->
|
2017-07-11 11:44:01 +01:00
|
|
|
AAE = #recent_aae{filter=blacklist,
|
|
|
|
buckets=[],
|
|
|
|
limit_minutes=60,
|
|
|
|
unit_minutes=5},
|
2017-06-30 10:03:36 +01:00
|
|
|
Bucket = <<"Bucket1">>,
|
|
|
|
Key = <<"Key1">>,
|
|
|
|
SQN = 1,
|
|
|
|
H = erlang:phash2(null),
|
|
|
|
LastMods = [os:timestamp(), os:timestamp()],
|
|
|
|
|
|
|
|
AAESpecs = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods),
|
|
|
|
?assertMatch(2, length(AAESpecs)),
|
|
|
|
|
|
|
|
LastMods1 = [os:timestamp()],
|
|
|
|
AAESpecs1 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods1),
|
|
|
|
?assertMatch(1, length(AAESpecs1)),
|
2017-06-30 16:31:22 +01:00
|
|
|
IdxB = element(2, element(1, lists:nth(1, AAESpecs1))),
|
|
|
|
io:format(user, "AAE IDXSpecs1 ~w~n", [AAESpecs1]),
|
|
|
|
?assertMatch(<<"$all">>, IdxB),
|
2017-06-30 10:03:36 +01:00
|
|
|
|
|
|
|
LastMods0 = [],
|
|
|
|
AAESpecs0 = aae_indexspecs(AAE, Bucket, Key, SQN, H, LastMods0),
|
|
|
|
?assertMatch(0, length(AAESpecs0)),
|
|
|
|
|
2017-07-11 11:44:01 +01:00
|
|
|
AAE0 = AAE#recent_aae{filter=whitelist,
|
|
|
|
buckets=[<<"Bucket0">>]},
|
2017-06-30 10:03:36 +01:00
|
|
|
AAESpecsB0 = aae_indexspecs(AAE0, Bucket, Key, SQN, H, LastMods1),
|
|
|
|
?assertMatch(0, length(AAESpecsB0)),
|
|
|
|
|
2017-07-11 11:44:01 +01:00
|
|
|
AAESpecsB1 = aae_indexspecs(AAE0, <<"Bucket0">>, Key, SQN, H, LastMods1),
|
2017-06-30 10:03:36 +01:00
|
|
|
?assertMatch(1, length(AAESpecsB1)),
|
2017-06-30 16:31:22 +01:00
|
|
|
[{{?IDX_TAG, <<"Bucket0">>, {Fld, Term}, <<"Key1">>},
|
2017-06-30 10:03:36 +01:00
|
|
|
{SQN, {active, TS}, no_lookup, null}}] = AAESpecsB1,
|
2017-06-30 16:31:22 +01:00
|
|
|
?assertMatch(true, is_integer(TS)),
|
2017-07-02 19:33:18 +01:00
|
|
|
?assertMatch(17, length(binary_to_list(Term))),
|
2017-07-11 11:44:01 +01:00
|
|
|
?assertMatch("$aae.", lists:sublist(binary_to_list(Fld), 5)),
|
|
|
|
|
|
|
|
AAE1 = AAE#recent_aae{filter=blacklist,
|
|
|
|
buckets=[<<"Bucket0">>]},
|
|
|
|
AAESpecsB2 = aae_indexspecs(AAE1, <<"Bucket0">>, Key, SQN, H, LastMods1),
|
|
|
|
?assertMatch(0, length(AAESpecsB2)).
|
2016-12-20 20:55:56 +00:00
|
|
|
|
2017-07-31 20:20:39 +02:00
|
|
|
-endif.
|