Mas d34 i453 eqwalizer (#454)

* Add eqwalizer and clear for codec & sst

The eqwalizer errors highlighted the need in several places for type clarification.

Within tests there are some issue where a type is assumed, and so ignore has been used to handle this rather than write more complex code to be explicit about the assumption.

The handling of arrays isn't great by eqwalizer - to be specific about the content of array causes issues when initialising an array.  Perhaps a type (map maybe) where one can be more explicit about types might be a better option (even if there is a minimal performance impact).

The use of a ?TOMB_COUNT defined option complicated the code much more with eqwalizer.  So for now, there is no developer option to disable ?TOMB_COUNT.

Test fixes required where strings have been used for buckets/keys not binaries.

The leveled_sst statem needs a different state record for starting when compared to other modes.  The state record has been divided up to reflect this, to make type management easier.  The impact on performance needs to be tested.

* Update ct tests to support binary keys/buckets only

* Eqwalizer for leveled_cdb and leveled_tictac

As array is used in leveled_tictac - there is the same issue as with leveled_sst

* Remove redundant indirection of leveled_rand

A legacy of pre-20 OTP

* Morde modules eqwalized

ebloom/log/util/monitor

* Eqwalize further modules

elp eqwalize leveled_codec; elp eqwalize leveled_sst; elp eqwalize leveled_cdb; elp eqwalize leveled_tictac; elp eqwalize leveled_log; elp eqwalize leveled_monitor; elp eqwalize leveled_head; elp eqwalize leveled_ebloom; elp eqwalize leveled_iclerk

All concurrently OK

* Refactor unit tests to use binary() no string() in key

Previously string() was allowed just to avoid having to change all these tests.  Go through the pain now, as part of eqwalizing.

* Add fixes for penciller, inker

Add a new ?IS_DEF macro to replace =/= undefined.

Now more explicit about primary, object and query keys

* Further fixes

Need to clarify functions used by runner - where keys , query keys and object keys are used

* Further eqwalisation

* Eqwalize leveled_pmanifest

Also make implementation independent of choice of dict - i.e. one can save a manifest using dict for blooms/pending_deletions and then open a manifest with code that uses a different type.  Allow for slow dict to be replaced with map.

Would not be backwards compatible though, without further thought - i.e. if you upgrade then downgrade.

Redundant code created by leveled_sst refactoring removed.

* Fix backwards compatibility issues

* Manifest Entry to belong to leveled_pmanifest

There are two manifests - leveled_pmanifest and leveled_imanifest.  Both have manifest_entry() type objects, but these types are different.  To avoid confusion don't include the pmanifest manifest_entry() within the global include file - be specific that it belongs to the leveled_pmanifest module

* Ignore elp file - large binary

* Update src/leveled_pmem.erl

Remove unnecessary empty list from type definition

Co-authored-by: Thomas Arts <thomas.arts@quviq.com>

---------

Co-authored-by: Thomas Arts <thomas.arts@quviq.com>
This commit is contained in:
Martin Sumner 2024-11-13 13:37:13 +00:00 committed by GitHub
parent 1be55fcd15
commit aaeac7ba36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 4778 additions and 3334 deletions

1
.gitignore vendored
View file

@ -9,3 +9,4 @@ cover
cover_*
.eqc-info
leveled_data/*
elp

View file

@ -70,6 +70,15 @@
%% Inker key type used for tombstones
%%%============================================================================
%%%============================================================================
%%% Helper Function
%%%============================================================================
-define(IS_DEF(Attribute), Attribute =/= undefined).
-if(?OTP_RELEASE < 26).
-type dynamic() :: any().
-endif.
%%%============================================================================
%%% Shared records
@ -79,13 +88,6 @@
is_basement = false :: boolean(),
timestamp :: integer()}).
-record(manifest_entry,
{start_key :: tuple() | undefined,
end_key :: tuple() | undefined,
owner :: pid()|list(),
filename :: string() | undefined,
bloom = none :: leveled_ebloom:bloom() | none}).
-record(cdb_options,
{max_size :: pos_integer() | undefined,
max_count :: pos_integer() | undefined,
@ -129,14 +131,15 @@
singlefile_compactionperc :: float()|undefined,
maxrunlength_compactionperc :: float()|undefined,
score_onein = 1 :: pos_integer(),
snaptimeout_long :: pos_integer() | undefined,
snaptimeout_long = 60 :: pos_integer(),
monitor = {no_monitor, 0}
:: leveled_monitor:monitor()}).
-record(penciller_options,
{root_path :: string() | undefined,
sst_options = #sst_options{} :: #sst_options{},
max_inmemory_tablesize :: integer() | undefined,
max_inmemory_tablesize = ?MIN_PCL_CACHE_SIZE
:: pos_integer(),
start_snapshot = false :: boolean(),
snapshot_query,
bookies_pid :: pid() | undefined,

View file

@ -12,6 +12,14 @@
{eunit_opts, [verbose]}.
{project_plugins, [
{eqwalizer_rebar3,
{git_subdir,
"https://github.com/whatsapp/eqwalizer.git",
{branch, "main"},
"eqwalizer_rebar3"}}
]}.
{profiles,
[{eqc, [{deps, [meck, fqc]},
{erl_opts, [debug_info, {d, 'EQC'}]},
@ -28,7 +36,8 @@
{deps, [
{lz4, ".*", {git, "https://github.com/nhs-riak/erlang-lz4", {branch, "nhse-develop-3.4"}}},
{zstd, ".*", {git, "https://github.com/nhs-riak/zstd-erlang", {branch, "nhse-develop"}}}
{zstd, ".*", {git, "https://github.com/nhs-riak/zstd-erlang", {branch, "nhse-develop"}}},
{eqwalizer_support, {git_subdir, "https://github.com/whatsapp/eqwalizer.git", {branch, "main"}, "eqwalizer_support"}}
]}.
{ct_opts, [{dir, ["test/end_to_end"]}]}.

File diff suppressed because it is too large Load diff

View file

@ -94,15 +94,11 @@
hashtable_calc/2]).
-define(DWORD_SIZE, 8).
-define(WORD_SIZE, 4).
-define(MAX_FILE_SIZE, 3221225472).
-define(BINARY_MODE, false).
-define(BASE_POSITION, 2048).
-define(WRITE_OPS, [binary, raw, read, write]).
-define(PENDING_ROLL_WAIT, 30).
-define(DELETE_TIMEOUT, 10000).
-define(TIMING_SAMPLECOUNTDOWN, 5000).
-define(TIMING_SAMPLESIZE, 100).
-define(GETPOS_FACTOR, 8).
-define(MAX_OBJECT_SIZE, 1000000000).
% 1GB but really should be much smaller than this
@ -111,18 +107,24 @@
-record(state, {hashtree,
last_position :: integer() | undefined,
% defined when writing, not required once rolled
last_key = empty,
current_count = 0 :: non_neg_integer(),
hash_index = {} :: tuple(),
filename :: string() | undefined,
handle :: file:fd() | undefined,
max_size :: pos_integer() | undefined,
max_count :: pos_integer() | undefined,
% defined when starting
handle :: file:io_device() | undefined,
% defined when starting
max_size :: pos_integer(),
max_count :: pos_integer(),
binary_mode = false :: boolean(),
delete_point = 0 :: integer(),
inker :: pid() | undefined,
% undefined until delete_pending
deferred_delete = false :: boolean(),
waste_path :: string() | undefined,
waste_path :: string()|undefined,
% undefined has functional meaning
% - no sending to waste on delete
sync_strategy = none,
log_options = leveled_log:get_opts()
:: leveled_log:log_options(),
@ -133,8 +135,11 @@
-type hashtable_index() :: tuple().
-type file_location() :: integer()|eof.
-type filter_fun() ::
fun((any(), binary(), integer(), any(), fun((binary()) -> any())) ->
{stop|loop, any()}).
fun((any(),
binary(),
integer(),
term()|{term(), term()},
fun((binary()) -> any())) -> {stop|loop, any()}).
-export_type([filter_fun/0]).
@ -265,11 +270,11 @@ cdb_getpositions(Pid, SampleSize) ->
cdb_getpositions_fromidx(Pid, FC, Index, Acc)
end
end,
RandFun = fun(X) -> {leveled_rand:uniform(), X} end,
RandFun = fun(X) -> {rand:uniform(), X} end,
SeededL = lists:map(RandFun, lists:seq(0, 255)),
SortedL = lists:keysort(1, SeededL),
PosList0 = lists:foldl(FoldFun, [], SortedL),
P1 = leveled_rand:uniform(max(1, length(PosList0) - S0)),
P1 = rand:uniform(max(1, length(PosList0) - S0)),
lists:sublist(lists:sort(PosList0), P1, S0)
end.
@ -367,7 +372,7 @@ cdb_scan(Pid, FilterFun, InitAcc, StartPosition) ->
{cdb_scan, FilterFun, InitAcc, StartPosition},
infinity).
-spec cdb_lastkey(pid()) -> any().
-spec cdb_lastkey(pid()) -> leveled_codec:journal_key()|empty.
%% @doc
%% Get the last key to be added to the file (which will have the highest
%% sequence number)
@ -487,38 +492,49 @@ starting({call, From}, {open_reader, Filename, LastKey}, State) ->
{next_state, reader, State0, [{reply, From, ok}, hibernate]}.
writer({call, From}, {get_kv, Key}, State) ->
writer(
{call, From}, {get_kv, Key}, State = #state{handle =IO})
when ?IS_DEF(IO) ->
{keep_state_and_data,
[{reply,
From,
get_mem(
Key,
State#state.handle,
IO,
State#state.hashtree,
State#state.binary_mode)}]};
writer({call, From}, {key_check, Key}, State) ->
writer(
{call, From}, {key_check, Key}, State = #state{handle =IO})
when ?IS_DEF(IO) ->
{keep_state_and_data,
[{reply,
From,
get_mem(
Key,
State#state.handle,
IO,
State#state.hashtree,
State#state.binary_mode,
loose_presence)}]};
writer({call, From}, {put_kv, Key, Value, Sync}, State) ->
writer(
{call, From},
{put_kv, Key, Value, Sync},
State = #state{last_position = LP, handle = IO})
when ?IS_DEF(last_position), ?IS_DEF(IO) ->
NewCount = State#state.current_count + 1,
case NewCount >= State#state.max_count of
true ->
{keep_state_and_data, [{reply, From, roll}]};
false ->
Result = put(State#state.handle,
Key,
Value,
{State#state.last_position, State#state.hashtree},
State#state.binary_mode,
State#state.max_size,
State#state.last_key == empty),
Result =
put(
IO,
Key,
Value,
{LP, State#state.hashtree},
State#state.binary_mode,
State#state.max_size,
State#state.last_key == empty
),
case Result of
roll ->
%% Key and value could not be written
@ -545,7 +561,11 @@ writer({call, From}, {put_kv, Key, Value, Sync}, State) ->
end;
writer({call, From}, {mput_kv, []}, _State) ->
{keep_state_and_data, [{reply, From, ok}]};
writer({call, From}, {mput_kv, KVList}, State) ->
writer(
{call, From},
{mput_kv, KVList},
State = #state{last_position = LP, handle = IO})
when ?IS_DEF(last_position), ?IS_DEF(IO) ->
NewCount = State#state.current_count + length(KVList),
TooMany = NewCount >= State#state.max_count,
NotEmpty = State#state.current_count > 0,
@ -553,11 +573,14 @@ writer({call, From}, {mput_kv, KVList}, State) ->
true ->
{keep_state_and_data, [{reply, From, roll}]};
false ->
Result = mput(State#state.handle,
KVList,
{State#state.last_position, State#state.hashtree},
State#state.binary_mode,
State#state.max_size),
Result =
mput(
IO,
KVList,
{LP, State#state.hashtree},
State#state.binary_mode,
State#state.max_size
),
case Result of
roll ->
%% Keys and values could not be written
@ -573,38 +596,46 @@ writer({call, From}, {mput_kv, KVList}, State) ->
[{reply, From, ok}]}
end
end;
writer({call, From}, cdb_complete, State) ->
NewName = determine_new_filename(State#state.filename),
writer(
{call, From}, cdb_complete, State = #state{filename = FN})
when ?IS_DEF(FN) ->
NewName = determine_new_filename(FN),
ok = close_file(State#state.handle,
State#state.hashtree,
State#state.last_position),
ok = rename_for_read(State#state.filename, NewName),
ok = rename_for_read(FN, NewName),
{stop_and_reply, normal, [{reply, From, {ok, NewName}}]};
writer({call, From}, Event, State) ->
handle_sync_event(Event, From, State);
writer(cast, cdb_roll, State) ->
writer(
cast, cdb_roll, State = #state{last_position = LP})
when ?IS_DEF(LP) ->
ok =
leveled_iclerk:clerk_hashtablecalc(
State#state.hashtree, State#state.last_position, self()),
State#state.hashtree, LP, self()),
{next_state, rolling, State}.
rolling({call, From}, {get_kv, Key}, State) ->
rolling(
{call, From}, {get_kv, Key}, State = #state{handle = IO})
when ?IS_DEF(IO) ->
{keep_state_and_data,
[{reply,
From,
get_mem(
Key,
State#state.handle,
IO,
State#state.hashtree,
State#state.binary_mode)}]};
rolling({call, From}, {key_check, Key}, State) ->
rolling(
{call, From}, {key_check, Key}, State = #state{handle = IO})
when ?IS_DEF(IO) ->
{keep_state_and_data,
[{reply,
From,
get_mem(
Key,
State#state.handle,
IO,
State#state.hashtree,
State#state.binary_mode,
loose_presence)}]};
@ -612,15 +643,19 @@ rolling({call, From},
{get_positions, _SampleSize, _Index, SampleAcc},
_State) ->
{keep_state_and_data, [{reply, From, SampleAcc}]};
rolling({call, From}, {return_hashtable, IndexList, HashTreeBin}, State) ->
rolling(
{call, From},
{return_hashtable, IndexList, HashTreeBin},
State = #state{filename = FN})
when ?IS_DEF(FN) ->
SW = os:timestamp(),
Handle = State#state.handle,
{ok, BasePos} = file:position(Handle, State#state.last_position),
NewName = determine_new_filename(State#state.filename),
NewName = determine_new_filename(FN),
ok = perform_write_hash_tables(Handle, HashTreeBin, BasePos),
ok = write_top_index_table(Handle, BasePos, IndexList),
file:close(Handle),
ok = rename_for_read(State#state.filename, NewName),
ok = rename_for_read(FN, NewName),
leveled_log:log(cdb03, [NewName]),
ets:delete(State#state.hashtree),
{NewHandle, Index, LastKey} =
@ -646,13 +681,17 @@ rolling(cast, {delete_pending, ManSQN, Inker}, State) ->
{keep_state,
State#state{delete_point=ManSQN, inker=Inker, deferred_delete=true}}.
reader({call, From}, {get_kv, Key}, State) ->
reader(
{call, From}, {get_kv, Key}, State = #state{handle = IO})
when ?IS_DEF(IO) ->
Result =
get_withcache(State#state.handle,
Key,
State#state.hash_index,
State#state.binary_mode,
State#state.monitor),
get_withcache(
IO,
Key,
State#state.hash_index,
State#state.binary_mode,
State#state.monitor
),
{keep_state_and_data, [{reply, From, Result}]};
reader({call, From}, {key_check, Key}, State) ->
Result =
@ -673,8 +712,11 @@ reader({call, From}, {get_positions, SampleSize, Index, Acc}, State) ->
{keep_state_and_data,
[{reply, From, lists:sublist(UpdAcc, SampleSize)}]}
end;
reader({call, From}, {direct_fetch, PositionList, Info}, State) ->
H = State#state.handle,
reader(
{call, From},
{direct_fetch, PositionList, Info},
State = #state{handle = IO})
when ?IS_DEF(IO) ->
FilterFalseKey =
fun(Tpl) ->
case element(1, Tpl) of
@ -687,20 +729,23 @@ reader({call, From}, {direct_fetch, PositionList, Info}, State) ->
case Info of
key_only ->
FM = lists:filtermap(
FM =
lists:filtermap(
fun(P) ->
FilterFalseKey(extract_key(H, P)) end,
PositionList),
FilterFalseKey(extract_key(IO, P))
end,
PositionList
),
MapFun = fun(T) -> element(1, T) end,
{keep_state_and_data,
[{reply, From, lists:map(MapFun, FM)}]};
key_size ->
FilterFun = fun(P) -> FilterFalseKey(extract_key_size(H, P)) end,
FilterFun = fun(P) -> FilterFalseKey(extract_key_size(IO, P)) end,
{keep_state_and_data,
[{reply, From, lists:filtermap(FilterFun, PositionList)}]};
key_value_check ->
BM = State#state.binary_mode,
MapFun = fun(P) -> extract_key_value_check(H, P, BM) end,
MapFun = fun(P) -> extract_key_value_check(IO, P, BM) end,
% direct_fetch will occur in batches, so it doesn't make sense to
% hibernate the process that is likely to be used again. However,
% a significant amount of unused binary references may have
@ -709,12 +754,13 @@ reader({call, From}, {direct_fetch, PositionList, Info}, State) ->
garbage_collect(),
{keep_state_and_data, []}
end;
reader({call, From}, cdb_complete, State) ->
leveled_log:log(cdb05, [State#state.filename, reader, cdb_ccomplete]),
ok = file:close(State#state.handle),
{stop_and_reply,
normal,
[{reply, From, {ok, State#state.filename}}],
reader(
{call, From}, cdb_complete, State = #state{filename = FN, handle = IO})
when ?IS_DEF(FN), ?IS_DEF(IO) ->
leveled_log:log(cdb05, [FN, reader, cdb_ccomplete]),
ok = file:close(IO),
{stop_and_reply, normal,
[{reply, From, {ok, FN}}],
State#state{handle=undefined}};
reader({call, From}, check_hashtable, _State) ->
{keep_state_and_data, [{reply, From, true}]};
@ -731,69 +777,77 @@ reader(cast, clerk_complete, _State) ->
{keep_state_and_data, [hibernate]}.
delete_pending({call, From}, {get_kv, Key}, State) ->
delete_pending(
{call, From}, {get_kv, Key}, State = #state{handle = IO})
when ?IS_DEF(IO) ->
Result =
get_withcache(State#state.handle,
Key,
State#state.hash_index,
State#state.binary_mode,
State#state.monitor),
get_withcache(
IO,
Key,
State#state.hash_index,
State#state.binary_mode,
State#state.monitor
),
{keep_state_and_data, [{reply, From, Result}, ?DELETE_TIMEOUT]};
delete_pending({call, From}, {key_check, Key}, State) ->
delete_pending(
{call, From}, {key_check, Key}, State = #state{handle = IO})
when ?IS_DEF(IO) ->
Result =
get_withcache(State#state.handle,
Key,
State#state.hash_index,
loose_presence,
State#state.binary_mode,
{no_monitor, 0}),
get_withcache(
IO,
Key,
State#state.hash_index,
loose_presence,
State#state.binary_mode,
{no_monitor, 0}
),
{keep_state_and_data, [{reply, From, Result}, ?DELETE_TIMEOUT]};
delete_pending({call, From}, cdb_close, State) ->
leveled_log:log(cdb05, [State#state.filename, delete_pending, cdb_close]),
close_pendingdelete(State#state.handle,
State#state.filename,
State#state.waste_path),
delete_pending(
{call, From}, cdb_close, State = #state{handle = IO, filename = FN})
when ?IS_DEF(FN), ?IS_DEF(IO) ->
leveled_log:log(cdb05, [FN, delete_pending, cdb_close]),
close_pendingdelete(IO, FN, State#state.waste_path),
{stop_and_reply, normal, [{reply, From, ok}]};
delete_pending(cast, delete_confirmed, State=#state{delete_point=ManSQN}) ->
leveled_log:log(cdb04, [State#state.filename, ManSQN]),
close_pendingdelete(State#state.handle,
State#state.filename,
State#state.waste_path),
{stop, normal};
delete_pending(cast, destroy, State) ->
leveled_log:log(cdb05, [State#state.filename, delete_pending, destroy]),
close_pendingdelete(State#state.handle,
State#state.filename,
State#state.waste_path),
delete_pending(
cast, delete_confirmed, State = #state{handle = IO, filename = FN})
when ?IS_DEF(FN), ?IS_DEF(IO) ->
leveled_log:log(cdb04, [FN, State#state.delete_point]),
close_pendingdelete(IO, FN, State#state.waste_path),
{stop, normal};
delete_pending(
timeout, _, State=#state{delete_point=ManSQN}) when ManSQN > 0 ->
cast, destroy, State = #state{handle = IO, filename = FN})
when ?IS_DEF(FN), ?IS_DEF(IO) ->
leveled_log:log(cdb05, [FN, delete_pending, destroy]),
close_pendingdelete(IO, FN, State#state.waste_path),
{stop, normal};
delete_pending(
timeout, _, State=#state{delete_point=ManSQN, handle = IO, filename = FN})
when ManSQN > 0, ?IS_DEF(FN), ?IS_DEF(IO) ->
case is_process_alive(State#state.inker) of
true ->
ok =
leveled_inker:ink_confirmdelete(State#state.inker,
ManSQN,
self()),
leveled_inker:ink_confirmdelete(
State#state.inker, ManSQN, self()),
{keep_state_and_data, [?DELETE_TIMEOUT]};
false ->
leveled_log:log(cdb04, [State#state.filename, ManSQN]),
close_pendingdelete(State#state.handle,
State#state.filename,
State#state.waste_path),
leveled_log:log(cdb04, [FN, ManSQN]),
close_pendingdelete(IO, FN, State#state.waste_path),
{stop, normal}
end.
handle_sync_event({cdb_scan, FilterFun, Acc, StartPos}, From, State) ->
{ok, EndPos0} = file:position(State#state.handle, eof),
handle_sync_event(
{cdb_scan, FilterFun, Acc, StartPos}, From, State = #state{handle = IO})
when ?IS_DEF(IO) ->
{ok, EndPos0} = file:position(IO, eof),
{ok, StartPos0} =
case StartPos of
undefined ->
file:position(State#state.handle, ?BASE_POSITION);
file:position(IO, ?BASE_POSITION);
StartPos ->
{ok, StartPos}
end,
file:position(State#state.handle, StartPos0),
file:position(IO, StartPos0),
MaybeEnd =
(check_last_key(State#state.last_key) == empty) or
(StartPos0 >= (EndPos0 - ?DWORD_SIZE)),
@ -802,11 +856,13 @@ handle_sync_event({cdb_scan, FilterFun, Acc, StartPos}, From, State) ->
true ->
{eof, Acc};
false ->
scan_over_file(State#state.handle,
StartPos0,
FilterFun,
Acc,
State#state.last_key)
scan_over_file(
IO,
StartPos0,
FilterFun,
Acc,
State#state.last_key
)
end,
% The scan may have created a lot of binary references, clear up the
% reference counters for this process here manually. The cdb process
@ -821,20 +877,26 @@ handle_sync_event({cdb_scan, FilterFun, Acc, StartPos}, From, State) ->
{keep_state_and_data, []};
handle_sync_event(cdb_lastkey, From, State) ->
{keep_state_and_data, [{reply, From, State#state.last_key}]};
handle_sync_event(cdb_firstkey, From, State) ->
{ok, EOFPos} = file:position(State#state.handle, eof),
FilterFun = fun(Key, _V, _P, _O, _Fun) -> {stop, Key} end,
handle_sync_event(
cdb_firstkey, From, State = #state{handle = IO})
when ?IS_DEF(IO) ->
{ok, EOFPos} = file:position(IO, eof),
FirstKey =
case EOFPos of
?BASE_POSITION ->
empty;
_ ->
file:position(State#state.handle, ?BASE_POSITION),
{_Pos, FirstScanKey} = scan_over_file(State#state.handle,
?BASE_POSITION,
FilterFun,
empty,
State#state.last_key),
FindFirstKeyFun =
fun(Key, _V, _P, _O, _Fun) -> {stop, Key} end,
file:position(IO, ?BASE_POSITION),
{_Pos, FirstScanKey} =
scan_over_file(
IO,
?BASE_POSITION,
FindFirstKeyFun,
empty,
State#state.last_key
),
FirstScanKey
end,
{keep_state_and_data, [{reply, From, FirstKey}]};
@ -861,14 +923,15 @@ handle_sync_event({put_cachedscore, Score}, From, State) ->
{keep_state,
State#state{cached_score = {Score,os:timestamp()}},
[{reply, From, ok}]};
handle_sync_event(cdb_close, From, State) ->
file:close(State#state.handle),
handle_sync_event(
cdb_close, From, _State = #state{handle = IO})
when ?IS_DEF(IO) ->
file:close(IO),
{stop_and_reply, normal, [{reply, From, ok}]}.
terminate(_Reason, _StateName, _State) ->
ok.
code_change(_OldVsn, StateName, State, _Extra) ->
{ok, StateName, State}.
@ -877,7 +940,6 @@ code_change(_OldVsn, StateName, State, _Extra) ->
%%% External functions
%%%============================================================================
finished_rolling(CDB) ->
RollerFun =
fun(Sleep, FinishedRolling) ->
@ -908,9 +970,9 @@ close_pendingdelete(Handle, Filename, WasteFP) ->
undefined ->
ok = file:delete(Filename);
WasteFP ->
Components = filename:split(Filename),
NewName = WasteFP ++ lists:last(Components),
file:rename(Filename, NewName)
FN = filename:basename(Filename),
NewName = filename:join(WasteFP, FN),
ok = file:rename(Filename, NewName)
end;
false ->
% This may happen when there has been a destroy while files are
@ -1179,7 +1241,7 @@ find_lastkey(Handle, IndexCache) ->
_ ->
{ok, _} = file:position(Handle, LastPosition),
{KeyLength, _ValueLength} = read_next_2_integers(Handle),
safe_read_next_key(Handle, KeyLength)
safe_read_next(Handle, KeyLength, key)
end.
@ -1239,7 +1301,7 @@ extract_kvpair(_H, [], _K, _BinaryMode) ->
extract_kvpair(Handle, [Position|Rest], Key, BinaryMode) ->
{ok, _} = file:position(Handle, Position),
{KeyLength, ValueLength} = read_next_2_integers(Handle),
case safe_read_next_keybin(Handle, KeyLength) of
case safe_read_next(Handle, KeyLength, keybin) of
{Key, KeyBin} -> % If same key as passed in, then found!
case checkread_next_value(Handle, ValueLength, KeyBin) of
{false, _} ->
@ -1259,12 +1321,12 @@ extract_kvpair(Handle, [Position|Rest], Key, BinaryMode) ->
extract_key(Handle, Position) ->
{ok, _} = file:position(Handle, Position),
{KeyLength, _ValueLength} = read_next_2_integers(Handle),
{safe_read_next_key(Handle, KeyLength)}.
{safe_read_next(Handle, KeyLength, key)}.
extract_key_size(Handle, Position) ->
{ok, _} = file:position(Handle, Position),
{KeyLength, ValueLength} = read_next_2_integers(Handle),
K = safe_read_next_key(Handle, KeyLength),
K = safe_read_next(Handle, KeyLength, key),
{K, ValueLength}.
extract_key_value_check(Handle, Position, BinaryMode) ->
@ -1279,32 +1341,35 @@ extract_key_value_check(Handle, Position, BinaryMode) ->
end.
-spec startup_scan_over_file(file:io_device(), file_location())
-> {file_location(), any()}.
-spec startup_scan_over_file(
file:io_device(), integer()) -> {integer(), {ets:tid(), term()}}.
%% @doc
%% Scan through the file until there is a failure to crc check an input, and
%% at that point return the position and the key dictionary scanned so far
startup_scan_over_file(Handle, Position) ->
HashTree = new_hashtree(),
{eof, Output} = scan_over_file(Handle,
Position,
fun startup_filter/5,
{HashTree, empty},
empty),
Hashtree = new_hashtree(),
FilterFun = startup_filter(Hashtree),
{eof, LastKey} = scan_over_file(Handle, Position, FilterFun, empty, empty),
{ok, FinalPos} = file:position(Handle, cur),
{FinalPos, Output}.
{FinalPos, {Hashtree, LastKey}}.
-spec startup_filter(ets:tid()) -> filter_fun().
%% @doc
%% Specific filter to be used at startup to build a hashtree for an incomplete
%% cdb file, and returns at the end the hashtree and the final Key seen in the
%% journal
startup_filter(Key, _ValueAsBin, Position, {Hashtree, _LastKey}, _ExtractFun) ->
{loop, {put_hashtree(Key, Position, Hashtree), Key}}.
startup_filter(Hashtree) ->
FilterFun =
fun(Key, _ValueAsBin, Position, _LastKey, _ExtractFun) ->
put_hashtree(Key, Position, Hashtree),
{loop, Key}
end,
FilterFun.
-spec scan_over_file(file:io_device(), file_location(),
filter_fun(), any(), any()) -> {file_location(), any()}.
-spec scan_over_file
(file:io_device(), integer(), filter_fun(), term(), any()) ->
{file_location(), term()}.
%% Scan for key changes - scan over file returning applying FilterFun
%% The FilterFun should accept as input:
%% - Key, ValueBin, Position, Accumulator, Fun (to extract values from Binary)
@ -1324,13 +1389,14 @@ scan_over_file(Handle, Position, FilterFun, Output, LastKey) ->
{ok, Position} = file:position(Handle, {bof, Position}),
{eof, Output};
{Key, ValueAsBin, KeyLength, ValueLength} ->
NewPosition = case Key of
LastKey ->
eof;
_ ->
Position + KeyLength + ValueLength
+ ?DWORD_SIZE
end,
NewPosition =
case Key of
LastKey ->
eof;
_ ->
Position + KeyLength + ValueLength
+ ?DWORD_SIZE
end,
case FilterFun(Key,
ValueAsBin,
Position,
@ -1360,9 +1426,8 @@ check_last_key(empty) ->
check_last_key(_LK) ->
ok.
-spec saferead_keyvalue(file:io_device())
-> false|{any(), any(), integer(), integer()}.
-spec saferead_keyvalue(
file:io_device()) -> false|{any(), binary(), integer(), integer()}.
%% @doc
%% Read the Key/Value at this point, returning {ok, Key, Value}
%% catch expected exceptions associated with file corruption (or end) and
@ -1372,11 +1437,11 @@ saferead_keyvalue(Handle) ->
eof ->
false;
{KeyL, ValueL} when is_integer(KeyL), is_integer(ValueL) ->
case safe_read_next_keybin(Handle, KeyL) of
case safe_read_next(Handle, KeyL, keybin) of
false ->
false;
{Key, KeyBin} ->
case safe_read_next_value(Handle, ValueL, KeyBin) of
case safe_read_next(Handle, ValueL, {value, KeyBin}) of
false ->
false;
TrueValue ->
@ -1388,66 +1453,37 @@ saferead_keyvalue(Handle) ->
false
end.
-spec safe_read_next_key(file:io_device(), integer()) -> false|term().
%% @doc
%% Return the next key or have false returned if there is some sort of
%% potentially expected error (e.g. due to file truncation). Note that no
%% CRC check has been performed
safe_read_next_key(Handle, Length) ->
ReadFun = fun(Bin) -> binary_to_term(Bin) end,
safe_read_next(Handle, Length, ReadFun).
-spec safe_read_next_keybin(file:io_device(), integer())
-> false|{term(), binary()}.
%% @doc
%% Return the next key or have false returned if there is some sort of
%% potentially expected error (e.g. due to file truncation). Note that no
%% CRC check has been performed
%% Returns both the Key and the Binary version, the binary version being
%% required for the CRC checking after the value fetch (see
%% safe_read_next_value/3)
safe_read_next_keybin(Handle, Length) ->
ReadFun = fun(Bin) -> {binary_to_term(Bin), Bin} end,
safe_read_next(Handle, Length, ReadFun).
-spec safe_read_next_value(file:io_device(), integer(), binary())
-> binary()|false.
safe_read_next_value(Handle, Length, KeyBin) ->
ReadFun = fun(VBin) -> crccheck(VBin, KeyBin) end,
safe_read_next(Handle, Length, ReadFun).
-type read_output() :: {term(), binary()}|binary()|term()|false.
-type read_fun() :: fun((binary()) -> read_output()).
-spec safe_read_next(file:io_device(), integer(), read_fun())
-> read_output().
-spec safe_read_next
(file:io_device(), integer(), key) -> false|term();
(file:io_device(), integer(), keybin) -> false|{term(), binary()};
(file:io_device(), integer(), {value, binary()}) -> false|binary().
%% @doc
%% Read the next item of length Length
%% Previously catching error:badarg was sufficient to capture errors of
%% corruption, but on some OS versions may need to catch error:einval as well
safe_read_next(Handle, Length, ReadFun) ->
safe_read_next(Handle, Length, ReadType) ->
ReadFun =
case ReadType of
key ->
fun(Bin) -> binary_to_term(Bin) end;
keybin ->
fun(KBin) -> {binary_to_term(KBin), KBin} end;
{value, KeyBin} ->
fun(VBin) -> crccheck(VBin, KeyBin) end
end,
try
loose_read(Handle, Length, ReadFun)
case file:read(Handle, Length) of
eof ->
false;
{ok, Result} ->
ReadFun(Result)
end
catch
error:ReadError ->
leveled_log:log(cdb20, [ReadError, Length]),
false
end.
-spec loose_read(file:io_device(), integer(), read_fun()) -> read_output().
%% @doc
%% Read with minimal error handling (only eof) - to be wrapped in
%% safe_read_next/3 to catch exceptions.
loose_read(Handle, Length, ReadFun) ->
case file:read(Handle, Length) of
eof ->
false;
{ok, Result} ->
ReadFun(Result)
end.
-spec crccheck(binary()|bitstring(), binary()) -> any().
%% @doc
%% CRC chaeck the value which should be a binary, where the first four bytes
@ -1472,8 +1508,9 @@ crccheck(_V, _KB) ->
calc_crc(KeyBin, Value) -> erlang:crc32(<<KeyBin/binary, Value/binary>>).
-spec checkread_next_value(file:io_device(), integer(), binary())
-> {boolean(), binary()|crc_wonky}.
-spec checkread_next_value
(file:io_device(), integer(), binary()) ->
{true, binary()}|{false, crc_wonky}.
%% @doc
%% Read next string where the string has a CRC prepended - stripping the crc
%% and checking if requested
@ -1578,12 +1615,14 @@ search_hash_table(Handle,
leveled_monitor:timing(),
leveled_monitor:timing(),
pos_integer()) -> ok.
maybelog_get_timing(_Monitor, no_timing, no_timing, _CC) ->
ok;
maybelog_get_timing({Pid, _StatsFreq}, IndexTime, ReadTime, CycleCount) ->
maybelog_get_timing(
{Pid, _StatsFreq}, IndexTime, ReadTime, CycleCount)
when is_pid(Pid), is_integer(IndexTime), is_integer(ReadTime) ->
leveled_monitor:add_stat(
Pid, {cdb_get_update, CycleCount, IndexTime, ReadTime}).
Pid, {cdb_get_update, CycleCount, IndexTime, ReadTime});
maybelog_get_timing(_Monitor, _IndexTime, _ReadTime, _CC) ->
ok.
%% Write the actual hashtables at the bottom of the file. Each hash table
%% entry is a doubleword in length. The first word is the hash value
@ -1916,7 +1955,7 @@ dump(FileName) ->
{ok, _} = file:position(Handle, {bof, ?BASE_POSITION}),
Fn1 = fun(_I, Acc) ->
{KL, VL} = read_next_2_integers(Handle),
{Key, KB} = safe_read_next_keybin(Handle, KL),
{Key, KB} = safe_read_next(Handle, KL, keybin),
Value =
case checkread_next_value(Handle, VL, KB) of
{true, V0} ->
@ -2632,7 +2671,7 @@ safe_read_test() ->
{ok, HandleK} = file:open(TestFN, ?WRITE_OPS),
ok = file:pwrite(HandleK, 0, BinToWrite),
{ok, _} = file:position(HandleK, 8 + KeyL + ValueL),
?assertMatch(false, safe_read_next_key(HandleK, KeyL)),
?assertMatch(false, safe_read_next(HandleK, KeyL, key)),
ok = file:close(HandleK),
WrongKeyL = endian_flip(KeyL + ValueL),
@ -2749,11 +2788,15 @@ getpositions_sample_test() ->
ok = cdb_close(P2),
file:delete(F2).
nonsense_coverage_test() ->
?assertMatch({ok, reader, #state{}}, code_change(nonsense,
reader,
#state{},
nonsense)).
?assertMatch(
{ok, reader, #state{}},
code_change(
nonsense,
reader,
#state{max_count=1, max_size=100},
nonsense
)
).
-endif.

View file

@ -10,6 +10,12 @@
-include("leveled.hrl").
-eqwalizer({nowarn_function, convert_to_ledgerv/5}).
-ifdef(TEST).
-export([convert_to_ledgerv/5]).
-endif.
-export([
inker_reload_strategy/1,
strip_to_seqonly/1,
@ -21,8 +27,10 @@
endkey_passed/2,
key_dominates/2,
maybe_reap_expiredkey/2,
to_ledgerkey/3,
to_ledgerkey/5,
to_objectkey/3,
to_objectkey/5,
to_querykey/3,
to_querykey/5,
from_ledgerkey/1,
from_ledgerkey/2,
isvalid_ledgerkey/1,
@ -33,11 +41,11 @@
from_journalkey/1,
revert_to_keydeltas/2,
is_full_journalentry/1,
split_inkvalue/1,
check_forinkertype/2,
get_tagstrategy/2,
maybe_compress/2,
create_value_for_journal/3,
revert_value_from_journal/1,
generate_ledgerkv/5,
get_size/2,
get_keyandobjhash/2,
@ -54,8 +62,9 @@
-type tag() ::
leveled_head:object_tag()|?IDX_TAG|?HEAD_TAG|atom().
-type key() ::
binary()|string()|{binary(), binary()}.
-type single_key() :: binary().
-type tuple_key() :: {single_key(), single_key()}.
-type key() :: single_key()|tuple_key().
% Keys SHOULD be binary()
% string() support is a legacy of old tests
-type sqn() ::
@ -75,8 +84,15 @@
-type ledger_status() ::
tomb|{active, non_neg_integer()|infinity}.
-type primary_key() ::
{leveled_head:object_tag(), key(), single_key(), single_key()|null}.
% Primary key for an object
-type object_key() ::
{tag(), key(), key(), single_key()|null}.
-type query_key() ::
{tag(), key()|null, key()|null, single_key()|null}|all.
-type ledger_key() ::
{tag(), any(), any(), any()}|all.
object_key()|query_key().
-type slimmed_key() ::
{binary(), binary()|null}|binary()|null|all.
-type ledger_value() ::
@ -86,7 +102,7 @@
-type ledger_value_v2() ::
{sqn(), ledger_status(), segment_hash(), metadata(), last_moddate()}.
-type ledger_kv() ::
{ledger_key(), ledger_value()}.
{object_key(), ledger_value()}.
-type compaction_method() ::
retain|recovr|recalc.
-type compaction_strategy() ::
@ -94,14 +110,14 @@
-type journal_key_tag() ::
?INKT_STND|?INKT_TOMB|?INKT_MPUT|?INKT_KEYD.
-type journal_key() ::
{sqn(), journal_key_tag(), ledger_key()}.
{sqn(), journal_key_tag(), primary_key()}.
-type journal_ref() ::
{ledger_key(), sqn()}.
{object_key(), sqn()}.
-type object_spec_v0() ::
{add|remove, key(), key(), key()|null, any()}.
{add|remove, key(), single_key(), single_key()|null, metadata()}.
-type object_spec_v1() ::
{add|remove, v1, key(), key(), key()|null,
list(erlang:timestamp())|undefined, any()}.
{add|remove, v1, key(), single_key(), single_key()|null,
list(erlang:timestamp())|undefined, metadata()}.
-type object_spec() ::
object_spec_v0()|object_spec_v1().
-type compression_method() ::
@ -135,10 +151,14 @@
-export_type([tag/0,
key/0,
single_key/0,
sqn/0,
object_spec/0,
segment_hash/0,
ledger_status/0,
primary_key/0,
object_key/0,
query_key/0,
ledger_key/0,
ledger_value/0,
ledger_kv/0,
@ -186,30 +206,26 @@ segment_hash(KeyTuple) when is_tuple(KeyTuple) ->
segment_hash(BinKey).
headkey_to_canonicalbinary({?HEAD_TAG, Bucket, Key, SubK})
when is_binary(Bucket), is_binary(Key), is_binary(SubK) ->
headkey_to_canonicalbinary({
?HEAD_TAG, Bucket, Key, SubK})
when is_binary(Bucket), is_binary(Key), is_binary(SubK) ->
<<Bucket/binary, Key/binary, SubK/binary>>;
headkey_to_canonicalbinary({?HEAD_TAG, Bucket, Key, null})
when is_binary(Bucket), is_binary(Key) ->
headkey_to_canonicalbinary(
{?HEAD_TAG, Bucket, Key, null})
when is_binary(Bucket), is_binary(Key) ->
<<Bucket/binary, Key/binary>>;
headkey_to_canonicalbinary({?HEAD_TAG, {BucketType, Bucket}, Key, SubKey})
when is_binary(BucketType), is_binary(Bucket) ->
headkey_to_canonicalbinary({?HEAD_TAG,
<<BucketType/binary, Bucket/binary>>,
Key,
SubKey});
headkey_to_canonicalbinary(Key) when element(1, Key) == ?HEAD_TAG ->
% In unit tests head specs can have non-binary keys, so handle
% this through hashing the whole key
leveled_util:t2b(Key).
headkey_to_canonicalbinary(
{?HEAD_TAG, {BucketType, Bucket}, Key, SubKey})
when is_binary(BucketType), is_binary(Bucket) ->
headkey_to_canonicalbinary(
{?HEAD_TAG, <<BucketType/binary, Bucket/binary>>, Key, SubKey}).
-spec to_lookup(ledger_key()) -> maybe_lookup().
%% @doc
%% Should it be possible to lookup a key in the merge tree. This is not true
%% For keys that should only be read through range queries. Direct lookup
%% keys will have presence in bloom filters and other lookup accelerators.
to_lookup(Key) ->
to_lookup(Key) when is_tuple(Key) ->
case element(1, Key) of
?IDX_TAG ->
no_lookup;
@ -235,12 +251,12 @@ strip_to_keyseqonly({LK, V}) -> {LK, element(1, V)}.
-spec strip_to_indexdetails(ledger_kv()) ->
{integer(), segment_hash(), last_moddate()}.
strip_to_indexdetails({_, V}) when tuple_size(V) == 4 ->
strip_to_indexdetails({_, {SQN, _, SegmentHash, _}}) ->
% A v1 value
{element(1, V), element(3, V), undefined};
strip_to_indexdetails({_, V}) when tuple_size(V) > 4 ->
{SQN, SegmentHash, undefined};
strip_to_indexdetails({_, {SQN, _, SegmentHash, _, LMD}}) ->
% A v2 value should have a fith element - Last Modified Date
{element(1, V), element(3, V), element(5, V)}.
{SQN, SegmentHash, LMD}.
-spec striphead_to_v1details(ledger_value()) -> ledger_value().
striphead_to_v1details(V) ->
@ -292,18 +308,25 @@ maybe_accumulate(
maybe_accumulate(T, Acc, Count, Filter, AccFun).
-spec accumulate_index(
{boolean(), undefined|leveled_runner:mp()}, leveled_runner:acc_fun())
-> any().
{boolean(), undefined|leveled_runner:mp()},
leveled_runner:fold_keys_fun())
-> leveled_penciller:pclacc_fun().
accumulate_index({false, undefined}, FoldKeysFun) ->
fun({?IDX_TAG, Bucket, _IndexInfo, ObjKey}, _Value, Acc) ->
fun(
{?IDX_TAG, Bucket, _IndexInfo, ObjKey}, _Value, Acc)
when ObjKey =/= null ->
FoldKeysFun(Bucket, ObjKey, Acc)
end;
accumulate_index({true, undefined}, FoldKeysFun) ->
fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) ->
fun(
{?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc)
when IdxValue =/= null, ObjKey =/= null ->
FoldKeysFun(Bucket, {IdxValue, ObjKey}, Acc)
end;
accumulate_index({AddTerm, TermRegex}, FoldKeysFun) ->
fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) ->
fun(
{?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc)
when IdxValue =/= null, ObjKey =/= null ->
case re:run(IdxValue, TermRegex) of
nomatch ->
Acc;
@ -343,17 +366,17 @@ maybe_reap(_, _) ->
false.
-spec count_tombs(
list(ledger_kv()), non_neg_integer()|not_counted) ->
non_neg_integer()|not_counted.
count_tombs(_List, not_counted) ->
not_counted;
list(ledger_kv()), non_neg_integer()) ->
non_neg_integer().
count_tombs([], Count) ->
Count;
count_tombs([{_K, V}|T], Count) when element(2, V) == tomb ->
count_tombs(T, Count + 1);
count_tombs([_KV|T], Count) ->
count_tombs(T, Count).
count_tombs([{_K, V}|T], Count) when is_tuple(V) ->
case element(2, V) of
tomb ->
count_tombs(T, Count + 1);
_ ->
count_tombs(T, Count)
end.
-spec from_ledgerkey(atom(), tuple()) -> false|tuple().
%% @doc
@ -375,18 +398,37 @@ from_ledgerkey({?HEAD_TAG, Bucket, Key, SubKey}) ->
from_ledgerkey({_Tag, Bucket, Key, _SubKey}) ->
{Bucket, Key}.
-spec to_ledgerkey(any(), any(), tag(), any(), any()) -> ledger_key().
-spec to_objectkey(
key(), single_key(), tag(), binary(), binary()) -> object_key().
%% @doc
%% Convert something into a ledger key
to_ledgerkey(Bucket, Key, Tag, Field, Value) when Tag == ?IDX_TAG ->
to_objectkey(Bucket, Key, Tag, Field, Value) when Tag == ?IDX_TAG ->
{?IDX_TAG, Bucket, {Field, Value}, Key}.
-spec to_ledgerkey(any(), any(), tag()) -> ledger_key().
-if(?OTP_RELEASE >= 26).
-spec to_objectkey
(key(), single_key(), leveled_head:object_tag()) -> primary_key();
(key(), key(), tag()) -> object_key().
-else.
-spec to_objectkey(key(), key()|single_key(), tag()) -> object_key().
-endif.
%% @doc
%% Convert something into a ledger key
to_ledgerkey(Bucket, {Key, SubKey}, ?HEAD_TAG) ->
to_objectkey(Bucket, {Key, SubKey}, ?HEAD_TAG) ->
{?HEAD_TAG, Bucket, Key, SubKey};
to_ledgerkey(Bucket, Key, Tag) ->
to_objectkey(Bucket, Key, Tag) ->
{Tag, Bucket, Key, null}.
-spec to_querykey(
key(), single_key()|null, tag(), binary(), binary())
-> query_key().
to_querykey(Bucket, Key, Tag, Field, Value) when Tag == ?IDX_TAG ->
{?IDX_TAG, Bucket, {Field, Value}, Key}.
-spec to_querykey(key()|null, key()|null, tag()) -> query_key().
%% @doc
%% Convert something into a ledger query key
to_querykey(Bucket, Key, Tag) ->
{Tag, Bucket, Key, null}.
%% No spec - due to tests
@ -399,8 +441,8 @@ isvalid_ledgerkey(_LK) ->
false.
-spec endkey_passed(
ledger_key()|slimmed_key(),
ledger_key()|slimmed_key()) -> boolean().
query_key()|slimmed_key(),
object_key()|slimmed_key()) -> boolean().
%% @doc
%% Compare a key against a query key, only comparing elements that are non-null
%% in the Query key.
@ -480,14 +522,19 @@ get_tagstrategy(Tag, Strategy) ->
%%% Manipulate Journal Key and Value
%%%============================================================================
-spec to_inkerkey(ledger_key(), non_neg_integer()) -> journal_key().
-spec to_inkerkey(primary_key(), non_neg_integer()) -> journal_key().
%% @doc
%% convertion from ledger_key to journal_key to allow for the key to be fetched
to_inkerkey(LedgerKey, SQN) ->
{SQN, ?INKT_STND, LedgerKey}.
-spec to_inkerkv(ledger_key(), non_neg_integer(), any(), journal_keychanges(),
compression_method(), boolean()) -> {journal_key(), any()}.
-spec to_inkerkv(
primary_key(),
non_neg_integer(),
any(),
journal_keychanges(),
compression_method(), boolean())
-> {journal_key(), binary()}.
%% @doc
%% Convert to the correct format of a Journal key and value
to_inkerkv(LedgerKey, SQN, Object, KeyChanges, PressMethod, Compress) ->
@ -496,7 +543,7 @@ to_inkerkv(LedgerKey, SQN, Object, KeyChanges, PressMethod, Compress) ->
create_value_for_journal({Object, KeyChanges}, Compress, PressMethod),
{{SQN, InkerType, LedgerKey}, Value}.
-spec revert_to_keydeltas(journal_key(), any()) -> {journal_key(), any()}.
-spec revert_to_keydeltas(journal_key(), binary()) -> {journal_key(), any()}.
%% @doc
%% If we wish to retain key deltas when an object in the Journal has been
%% replaced - then this converts a Journal Key and Value into one which has no
@ -575,7 +622,7 @@ serialise_object(Object, false, _Method) ->
serialise_object(Object, true, _Method) ->
term_to_binary(Object, [compressed]).
-spec revert_value_from_journal(binary()) -> {any(), journal_keychanges()}.
-spec revert_value_from_journal(binary()) -> {dynamic(), journal_keychanges()}.
%% @doc
%% Revert the object back to its deserialised state, along with the list of
%% key changes associated with the change
@ -661,10 +708,6 @@ decode_valuetype(TypeInt) ->
from_journalkey({SQN, _Type, LedgerKey}) ->
{SQN, LedgerKey}.
split_inkvalue(VBin) when is_binary(VBin) ->
revert_value_from_journal(VBin).
check_forinkertype(_LedgerKey, delete) ->
?INKT_TOMB;
check_forinkertype(_LedgerKey, head_only) ->
@ -709,7 +752,7 @@ idx_indexspecs(IndexSpecs, Bucket, Key, SQN, TTL) ->
gen_indexspec(Bucket, Key, IdxOp, IdxField, IdxTerm, SQN, TTL) ->
Status = set_status(IdxOp, TTL),
{to_ledgerkey(Bucket, Key, ?IDX_TAG, IdxField, IdxTerm),
{to_objectkey(Bucket, Key, ?IDX_TAG, IdxField, IdxTerm),
{SQN, Status, no_lookup, null}}.
-spec gen_headspec(object_spec(), integer(), integer()|infinity) -> ledger_kv().
@ -717,22 +760,30 @@ gen_indexspec(Bucket, Key, IdxOp, IdxField, IdxTerm, SQN, TTL) ->
%% Take an object_spec as passed in a book_mput, and convert it into to a
%% valid ledger key and value. Supports different shaped tuples for different
%% versions of the object_spec
gen_headspec({IdxOp, v1, Bucket, Key, SubKey, LMD, Value}, SQN, TTL) ->
gen_headspec(
{IdxOp, v1, Bucket, Key, SubKey, LMD, Value}, SQN, TTL)
when is_binary(Key) ->
% v1 object spec
Status = set_status(IdxOp, TTL),
K = to_ledgerkey(Bucket, {Key, SubKey}, ?HEAD_TAG),
K =
case SubKey of
null ->
to_objectkey(Bucket, Key, ?HEAD_TAG);
SKB when is_binary(SKB) ->
to_objectkey(Bucket, {Key, SKB}, ?HEAD_TAG)
end,
{K, {SQN, Status, segment_hash(K), Value, get_last_lastmodification(LMD)}};
gen_headspec({IdxOp, Bucket, Key, SubKey, Value}, SQN, TTL) ->
% v0 object spec
Status = set_status(IdxOp, TTL),
K = to_ledgerkey(Bucket, {Key, SubKey}, ?HEAD_TAG),
{K, {SQN, Status, segment_hash(K), Value, undefined}}.
gen_headspec(
{IdxOp, Bucket, Key, SubKey, Value}, SQN, TTL)
when is_binary(Key) ->
gen_headspec({IdxOp, v1, Bucket, Key, SubKey, undefined, Value}, SQN, TTL).
-spec return_proxy(leveled_head:object_tag()|leveled_head:headonly_tag(),
leveled_head:object_metadata(),
pid(), journal_ref())
-> proxy_objectbin()|leveled_head:object_metadata().
-spec return_proxy
(leveled_head:headonly_tag(), leveled_head:object_metadata(), null, journal_ref())
-> leveled_head:object_metadata();
(leveled_head:object_tag(), leveled_head:object_metadata(), pid(), journal_ref())
-> proxy_objectbin().
%% @doc
%% If the object has a value, return the metadata and a proxy through which
%% the applictaion or runner can access the value. If it is a ?HEAD_TAG
@ -751,6 +802,9 @@ return_proxy(Tag, ObjMetadata, InkerClone, JournalRef) ->
InkerClone,
JournalRef}}).
-spec set_status(
add|remove, non_neg_integer()|infinity) ->
tomb|{active, non_neg_integer()|infinity}.
set_status(add, TTL) ->
{active, TTL};
set_status(remove, _TTL) ->
@ -758,10 +812,18 @@ set_status(remove, _TTL) ->
tomb.
-spec generate_ledgerkv(
tuple(), integer(), any(), integer(), tuple()|infinity) ->
{any(), any(), any(),
{{integer(), integer()}|no_lookup, integer()},
list()}.
primary_key(),
integer(),
dynamic(),
integer(),
non_neg_integer()|infinity) ->
{
key(),
single_key(),
ledger_value_v2(),
{segment_hash(), non_neg_integer()|null},
list(erlang:timestamp())
}.
%% @doc
%% Function to extract from an object the information necessary to populate
%% the Penciller's ledger.
@ -776,24 +838,22 @@ set_status(remove, _TTL) ->
%% siblings)
generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) ->
{Tag, Bucket, Key, _} = PrimaryKey,
Status = case Obj of
delete ->
tomb;
_ ->
{active, TS}
end,
Status = case Obj of delete -> tomb; _ -> {active, TS} end,
Hash = segment_hash(PrimaryKey),
{MD, LastMods} = leveled_head:extract_metadata(Tag, Size, Obj),
ObjHash = leveled_head:get_hash(Tag, MD),
Value = {SQN,
Status,
Hash,
MD,
get_last_lastmodification(LastMods)},
Value =
{
SQN,
Status,
Hash,
MD,
get_last_lastmodification(LastMods)
},
{Bucket, Key, Value, {Hash, ObjHash}, LastMods}.
-spec get_last_lastmodification(list(erlang:timestamp())|undefined)
-> pos_integer()|undefined.
-spec get_last_lastmodification(
list(erlang:timestamp())|undefined) -> pos_integer()|undefined.
%% @doc
%% Get the highest of the last modifications measured in seconds. This will be
%% stored as 4 bytes (unsigned) so will last for another 80 + years
@ -830,10 +890,10 @@ get_keyandobjhash(LK, Value) ->
%% Get the next key to iterate from a given point
next_key(Key) when is_binary(Key) ->
<<Key/binary, 0>>;
next_key(Key) when is_list(Key) ->
Key ++ [0];
next_key({Type, Bucket}) when is_binary(Type), is_binary(Bucket) ->
{Type, next_key(Bucket)}.
UpdBucket = next_key(Bucket),
true = is_binary(UpdBucket),
{Type, UpdBucket}.
%%%============================================================================
@ -844,6 +904,17 @@ next_key({Type, Bucket}) when is_binary(Type), is_binary(Bucket) ->
-include_lib("eunit/include/eunit.hrl").
-spec convert_to_ledgerv(
leveled_codec:ledger_key(),
integer(),
any(),
integer(),
non_neg_integer()|infinity) -> leveled_codec:ledger_value().
convert_to_ledgerv(PK, SQN, Obj, Size, TS) ->
{_B, _K, MV, _H, _LMs} =
leveled_codec:generate_ledgerkv(PK, SQN, Obj, Size, TS),
MV.
valid_ledgerkey_test() ->
UserDefTag = {user_defined, <<"B">>, <<"K">>, null},
?assertMatch(true, isvalid_ledgerkey(UserDefTag)),
@ -870,8 +941,8 @@ indexspecs_test() ->
endkey_passed_test() ->
TestKey = {i, null, null, null},
K1 = {i, 123, {"a", "b"}, <<>>},
K2 = {o, 123, {"a", "b"}, <<>>},
K1 = {i, <<"123">>, {<<"a">>, <<"b">>}, <<>>},
K2 = {o, <<"123">>, {<<"a">>, <<"b">>}, <<>>},
?assertMatch(false, endkey_passed(TestKey, K1)),
?assertMatch(true, endkey_passed(TestKey, K2)).
@ -881,7 +952,7 @@ endkey_passed_test() ->
%% Maybe 5 microseconds per hash
hashperf_test() ->
OL = lists:map(fun(_X) -> leveled_rand:rand_bytes(8192) end, lists:seq(1, 1000)),
OL = lists:map(fun(_X) -> crypto:strong_rand_bytes(8192) end, lists:seq(1, 1000)),
SW = os:timestamp(),
_HL = lists:map(fun(Obj) -> erlang:phash2(Obj) end, OL),
io:format(user, "1000 object hashes in ~w microseconds~n",
@ -899,8 +970,8 @@ head_segment_compare_test() ->
headspec_v0v1_test() ->
% A v0 object spec generates the same outcome as a v1 object spec with the
% last modified date undefined
V1 = {add, v1, <<"B">>, <<"K">>, <<"SK">>, undefined, <<"V">>},
V0 = {add, <<"B">>, <<"K">>, <<"SK">>, <<"V">>},
V1 = {add, v1, <<"B">>, <<"K">>, <<"SK">>, undefined, {<<"V">>}},
V0 = {add, <<"B">>, <<"K">>, <<"SK">>, {<<"V">>}},
TTL = infinity,
?assertMatch(true, gen_headspec(V0, 1, TTL) == gen_headspec(V1, 1, TTL)).

View file

@ -91,8 +91,8 @@ check_hash({_SegHash, Hash}, BloomBin) when is_binary(BloomBin)->
list(leveled_codec:segment_hash()), tuple(), slot_count()) -> tuple().
map_hashes([], HashListTuple, _SlotCount) ->
HashListTuple;
map_hashes([Hash|Rest], HashListTuple, SlotCount) ->
{Slot, [H0, H1]} = split_hash(element(2, Hash), SlotCount),
map_hashes([{_SH, EH}|Rest], HashListTuple, SlotCount) ->
{Slot, [H0, H1]} = split_hash(EH, SlotCount),
SlotHL = element(Slot + 1, HashListTuple),
map_hashes(
Rest,
@ -174,11 +174,15 @@ generate_orderedkeys(Seqn, Count, Acc, BucketLow, BucketHigh) ->
BucketExt =
io_lib:format("K~4..0B", [BucketLow + BNumber]),
KeyExt =
io_lib:format("K~8..0B", [Seqn * 100 + leveled_rand:uniform(100)]),
LK = leveled_codec:to_ledgerkey("Bucket" ++ BucketExt, "Key" ++ KeyExt, o),
Chunk = leveled_rand:rand_bytes(16),
{_B, _K, MV, _H, _LMs} =
leveled_codec:generate_ledgerkv(LK, Seqn, Chunk, 64, infinity),
io_lib:format("K~8..0B", [Seqn * 100 + rand:uniform(100)]),
LK =
leveled_codec:to_objectkey(
list_to_binary("Bucket" ++ BucketExt),
list_to_binary("Key" ++ KeyExt),
o
),
Chunk = crypto:strong_rand_bytes(16),
MV = leveled_codec:convert_to_ledgerv(LK, Seqn, Chunk, 64, infinity),
generate_orderedkeys(
Seqn + 1, Count - 1, [{LK, MV}|Acc], BucketLow, BucketHigh).
@ -236,7 +240,7 @@ test_bloom(N, Runs) ->
fun(HashList) ->
HitOrMissFun =
fun (Entry, {HitL, MissL}) ->
case leveled_rand:uniform() < 0.5 of
case rand:uniform() < 0.5 of
true ->
{[Entry|HitL], MissL};
false ->

View file

@ -82,7 +82,7 @@
-type appdefinable_headfun() ::
fun((object_tag(), object_metadata()) -> head()).
-type appdefinable_metadatafun() ::
fun(({leveled_codec:tag(), non_neg_integer(), any()}) ->
fun((leveled_codec:tag(), non_neg_integer(), binary()|delete) ->
{object_metadata(), list(erlang:timestamp())}).
-type appdefinable_indexspecsfun() ::
fun((object_tag(), object_metadata(), object_metadata()|not_present) ->
@ -117,11 +117,13 @@
%% @doc
%% Convert a key to a binary in a consistent way for the tag. The binary will
%% then be used to create the hash
key_to_canonicalbinary({?RIAK_TAG, Bucket, Key, null})
when is_binary(Bucket), is_binary(Key) ->
key_to_canonicalbinary(
{?RIAK_TAG, Bucket, Key, null})
when is_binary(Bucket), is_binary(Key) ->
<<Bucket/binary, Key/binary>>;
key_to_canonicalbinary({?RIAK_TAG, {BucketType, Bucket}, Key, SubKey})
when is_binary(BucketType), is_binary(Bucket) ->
key_to_canonicalbinary(
{?RIAK_TAG, {BucketType, Bucket}, Key, SubKey})
when is_binary(BucketType), is_binary(Bucket) ->
key_to_canonicalbinary({?RIAK_TAG,
<<BucketType/binary, Bucket/binary>>,
Key,
@ -130,9 +132,11 @@ key_to_canonicalbinary(Key) when element(1, Key) == ?STD_TAG ->
default_key_to_canonicalbinary(Key);
key_to_canonicalbinary(Key) ->
OverrideFun =
get_appdefined_function(key_to_canonicalbinary,
fun default_key_to_canonicalbinary/1,
1),
get_appdefined_function(
key_to_canonicalbinary,
fun default_key_to_canonicalbinary/1,
1
),
OverrideFun(Key).
default_key_to_canonicalbinary(Key) ->
@ -162,7 +166,7 @@ default_build_head(_Tag, Metadata) ->
Metadata.
-spec extract_metadata(object_tag(), non_neg_integer(), any())
-spec extract_metadata(object_tag(), non_neg_integer(), binary())
-> {object_metadata(), list(erlang:timestamp())}.
%% @doc
%% Take the inbound object and extract from it the metadata to be stored within
@ -239,9 +243,8 @@ defined_objecttags() ->
[?STD_TAG, ?RIAK_TAG].
-spec default_reload_strategy(object_tag())
-> {object_tag(),
leveled_codec:compaction_method()}.
-spec default_reload_strategy(
object_tag()) -> {object_tag(), leveled_codec:compaction_method()}.
%% @doc
%% State the compaction_method to be used when reloading the Ledger from the
%% journal for each object tag. Note, no compaction strategy required for
@ -249,25 +252,24 @@ defined_objecttags() ->
default_reload_strategy(Tag) ->
{Tag, retain}.
-spec get_size(object_tag()|headonly_tag(), object_metadata())
-> non_neg_integer().
-spec get_size(
object_tag()|headonly_tag(), object_metadata()) -> non_neg_integer().
%% @doc
%% Fetch the size from the metadata
get_size(?RIAK_TAG, RiakObjectMetadata) ->
element(4, RiakObjectMetadata);
get_size(_Tag, ObjectMetadata) ->
element(2, ObjectMetadata).
get_size(?RIAK_TAG, {_, _, _, Size}) ->
Size;
get_size(_Tag, {_, Size, _}) ->
Size.
-spec get_hash(object_tag()|headonly_tag(), object_metadata())
-> non_neg_integer().
-spec get_hash(
object_tag()|headonly_tag(), object_metadata()) -> non_neg_integer()|null.
%% @doc
%% Fetch the hash from the metadata
get_hash(?RIAK_TAG, RiakObjectMetadata) ->
element(3, RiakObjectMetadata);
get_hash(_Tag, ObjectMetadata) ->
element(1, ObjectMetadata).
get_hash(?RIAK_TAG, {_, _, Hash, _}) ->
Hash;
get_hash(_Tag, {Hash, _, _}) ->
Hash.
-spec standard_hash(any()) -> non_neg_integer().
%% @doc
@ -280,9 +282,15 @@ standard_hash(Obj) ->
%%% Handling Override Functions
%%%============================================================================
-spec get_appdefined_function(
appdefinable_function(), appdefinable_function_fun(), non_neg_integer()) ->
appdefinable_function_fun().
-spec get_appdefined_function
(key_to_canonicalbinary, appdefinable_keyfun(), 1) ->
appdefinable_keyfun();
(build_head, appdefinable_headfun(), 2) ->
appdefinable_headfun();
(extract_metadata, appdefinable_metadatafun(), 3) ->
appdefinable_metadatafun();
(diff_indexspecs, appdefinable_indexspecsfun(), 3) ->
appdefinable_indexspecsfun().
%% @doc
%% If a keylist of [{function_name, fun()}] has been set as an environment
%% variable for a tag, then this FunctionName can be used instead of the
@ -300,8 +308,8 @@ get_appdefined_function(FunctionName, DefaultFun, RequiredArity) ->
%%%============================================================================
-spec riak_extract_metadata(binary()|delete, non_neg_integer()) ->
{riak_metadata(), list()}.
-spec riak_extract_metadata(
binary()|delete, non_neg_integer()) -> {riak_metadata(), list()}.
%% @doc
%% Riak extract metadata should extract a metadata object which is a
%% five-tuple of:

View file

@ -114,10 +114,10 @@
scoring_state :: scoring_state()|undefined,
score_onein = 1 :: pos_integer()}).
-record(candidate, {low_sqn :: integer() | undefined,
filename :: string() | undefined,
journal :: pid() | undefined,
compaction_perc :: float() | undefined}).
-record(candidate, {low_sqn :: integer(),
filename :: string(),
journal :: pid(),
compaction_perc :: float()}).
-record(scoring_state, {filter_fun :: leveled_inker:filterfun(),
filter_server :: leveled_inker:filterserver(),
@ -158,7 +158,13 @@
%% @doc
%% Generate a new clerk
clerk_new(InkerClerkOpts) ->
gen_server:start_link(?MODULE, [leveled_log:get_opts(), InkerClerkOpts], []).
{ok, Clerk} =
gen_server:start_link(
?MODULE,
[leveled_log:get_opts(), InkerClerkOpts],
[]
),
{ok, Clerk}.
-spec clerk_compact(pid(),
pid(),
@ -310,60 +316,71 @@ handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Manifest0},
end,
ok = clerk_scorefilelist(self(), lists:filter(NotRollingFun, Manifest)),
ScoringState =
#scoring_state{filter_fun = FilterFun,
filter_server = FilterServer,
max_sqn = MaxSQN,
close_fun = CloseFun,
start_time = SW},
#scoring_state{
filter_fun = FilterFun,
filter_server = FilterServer,
max_sqn = MaxSQN,
close_fun = CloseFun,
start_time = SW
},
{noreply, State#state{scored_files = [], scoring_state = ScoringState}};
handle_cast({score_filelist, [Entry|Tail]}, State) ->
handle_cast(
{score_filelist, [Entry|Tail]},
State = #state{scoring_state = ScoringState})
when ?IS_DEF(ScoringState) ->
Candidates = State#state.scored_files,
{LowSQN, FN, JournalP, _LK} = Entry,
ScoringState = State#state.scoring_state,
CpctPerc =
case {leveled_cdb:cdb_getcachedscore(JournalP, os:timestamp()),
leveled_rand:uniform(State#state.score_onein) == 1,
rand:uniform(State#state.score_onein) == 1,
State#state.score_onein} of
{CachedScore, _UseNewScore, ScoreOneIn}
when CachedScore == undefined; ScoreOneIn == 1 ->
% If caches are not used, always use the current score
check_single_file(JournalP,
ScoringState#scoring_state.filter_fun,
ScoringState#scoring_state.filter_server,
ScoringState#scoring_state.max_sqn,
?SAMPLE_SIZE,
?BATCH_SIZE,
State#state.reload_strategy);
check_single_file(
JournalP,
ScoringState#scoring_state.filter_fun,
ScoringState#scoring_state.filter_server,
ScoringState#scoring_state.max_sqn,
?SAMPLE_SIZE,
?BATCH_SIZE,
State#state.reload_strategy
);
{CachedScore, true, _ScoreOneIn} ->
% If caches are used roll the score towards the current score
% Expectation is that this will reduce instances of individual
% files being compacted when a run is missed due to cached
% scores being used in surrounding journals
NewScore =
check_single_file(JournalP,
ScoringState#scoring_state.filter_fun,
ScoringState#scoring_state.filter_server,
ScoringState#scoring_state.max_sqn,
?SAMPLE_SIZE,
?BATCH_SIZE,
State#state.reload_strategy),
check_single_file(
JournalP,
ScoringState#scoring_state.filter_fun,
ScoringState#scoring_state.filter_server,
ScoringState#scoring_state.max_sqn,
?SAMPLE_SIZE,
?BATCH_SIZE,
State#state.reload_strategy
),
(NewScore + CachedScore) / 2;
{CachedScore, false, _ScoreOneIn} ->
CachedScore
end,
ok = leveled_cdb:cdb_putcachedscore(JournalP, CpctPerc),
Candidate =
#candidate{low_sqn = LowSQN,
filename = FN,
journal = JournalP,
compaction_perc = CpctPerc},
#candidate{
low_sqn = LowSQN,
filename = FN,
journal = JournalP,
compaction_perc = CpctPerc
},
ok = clerk_scorefilelist(self(), Tail),
{noreply, State#state{scored_files = [Candidate|Candidates]}};
handle_cast(scoring_complete, State) ->
handle_cast(
scoring_complete, State = #state{scoring_state = ScoringState})
when ?IS_DEF(ScoringState) ->
MaxRunLength = State#state.max_run_length,
CDBopts = State#state.cdb_options,
Candidates = lists:reverse(State#state.scored_files),
ScoringState = State#state.scoring_state,
FilterFun = ScoringState#scoring_state.filter_fun,
FilterServer = ScoringState#scoring_state.filter_server,
MaxSQN = ScoringState#scoring_state.max_sqn,
@ -379,35 +396,45 @@ handle_cast(scoring_complete, State) ->
true ->
BestRun1 = sort_run(BestRun0),
print_compaction_run(BestRun1, ScoreParams),
ManifestSlice = compact_files(BestRun1,
CDBopts,
FilterFun,
FilterServer,
MaxSQN,
State#state.reload_strategy,
State#state.compression_method),
FilesToDelete = lists:map(fun(C) ->
{C#candidate.low_sqn,
C#candidate.filename,
C#candidate.journal,
undefined}
end,
BestRun1),
ManifestSlice =
compact_files(
BestRun1,
CDBopts,
FilterFun,
FilterServer,
MaxSQN,
State#state.reload_strategy,
State#state.compression_method
),
FilesToDelete =
lists:map(
fun(C) ->
{
C#candidate.low_sqn,
C#candidate.filename,
C#candidate.journal,
undefined
}
end,
BestRun1
),
leveled_log:log(ic002, [length(FilesToDelete)]),
ok = CloseFun(FilterServer),
ok = leveled_inker:ink_clerkcomplete(State#state.inker,
ManifestSlice,
FilesToDelete);
ok =
leveled_inker:ink_clerkcomplete(
State#state.inker, ManifestSlice, FilesToDelete);
false ->
ok = CloseFun(FilterServer),
ok = leveled_inker:ink_clerkcomplete(State#state.inker, [], [])
end,
{noreply, State#state{scoring_state = undefined}, hibernate};
handle_cast({trim, PersistedSQN, ManifestAsList}, State) ->
handle_cast(
{trim, PersistedSQN, ManifestAsList}, State = #state{inker = Ink})
when ?IS_DEF(Ink) ->
FilesToDelete =
leveled_imanifest:find_persistedentries(PersistedSQN, ManifestAsList),
leveled_log:log(ic007, []),
ok = leveled_inker:ink_clerkcomplete(State#state.inker, [], FilesToDelete),
ok = leveled_inker:ink_clerkcomplete(Ink, [], FilesToDelete),
{noreply, State};
handle_cast({hashtable_calc, HashTree, StartPos, CDBpid}, State) ->
{IndexList, HashTreeBin} = leveled_cdb:hashtable_calc(HashTree, StartPos),
@ -445,9 +472,9 @@ code_change(_OldVsn, State, _Extra) ->
%%% External functions
%%%============================================================================
-spec schedule_compaction(list(integer()),
integer(),
{integer(), integer(), integer()}) -> integer().
-spec schedule_compaction(
list(integer()), integer(), {integer(), integer(), integer()}) ->
integer().
%% @doc
%% Schedule the next compaction event for this store. Chooses a random
%% interval, and then a random start time within the first third
@ -483,11 +510,11 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) ->
% today.
RandSelect =
fun(_X) ->
{lists:nth(leveled_rand:uniform(TotalHours), CompactionHours),
leveled_rand:uniform(?INTERVALS_PER_HOUR)}
{lists:nth(rand:uniform(TotalHours), CompactionHours),
rand:uniform(?INTERVALS_PER_HOUR)}
end,
RandIntervals = lists:sort(lists:map(RandSelect,
lists:seq(1, RunsPerDay))),
RandIntervals =
lists:sort(lists:map(RandSelect, lists:seq(1, RunsPerDay))),
% Pick the next interval from the list. The intervals before current time
% are considered as intervals tomorrow, so will only be next if there are
@ -508,11 +535,11 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) ->
% Calculate the offset in seconds to this next interval
NextS0 = NextI * (IntervalLength * 60)
- leveled_rand:uniform(IntervalLength * 60),
- rand:uniform(IntervalLength * 60),
NextM = NextS0 div 60,
NextS = NextS0 rem 60,
TimeDiff = calendar:time_difference(LocalTime,
{NextDate, {NextH, NextM, NextS}}),
TimeDiff =
calendar:time_difference(LocalTime, {NextDate, {NextH, NextM, NextS}}),
{Days, {Hours, Mins, Secs}} = TimeDiff,
Days * 86400 + Hours * 3600 + Mins * 60 + Secs.
@ -521,13 +548,14 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) ->
%%% Internal functions
%%%============================================================================
-spec check_single_file(pid(),
leveled_inker:filterfun(),
leveled_inker:filterserver(),
leveled_codec:sqn(),
non_neg_integer(), non_neg_integer(),
leveled_codec:compaction_strategy()) ->
float().
-spec check_single_file(
pid(),
leveled_inker:filterfun(),
leveled_inker:filterserver(),
leveled_codec:sqn(),
non_neg_integer(), non_neg_integer(),
leveled_codec:compaction_strategy()) ->
float().
%% @doc
%% Get a score for a single CDB file in the journal. This will pull out a bunch
%% of keys and sizes at random in an efficient way (by scanning the hashtable
@ -624,8 +652,8 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) ->
fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List).
-spec assess_candidates(list(candidate()), score_parameters())
-> {list(candidate()), float()}.
-spec assess_candidates(
list(candidate()), score_parameters()) -> {list(candidate()), float()}.
%% @doc
%% For each run length we need to assess all the possible runs of candidates,
%% to determine which is the best score - to be put forward as the best
@ -704,10 +732,12 @@ score_run(Run, {MaxRunLength, MR_CT, SF_CT}) ->
(MR_CT - SF_CT) / (MaxRunSize - 1)
end,
Target = SF_CT + TargetIncr * (length(Run) - 1),
RunTotal = lists:foldl(fun(Cand, Acc) ->
Acc + Cand#candidate.compaction_perc end,
0.0,
Run),
RunTotal =
lists:foldl(
fun(Cand, Acc) -> Acc + Cand#candidate.compaction_perc end,
0.0,
Run
),
Target - RunTotal / length(Run).
@ -750,26 +780,29 @@ compact_files([Batch|T], CDBopts, ActiveJournal0,
FilterFun, FilterServer, MaxSQN,
RStrategy, PressMethod, ManSlice0) ->
{SrcJournal, PositionList} = Batch,
KVCs0 = leveled_cdb:cdb_directfetch(SrcJournal,
PositionList,
key_value_check),
KVCs1 = filter_output(KVCs0,
FilterFun,
FilterServer,
MaxSQN,
RStrategy),
{ActiveJournal1, ManSlice1} = write_values(KVCs1,
CDBopts,
ActiveJournal0,
ManSlice0,
PressMethod),
KVCs0 =
leveled_cdb:cdb_directfetch(SrcJournal, PositionList, key_value_check),
KVCs1 =
filter_output(KVCs0, FilterFun, FilterServer, MaxSQN, RStrategy),
{ActiveJournal1, ManSlice1} =
write_values(
KVCs1, CDBopts, ActiveJournal0, ManSlice0, PressMethod),
% The inker's clerk will no longer need these (potentially large) binaries,
% so force garbage collection at this point. This will mean when we roll
% each CDB file there will be no remaining references to the binaries that
% have been transferred and the memory can immediately be cleared
garbage_collect(),
compact_files(T, CDBopts, ActiveJournal1, FilterFun, FilterServer, MaxSQN,
RStrategy, PressMethod, ManSlice1).
compact_files(
T,
CDBopts,
ActiveJournal1,
FilterFun,
FilterServer,
MaxSQN,
RStrategy,
PressMethod,
ManSlice1
).
get_all_positions([], PositionBatches) ->
PositionBatches;
@ -777,23 +810,25 @@ get_all_positions([HeadRef|RestOfBest], PositionBatches) ->
SrcJournal = HeadRef#candidate.journal,
Positions = leveled_cdb:cdb_getpositions(SrcJournal, all),
leveled_log:log(ic008, [HeadRef#candidate.filename, length(Positions)]),
Batches = split_positions_into_batches(lists:sort(Positions),
SrcJournal,
[]),
Batches =
split_positions_into_batches(
lists:sort(Positions), SrcJournal, []
),
get_all_positions(RestOfBest, PositionBatches ++ Batches).
split_positions_into_batches([], _Journal, Batches) ->
Batches;
split_positions_into_batches(Positions, Journal, Batches) ->
{ThisBatch, Tail} = if
length(Positions) > ?BATCH_SIZE ->
lists:split(?BATCH_SIZE, Positions);
true ->
{Positions, []}
end,
split_positions_into_batches(Tail,
Journal,
Batches ++ [{Journal, ThisBatch}]).
{ThisBatch, Tail} =
if
length(Positions) > ?BATCH_SIZE ->
lists:split(?BATCH_SIZE, Positions);
true ->
{Positions, []}
end,
split_positions_into_batches(
Tail, Journal, Batches ++ [{Journal, ThisBatch}]
).
%% @doc
@ -918,7 +953,7 @@ clear_waste(State) ->
N = calendar:datetime_to_gregorian_seconds(calendar:local_time()),
DeleteJournalFun =
fun(DelJ) ->
LMD = filelib:last_modified(WP ++ DelJ),
LMD = {_,_} = filelib:last_modified(WP ++ DelJ),
case N - calendar:datetime_to_gregorian_seconds(LMD) of
LMD_Delta when LMD_Delta >= WRP ->
ok = file:delete(WP ++ DelJ),
@ -931,12 +966,10 @@ clear_waste(State) ->
lists:foreach(DeleteJournalFun, ClearedJournals)
end.
%%%============================================================================
%%% Test
%%%============================================================================
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
@ -966,24 +999,36 @@ local_time_to_now(DateTime) ->
{Seconds div 1000000, Seconds rem 1000000, 0}.
simple_score_test() ->
Run1 = [#candidate{compaction_perc = 75.0},
#candidate{compaction_perc = 75.0},
#candidate{compaction_perc = 76.0},
#candidate{compaction_perc = 70.0}],
DummyC =
#candidate{
low_sqn = 1, filename="dummy", journal=self(), compaction_perc = 0
},
Run1 = [DummyC#candidate{compaction_perc = 75.0},
DummyC#candidate{compaction_perc = 75.0},
DummyC#candidate{compaction_perc = 76.0},
DummyC#candidate{compaction_perc = 70.0}],
?assertMatch(-4.0, score_run(Run1, {4, 70.0, 40.0})),
Run2 = [#candidate{compaction_perc = 75.0}],
Run2 = [DummyC#candidate{compaction_perc = 75.0}],
?assertMatch(-35.0, score_run(Run2, {4, 70.0, 40.0})),
?assertEqual(0.0, score_run([], {4, 40.0, 70.0})),
Run3 = [#candidate{compaction_perc = 100.0}],
Run3 = [DummyC#candidate{compaction_perc = 100.0}],
?assertMatch(-60.0, score_run(Run3, {4, 70.0, 40.0})).
file_gc_test() ->
State = #state{waste_path="test/test_area/waste/",
waste_retention_period=1},
State =
#state{
waste_path="test/test_area/waste/", waste_retention_period = 1
},
ok = filelib:ensure_dir(State#state.waste_path),
file:write_file(State#state.waste_path ++ "1.cdb", term_to_binary("Hello")),
file:write_file(
filename:join(State#state.waste_path, "1.cdb"),
term_to_binary("Hello")
),
timer:sleep(1100),
file:write_file(State#state.waste_path ++ "2.cdb", term_to_binary("Hello")),
file:write_file(
filename:join(State#state.waste_path, "2.cdb"),
term_to_binary("Hello")
),
clear_waste(State),
{ok, ClearedJournals} = file:list_dir(State#state.waste_path),
?assertMatch(["2.cdb"], ClearedJournals),
@ -1004,27 +1049,47 @@ find_bestrun_test() ->
%% -define(MAXRUNLENGTH_COMPACTION_TARGET, 60.0).
%% Tested first with blocks significant as no back-tracking
Params = {4, 60.0, 40.0},
Block1 = [#candidate{compaction_perc = 55.0, filename = "a"},
#candidate{compaction_perc = 65.0, filename = "b"},
#candidate{compaction_perc = 42.0, filename = "c"},
#candidate{compaction_perc = 50.0, filename = "d"}],
Block2 = [#candidate{compaction_perc = 38.0, filename = "e"},
#candidate{compaction_perc = 75.0, filename = "f"},
#candidate{compaction_perc = 75.0, filename = "g"},
#candidate{compaction_perc = 45.0, filename = "h"}],
Block3 = [#candidate{compaction_perc = 70.0, filename = "i"},
#candidate{compaction_perc = 100.0, filename = "j"},
#candidate{compaction_perc = 100.0, filename = "k"},
#candidate{compaction_perc = 100.0, filename = "l"}],
Block4 = [#candidate{compaction_perc = 55.0, filename = "m"},
#candidate{compaction_perc = 56.0, filename = "n"},
#candidate{compaction_perc = 57.0, filename = "o"},
#candidate{compaction_perc = 40.0, filename = "p"}],
Block5 = [#candidate{compaction_perc = 60.0, filename = "q"},
#candidate{compaction_perc = 60.0, filename = "r"}],
DummyC =
#candidate{
low_sqn = 1, filename="dummy", journal=self(), compaction_perc = 0
},
Block1 =
[
DummyC#candidate{compaction_perc = 55.0, filename = "a"},
DummyC#candidate{compaction_perc = 65.0, filename = "b"},
DummyC#candidate{compaction_perc = 42.0, filename = "c"},
DummyC#candidate{compaction_perc = 50.0, filename = "d"}
],
Block2 =
[
DummyC#candidate{compaction_perc = 38.0, filename = "e"},
DummyC#candidate{compaction_perc = 75.0, filename = "f"},
DummyC#candidate{compaction_perc = 75.0, filename = "g"},
DummyC#candidate{compaction_perc = 45.0, filename = "h"}
],
Block3 =
[
DummyC#candidate{compaction_perc = 70.0, filename = "i"},
DummyC#candidate{compaction_perc = 100.0, filename = "j"},
DummyC#candidate{compaction_perc = 100.0, filename = "k"},
DummyC#candidate{compaction_perc = 100.0, filename = "l"}
],
Block4 =
[
DummyC#candidate{compaction_perc = 55.0, filename = "m"},
DummyC#candidate{compaction_perc = 56.0, filename = "n"},
DummyC#candidate{compaction_perc = 57.0, filename = "o"},
DummyC#candidate{compaction_perc = 40.0, filename = "p"}
],
Block5 =
[
DummyC#candidate{compaction_perc = 60.0, filename = "q"},
DummyC#candidate{compaction_perc = 60.0, filename = "r"}
],
CList0 = Block1 ++ Block2 ++ Block3 ++ Block4 ++ Block5,
?assertMatch(["b", "c", "d", "e"], check_bestrun(CList0, Params)),
CList1 = CList0 ++ [#candidate{compaction_perc = 20.0, filename="s"}],
CList1 =
CList0 ++ [DummyC#candidate{compaction_perc = 20.0, filename="s"}],
?assertMatch(["s"], check_bestrun(CList1, Params)),
CList2 = Block4 ++ Block3 ++ Block2 ++ Block1 ++ Block5,
?assertMatch(["h", "a", "b", "c"], check_bestrun(CList2, Params)),
@ -1219,12 +1284,18 @@ compact_empty_file_test() ->
ok = leveled_cdb:cdb_destroy(CDB2).
compare_candidate_test() ->
Candidate1 = #candidate{low_sqn=1},
Candidate2 = #candidate{low_sqn=2},
Candidate3 = #candidate{low_sqn=3},
Candidate4 = #candidate{low_sqn=4},
?assertMatch([Candidate1, Candidate2, Candidate3, Candidate4],
sort_run([Candidate3, Candidate2, Candidate4, Candidate1])).
DummyC =
#candidate{
low_sqn = 1, filename="dummy", journal=self(), compaction_perc = 0
},
Candidate1 = DummyC#candidate{low_sqn=1},
Candidate2 = DummyC#candidate{low_sqn=2},
Candidate3 = DummyC#candidate{low_sqn=3},
Candidate4 = DummyC#candidate{low_sqn=4},
?assertMatch(
[Candidate1, Candidate2, Candidate3, Candidate4],
sort_run([Candidate3, Candidate2, Candidate4, Candidate1])
).
compact_singlefile_totwosmallfiles_test_() ->
{timeout, 60, fun compact_singlefile_totwosmallfiles_testto/0}.
@ -1236,24 +1307,31 @@ compact_singlefile_totwosmallfiles_testto() ->
FN1 = leveled_inker:filepath(RP, 1, new_journal),
CDBoptsLarge = #cdb_options{binary_mode=true, max_size=30000000},
{ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBoptsLarge),
lists:foreach(fun(X) ->
LK = test_ledgerkey("Key" ++ integer_to_list(X)),
Value = leveled_rand:rand_bytes(1024),
{IK, IV} =
leveled_codec:to_inkerkv(LK, X, Value,
{[], infinity},
native, true),
ok = leveled_cdb:cdb_put(CDB1, IK, IV)
end,
lists:seq(1, 1000)),
lists:foreach(
fun(X) ->
LK = test_ledgerkey("Key" ++ integer_to_list(X)),
Value = crypto:strong_rand_bytes(1024),
{IK, IV} =
leveled_codec:to_inkerkv(LK, X, Value,
{[], infinity},
native, true),
ok = leveled_cdb:cdb_put(CDB1, IK, IV)
end,
lists:seq(1, 1000)
),
{ok, NewName} = leveled_cdb:cdb_complete(CDB1),
{ok, CDBr} = leveled_cdb:cdb_open_reader(NewName),
CDBoptsSmall =
#cdb_options{binary_mode=true, max_size=400000, file_path=CP},
BestRun1 = [#candidate{low_sqn=1,
filename=leveled_cdb:cdb_filename(CDBr),
journal=CDBr,
compaction_perc=50.0}],
BestRun1 =
[
#candidate{
low_sqn=1,
filename=leveled_cdb:cdb_filename(CDBr),
journal=CDBr,
compaction_perc=50.0
}
],
FakeFilterFun =
fun(_FS, _LK, SQN) ->
case SQN rem 2 of
@ -1262,19 +1340,24 @@ compact_singlefile_totwosmallfiles_testto() ->
end
end,
ManifestSlice = compact_files(BestRun1,
CDBoptsSmall,
FakeFilterFun,
null,
900,
[{?STD_TAG, recovr}],
native),
ManifestSlice =
compact_files(
BestRun1,
CDBoptsSmall,
FakeFilterFun,
null,
900,
[{?STD_TAG, recovr}],
native
),
?assertMatch(2, length(ManifestSlice)),
lists:foreach(fun({_SQN, _FN, CDB, _LK}) ->
ok = leveled_cdb:cdb_deletepending(CDB),
ok = leveled_cdb:cdb_destroy(CDB)
end,
ManifestSlice),
lists:foreach(
fun({_SQN, _FN, CDB, _LK}) ->
ok = leveled_cdb:cdb_deletepending(CDB),
ok = leveled_cdb:cdb_destroy(CDB)
end,
ManifestSlice
),
ok = leveled_cdb:cdb_deletepending(CDBr),
ok = leveled_cdb:cdb_destroy(CDBr).
@ -1304,11 +1387,13 @@ size_score_test() ->
end
end,
Score =
size_comparison_score(KeySizeList,
FilterFun,
CurrentList,
MaxSQN,
leveled_codec:inker_reload_strategy([])),
size_comparison_score(
KeySizeList,
FilterFun,
CurrentList,
MaxSQN,
leveled_codec:inker_reload_strategy([])
),
io:format("Score ~w", [Score]),
?assertMatch(true, Score > 69.0),
?assertMatch(true, Score < 70.0).

View file

@ -28,9 +28,7 @@
-type manifest() :: list({integer(), list()}).
%% The manifest is divided into blocks by sequence number, with each block
%% being a list of manifest entries for that SQN range.
-type manifest_entry() :: {integer(), string(), pid()|string(), any()}.
%% The Entry should have a pid() as the third element, but a string() may be
%% used in unit tests
-type manifest_entry() :: {integer(), string(), pid(), any()}.
-export_type([manifest/0, manifest_entry/0]).
@ -75,8 +73,8 @@ add_entry(Manifest, Entry, ToEnd) ->
from_list(Man1)
end.
-spec append_lastkey(manifest(), pid(), leveled_codec:journal_key())
-> manifest().
-spec append_lastkey(
manifest(), pid(), leveled_codec:journal_key()) -> manifest().
%% @doc
%% On discovery of the last key in the last journal entry, the manifest can
%% be updated through this function to have the last key
@ -100,7 +98,7 @@ remove_entry(Manifest, Entry) ->
Man0 = lists:keydelete(SQN, 1, to_list(Manifest)),
from_list(Man0).
-spec find_entry(integer(), manifest()) -> pid()|string().
-spec find_entry(integer(), manifest()) -> pid().
%% @doc
%% Given a SQN find the relevant manifest_entry, returning just the pid() of
%% the journal file (which may be a string() in unit tests)
@ -257,18 +255,31 @@ build_testmanifest_aslist() ->
ManifestMapFun =
fun(N) ->
NStr = integer_to_list(N),
{max(1, N * 1000), "FN" ++ NStr, "pid" ++ NStr, "LK" ++ NStr}
{
max(1, N * 1000),
"FN" ++ NStr,
set_pid(N),
"LK" ++ NStr
}
end,
lists:map(ManifestMapFun, lists:reverse(lists:seq(0, 50))).
set_pid(N) ->
lists:flatten(io_lib:format("<0.1~2..0w.0>", [N])).
test_testmanifest(Man0) ->
?assertMatch("pid0", find_entry(1, Man0)),
?assertMatch("pid0", find_entry(2, Man0)),
?assertMatch("pid1", find_entry(1001, Man0)),
?assertMatch("pid20", find_entry(20000, Man0)),
?assertMatch("pid20", find_entry(20001, Man0)),
?assertMatch("pid20", find_entry(20999, Man0)),
?assertMatch("pid50", find_entry(99999, Man0)).
P0 = set_pid(0),
P1 = set_pid(1),
P20 = set_pid(20),
P50 = set_pid(50),
?assertMatch(P0, find_entry(1, Man0)),
?assertMatch(P0, find_entry(2, Man0)),
?assertMatch(P1, find_entry(1001, Man0)),
?assertMatch(P20, find_entry(20000, Man0)),
?assertMatch(P20, find_entry(20001, Man0)),
?assertMatch(P20, find_entry(20999, Man0)),
?assertMatch(P50, find_entry(99999, Man0)).
buildfromlist_test() ->
ManL = build_testmanifest_aslist(),
@ -303,7 +314,7 @@ buildrandomfashion_test() ->
ManL0 = build_testmanifest_aslist(),
RandMapFun =
fun(X) ->
{leveled_rand:uniform(), X}
{rand:uniform(), X}
end,
ManL1 = lists:map(RandMapFun, ManL0),
ManL2 = lists:sort(ManL1),
@ -317,7 +328,7 @@ buildrandomfashion_test() ->
test_testmanifest(Man0),
?assertMatch(ManL0, to_list(Man0)),
RandomEntry = lists:nth(leveled_rand:uniform(50), ManL0),
RandomEntry = lists:nth(rand:uniform(50), ManL0),
Man1 = remove_entry(Man0, RandomEntry),
Man2 = add_entry(Man1, RandomEntry, false),

View file

@ -147,7 +147,7 @@
-record(state, {manifest = [] :: list(),
manifest_sqn = 0 :: integer(),
journal_sqn = 0 :: integer(),
journal_sqn = 0 :: non_neg_integer(),
active_journaldb :: pid() | undefined,
pending_removals = [] :: list(),
registered_snapshots = [] :: list(registered_snapshot()),
@ -159,7 +159,9 @@
is_snapshot = false :: boolean(),
compression_method = native :: lz4|native|none,
compress_on_receipt = false :: boolean(),
snap_timeout :: pos_integer() | undefined, % in seconds
snap_timeout = 0 :: non_neg_integer(),
% in seconds, 0 for snapshots
% (only relevant for primary Inker)
source_inker :: pid() | undefined,
shutdown_loops = ?SHUTDOWN_LOOPS :: non_neg_integer()}).
@ -196,20 +198,26 @@
%% The inker will need to know what the reload strategy is, to inform the
%% clerk about the rules to enforce during compaction.
ink_start(InkerOpts) ->
gen_server:start_link(?MODULE, [leveled_log:get_opts(), InkerOpts], []).
{ok, Inker} =
gen_server:start_link(
?MODULE, [leveled_log:get_opts(), InkerOpts], []),
{ok, Inker}.
-spec ink_snapstart(inker_options()) -> {ok, pid()}.
%% @doc
%% Don't link on startup as snapshot
ink_snapstart(InkerOpts) ->
gen_server:start(?MODULE, [leveled_log:get_opts(), InkerOpts], []).
{ok, Inker} =
gen_server:start(
?MODULE, [leveled_log:get_opts(), InkerOpts], []),
{ok, Inker}.
-spec ink_put(pid(),
leveled_codec:ledger_key(),
any(),
leveled_codec:journal_keychanges(),
boolean()) ->
{ok, integer(), integer()}.
{ok, non_neg_integer(), pos_integer()}.
%% @doc
%% PUT an object into the journal, returning the sequence number for the PUT
%% as well as the size of the object (information required by the ledger).
@ -504,14 +512,13 @@ ink_getclerkpid(Pid) ->
init([LogOpts, InkerOpts]) ->
leveled_log:save(LogOpts),
leveled_rand:seed(),
case {InkerOpts#inker_options.root_path,
InkerOpts#inker_options.start_snapshot} of
{undefined, true} ->
InkerOpts#inker_options.start_snapshot,
InkerOpts#inker_options.source_inker} of
{undefined, true, SrcInker} when ?IS_DEF(SrcInker) ->
%% monitor the bookie, and close the snapshot when bookie
%% exits
BookieMonitor = erlang:monitor(process, InkerOpts#inker_options.bookies_pid),
SrcInker = InkerOpts#inker_options.source_inker,
{Manifest,
ActiveJournalDB,
JournalSQN} = ink_registersnapshot(SrcInker, self()),
@ -522,7 +529,7 @@ init([LogOpts, InkerOpts]) ->
bookie_monref = BookieMonitor,
is_snapshot = true}};
%% Need to do something about timeout
{_RootPath, false} ->
{_RootPath, false, _SrcInker} ->
start_from_file(InkerOpts)
end.
@ -557,10 +564,12 @@ handle_call({fold,
Manifest = lists:reverse(leveled_imanifest:to_list(State#state.manifest)),
Folder =
fun() ->
fold_from_sequence(StartSQN,
{FilterFun, InitAccFun, FoldFun},
Acc,
Manifest)
fold_from_sequence(
StartSQN,
{FilterFun, InitAccFun, FoldFun},
Acc,
Manifest
)
end,
case By of
as_ink ->
@ -583,25 +592,21 @@ handle_call(get_manifest, _From, State) ->
handle_call(print_manifest, _From, State) ->
leveled_imanifest:printer(State#state.manifest),
{reply, ok, State};
handle_call({compact,
Checker,
InitiateFun,
CloseFun,
FilterFun},
_From, State=#state{is_snapshot=Snap}) when Snap == false ->
handle_call(
{compact, Checker, InitiateFun, CloseFun, FilterFun},
_From,
State=#state{is_snapshot=Snap})
when Snap == false ->
Clerk = State#state.clerk,
Manifest = leveled_imanifest:to_list(State#state.manifest),
leveled_iclerk:clerk_compact(State#state.clerk,
Checker,
InitiateFun,
CloseFun,
FilterFun,
Manifest),
leveled_iclerk:clerk_compact(
Clerk, Checker, InitiateFun, CloseFun, FilterFun, Manifest),
{reply, {ok, Clerk}, State#state{compaction_pending=true}};
handle_call(compaction_pending, _From, State) ->
{reply, State#state.compaction_pending, State};
handle_call({trim, PersistedSQN}, _From, State=#state{is_snapshot=Snap})
when Snap == false ->
handle_call(
{trim, PersistedSQN}, _From, State=#state{is_snapshot=Snap})
when Snap == false ->
Manifest = leveled_imanifest:to_list(State#state.manifest),
ok = leveled_iclerk:clerk_trim(State#state.clerk, PersistedSQN, Manifest),
{reply, ok, State};
@ -625,8 +630,9 @@ handle_call(roll, _From, State=#state{is_snapshot=Snap}) when Snap == false ->
manifest_sqn = NewManSQN,
active_journaldb = NewJournalP}}
end;
handle_call({backup, BackupPath}, _from, State)
when State#state.is_snapshot == true ->
handle_call(
{backup, BackupPath}, _from, State)
when State#state.is_snapshot == true ->
SW = os:timestamp(),
BackupJFP = filepath(filename:join(BackupPath, ?JOURNAL_FP), journal_dir),
ok = filelib:ensure_dir(BackupJFP),
@ -665,7 +671,7 @@ handle_call({backup, BackupPath}, _from, State)
leveled_log:log(i0022, [RFN]),
RemoveFile = filename:join(BackupJFP, RFN),
case filelib:is_file(RemoveFile)
and not filelib:is_dir(RemoveFile) of
andalso not filelib:is_dir(RemoveFile) of
true ->
ok = file:delete(RemoveFile);
false ->
@ -699,12 +705,13 @@ handle_call(get_clerkpid, _From, State) ->
handle_call(close, _From, State=#state{is_snapshot=Snap}) when Snap == true ->
ok = ink_releasesnapshot(State#state.source_inker, self()),
{stop, normal, ok, State};
handle_call(ShutdownType, From, State)
when ShutdownType == close; ShutdownType == doom ->
handle_call(
ShutdownType, From, State = #state{clerk = Clerk})
when ?IS_DEF(Clerk) ->
case ShutdownType of
doom ->
leveled_log:log(i0018, []);
_ ->
close ->
ok
end,
leveled_log:log(i0005, [ShutdownType]),
@ -714,9 +721,10 @@ handle_call(ShutdownType, From, State)
gen_server:cast(self(), {maybe_defer_shutdown, ShutdownType, From}),
{noreply, State}.
handle_cast({clerk_complete, ManifestSnippet, FilesToDelete}, State) ->
CDBOpts = State#state.cdb_options,
handle_cast(
{clerk_complete, ManifestSnippet, FilesToDelete},
State = #state{cdb_options = CDBOpts})
when ?IS_DEF(CDBOpts) ->
DropFun =
fun(E, Acc) ->
leveled_imanifest:remove_entry(Acc, E)
@ -854,8 +862,10 @@ handle_cast({complete_shutdown, ShutdownType, From}, State) ->
{stop, normal, State}.
%% handle the bookie stopping and stop this snapshot
handle_info({'DOWN', BookieMonRef, process, _BookiePid, _Info},
State=#state{bookie_monref = BookieMonRef}) ->
handle_info(
{'DOWN', BookieMonRef, process, _BookiePid, _Info},
State=#state{bookie_monref = BookieMonRef, source_inker = SrcInker})
when ?IS_DEF(SrcInker) ->
%% Monitor only registered on snapshots
ok = ink_releasesnapshot(State#state.source_inker, self()),
{stop, normal, State};
@ -879,7 +889,10 @@ code_change(_OldVsn, State, _Extra) ->
-spec start_from_file(inker_options()) -> {ok, ink_state()}.
%% @doc
%% Start an Inker from the state on disk (i.e. not a snapshot).
start_from_file(InkOpts) ->
start_from_file(
InkOpts =
#inker_options{root_path = RootPath, snaptimeout_long = SnapTimeout})
when ?IS_DEF(RootPath), ?IS_DEF(SnapTimeout) ->
% Setting the correct CDB options is important when starting the inker, in
% particular for waste retention which is determined by the CDB options
% with which the file was last opened
@ -926,9 +939,8 @@ start_from_file(InkOpts) ->
{Manifest,
ManifestSQN,
JournalSQN,
ActiveJournal} = build_manifest(ManifestFilenames,
RootPath,
CDBopts),
ActiveJournal} =
build_manifest(ManifestFilenames, RootPath, CDBopts),
{ok, #state{manifest = Manifest,
manifest_sqn = ManifestSQN,
journal_sqn = JournalSQN,
@ -971,61 +983,74 @@ get_cdbopts(InkOpts)->
CDBopts#cdb_options{waste_path = WasteFP}.
-spec put_object(leveled_codec:ledger_key(),
any(),
leveled_codec:journal_keychanges(),
boolean(),
ink_state())
-> {ok|rolling, ink_state(), integer()}.
-spec put_object(
leveled_codec:primary_key(),
any(),
leveled_codec:journal_keychanges(),
boolean(),
ink_state())
-> {ok|rolling, ink_state(), integer()}.
%% @doc
%% Add the object to the current journal if it fits. If it doesn't fit, a new
%% journal must be started, and the old journal is set to "roll" into a read
%% only Journal.
%% The reply contains the byte_size of the object, using the size calculated
%% to store the object.
put_object(LedgerKey, Object, KeyChanges, Sync, State) ->
put_object(
LedgerKey,
Object,
KeyChanges,
Sync,
State =
#state{
active_journaldb = ActiveJournal,
cdb_options = CDBOpts,
root_path = RP
})
when ?IS_DEF(ActiveJournal), ?IS_DEF(CDBOpts), ?IS_DEF(RP) ->
NewSQN = State#state.journal_sqn + 1,
ActiveJournal = State#state.active_journaldb,
{JournalKey, JournalBin} =
leveled_codec:to_inkerkv(LedgerKey,
NewSQN,
Object,
KeyChanges,
State#state.compression_method,
State#state.compress_on_receipt),
case leveled_cdb:cdb_put(ActiveJournal,
JournalKey,
JournalBin,
Sync) of
leveled_codec:to_inkerkv(
LedgerKey,
NewSQN,
Object,
KeyChanges,
State#state.compression_method,
State#state.compress_on_receipt
),
PutR = leveled_cdb:cdb_put(ActiveJournal, JournalKey, JournalBin, Sync),
case PutR of
ok ->
{ok,
State#state{journal_sqn=NewSQN},
byte_size(JournalBin)};
{ok, State#state{journal_sqn=NewSQN}, byte_size(JournalBin)};
roll ->
SWroll = os:timestamp(),
{NewJournalP, Manifest1, NewManSQN} =
roll_active(ActiveJournal,
State#state.manifest,
NewSQN,
State#state.cdb_options,
State#state.root_path,
State#state.manifest_sqn),
roll_active(
ActiveJournal,
State#state.manifest,
NewSQN,
State#state.cdb_options,
State#state.root_path,
State#state.manifest_sqn
),
leveled_log:log_timer(i0008, [], SWroll),
ok = leveled_cdb:cdb_put(NewJournalP,
JournalKey,
JournalBin),
ok =
leveled_cdb:cdb_put(
NewJournalP, JournalKey, JournalBin),
{rolling,
State#state{journal_sqn=NewSQN,
manifest=Manifest1,
manifest_sqn = NewManSQN,
active_journaldb=NewJournalP},
State#state{
journal_sqn=NewSQN,
manifest=Manifest1,
manifest_sqn = NewManSQN,
active_journaldb=NewJournalP},
byte_size(JournalBin)}
end.
-spec get_object(leveled_codec:ledger_key(),
integer(),
leveled_imanifest:manifest()) -> any().
-spec get_object(
leveled_codec:ledger_key(),
integer(),
leveled_imanifest:manifest()) -> any().
%% @doc
%% Find the SQN in the manifest and then fetch the object from the Journal,
%% in the manifest. If the fetch is in response to a user GET request then
@ -1041,28 +1066,36 @@ get_object(LedgerKey, SQN, Manifest, ToIgnoreKeyChanges) ->
leveled_codec:from_inkerkv(Obj, ToIgnoreKeyChanges).
-spec roll_active(pid(), leveled_imanifest:manifest(),
integer(), #cdb_options{}, string(), integer()) ->
{pid(), leveled_imanifest:manifest(), integer()}.
-spec roll_active(
pid(),
leveled_imanifest:manifest(),
integer(),
#cdb_options{},
string(),
integer()) -> {pid(), leveled_imanifest:manifest(), integer()}.
%% @doc
%% Roll the active journal, and start a new active journal, updating the
%% manifest
roll_active(ActiveJournal, Manifest, NewSQN, CDBopts, RootPath, ManifestSQN) ->
LastKey = leveled_cdb:cdb_lastkey(ActiveJournal),
ok = leveled_cdb:cdb_roll(ActiveJournal),
Manifest0 =
leveled_imanifest:append_lastkey(Manifest, ActiveJournal, LastKey),
ManEntry =
start_new_activejournal(NewSQN, RootPath, CDBopts),
{_, _, NewJournalP, _} = ManEntry,
Manifest1 = leveled_imanifest:add_entry(Manifest0, ManEntry, true),
ok = leveled_imanifest:writer(Manifest1, ManifestSQN + 1, RootPath),
{NewJournalP, Manifest1, ManifestSQN + 1}.
case leveled_cdb:cdb_lastkey(ActiveJournal) of
LastKey when LastKey =/= empty ->
ok = leveled_cdb:cdb_roll(ActiveJournal),
Manifest0 =
leveled_imanifest:append_lastkey(Manifest, ActiveJournal, LastKey),
ManEntry =
start_new_activejournal(NewSQN, RootPath, CDBopts),
{_, _, NewJournalP, _} = ManEntry,
Manifest1 = leveled_imanifest:add_entry(Manifest0, ManEntry, true),
ok =
leveled_imanifest:writer(Manifest1, ManifestSQN + 1, RootPath),
{NewJournalP, Manifest1, ManifestSQN + 1}
end.
-spec key_check(leveled_codec:ledger_key(),
integer(),
leveled_imanifest:manifest()) -> missing|probably.
-spec key_check(
leveled_codec:primary_key(),
integer(),
leveled_imanifest:manifest()) -> missing|probably.
%% @doc
%% Checks for the presence of the key at that SQN withing the journal,
%% avoiding the cost of actually reading the object from disk.
@ -1081,40 +1114,36 @@ key_check(LedgerKey, SQN, Manifest) ->
%% Selects the correct manifest to open, and then starts a process for each
%% file in the manifest, storing the PID for that process within the manifest.
%% Opens an active journal if one is not present.
build_manifest(ManifestFilenames,
RootPath,
CDBopts) ->
build_manifest(ManifestFilenames, RootPath, CDBopts) ->
% Find the manifest with a highest Manifest sequence number
% Open it and read it to get the current Confirmed Manifest
ManifestRegex = "(?<MSQN>[0-9]+)\\." ++ leveled_imanifest:complete_filex(),
ValidManSQNs = sequencenumbers_fromfilenames(ManifestFilenames,
ManifestRegex,
'MSQN'),
{Manifest,
ManifestSQN} = case length(ValidManSQNs) of
0 ->
{[], 1};
_ ->
PersistedManSQN = lists:max(ValidManSQNs),
M1 = leveled_imanifest:reader(PersistedManSQN,
RootPath),
{M1, PersistedManSQN}
end,
ValidManSQNs =
sequencenumbers_fromfilenames(
ManifestFilenames, ManifestRegex, 'MSQN'),
{Manifest, ManifestSQN} =
case length(ValidManSQNs) of
0 ->
{[], 1};
_ ->
PersistedManSQN = lists:max(ValidManSQNs),
M1 = leveled_imanifest:reader(PersistedManSQN, RootPath),
{M1, PersistedManSQN}
end,
% Open the manifest files, completing if necessary and ensure there is
% a valid active journal at the head of the manifest
OpenManifest = open_all_manifest(Manifest, RootPath, CDBopts),
{ActiveLowSQN,
_FN,
ActiveJournal,
_LK} = leveled_imanifest:head_entry(OpenManifest),
JournalSQN = case leveled_cdb:cdb_lastkey(ActiveJournal) of
empty ->
ActiveLowSQN;
{JSQN, _Type, _LastKey} ->
JSQN
end,
{ActiveLowSQN, _FN, ActiveJournal, _LK} =
leveled_imanifest:head_entry(OpenManifest),
JournalSQN =
case leveled_cdb:cdb_lastkey(ActiveJournal) of
empty ->
ActiveLowSQN;
{JSQN, _Type, _LastKey} ->
JSQN
end,
% Update the manifest if it has been changed by the process of loading
% the manifest (must also increment the manifest SQN).
@ -1146,8 +1175,9 @@ close_allmanifest([H|ManifestT]) ->
close_allmanifest(ManifestT).
-spec open_all_manifest(leveled_imanifest:manifest(), list(), #cdb_options{})
-> leveled_imanifest:manifest().
-spec open_all_manifest(
leveled_imanifest:manifest(), list(), #cdb_options{})
-> leveled_imanifest:manifest().
%% @doc
%% Open all the files in the manifets, and updating the manifest with the PIDs
%% of the opened files
@ -1185,24 +1215,21 @@ open_all_manifest(Man0, RootPath, CDBOpts) ->
true ->
leveled_log:log(i0012, [HeadFN]),
{ok, HeadR} = leveled_cdb:cdb_open_reader(CompleteHeadFN),
LastKey = leveled_cdb:cdb_lastkey(HeadR),
LastSQN = element(1, LastKey),
ManToHead = leveled_imanifest:add_entry(OpenedTail,
{HeadSQN,
HeadFN,
HeadR,
LastKey},
true),
NewManEntry = start_new_activejournal(LastSQN + 1,
RootPath,
CDBOpts),
LastKey = {LastSQN, _, _} = leveled_cdb:cdb_lastkey(HeadR),
ManToHead =
leveled_imanifest:add_entry(
OpenedTail,
{HeadSQN, HeadFN, HeadR, LastKey},
true
),
NewManEntry =
start_new_activejournal(LastSQN + 1, RootPath, CDBOpts),
leveled_imanifest:add_entry(ManToHead, NewManEntry, true);
false ->
{ok, HeadW} = leveled_cdb:cdb_open_writer(PendingHeadFN,
CDBOpts),
leveled_imanifest:add_entry(OpenedTail,
{HeadSQN, HeadFN, HeadW, HeadLK},
true)
{ok, HeadW} =
leveled_cdb:cdb_open_writer(PendingHeadFN, CDBOpts),
leveled_imanifest:add_entry(
OpenedTail, {HeadSQN, HeadFN, HeadW, HeadLK}, true)
end.
@ -1288,17 +1315,19 @@ foldfile_between_sequence(MinSQN, MaxSQN, FoldFuns,
sequencenumbers_fromfilenames(Filenames, Regex, IntName) ->
lists:foldl(fun(FN, Acc) ->
case re:run(FN,
Regex,
[{capture, [IntName], list}]) of
nomatch ->
Acc;
{match, [Int]} when is_list(Int) ->
Acc ++ [list_to_integer(Int)]
end end,
[],
Filenames).
lists:foldl(
fun(FN, Acc) ->
case re:run(FN,
Regex,
[{capture, [IntName], list}]) of
nomatch ->
Acc;
{match, [Int]} when is_list(Int) ->
Acc ++ [list_to_integer(Int)]
end
end,
[],
Filenames).
filepath(RootPath, journal_dir) ->
RootPath ++ "/" ++ ?FILES_FP ++ "/";
@ -1525,7 +1554,7 @@ compact_journal_testto(WRP, ExpectedFiles) ->
PK = "KeyZ" ++ integer_to_list(X),
{ok, SQN, _} = ink_put(Ink1,
test_ledgerkey(PK),
leveled_rand:rand_bytes(10000),
crypto:strong_rand_bytes(10000),
{[], infinity},
false),
{SQN, test_ledgerkey(PK)}

View file

@ -377,7 +377,7 @@ get_opts() ->
}
end.
-spec return_settings() -> {log_level(), list(string())}.
-spec return_settings() -> {log_level(), list(atom())}.
%% @doc
%% Return the settings outside of the record
return_settings() ->
@ -454,7 +454,7 @@ log_timer(LogRef, Subs, StartTime, SupportedLevels) ->
-spec log_randomtimer(atom(), list(), erlang:timestamp(), float()) -> ok.
log_randomtimer(LogReference, Subs, StartTime, RandomProb) ->
R = leveled_rand:uniform(),
R = rand:uniform(),
case R < RandomProb of
true ->
log_timer(LogReference, Subs, StartTime);

View file

@ -136,13 +136,13 @@
{leveled_pmanifest:lsm_level(), #sst_fetch_timings{}}.
-type log_type() ::
bookie_head|bookie_get|bookie_put|bookie_snap|pcl_fetch|sst_fetch|cdb_get.
-type pcl_level() :: mem|leveled_pmanifest:lsm_level().
-type pcl_level() :: memory|leveled_pmanifest:lsm_level().
-type sst_fetch_type() ::
fetch_cache|slot_cachedblock|slot_noncachedblock|not_found.
-type microsecs() :: pos_integer().
-type byte_size() :: pos_integer().
-type monitor() :: {no_monitor, 0}|{pid(), 0..100}.
-type timing() :: no_timing|pos_integer().
-type timing() :: no_timing|microsecs().
-type bookie_get_update() ::
@ -173,8 +173,10 @@
-spec monitor_start(pos_integer(), list(log_type())) -> {ok, pid()}.
monitor_start(LogFreq, LogOrder) ->
gen_server:start_link(
?MODULE, [leveled_log:get_opts(), LogFreq, LogOrder], []).
{ok, Monitor} =
gen_server:start_link(
?MODULE, [leveled_log:get_opts(), LogFreq, LogOrder], []),
{ok, Monitor}.
-spec add_stat(pid(), statistic()) -> ok.
add_stat(Watcher, Statistic) ->
@ -204,7 +206,7 @@ log_remove(Pid, ForcedLogs) ->
-spec maybe_time(monitor()) -> erlang:timestamp()|no_timing.
maybe_time({_Pid, TimingProbability}) ->
case leveled_rand:uniform(100) of
case rand:uniform(100) of
N when N =< TimingProbability ->
os:timestamp();
_ ->
@ -230,16 +232,15 @@ get_defaults() ->
init([LogOpts, LogFrequency, LogOrder]) ->
leveled_log:save(LogOpts),
leveled_rand:seed(),
RandomLogOrder =
lists:map(
fun({_R, SL}) -> SL end,
lists:keysort(
1,
lists:map(
fun(L) -> {leveled_rand:uniform(), L} end,
fun(L) -> {rand:uniform(), L} end,
LogOrder))),
InitialJitter = leveled_rand:uniform(2 * 1000 * LogFrequency),
InitialJitter = rand:uniform(2 * 1000 * LogFrequency),
erlang:send_after(InitialJitter, self(), report_next_stats),
{ok, #state{log_frequency = LogFrequency, log_order = RandomLogOrder}}.

View file

@ -48,8 +48,8 @@
-define(MIN_TIMEOUT, 200).
-define(GROOMING_PERC, 50).
-record(state, {owner :: pid() | undefined,
root_path :: string() | undefined,
-record(state, {owner :: pid()|undefined,
root_path :: string()|undefined,
pending_deletions = dict:new() :: dict:dict(),
sst_options :: sst_options()
}).
@ -123,18 +123,20 @@ handle_call(close, _From, State) ->
handle_cast(prompt, State) ->
handle_info(timeout, State);
handle_cast({push_work, Work}, State) ->
handle_cast(
{push_work, Work}, State = #state{root_path = RP, owner = PCL})
when ?IS_DEF(RP), is_pid(PCL) ->
{ManifestSQN, Deletions} =
handle_work(
Work,
State#state.root_path, State#state.sst_options, State#state.owner),
handle_work(Work, RP, State#state.sst_options, PCL),
PDs = dict:store(ManifestSQN, Deletions, State#state.pending_deletions),
leveled_log:log(pc022, [ManifestSQN]),
{noreply, State#state{pending_deletions = PDs}, ?MIN_TIMEOUT};
handle_cast({prompt_deletions, ManifestSQN}, State) ->
{Deletions, UpdD} = return_deletions(ManifestSQN,
State#state.pending_deletions),
ok = notify_deletions(Deletions, State#state.owner),
handle_cast(
{prompt_deletions, ManifestSQN}, State = #state{owner = PCL})
when is_pid(PCL) ->
{Deletions, UpdD} =
return_deletions(ManifestSQN, State#state.pending_deletions),
ok = notify_deletions(Deletions, PCL),
{noreply, State#state{pending_deletions = UpdD}, ?MIN_TIMEOUT};
handle_cast({log_level, LogLevel}, State) ->
ok = leveled_log:set_loglevel(LogLevel),
@ -152,8 +154,8 @@ handle_cast({remove_logs, ForcedLogs}, State) ->
SSTopts0 = SSTopts#sst_options{log_options = leveled_log:get_opts()},
{noreply, State#state{sst_options = SSTopts0}}.
handle_info(timeout, State) ->
ok = leveled_penciller:pcl_workforclerk(State#state.owner),
handle_info(timeout, State = #state{owner = PCL}) when is_pid(PCL) ->
ok = leveled_penciller:pcl_workforclerk(PCL),
% When handling work, the clerk can collect a large number of binary
% references, so proactively GC this process before receiving any future
% work. In under pressure clusters, clerks with large binary memory
@ -207,7 +209,7 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) ->
[SrcLevel + 1, FCnt, MnHBS, MnHS, MnLHS, MnBVHS])
end,
SelectMethod =
case leveled_rand:uniform(100) of
case rand:uniform(100) of
R when R =< ?GROOMING_PERC ->
{grooming, fun grooming_scorer/1};
_ ->
@ -220,16 +222,22 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) ->
leveled_pmanifest:merge_lookup(
Manifest,
SrcLevel + 1,
Src#manifest_entry.start_key,
Src#manifest_entry.end_key
leveled_pmanifest:entry_startkey(Src),
leveled_pmanifest:entry_endkey(Src)
),
Candidates = length(SinkList),
leveled_log:log(pc008, [SrcLevel, Candidates]),
case Candidates of
0 ->
NewLevel = SrcLevel + 1,
leveled_log:log(pc009, [Src#manifest_entry.filename, NewLevel]),
leveled_sst:sst_switchlevels(Src#manifest_entry.owner, NewLevel),
leveled_log:log(
pc009,
[leveled_pmanifest:entry_filename(Src), NewLevel]
),
leveled_sst:sst_switchlevels(
leveled_pmanifest:entry_owner(Src),
NewLevel
),
Man0 =
leveled_pmanifest:switch_manifest_entry(
Manifest,
@ -249,7 +257,11 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) ->
notify_deletions([], _Penciller) ->
ok;
notify_deletions([Head|Tail], Penciller) ->
ok = leveled_sst:sst_setfordelete(Head#manifest_entry.owner, Penciller),
ok =
leveled_sst:sst_setfordelete(
leveled_pmanifest:entry_owner(Head),
Penciller
),
notify_deletions(Tail, Penciller).
@ -259,9 +271,12 @@ notify_deletions([Head|Tail], Penciller) ->
%% SrcLevel is the level of the src sst file, the sink should be srcLevel + 1
perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN, OptsSST) ->
leveled_log:log(pc010, [Src#manifest_entry.filename, NewSQN]),
leveled_log:log(pc010, [leveled_pmanifest:entry_filename(Src), NewSQN]),
SrcList = [{next, Src, all}],
MaxSQN = leveled_sst:sst_getmaxsequencenumber(Src#manifest_entry.owner),
MaxSQN =
leveled_sst:sst_getmaxsequencenumber(
leveled_pmanifest:entry_owner(Src)
),
SinkLevel = SrcLevel + 1,
SinkBasement = leveled_pmanifest:is_basement(Manifest, SinkLevel),
Additions =
@ -319,13 +334,8 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) ->
{ok, Pid, Reply, Bloom} ->
{{KL1Rem, KL2Rem}, SmallestKey, HighestKey} = Reply,
Entry =
#manifest_entry{
start_key=SmallestKey,
end_key=HighestKey,
owner=Pid,
filename=FileName,
bloom=Bloom
},
leveled_pmanifest:new_entry(
SmallestKey, HighestKey, Pid, FileName, Bloom),
leveled_log:log_timer(pc015, [], TS1),
do_merge(
KL1Rem, KL2Rem,
@ -340,7 +350,8 @@ do_merge(KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions) ->
list(leveled_pmanifest:manifest_entry()))
-> leveled_pmanifest:manifest_entry().
grooming_scorer([ME | MEs]) ->
InitTombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner),
InitTombCount =
leveled_sst:sst_gettombcount(leveled_pmanifest:entry_owner(ME)),
{HighestTC, BestME} = grooming_scorer(InitTombCount, ME, MEs),
leveled_log:log(pc024, [HighestTC]),
BestME.
@ -348,7 +359,8 @@ grooming_scorer([ME | MEs]) ->
grooming_scorer(HighestTC, BestME, []) ->
{HighestTC, BestME};
grooming_scorer(HighestTC, BestME, [ME | MEs]) ->
TombCount = leveled_sst:sst_gettombcount(ME#manifest_entry.owner),
TombCount =
leveled_sst:sst_gettombcount(leveled_pmanifest:entry_owner(ME)),
case TombCount > HighestTC of
true ->
grooming_scorer(TombCount, ME, MEs);
@ -385,11 +397,17 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
BNumber =
lists:flatten(
io_lib:format("~4..0B",
[BucketLow + leveled_rand:uniform(BRange)])),
[BucketLow + rand:uniform(BRange)])),
KNumber =
lists:flatten(
io_lib:format("~4..0B", [leveled_rand:uniform(1000)])),
K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
io_lib:format("~4..0B", [rand:uniform(1000)])),
K =
{
o,
list_to_binary("Bucket" ++ BNumber),
list_to_binary("Key" ++ KNumber),
null
},
RandKey = {K, {Count + 1,
{active, infinity},
leveled_codec:segment_hash(K),
@ -415,7 +433,6 @@ grooming_score_test() ->
3,
999999,
#sst_options{},
true,
true),
{ok, PidL3_1B, _, _} =
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
@ -427,7 +444,6 @@ grooming_score_test() ->
3,
999999,
#sst_options{},
true,
true),
{ok, PidL3_2, _, _} =
@ -439,102 +455,116 @@ grooming_score_test() ->
3,
999999,
#sst_options{},
true,
true),
{ok, PidL3_2NC, _, _} =
leveled_sst:sst_newmerge("test/test_area/ledger_files/",
"2NC_L3.sst",
KL3_L3,
KL4_L3,
false,
3,
999999,
#sst_options{},
true,
false),
ME1 = #manifest_entry{owner=PidL3_1},
ME1B = #manifest_entry{owner=PidL3_1B},
ME2 = #manifest_entry{owner=PidL3_2},
ME2NC = #manifest_entry{owner=PidL3_2NC},
DSK = {o, <<"B">>, <<"SK">>, null},
DEK = {o, <<"E">>, <<"EK">>, null},
ME1 = leveled_pmanifest:new_entry(DSK, DEK, PidL3_1, "dummyL3_1", none),
ME1B = leveled_pmanifest:new_entry(DSK, DEK, PidL3_1B, "dummyL3_1B", none),
ME2 = leveled_pmanifest:new_entry(DSK, DEK, PidL3_2, "dummyL3_2", none),
?assertMatch(ME1, grooming_scorer([ME1, ME2])),
?assertMatch(ME1, grooming_scorer([ME2, ME1])),
% prefer the file with the tombstone
?assertMatch(ME2NC, grooming_scorer([ME1, ME2NC])),
?assertMatch(ME2NC, grooming_scorer([ME2NC, ME1])),
% not_counted > 1 - we will merge files in unexpected (i.e. legacy)
% format first
?assertMatch(ME1B, grooming_scorer([ME1B, ME2])),
?assertMatch(ME2, grooming_scorer([ME2, ME1B])),
% If the file with the tombstone is in the basement, it will have
% no tombstone so the first file will be chosen
lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end,
[PidL3_1, PidL3_1B, PidL3_2, PidL3_2NC]).
[PidL3_1, PidL3_1B, PidL3_2]).
merge_file_test() ->
ok = filelib:ensure_dir("test/test_area/ledger_files/"),
KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)),
{ok, PidL1_1, _, _} =
leveled_sst:sst_new("test/test_area/ledger_files/",
"KL1_L1.sst",
1,
KL1_L1,
999999,
#sst_options{}),
leveled_sst:sst_new(
"test/test_area/ledger_files/",
"KL1_L1.sst",
1,
KL1_L1,
999999,
#sst_options{}
),
KL1_L2 = lists:sort(generate_randomkeys(8000, 0, 250)),
{ok, PidL2_1, _, _} =
leveled_sst:sst_new("test/test_area/ledger_files/",
"KL1_L2.sst",
2,
KL1_L2,
999999,
#sst_options{}),
leveled_sst:sst_new(
"test/test_area/ledger_files/",
"KL1_L2.sst",
2,
KL1_L2,
999999,
#sst_options{}
),
KL2_L2 = lists:sort(generate_randomkeys(8000, 250, 250)),
{ok, PidL2_2, _, _} =
leveled_sst:sst_new("test/test_area/ledger_files/",
"KL2_L2.sst",
2,
KL2_L2,
999999,
#sst_options{press_method = lz4}),
leveled_sst:sst_new(
"test/test_area/ledger_files/",
"KL2_L2.sst",
2,
KL2_L2,
999999,
#sst_options{press_method = lz4}
),
KL3_L2 = lists:sort(generate_randomkeys(8000, 500, 250)),
{ok, PidL2_3, _, _} =
leveled_sst:sst_new("test/test_area/ledger_files/",
"KL3_L2.sst",
2,
KL3_L2,
999999,
#sst_options{press_method = lz4}),
leveled_sst:sst_new(
"test/test_area/ledger_files/",
"KL3_L2.sst",
2,
KL3_L2,
999999,
#sst_options{press_method = lz4}
),
KL4_L2 = lists:sort(generate_randomkeys(8000, 750, 250)),
{ok, PidL2_4, _, _} =
leveled_sst:sst_new("test/test_area/ledger_files/",
"KL4_L2.sst",
2,
KL4_L2,
999999,
#sst_options{press_method = lz4}),
E1 = #manifest_entry{owner = PidL1_1,
filename = "./KL1_L1.sst",
end_key = lists:last(KL1_L1),
start_key = lists:nth(1, KL1_L1)},
E2 = #manifest_entry{owner = PidL2_1,
filename = "./KL1_L2.sst",
end_key = lists:last(KL1_L2),
start_key = lists:nth(1, KL1_L2)},
E3 = #manifest_entry{owner = PidL2_2,
filename = "./KL2_L2.sst",
end_key = lists:last(KL2_L2),
start_key = lists:nth(1, KL2_L2)},
E4 = #manifest_entry{owner = PidL2_3,
filename = "./KL3_L2.sst",
end_key = lists:last(KL3_L2),
start_key = lists:nth(1, KL3_L2)},
E5 = #manifest_entry{owner = PidL2_4,
filename = "./KL4_L2.sst",
end_key = lists:last(KL4_L2),
start_key = lists:nth(1, KL4_L2)},
leveled_sst:sst_new(
"test/test_area/ledger_files/",
"KL4_L2.sst",
2,
KL4_L2,
999999,
#sst_options{press_method = lz4}
),
E1 =
leveled_pmanifest:new_entry(
lists:nth(1, KL1_L1),
lists:last(KL1_L1),
PidL1_1,
"./KL1_L1.sst",
none
),
E2 =
leveled_pmanifest:new_entry(
lists:nth(1, KL1_L2),
lists:last(KL1_L2),
PidL2_1,
"./KL1_L2.sst",
none
),
E3 =
leveled_pmanifest:new_entry(
lists:nth(1, KL2_L2),
lists:last(KL2_L2),
PidL2_2,
"./KL2_L2.sst",
none
),
E4 =
leveled_pmanifest:new_entry(
lists:nth(1, KL3_L2),
lists:last(KL3_L2),
PidL2_3,
"./KL3_L2.sst",
none
),
E5 =
leveled_pmanifest:new_entry(
lists:nth(1, KL4_L2),
lists:last(KL4_L2),
PidL2_4,
"./KL4_L2.sst",
none
),
Man0 = leveled_pmanifest:new_manifest(),
Man1 = leveled_pmanifest:insert_manifest_entry(Man0, 1, 2, E2),

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -41,9 +41,12 @@
cache_full/1
]).
% Test functions to ignore for equalizer - due to array issues
-eqwalizer({nowarn_function, index_performance_test/0}).
-define(MAX_CACHE_LINES, 31). % Must be less than 128
-type index_array() :: list(array:array())|[]|none.
-type index_array() :: list(array:array(binary()))|none.
-export_type([index_array/0]).
@ -58,7 +61,7 @@ cache_full(L0Cache) ->
length(L0Cache) == ?MAX_CACHE_LINES.
-spec prepare_for_index(
array:array(), leveled_codec:segment_hash()) -> array:array().
array:array(binary()), leveled_codec:segment_hash()) -> array:array().
%% @doc
%% Add the hash of a key to the index. This is 'prepared' in the sense that
%% this index is not use until it is loaded into the main index.
@ -73,18 +76,22 @@ prepare_for_index(IndexArray, Hash) ->
Bin = array:get(Slot, IndexArray),
array:set(Slot, <<Bin/binary, H0:24/integer>>, IndexArray).
-spec add_to_index(array:array(), index_array(), integer()) -> index_array().
-spec add_to_index(
array:array(binary()), index_array(), integer()) -> index_array().
%% @doc
%% Expand the penciller's current index array with the details from a new
%% ledger cache tree sent from the Bookie. The tree will have a cache slot
%% which is the index of this ledger_cache in the list of the ledger_caches
add_to_index(LM1Array, L0Index, CacheSlot) when CacheSlot < 128 ->
add_to_index(
LM1Array, L0Index, CacheSlot)
when CacheSlot < 128, L0Index =/= none ->
[LM1Array|L0Index].
-spec new_index() -> array:array().
-spec new_index() -> array:array(binary()).
%% @doc
%% Create a new index array
new_index() ->
% eqwalizer:ignore - array does contain binary()
array:new([{size, 256}, {default, <<>>}]).
-spec check_index(leveled_codec:segment_hash(), index_array())
@ -92,7 +99,7 @@ new_index() ->
%% @doc
%% return a list of positions in the list of cache arrays that may contain the
%% key associated with the hash being checked
check_index(Hash, L0Index) ->
check_index(Hash, L0Index) when L0Index =/= none ->
{Slot, H0} = split_hash(Hash),
{_L, Positions} =
lists:foldl(
@ -239,19 +246,14 @@ check_slotlist(Key, _Hash, CheckList, TreeList) ->
-include_lib("eunit/include/eunit.hrl").
generate_randomkeys_aslist(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
lists:ukeysort(1,
generate_randomkeys(Seqn,
Count,
[],
BucketRangeLow,
BucketRangeHigh)).
lists:ukeysort(
1,
generate_randomkeys(Seqn, Count, [], BucketRangeLow, BucketRangeHigh)
).
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
KVL = generate_randomkeys(Seqn,
Count,
[],
BucketRangeLow,
BucketRangeHigh),
KVL =
generate_randomkeys(Seqn, Count, [], BucketRangeLow, BucketRangeHigh),
leveled_tree:from_orderedlist(lists:ukeysort(1, KVL), ?CACHE_TYPE).
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
@ -260,32 +262,35 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BNumber =
lists:flatten(
io_lib:format("~4..0B",
[BucketLow + leveled_rand:uniform(BRange)])),
[BucketLow + rand:uniform(BRange)])),
KNumber =
lists:flatten(io_lib:format("~4..0B", [leveled_rand:uniform(1000)])),
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
{Seqn, {active, infinity}, null}},
generate_randomkeys(Seqn + 1,
Count - 1,
[{K, V}|Acc],
BucketLow,
BRange).
lists:flatten(io_lib:format("~4..0B", [rand:uniform(1000)])),
{K, V} =
{
{o,
list_to_binary("Bucket" ++ BNumber),
list_to_binary("Key" ++ KNumber),
null},
{Seqn, {active, infinity}, null}
},
generate_randomkeys(Seqn + 1, Count - 1, [{K, V}|Acc], BucketLow, BRange).
compare_method_test() ->
R = lists:foldl(fun(_X, {LedgerSQN, L0Size, L0TreeList}) ->
LM1 = generate_randomkeys(LedgerSQN + 1,
2000, 1, 500),
add_to_cache(
L0Size,
{LM1, LedgerSQN + 1, LedgerSQN + 2000},
LedgerSQN,
L0TreeList,
true)
end,
{0, 0, []},
lists:seq(1, 16)),
R =
lists:foldl(
fun(_X, {LedgerSQN, L0Size, L0TreeList}) ->
LM1 = generate_randomkeys(LedgerSQN + 1, 2000, 1, 500),
add_to_cache(
L0Size,
{LM1, LedgerSQN + 1, LedgerSQN + 2000},
LedgerSQN,
L0TreeList,
true)
end,
{0, 0, []},
lists:seq(1, 16)),
{SQN, Size, TreeList} = R,
?assertMatch(32000, SQN),
?assertMatch(true, Size =< 32000),
@ -310,51 +315,62 @@ compare_method_test() ->
end
end,
S0 = lists:foldl(fun({Key, _V}, Acc) ->
R0 = lists:foldl(FindKeyFun(Key),
{false, not_found},
TreeList),
[R0|Acc] end,
[],
TestList),
S0 =
lists:foldl(
fun({Key, _V}, Acc) ->
R0 =
lists:foldl(
FindKeyFun(Key), {false, not_found}, TreeList),
[R0|Acc]
end,
[],
TestList)
,
PosList = lists:seq(1, length(TreeList)),
S1 = lists:foldl(fun({Key, _V}, Acc) ->
R0 = check_levelzero(Key, PosList, TreeList),
[R0|Acc]
end,
[],
TestList),
S1 =
lists:foldl(
fun({Key, _V}, Acc) ->
R0 = check_levelzero(Key, PosList, TreeList),
[R0|Acc]
end,
[],
TestList
),
?assertMatch(S0, S1),
StartKey = {o, "Bucket0100", null, null},
EndKey = {o, "Bucket0200", null, null},
StartKey = {o, <<"Bucket0100">>, null, null},
EndKey = {o, <<"Bucket0200">>, null, null},
SWa = os:timestamp(),
FetchFun = fun(Slot) -> lists:nth(Slot, TreeList) end,
DumpList = to_list(length(TreeList), FetchFun),
Q0 = lists:foldl(fun({K, V}, Acc) ->
P = leveled_codec:endkey_passed(EndKey, K),
case {K, P} of
{K, false} when K >= StartKey ->
[{K, V}|Acc];
_ ->
Acc
end
end,
[],
DumpList),
Q0 =
lists:foldl(
fun({K, V}, Acc) ->
P = leveled_codec:endkey_passed(EndKey, K),
case {K, P} of
{K, false} when K >= StartKey ->
[{K, V}|Acc];
_ ->
Acc
end
end,
[],
DumpList
),
Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, Q0), ?CACHE_TYPE),
Sz0 = leveled_tree:tsize(Tree),
io:format("Crude method took ~w microseconds resulting in tree of " ++
"size ~w~n",
[timer:now_diff(os:timestamp(), SWa), Sz0]),
io:format(
"Crude method took ~w microseconds resulting in tree of size ~w~n",
[timer:now_diff(os:timestamp(), SWa), Sz0]
),
SWb = os:timestamp(),
Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty(?CACHE_TYPE)),
Sz1 = length(Q1),
io:format("Merge method took ~w microseconds resulting in tree of " ++
"size ~w~n",
[timer:now_diff(os:timestamp(), SWb), Sz1]),
io:format(
"Merge method took ~w microseconds resulting in tree of size ~w~n",
[timer:now_diff(os:timestamp(), SWb), Sz1]),
?assertMatch(Sz0, Sz1).
with_index_test_() ->

View file

@ -1,28 +0,0 @@
%% Generalized random module that offers a backwards compatible API
%% around some of the changes in rand, crypto and for time units.
-module(leveled_rand).
%% API
-export([
uniform/0,
uniform/1,
seed/0,
rand_bytes/1
]).
%%%===================================================================
%%% New (r19+) rand style functions
%%%===================================================================
uniform() ->
rand:uniform().
uniform(N) ->
rand:uniform(N).
seed() ->
ok.
rand_bytes(Size) ->
crypto:strong_rand_bytes(Size).

View file

@ -39,8 +39,7 @@
-define(CHECKJOURNAL_PROB, 0.2).
-type key_range()
:: {leveled_codec:ledger_key()|null,
leveled_codec:ledger_key()|null}.
:: {leveled_codec:query_key(), leveled_codec:query_key()}.
-type foldacc() :: any().
% Can't currently be specific about what an acc might be
@ -56,15 +55,15 @@
:: fun((leveled_codec:key(), leveled_codec:key()) -> accumulate|pass).
-type snap_fun()
:: fun(() -> {ok, pid(), pid()|null}).
:: fun(() -> {ok, pid(), pid()|null, fun(() -> ok)}).
-type runner_fun()
:: fun(() -> foldacc()).
-type acc_fun()
:: fun((leveled_codec:key(), any(), foldacc()) -> foldacc()).
-type objectacc_fun()
:: fun((leveled_codec:object_key(), any(), foldacc()) -> foldacc()).
-type mp()
:: {re_pattern, term(), term(), term(), term()}.
-export_type([acc_fun/0, mp/0]).
-export_type([fold_keys_fun/0, mp/0]).
%%%============================================================================
%%% External functions
@ -76,8 +75,8 @@
%% @doc
%% Fold over a bucket accumulating the count of objects and their total sizes
bucket_sizestats(SnapFun, Bucket, Tag) ->
StartKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
EndKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
StartKey = leveled_codec:to_querykey(Bucket, null, Tag),
EndKey = leveled_codec:to_querykey(Bucket, null, Tag),
AccFun = accumulate_size(),
Runner =
fun() ->
@ -132,7 +131,7 @@ bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) ->
-spec index_query(snap_fun(),
{leveled_codec:ledger_key(),
leveled_codec:ledger_key(),
{boolean(), undefined|mp()|iodata()}},
{boolean(), undefined|mp()}},
{fold_keys_fun(), foldacc()})
-> {async, runner_fun()}.
%% @doc
@ -165,7 +164,7 @@ index_query(SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT) ->
-spec bucketkey_query(snap_fun(),
leveled_codec:tag(),
leveled_codec:key()|null,
key_range(),
{leveled_codec:single_key()|null, leveled_codec:single_key()|null},
{fold_keys_fun(), foldacc()},
leveled_codec:regular_expression())
-> {async, runner_fun()}.
@ -175,8 +174,8 @@ bucketkey_query(SnapFun, Tag, Bucket,
{StartKey, EndKey},
{FoldKeysFun, InitAcc},
TermRegex) ->
SK = leveled_codec:to_ledgerkey(Bucket, StartKey, Tag),
EK = leveled_codec:to_ledgerkey(Bucket, EndKey, Tag),
SK = leveled_codec:to_querykey(Bucket, StartKey, Tag),
EK = leveled_codec:to_querykey(Bucket, EndKey, Tag),
AccFun = accumulate_keys(FoldKeysFun, TermRegex),
Runner =
fun() ->
@ -203,8 +202,8 @@ bucketkey_query(SnapFun, Tag, Bucket, FunAcc) ->
%% @doc
%% Fold over the keys under a given Tag accumulating the hashes
hashlist_query(SnapFun, Tag, JournalCheck) ->
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
StartKey = leveled_codec:to_querykey(null, null, Tag),
EndKey = leveled_codec:to_querykey(null, null, Tag),
Runner =
fun() ->
{ok, LedgerSnapshot, JournalSnapshot, AfterFun} = SnapFun(),
@ -217,10 +216,11 @@ hashlist_query(SnapFun, Tag, JournalCheck) ->
end,
{async, Runner}.
-spec tictactree(snap_fun(),
{leveled_codec:tag(), leveled_codec:key(), tuple()},
boolean(), atom(), fold_filter_fun())
-> {async, runner_fun()}.
-spec tictactree(
snap_fun(),
{leveled_codec:tag(), leveled_codec:key(), tuple()},
boolean(), leveled_tictac:tree_size(), fold_filter_fun())
-> {async, runner_fun()}.
%% @doc
%% Return a merkle tree from the fold, directly accessing hashes cached in the
%% metadata
@ -246,14 +246,14 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
case Tag of
?IDX_TAG ->
{IdxFld, StartIdx, EndIdx} = Query,
KeyDefFun = fun leveled_codec:to_ledgerkey/5,
KeyDefFun = fun leveled_codec:to_querykey/5,
{KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, StartIdx),
KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, EndIdx),
EnsureKeyBinaryFun};
_ ->
{StartOKey, EndOKey} = Query,
{leveled_codec:to_ledgerkey(Bucket, StartOKey, Tag),
leveled_codec:to_ledgerkey(Bucket, EndOKey, Tag),
{leveled_codec:to_querykey(Bucket, StartOKey, Tag),
leveled_codec:to_querykey(Bucket, EndOKey, Tag),
fun(K, H) ->
V = {is_hash, H},
EnsureKeyBinaryFun(K, V)
@ -279,8 +279,8 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) ->
%% function to each proxy object
foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck,
SegmentList, LastModRange, MaxObjectCount) ->
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
StartKey = leveled_codec:to_querykey(null, null, Tag),
EndKey = leveled_codec:to_querykey(null, null, Tag),
foldobjects(SnapFun,
Tag,
[{StartKey, EndKey}],
@ -298,8 +298,8 @@ foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck,
%% @doc
%% Fold over all objects for a given tag
foldobjects_allkeys(SnapFun, Tag, FoldFun, key_order) ->
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
StartKey = leveled_codec:to_querykey(null, null, Tag),
EndKey = leveled_codec:to_querykey(null, null, Tag),
foldobjects(SnapFun,
Tag,
[{StartKey, EndKey}],
@ -335,7 +335,7 @@ foldobjects_allkeys(SnapFun, Tag, FoldObjectsFun, sqn_order) ->
_ ->
{VBin, _VSize} = ExtractFun(JVal),
{Obj, _IdxSpecs} =
leveled_codec:split_inkvalue(VBin),
leveled_codec:revert_value_from_journal(VBin),
ToLoop =
case SQN of
MaxSQN -> stop;
@ -353,11 +353,16 @@ foldobjects_allkeys(SnapFun, Tag, FoldObjectsFun, sqn_order) ->
Folder =
fun() ->
{ok, LedgerSnapshot, JournalSnapshot, AfterFun} = SnapFun(),
{ok, JournalSQN} = leveled_inker:ink_getjournalsqn(JournalSnapshot),
{ok, LedgerSnapshot, JournalSnapshot, AfterFun} =
case SnapFun() of
{ok, LS, JS, AF} when is_pid(JS) ->
{ok, LS, JS, AF}
end,
{ok, JournalSQN} =
leveled_inker:ink_getjournalsqn(JournalSnapshot),
IsValidFun =
fun(Bucket, Key, SQN) ->
LedgerKey = leveled_codec:to_ledgerkey(Bucket, Key, Tag),
LedgerKey = leveled_codec:to_objectkey(Bucket, Key, Tag),
CheckSQN =
leveled_penciller:pcl_checksequencenumber(
LedgerSnapshot, LedgerKey, SQN),
@ -438,9 +443,9 @@ foldheads_bybucket(SnapFun,
%% and passing those objects into the fold function
foldobjects_byindex(SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldFun) ->
StartKey =
leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, Field, FromTerm),
leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, Field, FromTerm),
EndKey =
leveled_codec:to_ledgerkey(Bucket, null, ?IDX_TAG, Field, ToTerm),
leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, Field, ToTerm),
foldobjects(SnapFun,
Tag,
[{StartKey, EndKey}],
@ -457,38 +462,39 @@ foldobjects_byindex(SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldFun) ->
get_nextbucket(_NextB, _NextK, _Tag, _LS, BKList, {Limit, Limit}) ->
lists:reverse(BKList);
get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) ->
StartKey = leveled_codec:to_ledgerkey(NextBucket, NextKey, Tag),
EndKey = leveled_codec:to_ledgerkey(null, null, Tag),
StartKey = leveled_codec:to_querykey(NextBucket, NextKey, Tag),
EndKey = leveled_codec:to_querykey(null, null, Tag),
ExtractFun =
fun(LK, V, _Acc) ->
{leveled_codec:from_ledgerkey(LK), V}
end,
R = leveled_penciller:pcl_fetchnextkey(LedgerSnapshot,
StartKey,
EndKey,
ExtractFun,
null),
R =
leveled_penciller:pcl_fetchnextkey(
LedgerSnapshot, StartKey, EndKey, ExtractFun, null),
case R of
{1, null} ->
leveled_log:log(b0008,[]),
BKList;
{0, {{B, K}, _V}} ->
{0, {{B, K}, _V}} when is_binary(B); is_tuple(B) ->
leveled_log:log(b0009,[B]),
get_nextbucket(leveled_codec:next_key(B),
null,
Tag,
LedgerSnapshot,
[{B, K}|BKList],
{C + 1, L})
get_nextbucket(
leveled_codec:next_key(B),
null,
Tag,
LedgerSnapshot,
[{B, K}|BKList],
{C + 1, L}
)
end.
-spec foldobjects(snap_fun(),
atom(),
list(),
fold_objects_fun()|{fold_objects_fun(), foldacc()},
false|{true, boolean()}, false|list(integer())) ->
{async, runner_fun()}.
-spec foldobjects(
snap_fun(),
atom(),
list(),
fold_objects_fun()|{fold_objects_fun(), foldacc()},
false|{true, boolean()}, false|list(integer()))
-> {async, runner_fun()}.
foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) ->
foldobjects(SnapFun, Tag, KeyRanges,
FoldObjFun, DeferredFetch, SegmentList, false, false).
@ -534,14 +540,16 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch,
FoldFun, JournalSnapshot, Tag, DeferredFetch),
FoldFunGen =
fun({StartKey, EndKey}, FoldAcc) ->
leveled_penciller:pcl_fetchkeysbysegment(LedgerSnapshot,
StartKey,
EndKey,
AccFun,
FoldAcc,
SegmentList,
LastModRange,
LimitByCount)
leveled_penciller:pcl_fetchkeysbysegment(
LedgerSnapshot,
StartKey,
EndKey,
AccFun,
FoldAcc,
SegmentList,
LastModRange,
LimitByCount
)
end,
ListFoldFun =
fun(KeyRange, Acc) ->
@ -567,9 +575,7 @@ accumulate_hashes(JournalCheck, InkerClone) ->
fun(B, K, H, Acc) ->
[{B, K, H}|Acc]
end,
get_hashaccumulator(JournalCheck,
InkerClone,
AddKeyFun).
get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun).
accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) ->
AddKeyFun =
@ -581,15 +587,13 @@ accumulate_tree(FilterFun, JournalCheck, InkerClone, HashFun) ->
Tree
end
end,
get_hashaccumulator(JournalCheck,
InkerClone,
AddKeyFun).
get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun).
get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun) ->
AccFun =
fun(LK, V, Acc) ->
{B, K, H} = leveled_codec:get_keyandobjhash(LK, V),
Check = leveled_rand:uniform() < ?CHECKJOURNAL_PROB,
Check = rand:uniform() < ?CHECKJOURNAL_PROB,
case JournalCheck and Check of
true ->
case check_presence(LK, V, InkerClone) of
@ -604,11 +608,11 @@ get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun) ->
end,
AccFun.
-spec accumulate_objects(fold_objects_fun(),
pid()|null,
leveled_codec:tag(),
false|{true, boolean()})
-> acc_fun().
-spec accumulate_objects
(fold_objects_fun(), pid(), leveled_head:object_tag(), false|{true, boolean()})
-> objectacc_fun();
(fold_objects_fun(), null, leveled_head:headonly_tag(), {true, false})
-> objectacc_fun().
accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) ->
AccFun =
fun(LK, V, Acc) ->
@ -630,24 +634,23 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) ->
{B0, K0, _T0} ->
{B0, K0}
end,
JK = {leveled_codec:to_ledgerkey(B, K, Tag), SQN},
JK = {leveled_codec:to_objectkey(B, K, Tag), SQN},
case DeferredFetch of
{true, JournalCheck} ->
{true, JournalCheck} when MD =/= null ->
ProxyObj =
leveled_codec:return_proxy(Tag, MD, InkerClone, JK),
case JournalCheck of
true ->
case {JournalCheck, InkerClone} of
{true, InkerClone} when is_pid(InkerClone) ->
InJournal =
leveled_inker:ink_keycheck(InkerClone,
LK,
SQN),
leveled_inker:ink_keycheck(
InkerClone, LK, SQN),
case InJournal of
probably ->
FoldObjectsFun(B, K, ProxyObj, Acc);
missing ->
Acc
end;
false ->
{false, _} ->
FoldObjectsFun(B, K, ProxyObj, Acc)
end;
false ->
@ -730,10 +733,10 @@ throw_test() ->
fun() ->
error
end,
?assertMatch({ok, ['1']},
wrap_runner(CompletedFolder, AfterAction)),
?assertException(throw, stop_fold,
wrap_runner(StoppedFolder, AfterAction)).
?assertMatch({ok, ['1']}, wrap_runner(CompletedFolder, AfterAction)),
?assertException(
throw, stop_fold, wrap_runner(StoppedFolder, AfterAction)
).
-endif.

File diff suppressed because it is too large Load diff

View file

@ -101,7 +101,7 @@
width :: integer(),
segment_count :: integer(),
level1 :: level1_map(),
level2 :: array:array()
level2 :: array:array(binary())
}).
-type level1_map() :: #{non_neg_integer() => binary()}|binary().
@ -161,6 +161,8 @@ new_tree(TreeID, Size, UseMap) ->
width = Width,
segment_count = Width * ?L2_CHUNKSIZE,
level1 = Lv1Init,
% array values are indeed all binaries
% eqwalizer:ignore
level2 = Lv2Init
}.
@ -196,13 +198,16 @@ import_tree(ExportedTree) ->
[{<<"level1">>, L1Base64},
{<<"level2">>, {struct, L2List}}]} = ExportedTree,
L1Bin = base64:decode(L1Base64),
Sizes = lists:map(fun(SizeTag) -> {SizeTag, get_size(SizeTag)} end,
?VALID_SIZES),
Sizes =
lists:map(
fun(SizeTag) -> {SizeTag, get_size(SizeTag)} end,
?VALID_SIZES
),
Width = byte_size(L1Bin) div ?HASH_SIZE,
{Size, _Width} = lists:keyfind(Width, 2, Sizes),
%% assert that side is indeed the provided width
true = get_size(Size) == Width,
Lv2Init = array:new([{size, Width}]),
Lv2Init = array:new([{size, Width}, {default, ?EMPTY}]),
FoldFun =
fun({X, EncodedL2SegBin}, L2Array) ->
L2SegBin = zlib:uncompress(base64:decode(EncodedL2SegBin)),
@ -216,6 +221,8 @@ import_tree(ExportedTree) ->
width = Width,
segment_count = Width * ?L2_CHUNKSIZE,
level1 = to_level1_map(L1Bin),
% array values are indeed all binaries
% eqwalizer:ignore
level2 = Lv2
}.
@ -229,12 +236,14 @@ import_tree(ExportedTree) ->
add_kv(TicTacTree, Key, Value, BinExtractFun) ->
add_kv(TicTacTree, Key, Value, BinExtractFun, false).
-spec add_kv(
tictactree(), term(), term(), bin_extract_fun(), boolean())
-> tictactree()|{tictactree(), integer()}.
-spec add_kv
(tictactree(), term(), term(), bin_extract_fun(), true) ->
{tictactree(), integer()};
(tictactree(), term(), term(), bin_extract_fun(), false) ->
tictactree().
%% @doc
%% add_kv with ability to return segment ID of Key added
add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) ->
add_kv(TicTacTree, Key, Value, BinExtractFun, true) ->
{BinK, BinV} = BinExtractFun(Key, Value),
{SegHash, SegChangeHash} = tictac_hash(BinK, BinV),
Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count),
@ -249,12 +258,9 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, ReturnSegment) ->
replace_segment(
SegLeaf1Upd, SegLeaf2Upd, L1Extract, L2Extract, TicTacTree
),
case ReturnSegment of
true ->
{UpdatedTree, Segment};
false ->
UpdatedTree
end.
{UpdatedTree, Segment};
add_kv(TicTacTree, Key, Value, BinExtractFun, false) ->
element(1, add_kv(TicTacTree, Key, Value, BinExtractFun, true)).
-spec alter_segment(integer(), integer(), tictactree()) -> tictactree().
%% @doc
@ -373,16 +379,13 @@ get_segment(Hash, TreeSize) ->
%% has already been taken. If the value is not a pre-extracted hash just use
%% erlang:phash2. If an exportable hash of the value is required this should
%% be managed through the add_kv ExtractFun providing a pre-prepared Hash.
tictac_hash(BinKey, Val) when is_binary(BinKey) ->
tictac_hash(
BinKey, {is_hash, HashedVal})
when is_binary(BinKey), is_integer(HashedVal) ->
{HashKeyToSeg, AltHashKey} = keyto_doublesegment32(BinKey),
HashVal =
case Val of
{is_hash, HashedVal} ->
HashedVal;
_ ->
erlang:phash2(Val)
end,
{HashKeyToSeg, AltHashKey bxor HashVal}.
{HashKeyToSeg, AltHashKey bxor HashedVal};
tictac_hash(BinKey, ValToHash) when is_binary(BinKey) ->
tictac_hash(BinKey, {is_hash, erlang:phash2(ValToHash)}).
-spec keyto_doublesegment32(
binary()) -> {non_neg_integer(), non_neg_integer()}.
@ -961,7 +964,7 @@ timing_test() ->
timing_tester(KeyCount, SegCount, SmallSize, LargeSize) ->
SegList =
lists:map(fun(_C) ->
leveled_rand:uniform(get_size(SmallSize) * ?L2_CHUNKSIZE - 1)
rand:uniform(get_size(SmallSize) * ?L2_CHUNKSIZE - 1)
end,
lists:seq(1, SegCount)),
KeyToSegFun =

View file

@ -29,9 +29,7 @@
-define(SKIP_WIDTH, 16).
-type tree_type() :: tree|idxt|skpl.
-type leveled_tree() :: {tree_type(),
integer(), % length
any()}.
-type leveled_tree() :: {tree_type(), integer(), any()}.
-export_type([leveled_tree/0]).
@ -104,7 +102,7 @@ match(Key, {tree, _L, Tree}) ->
{_NK, SL, _Iter} ->
lookup_match(Key, SL)
end;
match(Key, {idxt, _L, {TLI, IDX}}) ->
match(Key, {idxt, _L, {TLI, IDX}}) when is_tuple(TLI) ->
Iter = tree_iterator_from(Key, IDX),
case tree_next(Iter) of
none ->
@ -136,7 +134,7 @@ search(Key, {tree, _L, Tree}, StartKeyFun) ->
{K, V}
end
end;
search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) ->
search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) when is_tuple(TLI) ->
Iter = tree_iterator_from(Key, IDX),
case tree_next(Iter) of
none ->
@ -235,14 +233,13 @@ to_list({tree, _L, Tree}) ->
Acc ++ SL
end,
lists:foldl(FoldFun, [], tree_to_list(Tree));
to_list({idxt, _L, {TLI, _IDX}}) ->
to_list({idxt, _L, {TLI, _IDX}}) when is_tuple(TLI) ->
lists:append(tuple_to_list(TLI));
to_list({skpl, _L, SkipList}) ->
to_list({skpl, _L, SkipList}) when is_list(SkipList) ->
FoldFun =
fun({_M, SL}, Acc) ->
[SL|Acc]
end,
Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)),
Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))),
lists:append(Lv0List).
@ -580,13 +577,13 @@ generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
Acc;
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BRand = leveled_rand:uniform(BRange),
BRand = rand:uniform(BRange),
BNumber =
lists:flatten(
io_lib:format("K~4..0B", [BucketLow + BRand])),
KNumber =
lists:flatten(
io_lib:format("K~8..0B", [leveled_rand:uniform(1000)])),
io_lib:format("K~8..0B", [rand:uniform(1000)])),
{K, V} =
{{o_kv,
{<<"btype">>, list_to_binary("Bucket" ++ BNumber)},
@ -608,7 +605,7 @@ generate_simplekeys(Seqn, Count, Acc) ->
KNumber =
list_to_binary(
lists:flatten(
io_lib:format("K~8..0B", [leveled_rand:uniform(100000)]))),
io_lib:format("K~8..0B", [rand:uniform(100000)]))),
generate_simplekeys(Seqn + 1, Count - 1, [{KNumber, Seqn}|Acc]).
@ -958,14 +955,13 @@ search_range_idx_test() ->
{o_rkv,"Bucket1","Key1",null},
"<0.320.0>","./16_1_6.sst", none}}]},
{1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
StartKeyFun =
fun(ME) ->
ME#manifest_entry.start_key
end,
R = search_range({o_rkv, "Bucket", null, null},
{o_rkv, "Bucket", null, null},
Tree,
StartKeyFun),
R =
search_range(
{o_rkv, "Bucket", null, null},
{o_rkv, "Bucket", null, null},
Tree,
fun leveled_pmanifest:entry_startkey/1
),
?assertMatch(1, length(R)).
-endif.

View file

@ -23,7 +23,7 @@
%% Credit to
%% https://github.com/afiskon/erlang-uuid-v4/blob/master/src/uuid.erl
generate_uuid() ->
<<A:32, B:16, C:16, D:16, E:48>> = leveled_rand:rand_bytes(16),
<<A:32, B:16, C:16, D:16, E:48>> = crypto:strong_rand_bytes(16),
L = io_lib:format("~8.16.0b-~4.16.0b-4~3.16.0b-~4.16.0b-~12.16.0b",
[A, B, C band 16#0fff, D band 16#3fff bor 16#8000, E]),
binary_to_list(list_to_binary(L)).

View file

@ -73,7 +73,7 @@ application_defined_tag_tester(KeyCount, Tag, Functions, ExpectMD) ->
[{bespoke_tag1, retain}, {bespoke_tag2, retain}]},
{override_functions, Functions}],
{ok, Bookie1} = leveled_bookie:book_start(StartOpts1),
Value = leveled_rand:rand_bytes(512),
Value = crypto:strong_rand_bytes(512),
MapFun =
fun(C) ->
{C, object_generator(C, Value)}
@ -119,7 +119,7 @@ application_defined_tag_tester(KeyCount, Tag, Functions, ExpectMD) ->
object_generator(Count, V) ->
Hash = erlang:phash2({count, V}),
Random = leveled_rand:uniform(1000),
Random = rand:uniform(1000),
Key = list_to_binary(leveled_util:generate_uuid()),
Bucket = <<"B">>,
{Bucket,

View file

@ -55,15 +55,19 @@ simple_put_fetch_head_delete(_Config) ->
simple_test_withlog(LogLevel, ForcedLogs) ->
RootPath = testutil:reset_filestructure(),
StartOpts1 = [{root_path, RootPath},
{sync_strategy, testutil:sync_strategy()},
{log_level, LogLevel},
{forced_logs, ForcedLogs}],
StartOpts1 =
[
{root_path, RootPath},
{sync_strategy, testutil:sync_strategy()},
{log_level, LogLevel},
{forced_logs, ForcedLogs},
{max_pencillercachesize, 200}
],
{ok, Bookie1} = leveled_bookie:book_start(StartOpts1),
{TestObject, TestSpec} = testutil:generate_testobject(),
ok = testutil:book_riakput(Bookie1, TestObject, TestSpec),
testutil:check_forobject(Bookie1, TestObject),
testutil:check_formissingobject(Bookie1, "Bucket1", "Key2"),
testutil:check_formissingobject(Bookie1, <<"Bucket1">>, <<"Key2">>),
ok = leveled_bookie:book_close(Bookie1),
StartOpts2 = [{root_path, RootPath},
{max_journalsize, 3000000},
@ -78,29 +82,49 @@ simple_test_withlog(LogLevel, ForcedLogs) ->
ChkList1 = lists:sublist(lists:sort(ObjList1), 100),
testutil:check_forlist(Bookie2, ChkList1),
testutil:check_forobject(Bookie2, TestObject),
testutil:check_formissingobject(Bookie2, "Bucket1", "Key2"),
ok = leveled_bookie:book_put(Bookie2, "Bucket1", "Key2", "Value2",
[{add, "Index1", "Term1"}]),
{ok, "Value2"} = leveled_bookie:book_get(Bookie2, "Bucket1", "Key2"),
{ok, {62888926, S, undefined}} =
leveled_bookie:book_head(Bookie2, "Bucket1", "Key2"),
true = (S == 58) or (S == 60),
testutil:check_formissingobject(Bookie2, <<"Bucket1">>, <<"Key2">>),
ok =
leveled_bookie:book_put(
Bookie2,
<<"Bucket1">>,
<<"Key2">>,
<<"Value2">>,
[{add, <<"Index1">>, <<"Term1">>}]
),
{ok, <<"Value2">>} =
leveled_bookie:book_get(Bookie2, <<"Bucket1">>, <<"Key2">>),
{ok, {2220864, S, undefined}} =
leveled_bookie:book_head(Bookie2, <<"Bucket1">>, <<"Key2">>),
true = (S == 63) or (S == 65),
% After OTP 26 the object is 58 bytes not 60
testutil:check_formissingobject(Bookie2, "Bucket1", "Key2"),
ok = leveled_bookie:book_put(Bookie2, "Bucket1", "Key2", <<"Value2">>,
[{remove, "Index1", "Term1"},
{add, "Index1", <<"Term2">>}]),
{ok, <<"Value2">>} = leveled_bookie:book_get(Bookie2, "Bucket1", "Key2"),
testutil:check_formissingobject(Bookie2, <<"Bucket1">>, <<"Key2">>),
ok =
leveled_bookie:book_put(
Bookie2,
<<"Bucket1">>,
<<"Key2">>,
<<"Value2">>,
[{remove, <<"Index1">>, <<"Term1">>},
{add, <<"Index1">>, <<"Term2">>}]
),
{ok, <<"Value2">>} =
leveled_bookie:book_get(Bookie2, <<"Bucket1">>, <<"Key2">>),
ok = leveled_bookie:book_close(Bookie2),
{ok, Bookie3} = leveled_bookie:book_start(StartOpts2),
{ok, <<"Value2">>} = leveled_bookie:book_get(Bookie3, "Bucket1", "Key2"),
ok = leveled_bookie:book_delete(Bookie3, "Bucket1", "Key2",
[{remove, "Index1", "Term1"}]),
not_found = leveled_bookie:book_get(Bookie3, "Bucket1", "Key2"),
not_found = leveled_bookie:book_head(Bookie3, "Bucket1", "Key2"),
{ok, <<"Value2">>} =
leveled_bookie:book_get(Bookie3, <<"Bucket1">>, <<"Key2">>),
ok =
leveled_bookie:book_delete(
Bookie3,
<<"Bucket1">>,
<<"Key2">>,
[{remove, <<"Index1">>, <<"Term1">>}]
),
not_found = leveled_bookie:book_get(Bookie3, <<"Bucket1">>, <<"Key2">>),
not_found = leveled_bookie:book_head(Bookie3, <<"Bucket1">>, <<"Key2">>),
ok = leveled_bookie:book_close(Bookie3),
{ok, Bookie4} = leveled_bookie:book_start(StartOpts2),
not_found = leveled_bookie:book_get(Bookie4, "Bucket1", "Key2"),
not_found = leveled_bookie:book_get(Bookie4, <<"Bucket1">>, <<"Key2">>),
ok = leveled_bookie:book_destroy(Bookie4).
many_put_fetch_head(_Config) ->
@ -168,7 +192,7 @@ many_put_fetch_head(_Config) ->
not_found = leveled_bookie:book_sqn(Bookie3,
testutil:get_bucket(TestObject),
testutil:get_key(TestObject)),
testutil:check_formissingobject(Bookie3, "Bookie1", "MissingKey0123"),
testutil:check_formissingobject(Bookie3, <<"Bookie1">>, <<"MissingKey0123">>),
ok = leveled_bookie:book_destroy(Bookie3).
bigjournal_littlejournal(_Config) ->
@ -181,7 +205,7 @@ bigjournal_littlejournal(_Config) ->
{ok, Bookie1} = leveled_bookie:book_start(StartOpts1),
ObjL1 =
testutil:generate_objects(100, 1, [],
leveled_rand:rand_bytes(10000),
crypto:strong_rand_bytes(10000),
fun() -> [] end, <<"B">>),
testutil:riakload(Bookie1, ObjL1),
ok = leveled_bookie:book_close(Bookie1),
@ -189,7 +213,7 @@ bigjournal_littlejournal(_Config) ->
{ok, Bookie2} = leveled_bookie:book_start(StartOpts2),
ObjL2 =
testutil:generate_objects(10, 1000, [],
leveled_rand:rand_bytes(10000),
crypto:strong_rand_bytes(10000),
fun() -> [] end, <<"B">>),
testutil:riakload(Bookie2, ObjL2),
testutil:check_forlist(Bookie2, ObjL1),
@ -214,7 +238,7 @@ bigsst_littlesst(_Config) ->
100000,
1,
[],
leveled_rand:rand_bytes(100),
crypto:strong_rand_bytes(100),
fun() -> [] end,
<<"B">>)
),
@ -260,13 +284,16 @@ journal_compaction_tester(Restart, WRP) ->
ChkList1 = lists:sublist(lists:sort(ObjList1), 10000),
testutil:check_forlist(Bookie0, ChkList1),
testutil:check_forobject(Bookie0, TestObject),
{B2, K2, V2, Spec2, MD} = {"Bucket2",
"Key2",
"Value2",
[],
[{"MDK2", "MDV2"}]},
{TestObject2, TestSpec2} = testutil:generate_testobject(B2, K2,
V2, Spec2, MD),
{B2, K2, V2, Spec2, MD} =
{
<<"Bucket2">>,
<<"Key2">>,
<<"Value2">>,
[],
[{<<"MDK2">>, <<"MDV2">>}]
},
{TestObject2, TestSpec2} =
testutil:generate_testobject(B2, K2, V2, Spec2, MD),
ok = testutil:book_riakput(Bookie0, TestObject2, TestSpec2),
ok = leveled_bookie:book_compactjournal(Bookie0, 30000),
testutil:check_forlist(Bookie0, ChkList1),
@ -277,13 +304,15 @@ journal_compaction_tester(Restart, WRP) ->
testutil:check_forobject(Bookie0, TestObject2),
%% Delete some of the objects
ObjListD = testutil:generate_objects(10000, 2),
lists:foreach(fun({_R, O, _S}) ->
testutil:book_riakdelete(Bookie0,
testutil:get_bucket(O),
testutil:get_key(O),
[])
end,
ObjListD),
lists:foreach(
fun({_R, O, _S}) ->
testutil:book_riakdelete(Bookie0,
testutil:get_bucket(O),
testutil:get_key(O),
[])
end,
ObjListD
),
%% Now replace all the other objects
ObjList2 = testutil:generate_objects(40000, 10002),
@ -539,11 +568,11 @@ fetchput_snapshot(_Config) ->
% smaller due to replacements and files deleting
% This is dependent on the sleep though (yuk)
{B1Size, B1Count} = testutil:check_bucket_stats(Bookie2, "Bucket1"),
{B1Size, B1Count} = testutil:check_bucket_stats(Bookie2, <<"Bucket1">>),
true = B1Size > 0,
true = B1Count == 1,
{B1Size, B1Count} = testutil:check_bucket_stats(Bookie2, "Bucket1"),
{BSize, BCount} = testutil:check_bucket_stats(Bookie2, "Bucket"),
{B1Size, B1Count} = testutil:check_bucket_stats(Bookie2, <<"Bucket1">>),
{BSize, BCount} = testutil:check_bucket_stats(Bookie2, <<"Bucket">>),
true = BSize > 0,
true = BCount == 180000,
@ -622,82 +651,78 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) ->
testutil:check_forobject(Bookie1, TestObject),
io:format("Loading initial small objects~n"),
G1 = fun testutil:generate_smallobjects/2,
lists:foldl(fun(_X, Acc) ->
testutil:load_objects(5000,
[Acc + 2],
Bookie1,
TestObject,
G1),
{_S, Count} =
testutil:check_bucket_stats(Bookie1, "Bucket"),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
0,
lists:seq(1, 20)),
lists:foldl(
fun(_X, Acc) ->
testutil:load_objects(
5000, [Acc + 2], Bookie1, TestObject, G1),
{_S, Count} =
testutil:check_bucket_stats(Bookie1, <<"Bucket">>),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
0,
lists:seq(1, 20)
),
testutil:check_forobject(Bookie1, TestObject),
io:format("Loading larger compressible objects~n"),
G2 = fun testutil:generate_compressibleobjects/2,
lists:foldl(fun(_X, Acc) ->
testutil:load_objects(5000,
[Acc + 2],
Bookie1,
TestObject,
G2),
{_S, Count} =
testutil:check_bucket_stats(Bookie1, "Bucket"),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
100000,
lists:seq(1, 20)),
lists:foldl(
fun(_X, Acc) ->
testutil:load_objects(
5000, [Acc + 2], Bookie1, TestObject, G2),
{_S, Count} =
testutil:check_bucket_stats(Bookie1, <<"Bucket">>),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
100000,
lists:seq(1, 20)
),
testutil:check_forobject(Bookie1, TestObject),
io:format("Replacing small objects~n"),
lists:foldl(fun(_X, Acc) ->
testutil:load_objects(5000,
[Acc + 2],
Bookie1,
TestObject,
G1),
{_S, Count} =
testutil:check_bucket_stats(Bookie1, "Bucket"),
if
Count == 200000 ->
ok
end,
Acc + 5000 end,
0,
lists:seq(1, 20)),
lists:foldl(
fun(_X, Acc) ->
testutil:load_objects(
5000, [Acc + 2], Bookie1, TestObject, G1),
{_S, Count} =
testutil:check_bucket_stats(Bookie1, <<"Bucket">>),
if
Count == 200000 ->
ok
end,
Acc + 5000 end,
0,
lists:seq(1, 20)
),
testutil:check_forobject(Bookie1, TestObject),
io:format("Loading more small objects~n"),
io:format("Now with unused snapshot so deletions are blocked~n"),
{ok, PclClone, null} =
leveled_bookie:book_snapshot(Bookie1, ledger, undefined, true),
lists:foldl(fun(_X, Acc) ->
testutil:load_objects(5000,
[Acc + 2],
Bookie1,
TestObject,
G2),
{_S, Count} =
testutil:check_bucket_stats(Bookie1, "Bucket"),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
200000,
lists:seq(1, 20)),
lists:foldl(
fun(_X, Acc) ->
testutil:load_objects(
5000, [Acc + 2], Bookie1, TestObject, G2),
{_S, Count} =
testutil:check_bucket_stats(Bookie1, <<"Bucket">>),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
200000,
lists:seq(1, 20)
),
testutil:check_forobject(Bookie1, TestObject),
ok = leveled_penciller:pcl_close(PclClone),
{_S, 300000} = testutil:check_bucket_stats(Bookie1, "Bucket"),
{_S, 300000} = testutil:check_bucket_stats(Bookie1, <<"Bucket">>),
ok = leveled_bookie:book_close(Bookie1),
{ok, Bookie2} = leveled_bookie:book_start(StartOpts1),
{_, 300000} = testutil:check_bucket_stats(Bookie2, "Bucket"),
{_, 300000} = testutil:check_bucket_stats(Bookie2, <<"Bucket">>),
ok = leveled_bookie:book_close(Bookie2),
@ -722,21 +747,19 @@ load_and_count_withdelete(_Config) ->
testutil:check_forobject(Bookie1, TestObject),
io:format("Loading initial small objects~n"),
G1 = fun testutil:generate_smallobjects/2,
lists:foldl(fun(_X, Acc) ->
testutil:load_objects(5000,
[Acc + 2],
Bookie1,
TestObject,
G1),
{_S, Count} = testutil:check_bucket_stats(Bookie1,
"Bucket"),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
0,
lists:seq(1, 20)),
lists:foldl(
fun(_X, Acc) ->
testutil:load_objects(
5000, [Acc + 2], Bookie1, TestObject, G1),
{_S, Count} = testutil:check_bucket_stats(Bookie1, <<"Bucket">>),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
0,
lists:seq(1, 20)
),
testutil:check_forobject(Bookie1, TestObject),
{BucketD, KeyD} =
{testutil:get_bucket(TestObject), testutil:get_key(TestObject)},
@ -746,21 +769,19 @@ load_and_count_withdelete(_Config) ->
{_, 0} = testutil:check_bucket_stats(Bookie1, BucketD),
io:format("Loading larger compressible objects~n"),
G2 = fun testutil:generate_compressibleobjects/2,
lists:foldl(fun(_X, Acc) ->
testutil:load_objects(5000,
[Acc + 2],
Bookie1,
no_check,
G2),
{_S, Count} = testutil:check_bucket_stats(Bookie1,
"Bucket"),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
100000,
lists:seq(1, 20)),
lists:foldl(
fun(_X, Acc) ->
testutil:load_objects(
5000, [Acc + 2], Bookie1, no_check, G2),
{_S, Count} = testutil:check_bucket_stats(Bookie1, <<"Bucket">>),
if
Acc + 5000 == Count ->
ok
end,
Acc + 5000 end,
100000,
lists:seq(1, 20)
),
not_found = testutil:book_riakget(Bookie1, BucketD, KeyD),
ok = leveled_bookie:book_close(Bookie1),
@ -780,11 +801,8 @@ space_clear_ondelete(_Config) ->
{sync_strategy, testutil:sync_strategy()}],
{ok, Book1} = leveled_bookie:book_start(StartOpts1),
G2 = fun testutil:generate_compressibleobjects/2,
testutil:load_objects(20000,
[uuid, uuid, uuid, uuid],
Book1,
no_check,
G2),
testutil:load_objects(
20000, [uuid, uuid, uuid, uuid], Book1, no_check, G2),
FoldKeysFun = fun(B, K, Acc) -> [{B, K}|Acc] end,
@ -808,10 +826,9 @@ space_clear_ondelete(_Config) ->
FoldObjectsFun = fun(B, K, ObjBin, Acc) ->
[{B, K, erlang:phash2(ObjBin)}|Acc] end,
{async, HTreeF1} = leveled_bookie:book_objectfold(Book1,
?RIAK_TAG,
{FoldObjectsFun, []},
false),
{async, HTreeF1} =
leveled_bookie:book_objectfold(
Book1, ?RIAK_TAG, {FoldObjectsFun, []}, false),
% This query does not Snap PreFold - and so will not prevent
% pending deletes from prompting actual deletes
@ -822,32 +839,34 @@ space_clear_ondelete(_Config) ->
% Delete the keys
SW2 = os:timestamp(),
lists:foreach(fun({Bucket, Key}) ->
testutil:book_riakdelete(Book1,
Bucket,
Key,
[])
end,
KL1),
io:format("Deletion took ~w microseconds for 80K keys~n",
[timer:now_diff(os:timestamp(), SW2)]),
lists:foreach(
fun({Bucket, Key}) ->
testutil:book_riakdelete(Book1, Bucket, Key, [])
end,
KL1),
io:format(
"Deletion took ~w microseconds for 80K keys~n",
[timer:now_diff(os:timestamp(), SW2)]),
ok = leveled_bookie:book_compactjournal(Book1, 30000),
F = fun leveled_bookie:book_islastcompactionpending/1,
lists:foldl(fun(X, Pending) ->
case Pending of
false ->
false;
true ->
io:format("Loop ~w waiting for journal "
++ "compaction to complete~n", [X]),
timer:sleep(20000),
F(Book1)
end end,
true,
lists:seq(1, 15)),
lists:foldl(
fun(X, Pending) ->
case Pending of
false ->
false;
true ->
io:format(
"Loop ~w waiting for journal "
"compaction to complete~n",
[X]
),
timer:sleep(20000),
F(Book1)
end
end,
true,
lists:seq(1, 15)),
io:format("Waiting for journal deletes - blocked~n"),
timer:sleep(20000),
@ -1113,7 +1132,7 @@ many_put_fetch_switchcompression_tester(CompressionMethod) ->
%% Change method back again
{ok, Bookie3} = leveled_bookie:book_start(StartOpts1),
testutil:check_formissingobject(Bookie3, "Bookie1", "MissingKey0123"),
testutil:check_formissingobject(Bookie3, <<"Bookie1">>, "MissingKey0123"),
lists:foreach(
fun(CL) -> ok = testutil:check_forlist(Bookie3, CL) end, CL2s),
lists:foreach(
@ -1244,10 +1263,12 @@ bigpcl_bucketlist(_Config) ->
MapFun =
fun(B) ->
testutil:generate_objects(ObjectCount, 1, [],
leveled_rand:rand_bytes(100),
fun() -> [] end,
B)
testutil:generate_objects(
ObjectCount, 1, [],
crypto:strong_rand_bytes(100),
fun() -> [] end,
B
)
end,
ObjLofL = lists:map(MapFun, BucketList),
lists:foreach(fun(ObjL) -> testutil:riakload(Bookie1, ObjL) end, ObjLofL),
@ -1263,11 +1284,15 @@ bigpcl_bucketlist(_Config) ->
FBAccT = {BucketFold, sets:new()},
{async, BucketFolder1} =
leveled_bookie:book_headfold(Bookie1,
?RIAK_TAG,
{bucket_list, BucketList},
FBAccT,
false, false, false),
leveled_bookie:book_headfold(
Bookie1,
?RIAK_TAG,
{bucket_list, BucketList},
FBAccT,
false,
false,
false
),
{FoldTime1, BucketList1} = timer:tc(BucketFolder1, []),
true = BucketCount == sets:size(BucketList1),
@ -1276,11 +1301,15 @@ bigpcl_bucketlist(_Config) ->
{ok, Bookie2} = leveled_bookie:book_start(StartOpts1),
{async, BucketFolder2} =
leveled_bookie:book_headfold(Bookie2,
?RIAK_TAG,
{bucket_list, BucketList},
FBAccT,
false, false, false),
leveled_bookie:book_headfold(
Bookie2,
?RIAK_TAG,
{bucket_list, BucketList},
FBAccT,
false,
false,
false
),
{FoldTime2, BucketList2} = timer:tc(BucketFolder2, []),
true = BucketCount == sets:size(BucketList2),

View file

@ -57,7 +57,7 @@ expiring_indexes(_Config) ->
Indexes9 = testutil:get_randomindexes_generator(2),
TempRiakObjects =
testutil:generate_objects(
KeyCount, binary_uuid, [], V9, Indexes9, "riakBucket"),
KeyCount, binary_uuid, [], V9, Indexes9, <<"riakBucket">>),
IBKL1 = testutil:stdload_expiring(Bookie1, KeyCount, Future),
lists:foreach(
@ -147,11 +147,13 @@ expiring_indexes(_Config) ->
Bookie1, B0, K0, 5, <<"value">>, leveled_util:integer_now() + 10),
timer:sleep(1000),
{async, Folder2} = IndexFold(),
leveled_bookie:book_indexfold(Bookie1,
B0,
{FoldFun, InitAcc},
{<<"temp_int">>, 5, 8},
{true, undefined}),
leveled_bookie:book_indexfold(
Bookie1,
B0,
{FoldFun, InitAcc},
{<<"temp_int">>, 5, 8},
{true, undefined}
),
QR2 = Folder2(),
io:format("Query with additional entry length ~w~n", [length(QR2)]),
true = lists:sort(QR2) == lists:sort([{5, B0, K0}|LoadedEntriesInRange]),
@ -208,11 +210,9 @@ breaking_folds(_Config) ->
{ok, Bookie1} = leveled_bookie:book_start(StartOpts1),
ObjectGen = testutil:get_compressiblevalue_andinteger(),
IndexGen = testutil:get_randomindexes_generator(8),
ObjL1 = testutil:generate_objects(KeyCount,
binary_uuid,
[],
ObjectGen,
IndexGen),
ObjL1 =
testutil:generate_objects(
KeyCount, binary_uuid, [], ObjectGen, IndexGen),
testutil:riakload(Bookie1, ObjL1),
% Find all keys index, and then same again but stop at a midpoint using a
@ -261,7 +261,6 @@ breaking_folds(_Config) ->
io:format("Index fold with result size ~w~n", [length(KeyList2)]),
true = KeyCount div 2 == length(KeyList2),
HeadFoldFun =
fun(_B, K, PO, Acc) ->
{proxy_object, _MDBin, Size, _FF} = binary_to_term(PO),
@ -287,10 +286,14 @@ breaking_folds(_Config) ->
end
end,
{async, HeadFolderToMidK} =
leveled_bookie:book_headfold(Bookie1,
?RIAK_TAG,
{FoldThrowFun(HeadFoldFun), []},
true, true, false),
leveled_bookie:book_headfold(
Bookie1,
?RIAK_TAG,
{FoldThrowFun(HeadFoldFun), []},
true,
true,
false
),
KeySizeList2 = lists:reverse(CatchingFold(HeadFolderToMidK)),
io:format("Head fold with result size ~w~n", [length(KeySizeList2)]),
true = KeyCount div 2 == length(KeySizeList2),
@ -300,21 +303,25 @@ breaking_folds(_Config) ->
[{K,byte_size(V)}|Acc]
end,
{async, ObjectFolderKO} =
leveled_bookie:book_objectfold(Bookie1,
?RIAK_TAG,
{ObjFoldFun, []},
false,
key_order),
leveled_bookie:book_objectfold(
Bookie1,
?RIAK_TAG,
{ObjFoldFun, []},
false,
key_order
),
ObjSizeList1 = lists:reverse(ObjectFolderKO()),
io:format("Obj fold with result size ~w~n", [length(ObjSizeList1)]),
true = KeyCount == length(ObjSizeList1),
{async, ObjFolderToMidK} =
leveled_bookie:book_objectfold(Bookie1,
?RIAK_TAG,
{FoldThrowFun(ObjFoldFun), []},
false,
key_order),
leveled_bookie:book_objectfold(
Bookie1,
?RIAK_TAG,
{FoldThrowFun(ObjFoldFun), []},
false,
key_order
),
ObjSizeList2 = lists:reverse(CatchingFold(ObjFolderToMidK)),
io:format("Object fold with result size ~w~n", [length(ObjSizeList2)]),
true = KeyCount div 2 == length(ObjSizeList2),
@ -324,11 +331,13 @@ breaking_folds(_Config) ->
% that was terminated by reaching a point in the key range .. as results
% will not be passed to the fold function in key order
{async, ObjectFolderSO} =
leveled_bookie:book_objectfold(Bookie1,
?RIAK_TAG,
{ObjFoldFun, []},
false,
sqn_order),
leveled_bookie:book_objectfold(
Bookie1,
?RIAK_TAG,
{ObjFoldFun, []},
false,
sqn_order
),
ObjSizeList1_SO = lists:reverse(ObjectFolderSO()),
io:format("Obj fold with result size ~w~n", [length(ObjSizeList1_SO)]),
true = KeyCount == length(ObjSizeList1_SO),
@ -346,33 +355,26 @@ breaking_folds(_Config) ->
end
end,
{async, ObjFolderTo1K} =
leveled_bookie:book_objectfold(Bookie1,
?RIAK_TAG,
{FoldThrowThousandFun(ObjFoldFun), []},
false,
sqn_order),
leveled_bookie:book_objectfold(
Bookie1,
?RIAK_TAG,
{FoldThrowThousandFun(ObjFoldFun), []},
false,
sqn_order
),
ObjSizeList2_SO = lists:reverse(CatchingFold(ObjFolderTo1K)),
io:format("Object fold with result size ~w~n", [length(ObjSizeList2_SO)]),
true = 1000 == length(ObjSizeList2_SO),
ObjL2 = testutil:generate_objects(10,
binary_uuid,
[],
ObjectGen,
IndexGen,
"B2"),
ObjL3 = testutil:generate_objects(10,
binary_uuid,
[],
ObjectGen,
IndexGen,
"B3"),
ObjL4 = testutil:generate_objects(10,
binary_uuid,
[],
ObjectGen,
IndexGen,
"B4"),
ObjL2 =
testutil:generate_objects(
10, binary_uuid, [], ObjectGen, IndexGen, <<"B2">>),
ObjL3 =
testutil:generate_objects(
10, binary_uuid, [], ObjectGen, IndexGen, <<"B3">>),
ObjL4 =
testutil:generate_objects(
10, binary_uuid, [], ObjectGen, IndexGen, <<"B4">>),
testutil:riakload(Bookie1, ObjL2),
testutil:riakload(Bookie1, ObjL3),
testutil:riakload(Bookie1, ObjL4),
@ -396,20 +398,16 @@ breaking_folds(_Config) ->
end,
{async, StopAt3BucketFolder} =
leveled_bookie:book_bucketlist(Bookie1,
?RIAK_TAG,
{StopAt3Fun, []},
all),
leveled_bookie:book_bucketlist(
Bookie1, ?RIAK_TAG, {StopAt3Fun, []}, all),
BucketListSA3 = lists:reverse(CatchingFold(StopAt3BucketFolder)),
io:format("bucket list with result ~w~n", [BucketListSA3]),
true = [<<"B2">>, <<"B3">>] == BucketListSA3,
ok = leveled_bookie:book_close(Bookie1),
testutil:reset_filestructure().
single_object_with2i(_Config) ->
% Load a single object with an integer and a binary
% index and query for it
@ -429,36 +427,40 @@ single_object_with2i(_Config) ->
{async, IdxFolder1} =
leveled_bookie:book_indexfold(
Bookie1,
"Bucket1",
<<"Bucket1">>,
{fun testutil:foldkeysfun/3, []},
{list_to_binary("binary_bin"),
<<99:32/integer>>, <<101:32/integer>>},
{true, undefined}),
R1 = IdxFolder1(),
io:format("R1 of ~w~n", [R1]),
true = [{<<100:32/integer>>,"Key1"}] == R1,
true = [{<<100:32/integer>>, <<"Key1">>}] == R1,
IdxQ2 = {index_query,
"Bucket1",
{fun testutil:foldkeysfun/3, []},
{list_to_binary("integer_int"),
99, 101},
{true, undefined}},
IdxQ2 =
{
index_query,
<<"Bucket1">>,
{fun testutil:foldkeysfun/3, []},
{list_to_binary("integer_int"), 99, 101},
{true, undefined}
},
{async, IdxFolder2} = leveled_bookie:book_returnfolder(Bookie1, IdxQ2),
R2 = IdxFolder2(),
io:format("R2 of ~w~n", [R2]),
true = [{100,"Key1"}] == R2,
true = [{100, <<"Key1">>}] == R2,
IdxQ3 = {index_query,
{"Bucket1", "Key1"},
{fun testutil:foldkeysfun/3, []},
{list_to_binary("integer_int"),
99, 101},
{true, undefined}},
IdxQ3 =
{
index_query,
{<<"Bucket1">>, <<"Key1">>},
{fun testutil:foldkeysfun/3, []},
{list_to_binary("integer_int"), 99, 101},
{true, undefined}
},
{async, IdxFolder3} = leveled_bookie:book_returnfolder(Bookie1, IdxQ3),
R3 = IdxFolder3(),
io:format("R2 of ~w~n", [R3]),
true = [{100,"Key1"}] == R3,
true = [{100, <<"Key1">>}] == R3,
ok = leveled_bookie:book_close(Bookie1),
testutil:reset_filestructure().
@ -473,7 +475,7 @@ small_load_with2i(_Config) ->
{TestObject, TestSpec} = testutil:generate_testobject(),
ok = testutil:book_riakput(Bookie1, TestObject, TestSpec),
testutil:check_forobject(Bookie1, TestObject),
testutil:check_formissingobject(Bookie1, "Bucket1", "Key2"),
testutil:check_formissingobject(Bookie1, <<"Bucket1">>, <<"Key2">>),
testutil:check_forobject(Bookie1, TestObject),
ObjectGen = testutil:get_compressiblevalue_andinteger(),
IndexGen = testutil:get_randomindexes_generator(8),
@ -486,58 +488,60 @@ small_load_with2i(_Config) ->
testutil:check_forobject(Bookie1, TestObject),
% Find all keys index, and then just the last key
IdxQ1 = {index_query,
"Bucket",
{fun testutil:foldkeysfun/3, []},
{<<"idx1_bin">>, <<"#">>, <<"|">>},
{true, undefined}},
IdxQ1 =
{
index_query,
<<"Bucket">>,
{fun testutil:foldkeysfun/3, []},
{<<"idx1_bin">>, <<"#">>, <<"|">>},
{true, undefined}
},
{async, IdxFolder} = leveled_bookie:book_returnfolder(Bookie1, IdxQ1),
KeyList1 = lists:usort(IdxFolder()),
true = 10000 == length(KeyList1),
{LastTerm, LastKey} = lists:last(KeyList1),
IdxQ2 = {index_query,
{"Bucket", LastKey},
{fun testutil:foldkeysfun/3, []},
{<<"idx1_bin">>, LastTerm, <<"|">>},
{false, undefined}},
IdxQ2 =
{
index_query,
{<<"Bucket">>, LastKey},
{fun testutil:foldkeysfun/3, []},
{<<"idx1_bin">>, LastTerm, <<"|">>},
{false, undefined}
},
{async, IdxFolderLK} = leveled_bookie:book_returnfolder(Bookie1, IdxQ2),
KeyList2 = lists:usort(IdxFolderLK()),
io:format("List should be last key ~w ~w~n", [LastKey, KeyList2]),
true = 1 == length(KeyList2),
%% Delete the objects from the ChkList removing the indexes
lists:foreach(fun({_RN, Obj, Spc}) ->
DSpc = lists:map(fun({add, F, T}) ->
{remove, F, T}
end,
Spc),
{B, K} =
{testutil:get_bucket(Obj), testutil:get_key(Obj)},
testutil:book_riakdelete(Bookie1, B, K, DSpc)
end,
ChkList1),
lists:foreach(
fun({_RN, Obj, Spc}) ->
DSpc =
lists:map(fun({add, F, T}) -> {remove, F, T} end, Spc),
{B, K} = {testutil:get_bucket(Obj), testutil:get_key(Obj)},
testutil:book_riakdelete(Bookie1, B, K, DSpc)
end,
ChkList1
),
%% Get the Buckets Keys and Hashes for the whole bucket
FoldObjectsFun = fun(B, K, V, Acc) -> [{B, K, erlang:phash2(V)}|Acc]
end,
FoldObjectsFun =
fun(B, K, V, Acc) -> [{B, K, erlang:phash2(V)}|Acc] end,
{async, HTreeF1} = leveled_bookie:book_objectfold(Bookie1,
?RIAK_TAG,
{FoldObjectsFun, []},
false),
{async, HTreeF1} =
leveled_bookie:book_objectfold(
Bookie1, ?RIAK_TAG, {FoldObjectsFun, []}, false),
KeyHashList1 = HTreeF1(),
{async, HTreeF2} = leveled_bookie:book_objectfold(Bookie1,
?RIAK_TAG,
"Bucket",
all,
{FoldObjectsFun, []},
false),
{async, HTreeF2} =
leveled_bookie:book_objectfold(
Bookie1, ?RIAK_TAG, <<"Bucket">>, all, {FoldObjectsFun, []}, false
),
KeyHashList2 = HTreeF2(),
{async, HTreeF3} =
leveled_bookie:book_objectfold(
Bookie1,
?RIAK_TAG,
"Bucket",
<<"Bucket">>,
{<<"idx1_bin">>, <<"#">>, <<"|">>},
{FoldObjectsFun, []},
false),
@ -546,12 +550,13 @@ small_load_with2i(_Config) ->
true = 9900 == length(KeyHashList2),
true = 9900 == length(KeyHashList3),
SumIntFun = fun(_B, _K, Obj, Acc) ->
{I, _Bin} = testutil:get_value(Obj),
Acc + I
end,
SumIntFun =
fun(_B, _K, Obj, Acc) ->
{I, _Bin} = testutil:get_value(Obj),
Acc + I
end,
BucketObjQ =
{foldobjects_bybucket, ?RIAK_TAG, "Bucket", all, {SumIntFun, 0}, true},
{foldobjects_bybucket, ?RIAK_TAG, <<"Bucket">>, all, {SumIntFun, 0}, true},
{async, Sum1} = leveled_bookie:book_returnfolder(Bookie1, BucketObjQ),
Total1 = Sum1(),
io:format("Total from summing all I is ~w~n", [Total1]),
@ -596,21 +601,18 @@ query_count(_Config) ->
BucketBin = list_to_binary("Bucket"),
{TestObject, TestSpec} =
testutil:generate_testobject(
BucketBin, term_to_binary("Key1"), "Value1", [], [{"MDK1", "MDV1"}]),
BucketBin, term_to_binary("Key1"), <<"Value1">>, [], [{<<"MDK1">>, <<"MDV1">>}]),
ok = testutil:book_riakput(Book1, TestObject, TestSpec),
testutil:check_forobject(Book1, TestObject),
testutil:check_formissingobject(Book1, "Bucket1", "Key2"),
testutil:check_formissingobject(Book1, <<"Bucket1">>, <<"Key2">>),
testutil:check_forobject(Book1, TestObject),
lists:foreach(
fun(_X) ->
V = testutil:get_compressiblevalue(),
Indexes = testutil:get_randomindexes_generator(8),
SW = os:timestamp(),
ObjL1 = testutil:generate_objects(10000,
binary_uuid,
[],
V,
Indexes),
ObjL1 =
testutil:generate_objects(10000, binary_uuid, [], V, Indexes),
testutil:riakload(Book1, ObjL1),
io:format(
"Put of 10000 objects with 8 index entries "
@ -681,15 +683,17 @@ query_count(_Config) ->
{true, undefined}},
{async,
Mia2KFolder2} = leveled_bookie:book_returnfolder(Book2, Query2),
Mia2000Count2 = lists:foldl(fun({Term, _Key}, Acc) ->
case re:run(Term, RegMia) of
nomatch ->
Acc;
_ ->
Acc + 1
end end,
0,
Mia2KFolder2()),
Mia2000Count2 =
lists:foldl(
fun({Term, _Key}, Acc) ->
case re:run(Term, RegMia) of
nomatch ->
Acc;
_ ->
Acc + 1
end end,
0,
Mia2KFolder2()),
ok = case Mia2000Count2 of
Mia2000Count1 when Mia2000Count1 > 0 ->
io:format("Mia2000 counts match at ~w~n",
@ -731,20 +735,22 @@ query_count(_Config) ->
Spc9Del = lists:map(fun({add, IdxF, IdxT}) -> {remove, IdxF, IdxT} end,
Spc9),
ok = testutil:book_riakput(Book2, Obj9, Spc9Del),
lists:foreach(fun({IdxF, IdxT, X}) ->
Q = {index_query,
BucketBin,
{fun testutil:foldkeysfun/3, []},
{IdxF, IdxT, IdxT},
?KEY_ONLY},
R = leveled_bookie:book_returnfolder(Book2, Q),
{async, Fldr} = R,
case length(Fldr()) of
Y ->
Y = X - 1
end
end,
R9),
lists:foreach(
fun({IdxF, IdxT, X}) ->
Q = {index_query,
BucketBin,
{fun testutil:foldkeysfun/3, []},
{IdxF, IdxT, IdxT},
?KEY_ONLY},
R = leveled_bookie:book_returnfolder(Book2, Q),
{async, Fldr} = R,
case length(Fldr()) of
Y ->
Y = X - 1
end
end,
R9
),
ok = leveled_bookie:book_close(Book2),
{ok, Book3} =
leveled_bookie:book_start(
@ -800,13 +806,13 @@ query_count(_Config) ->
ObjList10A =
testutil:generate_objects(
5000, binary_uuid, [], V9, Indexes9, "BucketA"),
5000, binary_uuid, [], V9, Indexes9, <<"BucketA">>),
ObjList10B =
testutil:generate_objects(
5000, binary_uuid, [], V9, Indexes9, "BucketB"),
5000, binary_uuid, [], V9, Indexes9, <<"BucketB">>),
ObjList10C =
testutil:generate_objects(
5000, binary_uuid, [], V9, Indexes9, "BucketC"),
5000, binary_uuid, [], V9, Indexes9, <<"BucketC">>),
testutil:riakload(Book4, ObjList10A),
testutil:riakload(Book4, ObjList10B),
testutil:riakload(Book4, ObjList10C),
@ -819,10 +825,9 @@ query_count(_Config) ->
ok = leveled_bookie:book_close(Book4),
{ok, Book5} = leveled_bookie:book_start(RootPath,
2000,
50000000,
testutil:sync_strategy()),
{ok, Book5} =
leveled_bookie:book_start(
RootPath, 2000, 50000000, testutil:sync_strategy()),
{async, BLF3} = leveled_bookie:book_returnfolder(Book5, BucketListQuery),
SW_QC = os:timestamp(),
BucketSet3 = BLF3(),
@ -866,33 +871,25 @@ multibucket_fold(_Config) ->
testutil:sync_strategy()),
ObjectGen = testutil:get_compressiblevalue_andinteger(),
IndexGen = fun() -> [] end,
ObjL1 = testutil:generate_objects(13000,
uuid,
[],
ObjectGen,
IndexGen,
{<<"Type1">>, <<"Bucket1">>}),
ObjL1 =
testutil:generate_objects(
13000, uuid, [], ObjectGen, IndexGen, {<<"Type1">>, <<"Bucket1">>}
),
testutil:riakload(Bookie1, ObjL1),
ObjL2 = testutil:generate_objects(17000,
uuid,
[],
ObjectGen,
IndexGen,
<<"Bucket2">>),
ObjL2 =
testutil:generate_objects(
17000, uuid, [], ObjectGen, IndexGen, <<"Bucket2">>
),
testutil:riakload(Bookie1, ObjL2),
ObjL3 = testutil:generate_objects(7000,
uuid,
[],
ObjectGen,
IndexGen,
<<"Bucket3">>),
ObjL3 =
testutil:generate_objects(
7000, uuid, [], ObjectGen, IndexGen, <<"Bucket3">>
),
testutil:riakload(Bookie1, ObjL3),
ObjL4 = testutil:generate_objects(23000,
uuid,
[],
ObjectGen,
IndexGen,
{<<"Type2">>, <<"Bucket4">>}),
ObjL4 =
testutil:generate_objects(
23000, uuid, [], ObjectGen, IndexGen, {<<"Type2">>, <<"Bucket4">>}
),
testutil:riakload(Bookie1, ObjL4),
FF = fun(B, K, _PO, Acc) ->
@ -901,30 +898,30 @@ multibucket_fold(_Config) ->
FoldAccT = {FF, []},
{async, R1} =
leveled_bookie:book_headfold(Bookie1,
?RIAK_TAG,
{bucket_list,
[{<<"Type1">>, <<"Bucket1">>},
{<<"Type2">>, <<"Bucket4">>}]},
FoldAccT,
false,
true,
false),
leveled_bookie:book_headfold(
Bookie1,
?RIAK_TAG,
{bucket_list,
[{<<"Type1">>, <<"Bucket1">>}, {<<"Type2">>, <<"Bucket4">>}]},
FoldAccT,
false,
true,
false
),
O1 = length(R1()),
io:format("Result R1 of length ~w~n", [O1]),
{async, R2} =
leveled_bookie:book_headfold(Bookie1,
?RIAK_TAG,
{bucket_list,
[<<"Bucket2">>,
<<"Bucket3">>]},
{fun(_B, _K, _PO, Acc) ->
Acc +1
end,
0},
false, true, false),
leveled_bookie:book_headfold(
Bookie1,
?RIAK_TAG,
{bucket_list, [<<"Bucket2">>, <<"Bucket3">>]},
{fun(_B, _K, _PO, Acc) -> Acc +1 end, 0},
false,
true,
false
),
O2 = R2(),
io:format("Result R2 of ~w~n", [O2]),
@ -933,10 +930,8 @@ multibucket_fold(_Config) ->
FoldBucketsFun = fun(B, Acc) -> [B|Acc] end,
{async, Folder} =
leveled_bookie:book_bucketlist(Bookie1,
?RIAK_TAG,
{FoldBucketsFun, []},
all),
leveled_bookie:book_bucketlist(
Bookie1, ?RIAK_TAG, {FoldBucketsFun, []}, all),
BucketList = lists:reverse(Folder()),
ExpectedBucketList =
[{<<"Type1">>, <<"Bucket1">>}, {<<"Type2">>, <<"Bucket4">>},
@ -949,54 +944,53 @@ multibucket_fold(_Config) ->
rotating_objects(_Config) ->
RootPath = testutil:reset_filestructure(),
ok = testutil:rotating_object_check(RootPath, "Bucket1", 10),
ok = testutil:rotating_object_check(RootPath, "Bucket2", 200),
ok = testutil:rotating_object_check(RootPath, "Bucket3", 800),
ok = testutil:rotating_object_check(RootPath, "Bucket4", 1600),
ok = testutil:rotating_object_check(RootPath, "Bucket5", 3200),
ok = testutil:rotating_object_check(RootPath, "Bucket6", 9600),
ok = testutil:rotating_object_check(RootPath, <<"Bucket1">>, 10),
ok = testutil:rotating_object_check(RootPath, <<"Bucket2">>, 200),
ok = testutil:rotating_object_check(RootPath, <<"Bucket3">>, 800),
ok = testutil:rotating_object_check(RootPath, <<"Bucket4">>, 1600),
ok = testutil:rotating_object_check(RootPath, <<"Bucket5">>, 3200),
ok = testutil:rotating_object_check(RootPath, <<"Bucket6">>, 9600),
testutil:reset_filestructure().
foldobjects_bybucket_range(_Config) ->
RootPath = testutil:reset_filestructure(),
{ok, Bookie1} = leveled_bookie:book_start(RootPath,
2000,
50000000,
testutil:sync_strategy()),
{ok, Bookie1} =
leveled_bookie:book_start(
RootPath, 2000, 50000000, testutil:sync_strategy()),
ObjectGen = testutil:get_compressiblevalue_andinteger(),
IndexGen = fun() -> [] end,
ObjL1 = testutil:generate_objects(1300,
{fixed_binary, 1},
[],
ObjectGen,
IndexGen,
<<"Bucket1">>),
ObjL1 =
testutil:generate_objects(
1300, {fixed_binary, 1}, [], ObjectGen, IndexGen, <<"Bucket1">>),
testutil:riakload(Bookie1, ObjL1),
FoldKeysFun = fun(_B, K,_V, Acc) ->
[ K |Acc]
end,
FoldKeysFun = fun(_B, K,_V, Acc) -> [ K |Acc] end,
StartKey = testutil:fixed_bin_key(123),
EndKey = testutil:fixed_bin_key(779),
{async, Folder} = leveled_bookie:book_objectfold(Bookie1,
?RIAK_TAG,
<<"Bucket1">>,
{StartKey, EndKey}, {FoldKeysFun, []},
true
),
{async, Folder} =
leveled_bookie:book_objectfold(
Bookie1,
?RIAK_TAG,
<<"Bucket1">>,
{StartKey, EndKey},
{FoldKeysFun, []},
true
),
ResLen = length(Folder()),
io:format("Length of Result of folder ~w~n", [ResLen]),
true = 657 == ResLen,
{async, AllFolder} = leveled_bookie:book_objectfold(Bookie1,
?RIAK_TAG,
<<"Bucket1">>,
all,
{FoldKeysFun, []},
true
),
{async, AllFolder} =
leveled_bookie:book_objectfold(
Bookie1,
?RIAK_TAG,
<<"Bucket1">>,
all,
{FoldKeysFun, []},
true
),
AllResLen = length(AllFolder()),
io:format("Length of Result of all keys folder ~w~n", [AllResLen]),

View file

@ -101,7 +101,7 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) ->
IndexGenFun =
fun(ListID) ->
fun() ->
RandInt = leveled_rand:uniform(IndexCount - 1),
RandInt = rand:uniform(IndexCount - 1),
IntIndex = ["integer", integer_to_list(ListID), "_int"],
BinIndex = ["binary", integer_to_list(ListID), "_bin"],
[{add, iolist_to_binary(IntIndex), RandInt},
@ -434,35 +434,35 @@ rotation_withnocheck(Book, B, NumberOfObjects, ObjSize, IdxCnt) ->
Book,
B,
NumberOfObjects,
base64:encode(leveled_rand:rand_bytes(ObjSize)),
base64:encode(crypto:strong_rand_bytes(ObjSize)),
IdxCnt
),
rotation_with_prefetch(
Book,
B,
NumberOfObjects,
base64:encode(leveled_rand:rand_bytes(ObjSize)),
base64:encode(crypto:strong_rand_bytes(ObjSize)),
IdxCnt
),
rotation_with_prefetch(
Book,
B,
NumberOfObjects,
base64:encode(leveled_rand:rand_bytes(ObjSize)),
base64:encode(crypto:strong_rand_bytes(ObjSize)),
IdxCnt
),
rotation_with_prefetch(
Book,
B,
NumberOfObjects,
base64:encode(leveled_rand:rand_bytes(ObjSize)),
base64:encode(crypto:strong_rand_bytes(ObjSize)),
IdxCnt
),
rotation_with_prefetch(
Book,
B,
NumberOfObjects,
base64:encode(leveled_rand:rand_bytes(ObjSize)),
base64:encode(crypto:strong_rand_bytes(ObjSize)),
IdxCnt
),
ok.
@ -471,7 +471,7 @@ generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) ->
testutil:generate_objects(
CountPerList,
{fixed_binary, (Chunk - 1) * CountPerList + 1}, [],
base64:encode(leveled_rand:rand_bytes(ObjSize)),
base64:encode(crypto:strong_rand_bytes(ObjSize)),
IndexGenFun(Chunk),
Bucket
).
@ -480,7 +480,7 @@ load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) ->
ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]),
time_load_chunk(
Bookie,
{Bucket, base64:encode(leveled_rand:rand_bytes(ObjSize)), IndexGenFun(Chunk)},
{Bucket, base64:encode(crypto:strong_rand_bytes(ObjSize)), IndexGenFun(Chunk)},
(Chunk - 1) * CountPerList + 1,
Chunk * CountPerList,
0,
@ -577,9 +577,9 @@ random_fetches(FetchType, Bookie, Bucket, ObjCount, Fetches) ->
case I rem 5 of
1 ->
testutil:fixed_bin_key(
Twenty + leveled_rand:uniform(ObjCount - Twenty));
Twenty + rand:uniform(ObjCount - Twenty));
_ ->
testutil:fixed_bin_key(leveled_rand:uniform(Twenty))
testutil:fixed_bin_key(rand:uniform(Twenty))
end
end,
{TC, ok} =
@ -616,18 +616,18 @@ random_fetches(FetchType, Bookie, Bucket, ObjCount, Fetches) ->
random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) ->
QueryFun =
fun() ->
ID = leveled_rand:uniform(IDs),
ID = rand:uniform(IDs),
BinIndex =
iolist_to_binary(["binary", integer_to_list(ID), "_bin"]),
Twenty = IdxCnt div 5,
RI = leveled_rand:uniform(MaxRange),
RI = rand:uniform(MaxRange),
[Start, End] =
case RI of
RI when RI < (MaxRange div 5) ->
R0 = leveled_rand:uniform(IdxCnt - (Twenty + RI)),
R0 = rand:uniform(IdxCnt - (Twenty + RI)),
[R0 + Twenty, R0 + Twenty + RI];
_ ->
R0 = leveled_rand:uniform(Twenty - RI),
R0 = rand:uniform(Twenty - RI),
[R0, R0 + RI]
end,
FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end,

View file

@ -58,7 +58,7 @@ basic_riak_tester(Bucket, KeyCount) ->
IndexGenFun =
fun(ListID) ->
fun() ->
RandInt = leveled_rand:uniform(IndexCount),
RandInt = rand:uniform(IndexCount),
ID = integer_to_list(ListID),
[{add,
list_to_binary("integer" ++ ID ++ "_int"),
@ -75,7 +75,7 @@ basic_riak_tester(Bucket, KeyCount) ->
testutil:generate_objects(
CountPerList,
{fixed_binary, 1}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
IndexGenFun(1),
Bucket
),
@ -83,7 +83,7 @@ basic_riak_tester(Bucket, KeyCount) ->
testutil:generate_objects(
CountPerList,
{fixed_binary, CountPerList + 1}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
IndexGenFun(2),
Bucket
),
@ -92,7 +92,7 @@ basic_riak_tester(Bucket, KeyCount) ->
testutil:generate_objects(
CountPerList,
{fixed_binary, 2 * CountPerList + 1}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
IndexGenFun(3),
Bucket
),
@ -101,7 +101,7 @@ basic_riak_tester(Bucket, KeyCount) ->
testutil:generate_objects(
CountPerList,
{fixed_binary, 3 * CountPerList + 1}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
IndexGenFun(4),
Bucket
),
@ -110,7 +110,7 @@ basic_riak_tester(Bucket, KeyCount) ->
testutil:generate_objects(
CountPerList,
{fixed_binary, 4 * CountPerList + 1}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
IndexGenFun(5),
Bucket
),
@ -276,7 +276,7 @@ summarisable_sstindex(_Config) ->
ObjListToSort =
lists:map(
fun(I) ->
{leveled_rand:uniform(KeyCount * 10),
{rand:uniform(KeyCount * 10),
testutil:set_object(
Bucket, KeyGen(I), integer_to_binary(I), IndexGen, [])}
end,
@ -344,7 +344,7 @@ summarisable_sstindex(_Config) ->
true = 200 == length(KeyRangeCheckFun(StartKey, EndKey))
end,
lists:map(
fun(_I) -> leveled_rand:uniform(KeyCount - 200) end,
fun(_I) -> rand:uniform(KeyCount - 200) end,
lists:seq(1, 100))),
IdxObjKeyCount = 50000,
@ -367,7 +367,7 @@ summarisable_sstindex(_Config) ->
IdxObjListToSort =
lists:map(
fun(I) ->
{leveled_rand:uniform(KeyCount * 10),
{rand:uniform(KeyCount * 10),
testutil:set_object(
Bucket,
KeyGen(I),
@ -419,7 +419,7 @@ summarisable_sstindex(_Config) ->
end,
lists:map(
fun(_I) ->
leveled_rand:uniform(IdxObjKeyCount - 20)
rand:uniform(IdxObjKeyCount - 20)
end,
lists:seq(1, 100))),
lists:foreach(
@ -430,7 +430,7 @@ summarisable_sstindex(_Config) ->
end,
lists:map(
fun(_I) ->
leveled_rand:uniform(IdxObjKeyCount - 10)
rand:uniform(IdxObjKeyCount - 10)
end,
lists:seq(1, 100))),
@ -451,7 +451,7 @@ summarisable_sstindex(_Config) ->
true = 200 == length(KeyRangeCheckFun(StartKey, EndKey))
end,
lists:map(
fun(_I) -> leveled_rand:uniform(KeyCount - 200) end,
fun(_I) -> rand:uniform(KeyCount - 200) end,
lists:seq(1, 100))),
ok = leveled_bookie:book_destroy(Bookie1).
@ -475,7 +475,7 @@ fetchclocks_modifiedbetween(_Config) ->
testutil:generate_objects(
100000,
{fixed_binary, 1}, [],
leveled_rand:rand_bytes(32),
crypto:strong_rand_bytes(32),
fun() -> [] end,
<<"BaselineB">>
),
@ -485,7 +485,7 @@ fetchclocks_modifiedbetween(_Config) ->
testutil:generate_objects(
20000,
{fixed_binary, 1}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
fun() -> [] end,
<<"B0">>
),
@ -498,7 +498,7 @@ fetchclocks_modifiedbetween(_Config) ->
testutil:generate_objects(
15000,
{fixed_binary, 20001}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
fun() -> [] end,
<<"B0">>
),
@ -511,7 +511,7 @@ fetchclocks_modifiedbetween(_Config) ->
testutil:generate_objects(
35000,
{fixed_binary, 35001}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
fun() -> [] end,
<<"B0">>
),
@ -524,7 +524,7 @@ fetchclocks_modifiedbetween(_Config) ->
testutil:generate_objects(
30000,
{fixed_binary, 70001}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
fun() -> [] end,
<<"B0">>
),
@ -537,7 +537,7 @@ fetchclocks_modifiedbetween(_Config) ->
testutil:generate_objects(
8000,
{fixed_binary, 1}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
fun() -> [] end,
<<"B1">>
),
@ -550,7 +550,7 @@ fetchclocks_modifiedbetween(_Config) ->
testutil:generate_objects(
7000,
{fixed_binary, 1}, [],
leveled_rand:rand_bytes(512),
crypto:strong_rand_bytes(512),
fun() -> [] end,
<<"B2">>
),
@ -815,7 +815,7 @@ fetchclocks_modifiedbetween(_Config) ->
testutil:generate_objects(
200000,
{fixed_binary, 1}, [],
leveled_rand:rand_bytes(32),
crypto:strong_rand_bytes(32),
fun() -> [] end,
<<"B1.9">>
),
@ -1637,7 +1637,7 @@ bigobject_memorycheck(_Config) ->
ObjPutFun =
fun(I) ->
Key = base64:encode(<<I:32/integer>>),
Value = leveled_rand:rand_bytes(1024 * 1024),
Value = crypto:strong_rand_bytes(1024 * 1024),
% a big object each time!
{Obj, Spc} = testutil:set_object(Bucket, Key, Value, IndexGen, []),
testutil:book_riakput(Bookie, Obj, Spc)

View file

@ -231,12 +231,14 @@ sync_strategy() ->
none.
book_riakput(Pid, RiakObject, IndexSpecs) ->
leveled_bookie:book_put(Pid,
RiakObject#r_object.bucket,
RiakObject#r_object.key,
to_binary(v1, RiakObject),
IndexSpecs,
?RIAK_TAG).
leveled_bookie:book_put(
Pid,
RiakObject#r_object.bucket,
RiakObject#r_object.key,
to_binary(v1, RiakObject),
IndexSpecs,
?RIAK_TAG
).
book_tempriakput(Pid, RiakObject, IndexSpecs, TTL) ->
leveled_bookie:book_tempput(
@ -246,7 +248,8 @@ book_tempriakput(Pid, RiakObject, IndexSpecs, TTL) ->
to_binary(v1, RiakObject),
IndexSpecs,
?RIAK_TAG,
TTL).
TTL
).
book_riakdelete(Pid, Bucket, Key, IndexSpecs) ->
leveled_bookie:book_put(Pid, Bucket, Key, delete, IndexSpecs, ?RIAK_TAG).
@ -383,9 +386,8 @@ wait_for_compaction(Bookie) ->
check_bucket_stats(Bookie, Bucket) ->
FoldSW1 = os:timestamp(),
io:format("Checking bucket size~n"),
{async, Folder1} = leveled_bookie:book_returnfolder(Bookie,
{riakbucket_stats,
Bucket}),
{async, Folder1} =
leveled_bookie:book_returnfolder(Bookie, {riakbucket_stats, Bucket}),
{B1Size, B1Count} = Folder1(),
io:format("Bucket fold completed in ~w microseconds~n",
[timer:now_diff(os:timestamp(), FoldSW1)]),
@ -399,28 +401,32 @@ check_forlist(Bookie, ChkList) ->
check_forlist(Bookie, ChkList, Log) ->
SW = os:timestamp(),
lists:foreach(fun({_RN, Obj, _Spc}) ->
if
Log == true ->
io:format("Fetching Key ~s~n", [Obj#r_object.key]);
true ->
ok
end,
R = book_riakget(Bookie,
Obj#r_object.bucket,
Obj#r_object.key),
true = case R of
{ok, Val} ->
to_binary(v1, Obj) == Val;
not_found ->
io:format("Object not found for key ~s~n",
[Obj#r_object.key]),
error
end
end,
ChkList),
io:format("Fetch check took ~w microseconds checking list of length ~w~n",
[timer:now_diff(os:timestamp(), SW), length(ChkList)]).
lists:foreach(
fun({_RN, Obj, _Spc}) ->
if
Log == true ->
io:format("Fetching Key ~s~n", [Obj#r_object.key]);
true ->
ok
end,
R = book_riakget(Bookie,
Obj#r_object.bucket,
Obj#r_object.key),
true =
case R of
{ok, Val} ->
to_binary(v1, Obj) == Val;
not_found ->
io:format("Object not found for key ~s~n",
[Obj#r_object.key]),
error
end
end,
ChkList),
io:format(
"Fetch check took ~w microseconds checking list of length ~w~n",
[timer:now_diff(os:timestamp(), SW), length(ChkList)]
).
checkhead_forlist(Bookie, ChkList) ->
SW = os:timestamp(),
@ -470,11 +476,14 @@ check_formissingobject(Bookie, Bucket, Key) ->
generate_testobject() ->
{B1, K1, V1, Spec1, MD} = {"Bucket1",
"Key1",
"Value1",
[],
[{"MDK1", "MDV1"}]},
{B1, K1, V1, Spec1, MD} =
{
<<"Bucket1">>,
<<"Key1">>,
<<"Value1">>,
[],
[{<<"MDK1">>, <<"MDV1">>}]
},
generate_testobject(B1, K1, V1, Spec1, MD).
generate_testobject(B, K, V, Spec, MD) ->
@ -493,7 +502,7 @@ generate_compressibleobjects(Count, KeyNumber) ->
get_compressiblevalue_andinteger() ->
{leveled_rand:uniform(1000), get_compressiblevalue()}.
{rand:uniform(1000), get_compressiblevalue()}.
get_compressiblevalue() ->
S1 = "111111111111111",
@ -510,7 +519,7 @@ get_compressiblevalue() ->
iolist_to_binary(
lists:foldl(
fun(_X, Acc) ->
{_, Str} = lists:keyfind(leveled_rand:uniform(8), 1, Selector),
{_, Str} = lists:keyfind(rand:uniform(8), 1, Selector),
[Str|Acc] end,
[""],
L
@ -518,28 +527,39 @@ get_compressiblevalue() ->
).
generate_smallobjects(Count, KeyNumber) ->
generate_objects(Count, KeyNumber, [], leveled_rand:rand_bytes(512)).
generate_objects(Count, KeyNumber, [], crypto:strong_rand_bytes(512)).
generate_objects(Count, KeyNumber) ->
generate_objects(Count, KeyNumber, [], leveled_rand:rand_bytes(4096)).
generate_objects(Count, KeyNumber, [], crypto:strong_rand_bytes(4096)).
generate_objects(Count, KeyNumber, ObjL, Value) ->
generate_objects(Count, KeyNumber, ObjL, Value, fun() -> [] end).
generate_objects(Count, KeyNumber, ObjL, Value, IndexGen) ->
generate_objects(Count, KeyNumber, ObjL, Value, IndexGen, "Bucket").
generate_objects(Count, KeyNumber, ObjL, Value, IndexGen, <<"Bucket">>).
generate_objects(0, _KeyNumber, ObjL, _Value, _IndexGen, _Bucket) ->
lists:reverse(ObjL);
generate_objects(Count, binary_uuid, ObjL, Value, IndexGen, Bucket) ->
{Obj1, Spec1} = set_object(list_to_binary(Bucket),
list_to_binary(leveled_util:generate_uuid()),
Value,
IndexGen),
generate_objects(
Count, binary_uuid, ObjL, Value, IndexGen, Bucket)
when is_list(Bucket) ->
generate_objects(
Count, binary_uuid, ObjL, Value, IndexGen, list_to_binary(Bucket)
);
generate_objects(
Count, binary_uuid, ObjL, Value, IndexGen, Bucket)
when is_binary(Bucket) ->
{Obj1, Spec1} =
set_object(
Bucket,
list_to_binary(leveled_util:generate_uuid()),
Value,
IndexGen
),
generate_objects(Count - 1,
binary_uuid,
[{leveled_rand:uniform(), Obj1, Spec1}|ObjL],
[{rand:uniform(), Obj1, Spec1}|ObjL],
Value,
IndexGen,
Bucket);
@ -550,19 +570,29 @@ generate_objects(Count, uuid, ObjL, Value, IndexGen, Bucket) ->
IndexGen),
generate_objects(Count - 1,
uuid,
[{leveled_rand:uniform(), Obj1, Spec1}|ObjL],
[{rand:uniform(), Obj1, Spec1}|ObjL],
Value,
IndexGen,
Bucket);
generate_objects(Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket) ->
generate_objects(
Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket)
when is_list(Bucket) ->
generate_objects(
Count, {binary, KeyNumber}, ObjL, Value, IndexGen, list_to_binary(Bucket)
);
generate_objects(
Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket)
when is_binary(Bucket) ->
{Obj1, Spec1} =
set_object(list_to_binary(Bucket),
list_to_binary(numbered_key(KeyNumber)),
Value,
IndexGen),
set_object(
Bucket,
list_to_binary(numbered_key(KeyNumber)),
Value,
IndexGen
),
generate_objects(Count - 1,
{binary, KeyNumber + 1},
[{leveled_rand:uniform(), Obj1, Spec1}|ObjL],
[{rand:uniform(), Obj1, Spec1}|ObjL],
Value,
IndexGen,
Bucket);
@ -574,7 +604,7 @@ generate_objects(Count, {fixed_binary, KeyNumber}, ObjL, Value, IndexGen, Bucket
IndexGen),
generate_objects(Count - 1,
{fixed_binary, KeyNumber + 1},
[{leveled_rand:uniform(), Obj1, Spec1}|ObjL],
[{rand:uniform(), Obj1, Spec1}|ObjL],
Value,
IndexGen,
Bucket);
@ -585,7 +615,7 @@ generate_objects(Count, KeyNumber, ObjL, Value, IndexGen, Bucket) ->
IndexGen),
generate_objects(Count - 1,
KeyNumber + 1,
[{leveled_rand:uniform(), Obj1, Spec1}|ObjL],
[{rand:uniform(), Obj1, Spec1}|ObjL],
Value,
IndexGen,
Bucket).
@ -652,7 +682,7 @@ update_some_objects(Bookie, ObjList, SampleSize) ->
[C] = Obj#r_object.contents,
MD = C#r_content.metadata,
MD0 = dict:store(?MD_LASTMOD, os:timestamp(), MD),
C0 = C#r_content{value = leveled_rand:rand_bytes(512),
C0 = C#r_content{value = crypto:strong_rand_bytes(512),
metadata = MD0},
UpdObj = Obj#r_object{vclock = VC0, contents = [C0]},
{R, UpdObj, Spec}
@ -679,11 +709,11 @@ delete_some_objects(Bookie, ObjList, SampleSize) ->
generate_vclock() ->
lists:map(fun(X) ->
{_, Actor} = lists:keyfind(leveled_rand:uniform(10),
{_, Actor} = lists:keyfind(rand:uniform(10),
1,
actor_list()),
{Actor, X} end,
lists:seq(1, leveled_rand:uniform(8))).
lists:seq(1, rand:uniform(8))).
update_vclock(VC) ->
[{Actor, X}|Rest] = VC,
@ -785,14 +815,14 @@ name_list() ->
get_randomname() ->
NameList = name_list(),
N = leveled_rand:uniform(16),
N = rand:uniform(16),
{N, Name} = lists:keyfind(N, 1, NameList),
Name.
get_randomdate() ->
LowTime = 60000000000,
HighTime = 70000000000,
RandPoint = LowTime + leveled_rand:uniform(HighTime - LowTime),
RandPoint = LowTime + rand:uniform(HighTime - LowTime),
Date = calendar:gregorian_seconds_to_datetime(RandPoint),
{{Year, Month, Day}, {Hour, Minute, Second}} = Date,
lists:flatten(io_lib:format("~4..0w~2..0w~2..0w~2..0w~2..0w~2..0w",

View file

@ -41,11 +41,14 @@ many_put_compare(_Config) ->
{max_pencillercachesize, 16000},
{sync_strategy, riak_sync}],
{ok, Bookie1} = leveled_bookie:book_start(StartOpts1),
{B1, K1, V1, S1, MD} = {"Bucket",
"Key1.1.4567.4321",
"Value1",
[],
[{"MDK1", "MDV1"}]},
{B1, K1, V1, S1, MD} =
{
<<"Bucket">>,
<<"Key1.1.4567.4321">>,
<<"Value1">>,
[],
[{<<"MDK1">>, <<"MDV1">>}]
},
{TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD),
ok = testutil:book_riakput(Bookie1, TestObject, TestSpec),
testutil:check_forobject(Bookie1, TestObject),
@ -63,12 +66,15 @@ many_put_compare(_Config) ->
GenList = [2, 20002, 40002, 60002, 80002,
100002, 120002, 140002, 160002, 180002],
CLs = testutil:load_objects(20000,
GenList,
Bookie2,
TestObject,
fun testutil:generate_smallobjects/2,
20000),
CLs =
testutil:load_objects(
20000,
GenList,
Bookie2,
TestObject,
fun testutil:generate_smallobjects/2,
20000
),
% Start a new store, and load the same objects (except fot the original
% test object) into this store
@ -84,7 +90,7 @@ many_put_compare(_Config) ->
% state between stores is consistent
TicTacQ = {tictactree_obj,
{o_rkv, "Bucket", null, null, true},
{o_rkv, <<"Bucket">>, null, null, true},
TreeSize,
fun(_B, _K) -> accumulate end},
{async, TreeAFolder} = leveled_bookie:book_returnfolder(Bookie2, TicTacQ),
@ -113,10 +119,13 @@ many_put_compare(_Config) ->
true = length(AltList) > 10000,
% check there are a significant number of differences from empty
WrongPartitionTicTacQ = {tictactree_obj,
{o_rkv, "Bucket", null, null, false},
TreeSize,
fun(_B, _K) -> pass end},
WrongPartitionTicTacQ =
{
tictactree_obj,
{o_rkv, <<"Bucket">>, null, null, false},
TreeSize,
fun(_B, _K) -> pass end
},
{async, TreeAFolder_WP} =
leveled_bookie:book_returnfolder(Bookie2, WrongPartitionTicTacQ),
TreeAWP = TreeAFolder_WP(),
@ -151,7 +160,7 @@ many_put_compare(_Config) ->
{async, TreeAObjFolder0} =
leveled_bookie:book_headfold(Bookie2,
o_rkv,
{range, "Bucket", all},
{range, <<"Bucket">>, all},
FoldAccT,
false,
true,
@ -170,7 +179,7 @@ many_put_compare(_Config) ->
leveled_bookie:book_headfold(
Bookie2,
?RIAK_TAG,
{range, "Bucket", all},
{range, <<"Bucket">>, all},
{FoldObjectsFun, InitAccTree},
true,
true,
@ -188,7 +197,7 @@ many_put_compare(_Config) ->
leveled_bookie:book_headfold(
Bookie2,
?RIAK_TAG,
{range, "Bucket", all},
{range, <<"Bucket">>, all},
{FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize, false)},
true,
true,
@ -218,29 +227,38 @@ many_put_compare(_Config) ->
end,
{async, TreeAAltObjFolder0} =
leveled_bookie:book_headfold(Bookie2,
?RIAK_TAG,
{range, "Bucket", all},
{AltFoldObjectsFun,
InitAccTree},
false, true, false),
leveled_bookie:book_headfold(
Bookie2,
?RIAK_TAG,
{range, <<"Bucket">>, all},
{AltFoldObjectsFun, InitAccTree},
false,
true,
false
),
SWB2Obj = os:timestamp(),
TreeAAltObj = TreeAAltObjFolder0(),
io:format("Build tictac tree via object fold with no "++
"presence check and 200K objects and alt hash in ~w~n",
[timer:now_diff(os:timestamp(), SWB2Obj)]),
io:format(
"Build tictac tree via object fold with no "
"presence check and 200K objects and alt hash in ~w~n",
[timer:now_diff(os:timestamp(), SWB2Obj)]
),
{async, TreeBAltObjFolder0} =
leveled_bookie:book_headfold(Bookie3,
?RIAK_TAG,
{range, "Bucket", all},
{AltFoldObjectsFun,
InitAccTree},
false, true, false),
leveled_bookie:book_headfold(
Bookie3,
?RIAK_TAG,
{range, <<"Bucket">>, all},
{AltFoldObjectsFun, InitAccTree},
false,
true,
false
),
SWB3Obj = os:timestamp(),
TreeBAltObj = TreeBAltObjFolder0(),
io:format("Build tictac tree via object fold with no "++
"presence check and 200K objects and alt hash in ~w~n",
[timer:now_diff(os:timestamp(), SWB3Obj)]),
io:format(
"Build tictac tree via object fold with no "
"presence check and 200K objects and alt hash in ~w~n",
[timer:now_diff(os:timestamp(), SWB3Obj)]),
DL_ExportFold =
length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)),
io:format("Found dirty leaves with exportable comparison of ~w~n",
@ -261,7 +279,7 @@ many_put_compare(_Config) ->
end
end
end,
SegQuery = {keylist, o_rkv, "Bucket", {FoldKeysFun(SegList0), []}},
SegQuery = {keylist, o_rkv, <<"Bucket">>, {FoldKeysFun(SegList0), []}},
{async, SegKeyFinder} =
leveled_bookie:book_returnfolder(Bookie2, SegQuery),
SWSKL0 = os:timestamp(),
@ -273,7 +291,7 @@ many_put_compare(_Config) ->
true = length(SegKeyList) >= 1,
true = length(SegKeyList) < 10,
true = lists:member("Key1.1.4567.4321", SegKeyList),
true = lists:member(<<"Key1.1.4567.4321">>, SegKeyList),
% Now remove the object which represents the difference between these
% stores and confirm that the tictac trees will now match
@ -630,20 +648,23 @@ tuplebuckets_headonly(_Config) ->
SW1 = os:timestamp(),
{async, HeadRunner1} =
leveled_bookie:book_headfold(Bookie1,
?HEAD_TAG,
{bucket_list, BucketList},
{FoldHeadFun, []},
false, false,
false),
leveled_bookie:book_headfold(
Bookie1,
?HEAD_TAG,
{bucket_list, BucketList},
{FoldHeadFun, []},
false, false,
false
),
ReturnedObjSpecL1 = lists:reverse(HeadRunner1()),
[FirstItem|_Rest] = ReturnedObjSpecL1,
LastItem = lists:last(ReturnedObjSpecL1),
io:format("Returned ~w objects with first ~w and last ~w in ~w ms~n",
[length(ReturnedObjSpecL1),
FirstItem, LastItem,
timer:now_diff(os:timestamp(), SW1)/1000]),
io:format(
"Returned ~w objects with first ~w and last ~w in ~w ms~n",
[length(ReturnedObjSpecL1),
FirstItem, LastItem,
timer:now_diff(os:timestamp(), SW1)/1000]),
true = ReturnedObjSpecL1 == lists:sort(ObjectSpecL),
@ -654,12 +675,14 @@ tuplebuckets_headonly(_Config) ->
SW2 = os:timestamp(),
{async, HeadRunner2} =
leveled_bookie:book_headfold(Bookie1,
?HEAD_TAG,
{bucket_list, BucketList},
{FoldHeadFun, []},
false, false,
SegList),
leveled_bookie:book_headfold(
Bookie1,
?HEAD_TAG,
{bucket_list, BucketList},
{FoldHeadFun, []},
false, false,
SegList
),
ReturnedObjSpecL2 = lists:reverse(HeadRunner2()),
io:format("Returned ~w objects using seglist in ~w ms~n",
@ -674,7 +697,6 @@ tuplebuckets_headonly(_Config) ->
leveled_bookie:book_destroy(Bookie1).
basic_headonly(_Config) ->
ObjectCount = 200000,
RemoveCount = 100,
@ -694,11 +716,14 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) ->
{head_only, HeadOnly},
{max_journalsize, 500000}],
{ok, Bookie1} = leveled_bookie:book_start(StartOpts1),
{B1, K1, V1, S1, MD} = {"Bucket",
"Key1.1.4567.4321",
"Value1",
[],
[{"MDK1", "MDV1"}]},
{B1, K1, V1, S1, MD} =
{
<<"Bucket">>,
<<"Key1.1.4567.4321">>,
<<"Value1">>,
[],
[{<<"MDK1">>, <<"MDV1">>}]
},
{TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD),
{unsupported_message, put} =
testutil:book_riakput(Bookie1, TestObject, TestSpec),
@ -818,23 +843,21 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) ->
false = is_process_alive(AltSnapshot);
no_lookup ->
{unsupported_message, head} =
leveled_bookie:book_head(Bookie1,
SegmentID0,
{Bucket0, Key0},
h),
leveled_bookie:book_head(
Bookie1, SegmentID0, {Bucket0, Key0}, h),
{unsupported_message, head} =
leveled_bookie:book_headonly(Bookie1,
SegmentID0,
Bucket0,
Key0),
leveled_bookie:book_headonly(
Bookie1, SegmentID0, Bucket0, Key0),
io:format("Closing actual store ~w~n", [Bookie1]),
ok = leveled_bookie:book_close(Bookie1)
end,
{ok, FinalJournals} = file:list_dir(JFP),
io:format("Trim has reduced journal count from " ++
"~w to ~w and ~w after restart~n",
[length(FNs), length(FinalFNs), length(FinalJournals)]),
io:format(
"Trim has reduced journal count from "
"~w to ~w and ~w after restart~n",
[length(FNs), length(FinalFNs), length(FinalJournals)]
),
{ok, Bookie2} = leveled_bookie:book_start(StartOpts1),
@ -849,16 +872,12 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) ->
% If we allow HEAD_TAG to be suubject to a lookup, then test this
% here
{ok, Hash0} =
leveled_bookie:book_head(Bookie2,
SegmentID0,
{Bucket0, Key0},
h);
leveled_bookie:book_head(
Bookie2, SegmentID0, {Bucket0, Key0}, h);
no_lookup ->
{unsupported_message, head} =
leveled_bookie:book_head(Bookie2,
SegmentID0,
{Bucket0, Key0},
h)
leveled_bookie:book_head(
Bookie2, SegmentID0, {Bucket0, Key0}, h)
end,
RemoveSpecL0 = lists:sublist(ObjectSpecL, RemoveCount),
@ -873,12 +892,9 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) ->
true = AccC3 == (ObjectCount - RemoveCount),
false = AccH3 == AccH2,
ok = leveled_bookie:book_close(Bookie2).
load_objectspecs([], _SliceSize, _Bookie) ->
ok;
load_objectspecs(ObjectSpecL, SliceSize, Bookie)