leveled/include/leveled.hrl
Martin Sumner c2f19d8825 Switch to using bloom at penciller
Previouslythe tinybloom was used within the SST file as an extra check to remove false fetches.

However the SST already has a low FPR check in the slot_index.  If the newebloom was used (which is no longer per slot, but per sst), this can be shared with the penciller and then the penciller could use it and avoid the message pass.

the message pass may be blocked by a 2i query or a slot fetch request for a merge.  So this should make performance within the Penciller snappier.

This is as a result of taking sst_timings within a volume test - where there was an average of + 100microsecs for each level that was dropped down.  Given the bloom/slot checks were < 20 microsecs - there seems to be some further delay.

The bloom is a binary of > 64 bytes - so passing it around should not require a copy.
2017-11-28 01:19:30 +00:00

115 lines
4.8 KiB
Erlang

%% Tag to be used on standard Riak KV objects
-define(RIAK_TAG, o_rkv).
%% Tag to be used on K/V objects for non-Riak purposes
-define(STD_TAG, o).
%% Tag used for secondary index keys
-define(IDX_TAG, i).
%% Inker key type used for 'normal' objects
-define(INKT_STND, stnd).
%% Inker key type used for objects which contain no value, only key changes
%% This is used currently for objects formed under a 'retain' strategy on Inker
%% compaction
-define(INKT_KEYD, keyd).
%% Inker key type used for tombstones
-define(INKT_TOMB, tomb).
-define(CACHE_TYPE, skpl).
-record(sft_options,
{wait = true :: boolean(),
expire_tombstones = false :: boolean(),
penciller :: pid()}).
-record(level,
{level :: integer(),
is_basement = false :: boolean(),
timestamp :: integer()}).
-record(manifest_entry,
{start_key :: tuple() | undefined,
end_key :: tuple() | undefined,
owner :: pid()|list(),
filename :: string() | undefined,
bloom :: binary() | none}).
-record(cdb_options,
{max_size :: integer() | undefined,
file_path :: string() | undefined,
waste_path :: string() | undefined,
binary_mode = false :: boolean(),
sync_strategy = sync}).
-record(inker_options,
{cdb_max_size :: integer() | undefined,
root_path :: string() | undefined,
cdb_options :: #cdb_options{} | undefined,
start_snapshot = false :: boolean(),
source_inker :: pid() | undefined,
reload_strategy = [] :: list(),
waste_retention_period :: integer() | undefined,
compression_method = native :: lz4|native,
compress_on_receipt = false :: boolean(),
max_run_length}).
-record(penciller_options,
{root_path :: string() | undefined,
max_inmemory_tablesize :: integer() | undefined,
start_snapshot = false :: boolean(),
snapshot_query,
bookies_mem :: tuple() | undefined,
source_penciller :: pid() | undefined,
snapshot_longrunning = true :: boolean(),
compression_method = native :: lz4|native,
levelzero_cointoss = false :: boolean()}).
-record(iclerk_options,
{inker :: pid() | undefined,
max_run_length :: integer() | undefined,
cdb_options = #cdb_options{} :: #cdb_options{},
waste_retention_period :: integer() | undefined,
compression_method = native :: lz4|native,
reload_strategy = [] :: list()}).
-record(recent_aae, {filter :: whitelist|blacklist,
% the buckets list should either be a
% - whitelist - specific buckets are included, and
% entries are indexed by bucket name
% - blacklist - specific buckets are excluded, and
% all other entries are indexes using the special
% $all bucket
buckets :: list(),
% whitelist or blacklist of buckets to support recent
% AAE
limit_minutes :: integer(),
% how long to retain entries the temporary index for
% It will actually be retained for limit + unit minutes
% 60 minutes seems sensible
unit_minutes :: integer(),
% What the minimum unit size will be for a query
% e.g. the minimum time duration to be used in range
% queries of the aae index
% 5 minutes seems sensible
tree_size = small :: atom()
% Just defaulted to small for now
}).
-record(r_content, {
metadata,
value :: term()
}).
-record(r_object, {
bucket,
key,
contents :: [#r_content{}],
vclock,
updatemetadata=dict:store(clean, true, dict:new()),
updatevalue :: term()}).