Add ZSTD compression (#430)
* Add support for zstd and split compression Add support for using zstd as an alternative to native, lz4. Upgrade lz4 to v1.9.4 (with ARM enhancements). Allow for split compression algorithms - i.e. use native on journal, but lz4 on ledger. * Switch to AdRoll zstd Development appears to be active and ongoing. No issues running on different linux flavours. * Use realistic bucket name * Update README.md * Switch branch * Add comment following review
This commit is contained in:
parent
c294570bce
commit
999ce8ba5b
10 changed files with 156 additions and 73 deletions
|
@ -5,6 +5,7 @@
|
|||
{registered, []},
|
||||
{applications, [
|
||||
lz4,
|
||||
zstd,
|
||||
kernel,
|
||||
stdlib
|
||||
]},
|
||||
|
|
|
@ -125,6 +125,7 @@
|
|||
{max_pencillercachesize, ?MAX_PCL_CACHE_SIZE},
|
||||
{ledger_preloadpagecache_level, ?SST_PAGECACHELEVEL_LOOKUP},
|
||||
{compression_method, ?COMPRESSION_METHOD},
|
||||
{ledger_compression, as_store},
|
||||
{compression_point, ?COMPRESSION_POINT},
|
||||
{compression_level, ?COMPRESSION_LEVEL},
|
||||
{log_level, ?LOG_LEVEL},
|
||||
|
@ -292,13 +293,15 @@
|
|||
% To which level of the ledger should the ledger contents be
|
||||
% pre-loaded into the pagecache (using fadvise on creation and
|
||||
% startup)
|
||||
{compression_method, native|lz4|none} |
|
||||
{compression_method, native|lz4|zstd|none} |
|
||||
% Compression method and point allow Leveled to be switched from
|
||||
% using bif based compression (zlib) to using nif based compression
|
||||
% (lz4). To disable compression use none. This will disable in
|
||||
% the ledger as well as the journla (both on_receipt and
|
||||
% on_compact).
|
||||
% (lz4 or zstd).
|
||||
% Defaults to ?COMPRESSION_METHOD
|
||||
{ledger_compression, as_store|native|lz4|zstd|none} |
|
||||
% Define an alternative to the compression method to be used by the
|
||||
% ledger only. Default is as_store - use the method defined as
|
||||
% compression_method for the whole store
|
||||
{compression_point, on_compact|on_receipt} |
|
||||
% The =compression point can be changed between on_receipt (all
|
||||
% values are compressed as they are received), to on_compact where
|
||||
|
@ -1812,6 +1815,14 @@ set_options(Opts, Monitor) ->
|
|||
true = SFL_CompPerc >= 0.0,
|
||||
|
||||
CompressionMethod = proplists:get_value(compression_method, Opts),
|
||||
JournalCompression = CompressionMethod,
|
||||
LedgerCompression =
|
||||
case proplists:get_value(ledger_compression, Opts) of
|
||||
as_store ->
|
||||
CompressionMethod;
|
||||
AltMethod ->
|
||||
AltMethod
|
||||
end,
|
||||
CompressOnReceipt =
|
||||
case proplists:get_value(compression_point, Opts) of
|
||||
on_receipt ->
|
||||
|
@ -1835,7 +1846,7 @@ set_options(Opts, Monitor) ->
|
|||
maxrunlength_compactionperc = MRL_CompPerc,
|
||||
waste_retention_period = WRP,
|
||||
snaptimeout_long = SnapTimeoutLong,
|
||||
compression_method = CompressionMethod,
|
||||
compression_method = JournalCompression,
|
||||
compress_on_receipt = CompressOnReceipt,
|
||||
score_onein = ScoreOneIn,
|
||||
cdb_options =
|
||||
|
@ -1854,7 +1865,7 @@ set_options(Opts, Monitor) ->
|
|||
snaptimeout_long = SnapTimeoutLong,
|
||||
sst_options =
|
||||
#sst_options{
|
||||
press_method = CompressionMethod,
|
||||
press_method = LedgerCompression,
|
||||
press_level = CompressionLevel,
|
||||
log_options = leveled_log:get_opts(),
|
||||
max_sstslots = MaxSSTSlots,
|
||||
|
|
|
@ -52,9 +52,6 @@
|
|||
accumulate_index/2,
|
||||
count_tombs/2]).
|
||||
|
||||
-define(LMD_FORMAT, "~4..0w~2..0w~2..0w~2..0w~2..0w").
|
||||
-define(NRT_IDX, "$aae.").
|
||||
|
||||
-type tag() ::
|
||||
leveled_head:object_tag()|?IDX_TAG|?HEAD_TAG|atom().
|
||||
-type key() ::
|
||||
|
@ -108,7 +105,7 @@
|
|||
-type object_spec() ::
|
||||
object_spec_v0()|object_spec_v1().
|
||||
-type compression_method() ::
|
||||
lz4|native|none.
|
||||
lz4|native|zstd|none.
|
||||
-type index_specs() ::
|
||||
list({add|remove, any(), any()}).
|
||||
-type journal_keychanges() ::
|
||||
|
@ -489,7 +486,6 @@ get_tagstrategy(Tag, Strategy) ->
|
|||
to_inkerkey(LedgerKey, SQN) ->
|
||||
{SQN, ?INKT_STND, LedgerKey}.
|
||||
|
||||
|
||||
-spec to_inkerkv(ledger_key(), non_neg_integer(), any(), journal_keychanges(),
|
||||
compression_method(), boolean()) -> {journal_key(), any()}.
|
||||
%% @doc
|
||||
|
@ -524,7 +520,6 @@ from_inkerkv(Object, ToIgnoreKeyChanges) ->
|
|||
Object
|
||||
end.
|
||||
|
||||
|
||||
-spec create_value_for_journal({any(), journal_keychanges()|binary()},
|
||||
boolean(), compression_method()) -> binary().
|
||||
%% @doc
|
||||
|
@ -549,14 +544,14 @@ maybe_compress(JournalBin, PressMethod) ->
|
|||
<<JBin0:Length0/binary,
|
||||
KeyChangeLength:32/integer,
|
||||
Type:8/integer>> = JournalBin,
|
||||
{IsBinary, IsCompressed, IsLz4} = decode_valuetype(Type),
|
||||
{IsBinary, IsCompressed, CompMethod} = decode_valuetype(Type),
|
||||
case IsCompressed of
|
||||
true ->
|
||||
JournalBin;
|
||||
false ->
|
||||
Length1 = Length0 - KeyChangeLength,
|
||||
<<OBin2:Length1/binary, KCBin2:KeyChangeLength/binary>> = JBin0,
|
||||
V0 = {deserialise_object(OBin2, IsBinary, IsCompressed, IsLz4),
|
||||
V0 = {deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod),
|
||||
binary_to_term(KCBin2)},
|
||||
create_value_for_journal(V0, true, PressMethod)
|
||||
end.
|
||||
|
@ -568,6 +563,8 @@ serialise_object(Object, true, Method) when is_binary(Object) ->
|
|||
lz4 ->
|
||||
{ok, Bin} = lz4:pack(Object),
|
||||
Bin;
|
||||
zstd ->
|
||||
zstd:compress(Object);
|
||||
native ->
|
||||
zlib:compress(Object);
|
||||
none ->
|
||||
|
@ -590,35 +587,42 @@ revert_value_from_journal(JournalBin, ToIgnoreKeyChanges) ->
|
|||
<<JBin0:Length0/binary,
|
||||
KeyChangeLength:32/integer,
|
||||
Type:8/integer>> = JournalBin,
|
||||
{IsBinary, IsCompressed, IsLz4} = decode_valuetype(Type),
|
||||
{IsBinary, IsCompressed, CompMethod} = decode_valuetype(Type),
|
||||
Length1 = Length0 - KeyChangeLength,
|
||||
case ToIgnoreKeyChanges of
|
||||
true ->
|
||||
<<OBin2:Length1/binary, _KCBin2:KeyChangeLength/binary>> = JBin0,
|
||||
{deserialise_object(OBin2, IsBinary, IsCompressed, IsLz4),
|
||||
{deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod),
|
||||
{[], infinity}};
|
||||
false ->
|
||||
<<OBin2:Length1/binary, KCBin2:KeyChangeLength/binary>> = JBin0,
|
||||
{deserialise_object(OBin2, IsBinary, IsCompressed, IsLz4),
|
||||
{deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod),
|
||||
binary_to_term(KCBin2)}
|
||||
end.
|
||||
|
||||
deserialise_object(Binary, true, true, true) ->
|
||||
deserialise_object(Binary, true, true, lz4) ->
|
||||
{ok, Deflated} = lz4:unpack(Binary),
|
||||
Deflated;
|
||||
deserialise_object(Binary, true, true, false) ->
|
||||
deserialise_object(Binary, true, true, zstd) ->
|
||||
zstd:decompress(Binary);
|
||||
deserialise_object(Binary, true, true, native) ->
|
||||
zlib:uncompress(Binary);
|
||||
deserialise_object(Binary, true, false, _IsLz4) ->
|
||||
deserialise_object(Binary, true, false, _) ->
|
||||
Binary;
|
||||
deserialise_object(Binary, false, _, _IsLz4) ->
|
||||
deserialise_object(Binary, false, _, _) ->
|
||||
binary_to_term(Binary).
|
||||
|
||||
-spec encode_valuetype(boolean(), boolean(), native|lz4|zstd|none) -> 0..15.
|
||||
%% @doc Note that IsCompressed will be based on the compression_point
|
||||
%% configuration option when the object is first stored (i.e. only `true` if
|
||||
%% this is set to `on_receipt`). On compaction this will be set to true.
|
||||
encode_valuetype(IsBinary, IsCompressed, Method) ->
|
||||
Bit3 =
|
||||
{Bit3, Bit4} =
|
||||
case Method of
|
||||
lz4 -> 4;
|
||||
native -> 0;
|
||||
none -> 0
|
||||
lz4 -> {4, 0};
|
||||
zstd -> {4, 8};
|
||||
native -> {0, 0};
|
||||
none -> {0, 0}
|
||||
end,
|
||||
Bit2 =
|
||||
case IsBinary of
|
||||
|
@ -630,17 +634,26 @@ encode_valuetype(IsBinary, IsCompressed, Method) ->
|
|||
true -> 1;
|
||||
false -> 0
|
||||
end,
|
||||
Bit1 + Bit2 + Bit3.
|
||||
Bit1 + Bit2 + Bit3 + Bit4.
|
||||
|
||||
|
||||
-spec decode_valuetype(integer()) -> {boolean(), boolean(), boolean()}.
|
||||
-spec decode_valuetype(integer())
|
||||
-> {boolean(), boolean(), compression_method()}.
|
||||
%% @doc
|
||||
%% Check bit flags to confirm how the object has been serialised
|
||||
decode_valuetype(TypeInt) ->
|
||||
IsCompressed = TypeInt band 1 == 1,
|
||||
IsBinary = TypeInt band 2 == 2,
|
||||
IsLz4 = TypeInt band 4 == 4,
|
||||
{IsBinary, IsCompressed, IsLz4}.
|
||||
CompressionMethod =
|
||||
case TypeInt band 12 of
|
||||
0 ->
|
||||
native;
|
||||
4 ->
|
||||
lz4;
|
||||
12 ->
|
||||
zstd
|
||||
end,
|
||||
{IsBinary, IsCompressed, CompressionMethod}.
|
||||
|
||||
-spec from_journalkey(journal_key()) -> {integer(), ledger_key()}.
|
||||
%% @doc
|
||||
|
|
|
@ -151,7 +151,7 @@
|
|||
-type slot_index_value()
|
||||
:: #slot_index_value{}.
|
||||
-type press_method()
|
||||
:: lz4|native|none.
|
||||
:: lz4|native|zstd|none.
|
||||
-type range_endpoint()
|
||||
:: all|leveled_codec:ledger_key().
|
||||
-type slot_pointer()
|
||||
|
@ -1489,14 +1489,15 @@ read_file(Filename, State, LoadPageCache) ->
|
|||
Bloom}.
|
||||
|
||||
gen_fileversion(PressMethod, IdxModDate, CountOfTombs) ->
|
||||
% Native or none can be treated the same once written, as reader
|
||||
% does not need to know as compression info will be in header of the
|
||||
% Native or none can be treated the same once written, as reader
|
||||
% does not need to know as compression info will be in header of the
|
||||
% block
|
||||
Bit1 =
|
||||
case PressMethod of
|
||||
Bit1 =
|
||||
case PressMethod of
|
||||
lz4 -> 1;
|
||||
native -> 0;
|
||||
none -> 0
|
||||
none -> 0;
|
||||
zstd -> 0
|
||||
end,
|
||||
Bit2 =
|
||||
case IdxModDate of
|
||||
|
@ -1505,18 +1506,25 @@ gen_fileversion(PressMethod, IdxModDate, CountOfTombs) ->
|
|||
false ->
|
||||
0
|
||||
end,
|
||||
Bit3 =
|
||||
Bit3 =
|
||||
case CountOfTombs of
|
||||
not_counted ->
|
||||
0;
|
||||
_ ->
|
||||
4
|
||||
end,
|
||||
Bit1 + Bit2 + Bit3.
|
||||
Bit4 =
|
||||
case PressMethod of
|
||||
zstd ->
|
||||
8;
|
||||
_ ->
|
||||
0
|
||||
end,
|
||||
Bit1 + Bit2 + Bit3 + Bit4.
|
||||
|
||||
imp_fileversion(VersionInt, State) ->
|
||||
UpdState0 =
|
||||
case VersionInt band 1 of
|
||||
UpdState0 =
|
||||
case VersionInt band 1 of
|
||||
0 ->
|
||||
State#state{compression_method = native};
|
||||
1 ->
|
||||
|
@ -1529,11 +1537,18 @@ imp_fileversion(VersionInt, State) ->
|
|||
2 ->
|
||||
UpdState0#state{index_moddate = true}
|
||||
end,
|
||||
case VersionInt band 4 of
|
||||
0 ->
|
||||
UpdState1;
|
||||
4 ->
|
||||
UpdState1#state{tomb_count = 0}
|
||||
UpdState2 =
|
||||
case VersionInt band 4 of
|
||||
0 ->
|
||||
UpdState1;
|
||||
4 ->
|
||||
UpdState1#state{tomb_count = 0}
|
||||
end,
|
||||
case VersionInt band 8 of
|
||||
0 ->
|
||||
UpdState2;
|
||||
8 ->
|
||||
UpdState2#state{compression_method = zstd}
|
||||
end.
|
||||
|
||||
open_reader(Filename, LoadPageCache) ->
|
||||
|
@ -1658,12 +1673,15 @@ serialise_block(Term, native) ->
|
|||
Bin = term_to_binary(Term, ?BINARY_SETTINGS),
|
||||
CRC32 = hmac(Bin),
|
||||
<<Bin/binary, CRC32:32/integer>>;
|
||||
serialise_block(Term, zstd) ->
|
||||
Bin = zstd:compress(term_to_binary(Term)),
|
||||
CRC32 = hmac(Bin),
|
||||
<<Bin/binary, CRC32:32/integer>>;
|
||||
serialise_block(Term, none) ->
|
||||
Bin = term_to_binary(Term),
|
||||
CRC32 = hmac(Bin),
|
||||
<<Bin/binary, CRC32:32/integer>>.
|
||||
|
||||
|
||||
-spec deserialise_block(binary(), press_method()) -> any().
|
||||
%% @doc
|
||||
%% Convert binary to term
|
||||
|
@ -1686,6 +1704,8 @@ deserialise_block(_Bin, _PM) ->
|
|||
deserialise_checkedblock(Bin, lz4) ->
|
||||
{ok, Bin0} = lz4:unpack(Bin),
|
||||
binary_to_term(Bin0);
|
||||
deserialise_checkedblock(Bin, zstd) ->
|
||||
binary_to_term(zstd:decompress(Bin));
|
||||
deserialise_checkedblock(Bin, _Other) ->
|
||||
% native or none can be treated the same
|
||||
binary_to_term(Bin).
|
||||
|
@ -4207,6 +4227,7 @@ stop_whenstarter_stopped_testto() ->
|
|||
corrupted_block_range_test() ->
|
||||
corrupted_block_rangetester(native, 100),
|
||||
corrupted_block_rangetester(lz4, 100),
|
||||
corrupted_block_rangetester(zstd, 100),
|
||||
corrupted_block_rangetester(none, 100).
|
||||
|
||||
corrupted_block_rangetester(PressMethod, TestCount) ->
|
||||
|
@ -4251,6 +4272,7 @@ corrupted_block_rangetester(PressMethod, TestCount) ->
|
|||
corrupted_block_fetch_test() ->
|
||||
corrupted_block_fetch_tester(native),
|
||||
corrupted_block_fetch_tester(lz4),
|
||||
corrupted_block_fetch_tester(zstd),
|
||||
corrupted_block_fetch_tester(none).
|
||||
|
||||
corrupted_block_fetch_tester(PressMethod) ->
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue