0
Fork 0
mirror of https://github.com/ninenines/cowboy.git synced 2025-07-14 20:30:23 +00:00

Rework and improve the decompress stream handler

The read buffer was changed into an iovec to avoid doing
too many binary concatenations and allocations.

Decompression happens transparently: when decoding gzip,
the content-encoding header is removed (we only decode
when "gzip" is the only encoding so nothing remains).

We always add a content_decoded key to the Req object.
This key contains a list of codings that were decoded,
in the reverse order in which they were. Currently it
can only be empty or contain <<"gzip">> but future
improvements or user handlers may see it contain more
values.

The option to disable decompression was renamed to
decompress_enabled and defaults to true.

It is no longer possible to enable/disable decompression
in the middle of reading the body: this ensures that the
data we pass forward is always valid.

Various smaller improvements were made to the code,
tests and manual pages.
This commit is contained in:
Loïc Hoguin 2024-01-04 15:15:41 +01:00
parent 3ed1b24dd6
commit fd9711d949
No known key found for this signature in database
GPG key ID: 8A9DF795F6FED764
6 changed files with 361 additions and 203 deletions

View file

@ -1,3 +1,18 @@
%% Copyright (c) 2024, jdamanalo <joshuadavid.agustin@manalo.ph>
%% Copyright (c) 2024, Loïc Hoguin <essen@ninenines.eu>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-module(cowboy_decompress_h).
-behavior(cowboy_stream).
@ -9,21 +24,27 @@
-record(state, {
next :: any(),
enabled :: boolean(),
ratio_limit :: non_neg_integer() | undefined,
ignore = false :: boolean(),
compress = undefined :: undefined | gzip,
inflate = undefined :: undefined | zlib:zstream(),
is_reading = false :: boolean(),
read_body_buffer = <<>> :: binary(),
%% We use a list of binaries to avoid doing unnecessary
%% memory allocations when inflating. We convert to binary
%% when we propagate the data. The data must be reversed
%% before converting to binary or inflating: this is done
%% via the buffer_to_binary/buffer_to_iovec functions.
read_body_buffer = [] :: [binary()],
read_body_is_fin = nofin :: nofin | {fin, non_neg_integer()}
}).
-spec init(cowboy_stream:streamid(), cowboy_req:req(), cowboy:opts())
-> {cowboy_stream:commands(), #state{}}.
init(StreamID, Req, Opts) ->
init(StreamID, Req0, Opts) ->
Enabled = maps:get(decompress_enabled, Opts, true),
RatioLimit = maps:get(decompress_ratio_limit, Opts, 20),
Ignore = maps:get(decompress_ignore, Opts, false),
State = check_req(Req),
{Req, State} = check_and_update_req(Req0),
Inflate = case State#state.compress of
undefined ->
undefined;
@ -33,48 +54,46 @@ init(StreamID, Req, Opts) ->
Z
end,
{Commands, Next} = cowboy_stream:init(StreamID, Req, Opts),
fold(Commands, State#state{next=Next, ratio_limit=RatioLimit, ignore=Ignore,
inflate=Inflate}).
fold(Commands, State#state{next=Next, enabled=Enabled,
ratio_limit=RatioLimit, inflate=Inflate}).
-spec data(cowboy_stream:streamid(), cowboy_stream:fin(), cowboy_req:resp_body(), State)
-> {cowboy_stream:commands(), State} when State::#state{}.
data(StreamID, IsFin, Data, State=#state{next=Next0, inflate=undefined}) ->
{Commands, Next} = cowboy_stream:data(StreamID, IsFin, Data, Next0),
fold(Commands, State#state{next=Next, read_body_is_fin=IsFin});
data(StreamID, IsFin, Data, State=#state{next=Next0, ignore=true, read_body_buffer=Buffer}) ->
data(StreamID, IsFin, Data, State=#state{next=Next0, enabled=false, read_body_buffer=Buffer}) ->
{Commands, Next} = cowboy_stream:data(StreamID, IsFin,
<< Buffer/binary, Data/binary >>, Next0),
buffer_to_binary([Data|Buffer]), Next0),
fold(Commands, State#state{next=Next, read_body_is_fin=IsFin});
data(StreamID, IsFin, Data, State0=#state{next=Next0, ratio_limit=RatioLimit,
inflate=Z, is_reading=true, read_body_buffer=Buffer0}) ->
Buffer = << Buffer0/binary, Data/binary >>,
case inflate(Z, RatioLimit, Buffer) of
{error, Type} ->
Status = case Type of
data -> 400;
size -> 413
inflate=Z, is_reading=true, read_body_buffer=Buffer}) ->
case inflate(Z, RatioLimit, buffer_to_iovec([Data|Buffer])) of
{error, ErrorType} ->
zlib:close(Z),
Status = case ErrorType of
data_error -> 400;
size_error -> 413
end,
Commands = [
{error_response, Status, #{<<"content-length">> => <<"0">>}, <<>>},
stop
],
fold(Commands, State0#state{inflate=undefined});
fold(Commands, State0#state{inflate=undefined, read_body_buffer=[]});
{ok, Inflated} ->
State = case IsFin of
nofin ->
State0;
fin ->
zlib:inflateEnd(Z),
zlib:close(Z),
State0#state{inflate=undefined}
end,
{Commands, Next} = cowboy_stream:data(StreamID, IsFin, Inflated, Next0),
fold(Commands, State#state{next=Next, read_body_buffer= <<>>,
fold(Commands, State#state{next=Next, read_body_buffer=[],
read_body_is_fin=IsFin})
end;
data(_, IsFin, Data, State=#state{read_body_buffer=Buffer0}) ->
Buffer = << Buffer0/binary, Data/binary >>,
{[], State#state{read_body_buffer=Buffer, read_body_is_fin=IsFin}}.
data(_, IsFin, Data, State=#state{read_body_buffer=Buffer}) ->
{[], State#state{read_body_buffer=[Data|Buffer], read_body_is_fin=IsFin}}.
-spec info(cowboy_stream:streamid(), any(), State)
-> {cowboy_stream:commands(), State} when State::#state{}.
@ -86,12 +105,19 @@ info(StreamID, Info={CommandTag, _, _, _, _}, State=#state{next=Next0, read_body
{Commands0, Next1} = cowboy_stream:info(StreamID, Info, Next0),
{Commands, Next} = data(StreamID, IsFin, <<>>, State#state{next=Next1, is_reading=true}),
fold(Commands ++ Commands0, Next);
info(StreamID, Info={set_options, Opts}, State=#state{next=Next0,
ignore=Ignore0, ratio_limit=RatioLimit0}) ->
Ignore = maps:get(decompress_ignore, Opts, Ignore0),
info(StreamID, Info={set_options, Opts}, State0=#state{next=Next0,
enabled=Enabled0, ratio_limit=RatioLimit0, is_reading=IsReading}) ->
Enabled = maps:get(decompress_enabled, Opts, Enabled0),
RatioLimit = maps:get(decompress_ratio_limit, Opts, RatioLimit0),
{Commands, Next} = cowboy_stream:info(StreamID, Info, Next0),
fold(Commands, State#state{next=Next, ignore=Ignore, ratio_limit=RatioLimit});
%% We can't change the enabled setting after we start reading,
%% otherwise the data becomes garbage. Changing the setting
%% is not treated as an error, it is just ignored.
State = case IsReading of
true -> State0;
false -> State0#state{enabled=Enabled}
end,
fold(Commands, State#state{next=Next, ratio_limit=RatioLimit});
info(StreamID, Info, State=#state{next=Next0}) ->
{Commands, Next} = cowboy_stream:info(StreamID, Info, Next0),
fold(Commands, State#state{next=Next}).
@ -112,31 +138,49 @@ early_error(StreamID, Reason, PartialReq, Resp, Opts) ->
%% Internal.
check_req(Req) ->
%% Check whether the request needs content decoding, and if it does
%% whether it fits our criteria for decoding. We also update the
%% Req to indicate whether content was decoded.
%%
%% We always set the content_decoded value in the Req because it
%% indicates whether content decoding was attempted.
%%
%% A malformed content-encoding header results in no decoding.
check_and_update_req(Req=#{headers := Headers}) ->
ContentDecoded = maps:get(content_decoded, Req, []),
try cowboy_req:parse_header(<<"content-encoding">>, Req) of
undefined ->
#state{compress=undefined};
Encodings ->
case [E || E=(<<"gzip">>) <- Encodings] of
[] ->
#state{compress=undefined};
_ ->
#state{compress=gzip}
end
catch
_:_ ->
#state{compress=undefined}
%% We only automatically decompress when gzip is the only
%% encoding used. Since it's the only encoding used, we
%% can remove the header entirely before passing the Req
%% forward.
[<<"gzip">>] ->
{Req#{
headers => maps:remove(<<"content-encoding">>, Headers),
content_decoded => [<<"gzip">>|ContentDecoded]
}, #state{compress=gzip}};
_ ->
{Req#{content_decoded => ContentDecoded},
#state{compress=undefined}}
catch _:_ ->
{Req#{content_decoded => ContentDecoded},
#state{compress=undefined}}
end.
buffer_to_iovec(Buffer) ->
lists:reverse(Buffer).
buffer_to_binary(Buffer) ->
iolist_to_binary(lists:reverse(Buffer)).
fold(Commands, State) ->
fold(Commands, State, []).
fold([], State, Acc) ->
{lists:reverse(Acc), State};
fold([{response, Status, Headers0, Body}|Tail], State=#state{ignore=false}, Acc) ->
fold([{response, Status, Headers0, Body}|Tail], State=#state{enabled=true}, Acc) ->
Headers = add_accept_encoding(Headers0),
fold(Tail, State, [{response, Status, Headers, Body}|Acc]);
fold([{headers, Status, Headers0} | Tail], State=#state{ignore=false}, Acc) ->
fold([{headers, Status, Headers0} | Tail], State=#state{enabled=true}, Acc) ->
Headers = add_accept_encoding(Headers0),
fold(Tail, State, [{headers, Status, Headers}|Acc]);
fold([Command|Tail], State, Acc) ->
@ -146,7 +190,7 @@ add_accept_encoding(Headers=#{<<"accept-encoding">> := AcceptEncoding}) ->
try cow_http_hd:parse_accept_encoding(iolist_to_binary(AcceptEncoding)) of
List ->
case lists:keyfind(<<"gzip">>, 1, List) of
%% gzip is excluded but this handler is not ignored; we replace.
%% gzip is excluded but this handler is enabled; we replace.
{_, 0} ->
Replaced = lists:keyreplace(<<"gzip">>, 1, List, {<<"gzip">>, 1000}),
Codings = build_accept_encoding(Replaced),
@ -167,18 +211,20 @@ add_accept_encoding(Headers=#{<<"accept-encoding">> := AcceptEncoding}) ->
end
end
catch _:_ ->
%% The accept-encoding header is invalid. Probably empty. We replace it with ours.
Headers#{<<"accept-encoding">> => <<"gzip">>}
end;
add_accept_encoding(Headers) ->
Headers#{<<"accept-encoding">> => <<"gzip">>}.
%% From cowlib, maybe expose?
%% @todo From cowlib, maybe expose?
qvalue_to_iodata(0) -> <<"0">>;
qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)];
qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)];
qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)];
qvalue_to_iodata(1000) -> <<"1">>.
%% @todo Should be added to Cowlib.
build_accept_encoding([{ContentCoding, Q}|Tail]) ->
Weight = iolist_to_binary(qvalue_to_iodata(Q)),
Acc = <<ContentCoding/binary, ";q=", Weight/binary>>,
@ -195,20 +241,14 @@ inflate(Z, RatioLimit, Data) ->
try
{Status, Output} = zlib:safeInflate(Z, Data),
Size = iolist_size(Output),
do_inflate(Z, Size, byte_size(Data) * RatioLimit, Status, [Output])
do_inflate(Z, Size, iolist_size(Data) * RatioLimit, Status, [Output])
catch
error:data_error ->
zlib:close(Z),
{error, data}
{error, data_error}
end.
do_inflate(Z, Size, Limit, Status, _) when Size > Limit ->
case Status of
continue -> ok;
finished -> zlib:inflateEnd(Z)
end,
zlib:close(Z),
{error, size};
do_inflate(_, Size, Limit, _, _) when Size > Limit ->
{error, size_error};
do_inflate(Z, Size0, Limit, continue, Acc) ->
{Status, Output} = zlib:safeInflate(Z, []),
Size = Size0 + iolist_size(Output),