mirror of
https://github.com/ninenines/cowboy.git
synced 2025-07-14 20:30:23 +00:00
Rework and improve the decompress stream handler
The read buffer was changed into an iovec to avoid doing too many binary concatenations and allocations. Decompression happens transparently: when decoding gzip, the content-encoding header is removed (we only decode when "gzip" is the only encoding so nothing remains). We always add a content_decoded key to the Req object. This key contains a list of codings that were decoded, in the reverse order in which they were. Currently it can only be empty or contain <<"gzip">> but future improvements or user handlers may see it contain more values. The option to disable decompression was renamed to decompress_enabled and defaults to true. It is no longer possible to enable/disable decompression in the middle of reading the body: this ensures that the data we pass forward is always valid. Various smaller improvements were made to the code, tests and manual pages.
This commit is contained in:
parent
3ed1b24dd6
commit
fd9711d949
6 changed files with 361 additions and 203 deletions
|
@ -1,3 +1,18 @@
|
|||
%% Copyright (c) 2024, jdamanalo <joshuadavid.agustin@manalo.ph>
|
||||
%% Copyright (c) 2024, Loïc Hoguin <essen@ninenines.eu>
|
||||
%%
|
||||
%% Permission to use, copy, modify, and/or distribute this software for any
|
||||
%% purpose with or without fee is hereby granted, provided that the above
|
||||
%% copyright notice and this permission notice appear in all copies.
|
||||
%%
|
||||
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
-module(cowboy_decompress_h).
|
||||
-behavior(cowboy_stream).
|
||||
|
||||
|
@ -9,21 +24,27 @@
|
|||
|
||||
-record(state, {
|
||||
next :: any(),
|
||||
enabled :: boolean(),
|
||||
ratio_limit :: non_neg_integer() | undefined,
|
||||
ignore = false :: boolean(),
|
||||
compress = undefined :: undefined | gzip,
|
||||
inflate = undefined :: undefined | zlib:zstream(),
|
||||
is_reading = false :: boolean(),
|
||||
read_body_buffer = <<>> :: binary(),
|
||||
|
||||
%% We use a list of binaries to avoid doing unnecessary
|
||||
%% memory allocations when inflating. We convert to binary
|
||||
%% when we propagate the data. The data must be reversed
|
||||
%% before converting to binary or inflating: this is done
|
||||
%% via the buffer_to_binary/buffer_to_iovec functions.
|
||||
read_body_buffer = [] :: [binary()],
|
||||
read_body_is_fin = nofin :: nofin | {fin, non_neg_integer()}
|
||||
}).
|
||||
|
||||
-spec init(cowboy_stream:streamid(), cowboy_req:req(), cowboy:opts())
|
||||
-> {cowboy_stream:commands(), #state{}}.
|
||||
init(StreamID, Req, Opts) ->
|
||||
init(StreamID, Req0, Opts) ->
|
||||
Enabled = maps:get(decompress_enabled, Opts, true),
|
||||
RatioLimit = maps:get(decompress_ratio_limit, Opts, 20),
|
||||
Ignore = maps:get(decompress_ignore, Opts, false),
|
||||
State = check_req(Req),
|
||||
{Req, State} = check_and_update_req(Req0),
|
||||
Inflate = case State#state.compress of
|
||||
undefined ->
|
||||
undefined;
|
||||
|
@ -33,48 +54,46 @@ init(StreamID, Req, Opts) ->
|
|||
Z
|
||||
end,
|
||||
{Commands, Next} = cowboy_stream:init(StreamID, Req, Opts),
|
||||
fold(Commands, State#state{next=Next, ratio_limit=RatioLimit, ignore=Ignore,
|
||||
inflate=Inflate}).
|
||||
fold(Commands, State#state{next=Next, enabled=Enabled,
|
||||
ratio_limit=RatioLimit, inflate=Inflate}).
|
||||
|
||||
-spec data(cowboy_stream:streamid(), cowboy_stream:fin(), cowboy_req:resp_body(), State)
|
||||
-> {cowboy_stream:commands(), State} when State::#state{}.
|
||||
data(StreamID, IsFin, Data, State=#state{next=Next0, inflate=undefined}) ->
|
||||
{Commands, Next} = cowboy_stream:data(StreamID, IsFin, Data, Next0),
|
||||
fold(Commands, State#state{next=Next, read_body_is_fin=IsFin});
|
||||
data(StreamID, IsFin, Data, State=#state{next=Next0, ignore=true, read_body_buffer=Buffer}) ->
|
||||
data(StreamID, IsFin, Data, State=#state{next=Next0, enabled=false, read_body_buffer=Buffer}) ->
|
||||
{Commands, Next} = cowboy_stream:data(StreamID, IsFin,
|
||||
<< Buffer/binary, Data/binary >>, Next0),
|
||||
buffer_to_binary([Data|Buffer]), Next0),
|
||||
fold(Commands, State#state{next=Next, read_body_is_fin=IsFin});
|
||||
data(StreamID, IsFin, Data, State0=#state{next=Next0, ratio_limit=RatioLimit,
|
||||
inflate=Z, is_reading=true, read_body_buffer=Buffer0}) ->
|
||||
Buffer = << Buffer0/binary, Data/binary >>,
|
||||
case inflate(Z, RatioLimit, Buffer) of
|
||||
{error, Type} ->
|
||||
Status = case Type of
|
||||
data -> 400;
|
||||
size -> 413
|
||||
inflate=Z, is_reading=true, read_body_buffer=Buffer}) ->
|
||||
case inflate(Z, RatioLimit, buffer_to_iovec([Data|Buffer])) of
|
||||
{error, ErrorType} ->
|
||||
zlib:close(Z),
|
||||
Status = case ErrorType of
|
||||
data_error -> 400;
|
||||
size_error -> 413
|
||||
end,
|
||||
Commands = [
|
||||
{error_response, Status, #{<<"content-length">> => <<"0">>}, <<>>},
|
||||
stop
|
||||
],
|
||||
fold(Commands, State0#state{inflate=undefined});
|
||||
fold(Commands, State0#state{inflate=undefined, read_body_buffer=[]});
|
||||
{ok, Inflated} ->
|
||||
State = case IsFin of
|
||||
nofin ->
|
||||
State0;
|
||||
fin ->
|
||||
zlib:inflateEnd(Z),
|
||||
zlib:close(Z),
|
||||
State0#state{inflate=undefined}
|
||||
end,
|
||||
{Commands, Next} = cowboy_stream:data(StreamID, IsFin, Inflated, Next0),
|
||||
fold(Commands, State#state{next=Next, read_body_buffer= <<>>,
|
||||
fold(Commands, State#state{next=Next, read_body_buffer=[],
|
||||
read_body_is_fin=IsFin})
|
||||
end;
|
||||
data(_, IsFin, Data, State=#state{read_body_buffer=Buffer0}) ->
|
||||
Buffer = << Buffer0/binary, Data/binary >>,
|
||||
{[], State#state{read_body_buffer=Buffer, read_body_is_fin=IsFin}}.
|
||||
data(_, IsFin, Data, State=#state{read_body_buffer=Buffer}) ->
|
||||
{[], State#state{read_body_buffer=[Data|Buffer], read_body_is_fin=IsFin}}.
|
||||
|
||||
-spec info(cowboy_stream:streamid(), any(), State)
|
||||
-> {cowboy_stream:commands(), State} when State::#state{}.
|
||||
|
@ -86,12 +105,19 @@ info(StreamID, Info={CommandTag, _, _, _, _}, State=#state{next=Next0, read_body
|
|||
{Commands0, Next1} = cowboy_stream:info(StreamID, Info, Next0),
|
||||
{Commands, Next} = data(StreamID, IsFin, <<>>, State#state{next=Next1, is_reading=true}),
|
||||
fold(Commands ++ Commands0, Next);
|
||||
info(StreamID, Info={set_options, Opts}, State=#state{next=Next0,
|
||||
ignore=Ignore0, ratio_limit=RatioLimit0}) ->
|
||||
Ignore = maps:get(decompress_ignore, Opts, Ignore0),
|
||||
info(StreamID, Info={set_options, Opts}, State0=#state{next=Next0,
|
||||
enabled=Enabled0, ratio_limit=RatioLimit0, is_reading=IsReading}) ->
|
||||
Enabled = maps:get(decompress_enabled, Opts, Enabled0),
|
||||
RatioLimit = maps:get(decompress_ratio_limit, Opts, RatioLimit0),
|
||||
{Commands, Next} = cowboy_stream:info(StreamID, Info, Next0),
|
||||
fold(Commands, State#state{next=Next, ignore=Ignore, ratio_limit=RatioLimit});
|
||||
%% We can't change the enabled setting after we start reading,
|
||||
%% otherwise the data becomes garbage. Changing the setting
|
||||
%% is not treated as an error, it is just ignored.
|
||||
State = case IsReading of
|
||||
true -> State0;
|
||||
false -> State0#state{enabled=Enabled}
|
||||
end,
|
||||
fold(Commands, State#state{next=Next, ratio_limit=RatioLimit});
|
||||
info(StreamID, Info, State=#state{next=Next0}) ->
|
||||
{Commands, Next} = cowboy_stream:info(StreamID, Info, Next0),
|
||||
fold(Commands, State#state{next=Next}).
|
||||
|
@ -112,31 +138,49 @@ early_error(StreamID, Reason, PartialReq, Resp, Opts) ->
|
|||
|
||||
%% Internal.
|
||||
|
||||
check_req(Req) ->
|
||||
%% Check whether the request needs content decoding, and if it does
|
||||
%% whether it fits our criteria for decoding. We also update the
|
||||
%% Req to indicate whether content was decoded.
|
||||
%%
|
||||
%% We always set the content_decoded value in the Req because it
|
||||
%% indicates whether content decoding was attempted.
|
||||
%%
|
||||
%% A malformed content-encoding header results in no decoding.
|
||||
check_and_update_req(Req=#{headers := Headers}) ->
|
||||
ContentDecoded = maps:get(content_decoded, Req, []),
|
||||
try cowboy_req:parse_header(<<"content-encoding">>, Req) of
|
||||
undefined ->
|
||||
#state{compress=undefined};
|
||||
Encodings ->
|
||||
case [E || E=(<<"gzip">>) <- Encodings] of
|
||||
[] ->
|
||||
#state{compress=undefined};
|
||||
_ ->
|
||||
#state{compress=gzip}
|
||||
end
|
||||
catch
|
||||
_:_ ->
|
||||
#state{compress=undefined}
|
||||
%% We only automatically decompress when gzip is the only
|
||||
%% encoding used. Since it's the only encoding used, we
|
||||
%% can remove the header entirely before passing the Req
|
||||
%% forward.
|
||||
[<<"gzip">>] ->
|
||||
{Req#{
|
||||
headers => maps:remove(<<"content-encoding">>, Headers),
|
||||
content_decoded => [<<"gzip">>|ContentDecoded]
|
||||
}, #state{compress=gzip}};
|
||||
_ ->
|
||||
{Req#{content_decoded => ContentDecoded},
|
||||
#state{compress=undefined}}
|
||||
catch _:_ ->
|
||||
{Req#{content_decoded => ContentDecoded},
|
||||
#state{compress=undefined}}
|
||||
end.
|
||||
|
||||
buffer_to_iovec(Buffer) ->
|
||||
lists:reverse(Buffer).
|
||||
|
||||
buffer_to_binary(Buffer) ->
|
||||
iolist_to_binary(lists:reverse(Buffer)).
|
||||
|
||||
fold(Commands, State) ->
|
||||
fold(Commands, State, []).
|
||||
|
||||
fold([], State, Acc) ->
|
||||
{lists:reverse(Acc), State};
|
||||
fold([{response, Status, Headers0, Body}|Tail], State=#state{ignore=false}, Acc) ->
|
||||
fold([{response, Status, Headers0, Body}|Tail], State=#state{enabled=true}, Acc) ->
|
||||
Headers = add_accept_encoding(Headers0),
|
||||
fold(Tail, State, [{response, Status, Headers, Body}|Acc]);
|
||||
fold([{headers, Status, Headers0} | Tail], State=#state{ignore=false}, Acc) ->
|
||||
fold([{headers, Status, Headers0} | Tail], State=#state{enabled=true}, Acc) ->
|
||||
Headers = add_accept_encoding(Headers0),
|
||||
fold(Tail, State, [{headers, Status, Headers}|Acc]);
|
||||
fold([Command|Tail], State, Acc) ->
|
||||
|
@ -146,7 +190,7 @@ add_accept_encoding(Headers=#{<<"accept-encoding">> := AcceptEncoding}) ->
|
|||
try cow_http_hd:parse_accept_encoding(iolist_to_binary(AcceptEncoding)) of
|
||||
List ->
|
||||
case lists:keyfind(<<"gzip">>, 1, List) of
|
||||
%% gzip is excluded but this handler is not ignored; we replace.
|
||||
%% gzip is excluded but this handler is enabled; we replace.
|
||||
{_, 0} ->
|
||||
Replaced = lists:keyreplace(<<"gzip">>, 1, List, {<<"gzip">>, 1000}),
|
||||
Codings = build_accept_encoding(Replaced),
|
||||
|
@ -167,18 +211,20 @@ add_accept_encoding(Headers=#{<<"accept-encoding">> := AcceptEncoding}) ->
|
|||
end
|
||||
end
|
||||
catch _:_ ->
|
||||
%% The accept-encoding header is invalid. Probably empty. We replace it with ours.
|
||||
Headers#{<<"accept-encoding">> => <<"gzip">>}
|
||||
end;
|
||||
add_accept_encoding(Headers) ->
|
||||
Headers#{<<"accept-encoding">> => <<"gzip">>}.
|
||||
|
||||
%% From cowlib, maybe expose?
|
||||
%% @todo From cowlib, maybe expose?
|
||||
qvalue_to_iodata(0) -> <<"0">>;
|
||||
qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)];
|
||||
qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)];
|
||||
qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)];
|
||||
qvalue_to_iodata(1000) -> <<"1">>.
|
||||
|
||||
%% @todo Should be added to Cowlib.
|
||||
build_accept_encoding([{ContentCoding, Q}|Tail]) ->
|
||||
Weight = iolist_to_binary(qvalue_to_iodata(Q)),
|
||||
Acc = <<ContentCoding/binary, ";q=", Weight/binary>>,
|
||||
|
@ -195,20 +241,14 @@ inflate(Z, RatioLimit, Data) ->
|
|||
try
|
||||
{Status, Output} = zlib:safeInflate(Z, Data),
|
||||
Size = iolist_size(Output),
|
||||
do_inflate(Z, Size, byte_size(Data) * RatioLimit, Status, [Output])
|
||||
do_inflate(Z, Size, iolist_size(Data) * RatioLimit, Status, [Output])
|
||||
catch
|
||||
error:data_error ->
|
||||
zlib:close(Z),
|
||||
{error, data}
|
||||
{error, data_error}
|
||||
end.
|
||||
|
||||
do_inflate(Z, Size, Limit, Status, _) when Size > Limit ->
|
||||
case Status of
|
||||
continue -> ok;
|
||||
finished -> zlib:inflateEnd(Z)
|
||||
end,
|
||||
zlib:close(Z),
|
||||
{error, size};
|
||||
do_inflate(_, Size, Limit, _, _) when Size > Limit ->
|
||||
{error, size_error};
|
||||
do_inflate(Z, Size0, Limit, continue, Acc) ->
|
||||
{Status, Output} = zlib:safeInflate(Z, []),
|
||||
Size = Size0 + iolist_size(Output),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue