0
Fork 0
mirror of https://github.com/ninenines/cowboy.git synced 2025-07-14 12:20:24 +00:00

Better handle content negotiation when accept contains charsets

Thanks to Philip Witty for help figuring this out.
This commit is contained in:
Loïc Hoguin 2018-11-02 12:36:51 +01:00
parent d4dff21055
commit 399b6a16b4
No known key found for this signature in database
GPG key ID: 8A9DF795F6FED764
6 changed files with 231 additions and 19 deletions

View file

@ -272,6 +272,16 @@ Cowboy treats all parameters as case sensitive, except for the
should therefore always provide the charset as a lowercase should therefore always provide the charset as a lowercase
binary string. binary string.
When a charset is given in the media type parameters in the
accept header, Cowboy will do some additional checks to confirm
that it can use this charset. When the wildcard is used then Cowboy
will immediately call `charsets_provided` to confirm the charset
is acceptable. If the callback is undefined Cowboy assumes any
charset is acceptable. When the wildcard is not used and the charset
given in the accept header matches one of the configured media
types Cowboy will use that charset and skip the `charsets_provided`
step entirely.
Cowboy will add the negotiated `media_type` to the Req object Cowboy will add the negotiated `media_type` to the Req object
after this step completes: after this step completes:
@ -284,6 +294,16 @@ req() :: #{
// @todo Case sensitivity of parsed mime content? // @todo Case sensitivity of parsed mime content?
Cowboy may also add the negotiated `charset` to the Req object
after this step completes:
[source,erlang]
----
req() :: #{
charset => binary() %% lowercase; case insensitive
}
----
=== delete_completed === delete_completed
[source,erlang] [source,erlang]

View file

@ -245,7 +245,7 @@
language_a :: undefined | binary(), language_a :: undefined | binary(),
%% Charset. %% Charset.
charsets_p = [] :: [binary()], charsets_p = undefined :: undefined | [binary()],
charset_a :: undefined | binary(), charset_a :: undefined | binary(),
%% Whether the resource exists. %% Whether the resource exists.
@ -503,16 +503,55 @@ match_media_type(Req, State, Accept,
match_media_type(Req, State, Accept, [_Any|Tail], MediaType) -> match_media_type(Req, State, Accept, [_Any|Tail], MediaType) ->
match_media_type(Req, State, Accept, Tail, MediaType). match_media_type(Req, State, Accept, Tail, MediaType).
match_media_type_params(Req, State, _Accept,
[Provided = {{TP, STP, '*'}, _Fun}|_Tail],
{{_TA, _STA, Params_A}, _QA, _APA}) ->
PMT = {TP, STP, Params_A},
languages_provided(Req#{media_type => PMT},
State#state{content_type_a=Provided});
match_media_type_params(Req, State, Accept, match_media_type_params(Req, State, Accept,
[Provided = {PMT = {_TP, _STP, Params_P}, _Fun}|Tail], [Provided = {{TP, STP, '*'}, _Fun}|Tail],
MediaType = {{TA, _STA, Params_A0}, _QA, _APA}) ->
case lists:keytake(<<"charset">>, 1, Params_A0) of
{value, {_, Charset}, Params_A} when TA =:= <<"text">> ->
%% When we match against a wildcard, the media type is text
%% and has a charset parameter, we call charsets_provided
%% and check that the charset is provided. If the callback
%% is not exported, we accept inconditionally but ignore
%% the given charset so as to not send a wrong value back.
case call(Req, State, charsets_provided) of
no_call ->
languages_provided(Req#{media_type => {TP, STP, Params_A0}},
State#state{content_type_a=Provided});
{stop, Req2, HandlerState} ->
terminate(Req2, State#state{handler_state=HandlerState});
{Switch, Req2, HandlerState} when element(1, Switch) =:= switch_handler ->
switch_handler(Switch, Req2, HandlerState);
{CP, Req2, HandlerState} ->
State2 = State#state{handler_state=HandlerState, charsets_p=CP},
case lists:member(Charset, CP) of
false ->
match_media_type(Req2, State2, Accept, Tail, MediaType);
true ->
languages_provided(Req2#{media_type => {TP, STP, Params_A}},
State2#state{content_type_a=Provided,
charset_a=Charset})
end
end;
_ ->
languages_provided(Req#{media_type => {TP, STP, Params_A0}},
State#state{content_type_a=Provided})
end;
match_media_type_params(Req, State, Accept,
[Provided = {PMT = {TP, STP, Params_P0}, Fun}|Tail],
MediaType = {{_TA, _STA, Params_A}, _QA, _APA}) -> MediaType = {{_TA, _STA, Params_A}, _QA, _APA}) ->
case lists:sort(Params_P) =:= lists:sort(Params_A) of case lists:sort(Params_P0) =:= lists:sort(Params_A) of
true when TP =:= <<"text">> ->
%% When a charset was provided explicitly in both the charset header
%% and the media types provided and the negotiation is successful,
%% we keep the charset and don't call charsets_provided. This only
%% applies to text media types, however.
{Charset, Params_P} = case lists:keytake(<<"charset">>, 1, Params_P0) of
false -> {undefined, Params_P0};
{value, {_, Charset0}, Params_P1} -> {Charset0, Params_P1}
end,
languages_provided(Req#{media_type => {TP, STP, Params_P}},
State#state{content_type_a={{TP, STP, Params_P}, Fun},
charset_a=Charset});
true -> true ->
languages_provided(Req#{media_type => PMT}, languages_provided(Req#{media_type => PMT},
State#state{content_type_a=Provided}); State#state{content_type_a=Provided});
@ -587,6 +626,26 @@ set_language(Req, State=#state{language_a=Language}) ->
%% charsets_provided should return a list of binary values indicating %% charsets_provided should return a list of binary values indicating
%% which charsets are accepted by the resource. %% which charsets are accepted by the resource.
%%
%% A charset may have been selected while negotiating the accept header.
%% There's no need to select one again.
charsets_provided(Req, State=#state{charset_a=Charset})
when Charset =/= undefined ->
set_content_type(Req, State);
%% If charsets_p is defined, use it instead of calling charsets_provided
%% again. We also call this clause during normal execution to avoid
%% duplicating code.
charsets_provided(Req, State=#state{charsets_p=[]}) ->
not_acceptable(Req, State);
charsets_provided(Req, State=#state{charsets_p=CP})
when CP =/= undefined ->
case cowboy_req:parse_header(<<"accept-charset">>, Req) of
undefined ->
set_content_type(Req, State#state{charset_a=hd(CP)});
AcceptCharset0 ->
AcceptCharset = prioritize_charsets(AcceptCharset0),
choose_charset(Req, State, AcceptCharset)
end;
charsets_provided(Req, State) -> charsets_provided(Req, State) ->
case call(Req, State, charsets_provided) of case call(Req, State, charsets_provided) of
no_call -> no_call ->
@ -595,17 +654,8 @@ charsets_provided(Req, State) ->
terminate(Req2, State#state{handler_state=HandlerState}); terminate(Req2, State#state{handler_state=HandlerState});
{Switch, Req2, HandlerState} when element(1, Switch) =:= switch_handler -> {Switch, Req2, HandlerState} when element(1, Switch) =:= switch_handler ->
switch_handler(Switch, Req2, HandlerState); switch_handler(Switch, Req2, HandlerState);
{[], Req2, HandlerState} ->
not_acceptable(Req2, State#state{handler_state=HandlerState});
{CP, Req2, HandlerState} -> {CP, Req2, HandlerState} ->
State2 = State#state{handler_state=HandlerState, charsets_p=CP}, charsets_provided(Req2, State#state{handler_state=HandlerState, charsets_p=CP})
case cowboy_req:parse_header(<<"accept-charset">>, Req2) of
undefined ->
set_content_type(Req2, State2#state{charset_a=hd(CP)});
AcceptCharset ->
AcceptCharset2 = prioritize_charsets(AcceptCharset),
choose_charset(Req2, State2, AcceptCharset2)
end
end. end.
%% The special value "*", if present in the Accept-Charset field, %% The special value "*", if present in the Accept-Charset field,
@ -690,6 +740,7 @@ variances(Req, State=#state{content_types_p=CTP,
[_|_] -> [<<"accept-language">>|Variances] [_|_] -> [<<"accept-language">>|Variances]
end, end,
Variances3 = case CP of Variances3 = case CP of
undefined -> Variances2;
[] -> Variances2; [] -> Variances2;
[_] -> Variances2; [_] -> Variances2;
[_|_] -> [<<"accept-charset">>|Variances2] [_|_] -> [<<"accept-charset">>|Variances2]

View file

@ -0,0 +1,22 @@
%% This module has a media type provided with an explicit charset.
-module(charset_in_content_types_provided_h).
-export([init/2]).
-export([content_types_provided/2]).
-export([charsets_provided/2]).
-export([get_text_plain/2]).
init(Req, Opts) ->
{cowboy_rest, Req, Opts}.
content_types_provided(Req, State) ->
{[
{{<<"text">>, <<"plain">>, [{<<"charset">>, <<"utf-8">>}]}, get_text_plain}
], Req, State}.
charsets_provided(Req, State) ->
{[<<"utf-16">>, <<"iso-8861-1">>], Req, State}.
get_text_plain(Req, State) ->
{<<"This is REST!">>, Req, State}.

View file

@ -0,0 +1,24 @@
%% This module has a media type provided with a wildcard
%% and a list of charsets that is limited.
-module(charset_in_content_types_provided_implicit_h).
-export([init/2]).
-export([content_types_provided/2]).
-export([charsets_provided/2]).
-export([get_text_plain/2]).
init(Req, Opts) ->
{cowboy_rest, Req, Opts}.
content_types_provided(Req, State) ->
{[
{{<<"text">>, <<"plain">>, '*'}, get_text_plain}
], Req, State}.
charsets_provided(Req, State) ->
{[<<"utf-8">>, <<"utf-16">>], Req, State}.
get_text_plain(Req, State) ->
{<<"This is REST!">>, Req, State}.

View file

@ -0,0 +1,21 @@
%% This module has a media type provided with a wildcard
%% and lacks a charsets_provided callback.
-module(charset_in_content_types_provided_implicit_no_callback_h).
-export([init/2]).
-export([content_types_provided/2]).
-export([get_text_plain/2]).
init(Req, Opts) ->
{cowboy_rest, Req, Opts}.
content_types_provided(Req, State) ->
{[
{{<<"text">>, <<"plain">>, '*'}, get_text_plain}
], Req, State}.
get_text_plain(Req, State) ->
{<<"This is REST!">>, Req, State}.

View file

@ -39,6 +39,12 @@ end_per_group(Name, _) ->
init_dispatch(_) -> init_dispatch(_) ->
cowboy_router:compile([{'_', [ cowboy_router:compile([{'_', [
{"/", rest_hello_h, []}, {"/", rest_hello_h, []},
{"/charset_in_content_types_provided",
charset_in_content_types_provided_h, []},
{"/charset_in_content_types_provided_implicit",
charset_in_content_types_provided_implicit_h, []},
{"/charset_in_content_types_provided_implicit_no_callback",
charset_in_content_types_provided_implicit_no_callback_h, []},
{"/provide_callback_missing", provide_callback_missing_h, []}, {"/provide_callback_missing", provide_callback_missing_h, []},
{"/switch_handler", switch_handler_h, run}, {"/switch_handler", switch_handler_h, run},
{"/switch_handler_opts", switch_handler_h, hibernate} {"/switch_handler_opts", switch_handler_h, hibernate}
@ -74,6 +80,74 @@ error_on_malformed_if_none_match(Config) ->
{response, _, 400, _} = gun:await(ConnPid, Ref), {response, _, 400, _} = gun:await(ConnPid, Ref),
ok. ok.
charset_in_content_types_provided(Config) ->
doc("When a charset is matched explictly in content_types_provided, "
"that charset is used and the charsets_provided callback is ignored."),
ConnPid = gun_open(Config),
Ref = gun:get(ConnPid, "/charset_in_content_types_provided", [
{<<"accept">>, <<"text/plain;charset=utf-8">>},
{<<"accept-charset">>, <<"utf-16, utf-8;q=0.5">>},
{<<"accept-encoding">>, <<"gzip">>}
]),
{response, _, 200, Headers} = gun:await(ConnPid, Ref),
{_, <<"text/plain; charset=utf-8">>} = lists:keyfind(<<"content-type">>, 1, Headers),
ok.
charset_in_content_types_provided_implicit_match(Config) ->
doc("When a charset is matched implicitly in content_types_provided, "
"the charsets_provided callback is used to determine if the media "
"type will match."),
ConnPid = gun_open(Config),
Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [
{<<"accept">>, <<"text/plain;charset=utf-16">>},
{<<"accept-encoding">>, <<"gzip">>}
]),
{response, _, 200, Headers} = gun:await(ConnPid, Ref),
{_, <<"text/plain; charset=utf-16">>} = lists:keyfind(<<"content-type">>, 1, Headers),
ok.
charset_in_content_types_provided_implicit_nomatch(Config) ->
doc("When a charset is matched implicitly in content_types_provided, "
"the charsets_provided callback is used to determine if the media "
"type will match. If it doesn't, try the next media type."),
ConnPid = gun_open(Config),
Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [
{<<"accept">>, <<"text/plain;charset=utf-32, text/plain">>},
{<<"accept-encoding">>, <<"gzip">>}
]),
{response, _, 200, Headers} = gun:await(ConnPid, Ref),
%% We end up with the first charset listed in charsets_provided.
{_, <<"text/plain; charset=utf-8">>} = lists:keyfind(<<"content-type">>, 1, Headers),
ok.
charset_in_content_types_provided_implicit_nomatch_error(Config) ->
doc("When a charset is matched implicitly in content_types_provided, "
"the charsets_provided callback is used to determine if the media "
"type will match. If it doesn't, and there's no other media type, "
"a 406 is returned."),
ConnPid = gun_open(Config),
Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [
{<<"accept">>, <<"text/plain;charset=utf-32">>},
{<<"accept-encoding">>, <<"gzip">>}
]),
{response, _, 406, _} = gun:await(ConnPid, Ref),
ok.
charset_in_content_types_provided_implicit_no_callback(Config) ->
doc("When a charset is matched implicitly in content_types_provided, "
"and the charsets_provided callback is not exported, the media "
"type will match but the charset will be ignored like all other "
"parameters."),
ConnPid = gun_open(Config),
Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit_no_callback", [
{<<"accept">>, <<"text/plain;charset=utf-32">>},
{<<"accept-encoding">>, <<"gzip">>}
]),
{response, _, 200, Headers} = gun:await(ConnPid, Ref),
%% The charset is ignored as if it was any other parameter.
{_, <<"text/plain">>} = lists:keyfind(<<"content-type">>, 1, Headers),
ok.
provide_callback_missing(Config) -> provide_callback_missing(Config) ->
doc("A 500 response must be sent when the ProvideCallback can't be called."), doc("A 500 response must be sent when the ProvideCallback can't be called."),
ConnPid = gun_open(Config), ConnPid = gun_open(Config),