mirror of
https://github.com/ninenines/cowboy.git
synced 2025-07-14 20:30:23 +00:00
Websocket text frames are now checked for UTF-8 correctness
The autobahntestsuite now passes 100% of the tests. We are getting close to fully implementing the Websocket RFC.
This commit is contained in:
parent
cccc0bc475
commit
5dd09737d0
2 changed files with 94 additions and 4 deletions
|
@ -45,7 +45,8 @@
|
||||||
timeout_ref = undefined :: undefined | reference(),
|
timeout_ref = undefined :: undefined | reference(),
|
||||||
messages = undefined :: undefined | {atom(), atom(), atom()},
|
messages = undefined :: undefined | {atom(), atom(), atom()},
|
||||||
hibernate = false :: boolean(),
|
hibernate = false :: boolean(),
|
||||||
frag_state = undefined :: frag_state()
|
frag_state = undefined :: frag_state(),
|
||||||
|
utf8_state = <<>> :: binary()
|
||||||
}).
|
}).
|
||||||
|
|
||||||
%% @doc Upgrade an HTTP request to the Websocket protocol.
|
%% @doc Upgrade an HTTP request to the Websocket protocol.
|
||||||
|
@ -285,6 +286,65 @@ websocket_data(State, Req, HandlerState, Opcode, Len, MaskKey, Data, 1) ->
|
||||||
-> {ok, Req, cowboy_middleware:env()}
|
-> {ok, Req, cowboy_middleware:env()}
|
||||||
| {suspend, module(), atom(), [any()]}
|
| {suspend, module(), atom(), [any()]}
|
||||||
when Req::cowboy_req:req().
|
when Req::cowboy_req:req().
|
||||||
|
%% Text frames must have a payload that is valid UTF-8.
|
||||||
|
websocket_payload(State=#state{utf8_state=Incomplete},
|
||||||
|
Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data)
|
||||||
|
when byte_size(Data) < Len ->
|
||||||
|
Unmasked2 = websocket_unmask(Data,
|
||||||
|
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
|
||||||
|
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
|
||||||
|
false ->
|
||||||
|
websocket_close(State, Req, HandlerState, {error, badframe});
|
||||||
|
Utf8State ->
|
||||||
|
websocket_payload_loop(State#state{utf8_state=Utf8State},
|
||||||
|
Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
|
||||||
|
<< Unmasked/binary, Unmasked2/binary >>)
|
||||||
|
end;
|
||||||
|
websocket_payload(State=#state{utf8_state=Incomplete},
|
||||||
|
Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data) ->
|
||||||
|
<< End:Len/binary, Rest/bits >> = Data,
|
||||||
|
Unmasked2 = websocket_unmask(End,
|
||||||
|
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
|
||||||
|
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
|
||||||
|
<<>> ->
|
||||||
|
websocket_dispatch(State#state{utf8_state= <<>>},
|
||||||
|
Req, HandlerState, Rest, Opcode,
|
||||||
|
<< Unmasked/binary, Unmasked2/binary >>);
|
||||||
|
_ ->
|
||||||
|
websocket_close(State, Req, HandlerState, {error, badframe})
|
||||||
|
end;
|
||||||
|
%% Fragmented text frames may cut payload in the middle of UTF-8 codepoints.
|
||||||
|
websocket_payload(State=#state{frag_state={_, 1, _}, utf8_state=Incomplete},
|
||||||
|
Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data)
|
||||||
|
when byte_size(Data) < Len ->
|
||||||
|
Unmasked2 = websocket_unmask(Data,
|
||||||
|
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
|
||||||
|
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
|
||||||
|
false ->
|
||||||
|
websocket_close(State, Req, HandlerState, {error, badframe});
|
||||||
|
Utf8State ->
|
||||||
|
websocket_payload_loop(State#state{utf8_state=Utf8State},
|
||||||
|
Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
|
||||||
|
<< Unmasked/binary, Unmasked2/binary >>)
|
||||||
|
end;
|
||||||
|
websocket_payload(State=#state{frag_state={Fin, 1, _}, utf8_state=Incomplete},
|
||||||
|
Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data) ->
|
||||||
|
<< End:Len/binary, Rest/bits >> = Data,
|
||||||
|
Unmasked2 = websocket_unmask(End,
|
||||||
|
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
|
||||||
|
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
|
||||||
|
<<>> ->
|
||||||
|
websocket_dispatch(State#state{utf8_state= <<>>},
|
||||||
|
Req, HandlerState, Rest, Opcode,
|
||||||
|
<< Unmasked/binary, Unmasked2/binary >>);
|
||||||
|
Utf8State when is_binary(Utf8State), Fin =:= nofin ->
|
||||||
|
websocket_dispatch(State#state{utf8_state=Utf8State},
|
||||||
|
Req, HandlerState, Rest, Opcode,
|
||||||
|
<< Unmasked/binary, Unmasked2/binary >>);
|
||||||
|
_ ->
|
||||||
|
websocket_close(State, Req, HandlerState, {error, badframe})
|
||||||
|
end;
|
||||||
|
%% Other frames have a binary payload.
|
||||||
websocket_payload(State, Req, HandlerState,
|
websocket_payload(State, Req, HandlerState,
|
||||||
Opcode, Len, MaskKey, Unmasked, Data)
|
Opcode, Len, MaskKey, Unmasked, Data)
|
||||||
when byte_size(Data) < Len ->
|
when byte_size(Data) < Len ->
|
||||||
|
@ -325,6 +385,36 @@ rotate_mask_key(MaskKey, UnmaskedLen) ->
|
||||||
Right = 4 - Left,
|
Right = 4 - Left,
|
||||||
(MaskKey bsl (Left * 8)) + (MaskKey bsr (Right * 8)).
|
(MaskKey bsl (Left * 8)) + (MaskKey bsr (Right * 8)).
|
||||||
|
|
||||||
|
%% Returns <<>> if the argument is valid UTF-8, false if not,
|
||||||
|
%% or the incomplete part of the argument if we need more data.
|
||||||
|
-spec is_utf8(binary()) -> false | binary().
|
||||||
|
is_utf8(Valid = <<>>) ->
|
||||||
|
Valid;
|
||||||
|
is_utf8(<< _/utf8, Rest/binary >>) ->
|
||||||
|
is_utf8(Rest);
|
||||||
|
%% 2 bytes. Codepages C0 and C1 are invalid; fail early.
|
||||||
|
is_utf8(<< 2#1100000:7, _/bits >>) ->
|
||||||
|
false;
|
||||||
|
is_utf8(Incomplete = << 2#110:3, _:5 >>) ->
|
||||||
|
Incomplete;
|
||||||
|
%% 3 bytes.
|
||||||
|
is_utf8(Incomplete = << 2#1110:4, _:4 >>) ->
|
||||||
|
Incomplete;
|
||||||
|
is_utf8(Incomplete = << 2#1110:4, _:4, 2#10:2, _:6 >>) ->
|
||||||
|
Incomplete;
|
||||||
|
%% 4 bytes. Codepage F4 may have invalid values greater than 0x10FFFF.
|
||||||
|
is_utf8(<< 2#11110100:8, 2#10:2, High:6, _/bits >>) when High >= 2#10000 ->
|
||||||
|
false;
|
||||||
|
is_utf8(Incomplete = << 2#11110:5, _:3 >>) ->
|
||||||
|
Incomplete;
|
||||||
|
is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6 >>) ->
|
||||||
|
Incomplete;
|
||||||
|
is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6, 2#10:2, _:6 >>) ->
|
||||||
|
Incomplete;
|
||||||
|
%% Invalid.
|
||||||
|
is_utf8(_) ->
|
||||||
|
false.
|
||||||
|
|
||||||
-spec websocket_payload_loop(#state{}, Req, any(),
|
-spec websocket_payload_loop(#state{}, Req, any(),
|
||||||
opcode(), non_neg_integer(), mask_key(), binary())
|
opcode(), non_neg_integer(), mask_key(), binary())
|
||||||
-> {ok, Req, cowboy_middleware:env()}
|
-> {ok, Req, cowboy_middleware:env()}
|
||||||
|
|
|
@ -92,7 +92,7 @@ run_tests(Config) ->
|
||||||
_ -> ok
|
_ -> ok
|
||||||
end,
|
end,
|
||||||
{ok, IndexHTML} = file:read_file(IndexFile),
|
{ok, IndexHTML} = file:read_file(IndexFile),
|
||||||
case binary:match(IndexHTML, <<"Fail">>) of
|
case length(binary:matches(IndexHTML, <<"case_failed">>)) > 2 of
|
||||||
{_, _} -> erlang:error(failed);
|
true -> erlang:error(failed);
|
||||||
nomatch -> ok
|
false -> ok
|
||||||
end.
|
end.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue