mirror of
https://github.com/ninenines/cowboy.git
synced 2025-07-14 12:20:24 +00:00
Websocket text frames are now checked for UTF-8 correctness
The autobahntestsuite now passes 100% of the tests. We are getting close to fully implementing the Websocket RFC.
This commit is contained in:
parent
cccc0bc475
commit
5dd09737d0
2 changed files with 94 additions and 4 deletions
|
@ -45,7 +45,8 @@
|
|||
timeout_ref = undefined :: undefined | reference(),
|
||||
messages = undefined :: undefined | {atom(), atom(), atom()},
|
||||
hibernate = false :: boolean(),
|
||||
frag_state = undefined :: frag_state()
|
||||
frag_state = undefined :: frag_state(),
|
||||
utf8_state = <<>> :: binary()
|
||||
}).
|
||||
|
||||
%% @doc Upgrade an HTTP request to the Websocket protocol.
|
||||
|
@ -285,6 +286,65 @@ websocket_data(State, Req, HandlerState, Opcode, Len, MaskKey, Data, 1) ->
|
|||
-> {ok, Req, cowboy_middleware:env()}
|
||||
| {suspend, module(), atom(), [any()]}
|
||||
when Req::cowboy_req:req().
|
||||
%% Text frames must have a payload that is valid UTF-8.
|
||||
websocket_payload(State=#state{utf8_state=Incomplete},
|
||||
Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data)
|
||||
when byte_size(Data) < Len ->
|
||||
Unmasked2 = websocket_unmask(Data,
|
||||
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
|
||||
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
|
||||
false ->
|
||||
websocket_close(State, Req, HandlerState, {error, badframe});
|
||||
Utf8State ->
|
||||
websocket_payload_loop(State#state{utf8_state=Utf8State},
|
||||
Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
|
||||
<< Unmasked/binary, Unmasked2/binary >>)
|
||||
end;
|
||||
websocket_payload(State=#state{utf8_state=Incomplete},
|
||||
Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data) ->
|
||||
<< End:Len/binary, Rest/bits >> = Data,
|
||||
Unmasked2 = websocket_unmask(End,
|
||||
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
|
||||
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
|
||||
<<>> ->
|
||||
websocket_dispatch(State#state{utf8_state= <<>>},
|
||||
Req, HandlerState, Rest, Opcode,
|
||||
<< Unmasked/binary, Unmasked2/binary >>);
|
||||
_ ->
|
||||
websocket_close(State, Req, HandlerState, {error, badframe})
|
||||
end;
|
||||
%% Fragmented text frames may cut payload in the middle of UTF-8 codepoints.
|
||||
websocket_payload(State=#state{frag_state={_, 1, _}, utf8_state=Incomplete},
|
||||
Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data)
|
||||
when byte_size(Data) < Len ->
|
||||
Unmasked2 = websocket_unmask(Data,
|
||||
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
|
||||
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
|
||||
false ->
|
||||
websocket_close(State, Req, HandlerState, {error, badframe});
|
||||
Utf8State ->
|
||||
websocket_payload_loop(State#state{utf8_state=Utf8State},
|
||||
Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
|
||||
<< Unmasked/binary, Unmasked2/binary >>)
|
||||
end;
|
||||
websocket_payload(State=#state{frag_state={Fin, 1, _}, utf8_state=Incomplete},
|
||||
Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data) ->
|
||||
<< End:Len/binary, Rest/bits >> = Data,
|
||||
Unmasked2 = websocket_unmask(End,
|
||||
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
|
||||
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
|
||||
<<>> ->
|
||||
websocket_dispatch(State#state{utf8_state= <<>>},
|
||||
Req, HandlerState, Rest, Opcode,
|
||||
<< Unmasked/binary, Unmasked2/binary >>);
|
||||
Utf8State when is_binary(Utf8State), Fin =:= nofin ->
|
||||
websocket_dispatch(State#state{utf8_state=Utf8State},
|
||||
Req, HandlerState, Rest, Opcode,
|
||||
<< Unmasked/binary, Unmasked2/binary >>);
|
||||
_ ->
|
||||
websocket_close(State, Req, HandlerState, {error, badframe})
|
||||
end;
|
||||
%% Other frames have a binary payload.
|
||||
websocket_payload(State, Req, HandlerState,
|
||||
Opcode, Len, MaskKey, Unmasked, Data)
|
||||
when byte_size(Data) < Len ->
|
||||
|
@ -325,6 +385,36 @@ rotate_mask_key(MaskKey, UnmaskedLen) ->
|
|||
Right = 4 - Left,
|
||||
(MaskKey bsl (Left * 8)) + (MaskKey bsr (Right * 8)).
|
||||
|
||||
%% Returns <<>> if the argument is valid UTF-8, false if not,
|
||||
%% or the incomplete part of the argument if we need more data.
|
||||
-spec is_utf8(binary()) -> false | binary().
|
||||
is_utf8(Valid = <<>>) ->
|
||||
Valid;
|
||||
is_utf8(<< _/utf8, Rest/binary >>) ->
|
||||
is_utf8(Rest);
|
||||
%% 2 bytes. Codepages C0 and C1 are invalid; fail early.
|
||||
is_utf8(<< 2#1100000:7, _/bits >>) ->
|
||||
false;
|
||||
is_utf8(Incomplete = << 2#110:3, _:5 >>) ->
|
||||
Incomplete;
|
||||
%% 3 bytes.
|
||||
is_utf8(Incomplete = << 2#1110:4, _:4 >>) ->
|
||||
Incomplete;
|
||||
is_utf8(Incomplete = << 2#1110:4, _:4, 2#10:2, _:6 >>) ->
|
||||
Incomplete;
|
||||
%% 4 bytes. Codepage F4 may have invalid values greater than 0x10FFFF.
|
||||
is_utf8(<< 2#11110100:8, 2#10:2, High:6, _/bits >>) when High >= 2#10000 ->
|
||||
false;
|
||||
is_utf8(Incomplete = << 2#11110:5, _:3 >>) ->
|
||||
Incomplete;
|
||||
is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6 >>) ->
|
||||
Incomplete;
|
||||
is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6, 2#10:2, _:6 >>) ->
|
||||
Incomplete;
|
||||
%% Invalid.
|
||||
is_utf8(_) ->
|
||||
false.
|
||||
|
||||
-spec websocket_payload_loop(#state{}, Req, any(),
|
||||
opcode(), non_neg_integer(), mask_key(), binary())
|
||||
-> {ok, Req, cowboy_middleware:env()}
|
||||
|
|
|
@ -92,7 +92,7 @@ run_tests(Config) ->
|
|||
_ -> ok
|
||||
end,
|
||||
{ok, IndexHTML} = file:read_file(IndexFile),
|
||||
case binary:match(IndexHTML, <<"Fail">>) of
|
||||
{_, _} -> erlang:error(failed);
|
||||
nomatch -> ok
|
||||
case length(binary:matches(IndexHTML, <<"case_failed">>)) > 2 of
|
||||
true -> erlang:error(failed);
|
||||
false -> ok
|
||||
end.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue