0
Fork 0
mirror of https://github.com/ninenines/cowboy.git synced 2025-07-14 12:20:24 +00:00

Websocket text frames are now checked for UTF-8 correctness

The autobahntestsuite now passes 100% of the tests. We are
getting close to fully implementing the Websocket RFC.
This commit is contained in:
Loïc Hoguin 2013-01-13 00:10:32 +01:00
parent cccc0bc475
commit 5dd09737d0
2 changed files with 94 additions and 4 deletions

View file

@ -45,7 +45,8 @@
timeout_ref = undefined :: undefined | reference(),
messages = undefined :: undefined | {atom(), atom(), atom()},
hibernate = false :: boolean(),
frag_state = undefined :: frag_state()
frag_state = undefined :: frag_state(),
utf8_state = <<>> :: binary()
}).
%% @doc Upgrade an HTTP request to the Websocket protocol.
@ -285,6 +286,65 @@ websocket_data(State, Req, HandlerState, Opcode, Len, MaskKey, Data, 1) ->
-> {ok, Req, cowboy_middleware:env()}
| {suspend, module(), atom(), [any()]}
when Req::cowboy_req:req().
%% Text frames must have a payload that is valid UTF-8.
websocket_payload(State=#state{utf8_state=Incomplete},
Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data)
when byte_size(Data) < Len ->
Unmasked2 = websocket_unmask(Data,
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
false ->
websocket_close(State, Req, HandlerState, {error, badframe});
Utf8State ->
websocket_payload_loop(State#state{utf8_state=Utf8State},
Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
<< Unmasked/binary, Unmasked2/binary >>)
end;
websocket_payload(State=#state{utf8_state=Incomplete},
Req, HandlerState, Opcode=1, Len, MaskKey, Unmasked, Data) ->
<< End:Len/binary, Rest/bits >> = Data,
Unmasked2 = websocket_unmask(End,
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
<<>> ->
websocket_dispatch(State#state{utf8_state= <<>>},
Req, HandlerState, Rest, Opcode,
<< Unmasked/binary, Unmasked2/binary >>);
_ ->
websocket_close(State, Req, HandlerState, {error, badframe})
end;
%% Fragmented text frames may cut payload in the middle of UTF-8 codepoints.
websocket_payload(State=#state{frag_state={_, 1, _}, utf8_state=Incomplete},
Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data)
when byte_size(Data) < Len ->
Unmasked2 = websocket_unmask(Data,
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
false ->
websocket_close(State, Req, HandlerState, {error, badframe});
Utf8State ->
websocket_payload_loop(State#state{utf8_state=Utf8State},
Req, HandlerState, Opcode, Len - byte_size(Data), MaskKey,
<< Unmasked/binary, Unmasked2/binary >>)
end;
websocket_payload(State=#state{frag_state={Fin, 1, _}, utf8_state=Incomplete},
Req, HandlerState, Opcode=0, Len, MaskKey, Unmasked, Data) ->
<< End:Len/binary, Rest/bits >> = Data,
Unmasked2 = websocket_unmask(End,
rotate_mask_key(MaskKey, byte_size(Unmasked)), <<>>),
case is_utf8(<< Incomplete/binary, Unmasked2/binary >>) of
<<>> ->
websocket_dispatch(State#state{utf8_state= <<>>},
Req, HandlerState, Rest, Opcode,
<< Unmasked/binary, Unmasked2/binary >>);
Utf8State when is_binary(Utf8State), Fin =:= nofin ->
websocket_dispatch(State#state{utf8_state=Utf8State},
Req, HandlerState, Rest, Opcode,
<< Unmasked/binary, Unmasked2/binary >>);
_ ->
websocket_close(State, Req, HandlerState, {error, badframe})
end;
%% Other frames have a binary payload.
websocket_payload(State, Req, HandlerState,
Opcode, Len, MaskKey, Unmasked, Data)
when byte_size(Data) < Len ->
@ -325,6 +385,36 @@ rotate_mask_key(MaskKey, UnmaskedLen) ->
Right = 4 - Left,
(MaskKey bsl (Left * 8)) + (MaskKey bsr (Right * 8)).
%% Returns <<>> if the argument is valid UTF-8, false if not,
%% or the incomplete part of the argument if we need more data.
-spec is_utf8(binary()) -> false | binary().
is_utf8(Valid = <<>>) ->
Valid;
is_utf8(<< _/utf8, Rest/binary >>) ->
is_utf8(Rest);
%% 2 bytes. Codepages C0 and C1 are invalid; fail early.
is_utf8(<< 2#1100000:7, _/bits >>) ->
false;
is_utf8(Incomplete = << 2#110:3, _:5 >>) ->
Incomplete;
%% 3 bytes.
is_utf8(Incomplete = << 2#1110:4, _:4 >>) ->
Incomplete;
is_utf8(Incomplete = << 2#1110:4, _:4, 2#10:2, _:6 >>) ->
Incomplete;
%% 4 bytes. Codepage F4 may have invalid values greater than 0x10FFFF.
is_utf8(<< 2#11110100:8, 2#10:2, High:6, _/bits >>) when High >= 2#10000 ->
false;
is_utf8(Incomplete = << 2#11110:5, _:3 >>) ->
Incomplete;
is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6 >>) ->
Incomplete;
is_utf8(Incomplete = << 2#11110:5, _:3, 2#10:2, _:6, 2#10:2, _:6 >>) ->
Incomplete;
%% Invalid.
is_utf8(_) ->
false.
-spec websocket_payload_loop(#state{}, Req, any(),
opcode(), non_neg_integer(), mask_key(), binary())
-> {ok, Req, cowboy_middleware:env()}

View file

@ -92,7 +92,7 @@ run_tests(Config) ->
_ -> ok
end,
{ok, IndexHTML} = file:read_file(IndexFile),
case binary:match(IndexHTML, <<"Fail">>) of
{_, _} -> erlang:error(failed);
nomatch -> ok
case length(binary:matches(IndexHTML, <<"case_failed">>)) > 2 of
true -> erlang:error(failed);
false -> ok
end.