fixed bug related to partial utfx sequences
This commit is contained in:
parent
c287315ebf
commit
ce503823c8
4 changed files with 61 additions and 11 deletions
|
@ -36,7 +36,7 @@ decoder(Opts) ->
|
||||||
|
|
||||||
decoder({F, _} = Callbacks, OptsList) when is_list(OptsList), is_function(F) ->
|
decoder({F, _} = Callbacks, OptsList) when is_list(OptsList), is_function(F) ->
|
||||||
start(Callbacks, OptsList);
|
start(Callbacks, OptsList);
|
||||||
decoder({{Mod, Fun}, State}, OptsList) when is_list(OptsList), is_atom(Mod), is_atom(Fun) ->
|
decoder({Mod, Fun, State}, OptsList) when is_list(OptsList), is_atom(Mod), is_atom(Fun) ->
|
||||||
start({fun(E, S) -> Mod:Fun(E, S) end, State}, OptsList).
|
start({fun(E, S) -> Mod:Fun(E, S) end, State}, OptsList).
|
||||||
|
|
||||||
start(Callbacks, OptsList) ->
|
start(Callbacks, OptsList) ->
|
||||||
|
|
|
@ -193,8 +193,21 @@ string(<<?rsolidus/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
||||||
escape(Rest, Stack, Callbacks, Opts, Acc);
|
escape(Rest, Stack, Callbacks, Opts, Acc);
|
||||||
string(<<S/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) ->
|
string(<<S/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) ->
|
||||||
string(Rest, Stack, Callbacks, Opts, [S] ++ Acc);
|
string(Rest, Stack, Callbacks, Opts, [S] ++ Acc);
|
||||||
string(Bin, Stack, Callbacks, Opts, Acc) when byte_size(Bin) < 2 ->
|
string(Bin, Stack, Callbacks, Opts, Acc) ->
|
||||||
{incomplete, fun(Stream) -> string(<<Bin/binary, Stream/binary>>, Stack, Callbacks, Opts, Acc) end}.
|
case partial_utf16(Bin) of
|
||||||
|
true ->
|
||||||
|
{incomplete, fun(Stream) -> string(<<Bin/binary, Stream/binary>>, Stack, Callbacks, Opts, Acc) end}
|
||||||
|
; false ->
|
||||||
|
erlang:error(function_clause)
|
||||||
|
end.
|
||||||
|
|
||||||
|
partial_utf16(<<>>) -> true;
|
||||||
|
%% this case is not strictly true, there are single bytes that should be rejected, but
|
||||||
|
%% they're rare enough they can be ignored
|
||||||
|
partial_utf16(<<_X>>) -> true;
|
||||||
|
partial_utf16(<<X, _Y>>) when X >= 16#d8, X =< 16#df -> true;
|
||||||
|
partial_utf16(<<X, _Y, Z>>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df -> true;
|
||||||
|
partial_utf16(_) -> false.
|
||||||
|
|
||||||
|
|
||||||
%% only thing to note here is the additional accumulator passed to escaped_unicode used
|
%% only thing to note here is the additional accumulator passed to escaped_unicode used
|
||||||
|
|
|
@ -193,8 +193,21 @@ string(<<?rsolidus/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
||||||
escape(Rest, Stack, Callbacks, Opts, Acc);
|
escape(Rest, Stack, Callbacks, Opts, Acc);
|
||||||
string(<<S/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) ->
|
string(<<S/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) ->
|
||||||
string(Rest, Stack, Callbacks, Opts, [S] ++ Acc);
|
string(Rest, Stack, Callbacks, Opts, [S] ++ Acc);
|
||||||
string(Bin, Stack, Callbacks, Opts, Acc) when byte_size(Bin) < 2 ->
|
string(Bin, Stack, Callbacks, Opts, Acc) ->
|
||||||
{incomplete, fun(Stream) -> string(<<Bin/binary, Stream/binary>>, Stack, Callbacks, Opts, Acc) end}.
|
case partial_utf16(Bin) of
|
||||||
|
true ->
|
||||||
|
{incomplete, fun(Stream) -> string(<<Bin/binary, Stream/binary>>, Stack, Callbacks, Opts, Acc) end}
|
||||||
|
; false ->
|
||||||
|
erlang:error(function_clause)
|
||||||
|
end.
|
||||||
|
|
||||||
|
partial_utf16(<<>>) -> true;
|
||||||
|
%% this case is not strictly true, there are single bytes that should be rejected, but
|
||||||
|
%% they're rare enough they can be ignored
|
||||||
|
partial_utf16(<<_X>>) -> true;
|
||||||
|
partial_utf16(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true;
|
||||||
|
partial_utf16(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true;
|
||||||
|
partial_utf16(_) -> false.
|
||||||
|
|
||||||
|
|
||||||
%% only thing to note here is the additional accumulator passed to escaped_unicode used
|
%% only thing to note here is the additional accumulator passed to escaped_unicode used
|
||||||
|
|
|
@ -183,7 +183,8 @@ key(<<>>, Stack, Callbacks, Opts) ->
|
||||||
%% converted back to lists by the user anyways.
|
%% converted back to lists by the user anyways.
|
||||||
|
|
||||||
%% the clause starting with Bin is necessary for cases where a stream is broken at a
|
%% the clause starting with Bin is necessary for cases where a stream is broken at a
|
||||||
%% point where it contains only a partial utf-8 sequence.
|
%% point where it contains only a partial utf-8 sequence. we emulate a function_clause
|
||||||
|
%% error if the partial sequence is not valid utf-8 to maintain consistency of errors
|
||||||
|
|
||||||
string(<<?quote/?encoding, Rest/binary>>, [key|_] = Stack, Callbacks, Opts, Acc) ->
|
string(<<?quote/?encoding, Rest/binary>>, [key|_] = Stack, Callbacks, Opts, Acc) ->
|
||||||
colon(Rest, Stack, fold({key, lists:reverse(Acc)}, Callbacks), Opts);
|
colon(Rest, Stack, fold({key, lists:reverse(Acc)}, Callbacks), Opts);
|
||||||
|
@ -192,9 +193,32 @@ string(<<?quote/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
||||||
string(<<?rsolidus/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
string(<<?rsolidus/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
||||||
escape(Rest, Stack, Callbacks, Opts, Acc);
|
escape(Rest, Stack, Callbacks, Opts, Acc);
|
||||||
string(<<S/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) ->
|
string(<<S/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) ->
|
||||||
string(Rest, Stack, Callbacks, Opts, [S] ++ Acc);
|
string(Rest, Stack, Callbacks, Opts, [S] ++ Acc);
|
||||||
string(<<>>, Stack, Callbacks, Opts, Acc) ->
|
string(Bin, Stack, Callbacks, Opts, Acc) ->
|
||||||
{incomplete, fun(Stream) -> string(Stream, Stack, Callbacks, Opts, Acc) end}.
|
case partial_utf8(Bin) of
|
||||||
|
true ->
|
||||||
|
{incomplete, fun(Stream) -> string(<<Bin/binary, Stream/binary>>, Stack, Callbacks, Opts, Acc) end}
|
||||||
|
; false ->
|
||||||
|
erlang:error(function_clause)
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% in the case of broken (as in split over two halves of a stream) utf-8 input,
|
||||||
|
%% ensure that the half present is *possibly* valid
|
||||||
|
|
||||||
|
partial_utf8(<<>>) -> true;
|
||||||
|
partial_utf8(<<X>>) when X >= 16#c2, X =< 16#df -> true;
|
||||||
|
partial_utf8(<<X, Rest/binary>>) when X >= 16#e0, X =< 16#ef ->
|
||||||
|
case Rest of
|
||||||
|
<<>> -> true
|
||||||
|
; <<Y>> when Y >= 16#80, Y =< 16#bf -> true
|
||||||
|
end;
|
||||||
|
partial_utf8(<<X, Rest/binary>>) when X >= 16#f0, X =< 16#f4 ->
|
||||||
|
case Rest of
|
||||||
|
<<>> -> true
|
||||||
|
; <<Y>> when Y >= 16#80, Y =< 16#bf -> true
|
||||||
|
; <<Y, Z>> when Y >= 16#80, Y =< 16#bf, Z >= 16#80, Z =< 16#bf -> true
|
||||||
|
end;
|
||||||
|
partial_utf8(_) -> false.
|
||||||
|
|
||||||
|
|
||||||
%% only thing to note here is the additional accumulator passed to escaped_unicode used
|
%% only thing to note here is the additional accumulator passed to escaped_unicode used
|
||||||
|
@ -346,9 +370,9 @@ decimal(<<?comma/?encoding, Rest/binary>>, [array|_] = Stack, Callbacks, Opts, A
|
||||||
decimal(<<?zero/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
decimal(<<?zero/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
||||||
decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc);
|
decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc);
|
||||||
decimal(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
decimal(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
||||||
e(Rest, Stack, Callbacks, Opts, "e0." ++ Acc);
|
e(Rest, Stack, Callbacks, Opts, "e" ++ Acc);
|
||||||
decimal(<<$E/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
decimal(<<$E/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) ->
|
||||||
e(Rest, Stack, Callbacks, Opts, "e0." ++ Acc);
|
e(Rest, Stack, Callbacks, Opts, "e" ++ Acc);
|
||||||
decimal(<<S/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) ->
|
decimal(<<S/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) ->
|
||||||
maybe_done(Rest, Stack, fold({float, lists:reverse(Acc)}, Callbacks), Opts);
|
maybe_done(Rest, Stack, fold({float, lists:reverse(Acc)}, Callbacks), Opts);
|
||||||
decimal(<<?solidus/?encoding, Rest/binary>>, Stack, Callbacks, ?comments_enabled(Opts), Acc) ->
|
decimal(<<?solidus/?encoding, Rest/binary>>, Stack, Callbacks, ?comments_enabled(Opts), Acc) ->
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue