Merge branch 'feature/loose_unicode' into develop
This commit is contained in:
commit
0e66a82b29
19 changed files with 228 additions and 15 deletions
|
@ -34,6 +34,7 @@
|
||||||
|
|
||||||
-type jsx_opts() :: [jsx_opt()].
|
-type jsx_opts() :: [jsx_opt()].
|
||||||
-type jsx_opt() :: {multi_term, true | false}
|
-type jsx_opt() :: {multi_term, true | false}
|
||||||
|
| loose_unicode
|
||||||
| {encoding, auto
|
| {encoding, auto
|
||||||
| utf8
|
| utf8
|
||||||
| utf16
|
| utf16
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
%% opts record for decoder
|
%% opts record for decoder
|
||||||
-record(opts, {
|
-record(opts, {
|
||||||
multi_term = false,
|
multi_term = false,
|
||||||
|
loose_unicode = false,
|
||||||
encoding = auto
|
encoding = auto
|
||||||
}).
|
}).
|
||||||
|
|
||||||
|
@ -81,7 +82,7 @@
|
||||||
).
|
).
|
||||||
|
|
||||||
-define(is_noncontrol(Symbol),
|
-define(is_noncontrol(Symbol),
|
||||||
Symbol >= ?space
|
(Symbol >= ?space)
|
||||||
).
|
).
|
||||||
|
|
||||||
-define(is_whitespace(Symbol),
|
-define(is_whitespace(Symbol),
|
||||||
|
@ -138,6 +139,8 @@ parse_opts([{multi_term, Value}|Rest], Opts) ->
|
||||||
parse_opts(Rest, Opts#opts{multi_term=Value});
|
parse_opts(Rest, Opts#opts{multi_term=Value});
|
||||||
parse_opts([multi_term|Rest], Opts) ->
|
parse_opts([multi_term|Rest], Opts) ->
|
||||||
parse_opts(Rest, Opts#opts{multi_term=true});
|
parse_opts(Rest, Opts#opts{multi_term=true});
|
||||||
|
parse_opts([loose_unicode|Rest], Opts) ->
|
||||||
|
parse_opts(Rest, Opts#opts{loose_unicode=true});
|
||||||
parse_opts([{encoding, _}|Rest], Opts) ->
|
parse_opts([{encoding, _}|Rest], Opts) ->
|
||||||
parse_opts(Rest, Opts);
|
parse_opts(Rest, Opts);
|
||||||
parse_opts(_, _) ->
|
parse_opts(_, _) ->
|
||||||
|
@ -350,7 +353,6 @@ key(Bin, Stack, Opts) ->
|
||||||
%% states
|
%% states
|
||||||
string(Bin, Stack, Opts) -> string(Bin, Stack, Opts, <<>>).
|
string(Bin, Stack, Opts) -> string(Bin, Stack, Opts, <<>>).
|
||||||
|
|
||||||
|
|
||||||
string(<<?quote/?utfx, Rest/binary>>, [key|_] = Stack, Opts, Acc) ->
|
string(<<?quote/?utfx, Rest/binary>>, [key|_] = Stack, Opts, Acc) ->
|
||||||
{jsx, {key, Acc}, fun() -> colon(Rest, Stack, Opts) end};
|
{jsx, {key, Acc}, fun() -> colon(Rest, Stack, Opts) end};
|
||||||
string(<<?quote/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
string(<<?quote/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
||||||
|
@ -358,8 +360,37 @@ string(<<?quote/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
||||||
maybe_done(Rest, Stack, Opts)
|
maybe_done(Rest, Stack, Opts)
|
||||||
end};
|
end};
|
||||||
string(<<?rsolidus/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
string(<<?rsolidus/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
||||||
escape(Rest, Stack, Opts, Acc);
|
escape(Rest, Stack, Opts, Acc);
|
||||||
string(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc) when ?is_noncontrol(S) ->
|
%% things get dumb here. erlang doesn't properly restrict unicode non-characters
|
||||||
|
%% so you can't trust the codepoints it returns always
|
||||||
|
%% the range 32..16#fdcf is safe, so allow that
|
||||||
|
string(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when ?is_noncontrol(S), S < 16#fdd0 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, S/utf8>>);
|
||||||
|
%% the range 16#fdf0..16#fffd is also safe
|
||||||
|
string(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when S > 16#fdef, S < 16#fffe ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, S/utf8>>);
|
||||||
|
%% i think doing it like this is faster than just putting this clause first.
|
||||||
|
%% yes, i think it's insane too
|
||||||
|
string(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when S > 16#ffff andalso
|
||||||
|
S =/= 16#1fffe andalso S =/= 16#1ffff andalso
|
||||||
|
S =/= 16#2fffe andalso S =/= 16#2ffff andalso
|
||||||
|
S =/= 16#3fffe andalso S =/= 16#3ffff andalso
|
||||||
|
S =/= 16#4fffe andalso S =/= 16#4ffff andalso
|
||||||
|
S =/= 16#5fffe andalso S =/= 16#5ffff andalso
|
||||||
|
S =/= 16#6fffe andalso S =/= 16#6ffff andalso
|
||||||
|
S =/= 16#7fffe andalso S =/= 16#7ffff andalso
|
||||||
|
S =/= 16#8fffe andalso S =/= 16#8ffff andalso
|
||||||
|
S =/= 16#9fffe andalso S =/= 16#9ffff andalso
|
||||||
|
S =/= 16#afffe andalso S =/= 16#affff andalso
|
||||||
|
S =/= 16#bfffe andalso S =/= 16#bffff andalso
|
||||||
|
S =/= 16#cfffe andalso S =/= 16#cffff andalso
|
||||||
|
S =/= 16#dfffe andalso S =/= 16#dffff andalso
|
||||||
|
S =/= 16#efffe andalso S =/= 16#effff andalso
|
||||||
|
S =/= 16#ffffe andalso S =/= 16#fffff andalso
|
||||||
|
S =/= 16#101fffe andalso S =/= 16#10ffff ->
|
||||||
string(Rest, Stack, Opts, <<Acc/binary, S/utf8>>);
|
string(Rest, Stack, Opts, <<Acc/binary, S/utf8>>);
|
||||||
string(Bin, Stack, Opts, Acc) ->
|
string(Bin, Stack, Opts, Acc) ->
|
||||||
case partial_utf(Bin) of
|
case partial_utf(Bin) of
|
||||||
|
@ -369,7 +400,11 @@ string(Bin, Stack, Opts, Acc) ->
|
||||||
; (Stream) ->
|
; (Stream) ->
|
||||||
string(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)
|
string(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)
|
||||||
end}
|
end}
|
||||||
; false -> {error, {badjson, Bin}}
|
; false ->
|
||||||
|
case Opts#opts.loose_unicode of
|
||||||
|
true -> noncharacter(Bin, Stack, Opts, Acc)
|
||||||
|
; false -> {error, {badjson, Bin}}
|
||||||
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
@ -380,12 +415,14 @@ partial_utf(<<X, Rest/binary>>) when X >= 16#e0, X =< 16#ef ->
|
||||||
case Rest of
|
case Rest of
|
||||||
<<>> -> true
|
<<>> -> true
|
||||||
; <<Y>> when Y >= 16#80, Y =< 16#bf -> true
|
; <<Y>> when Y >= 16#80, Y =< 16#bf -> true
|
||||||
|
; _ -> false
|
||||||
end;
|
end;
|
||||||
partial_utf(<<X, Rest/binary>>) when X >= 16#f0, X =< 16#f4 ->
|
partial_utf(<<X, Rest/binary>>) when X >= 16#f0, X =< 16#f4 ->
|
||||||
case Rest of
|
case Rest of
|
||||||
<<>> -> true
|
<<>> -> true
|
||||||
; <<Y>> when Y >= 16#80, Y =< 16#bf -> true
|
; <<Y>> when Y >= 16#80, Y =< 16#bf -> true
|
||||||
; <<Y, Z>> when Y >= 16#80, Y =< 16#bf, Z >= 16#80, Z =< 16#bf -> true
|
; <<Y, Z>> when Y >= 16#80, Y =< 16#bf, Z >= 16#80, Z =< 16#bf -> true
|
||||||
|
; _ -> false
|
||||||
end;
|
end;
|
||||||
partial_utf(_) -> false.
|
partial_utf(_) -> false.
|
||||||
-endif.
|
-endif.
|
||||||
|
@ -416,13 +453,101 @@ partial_utf(_) -> false.
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
-ifdef(utf32).
|
-ifdef(utf32).
|
||||||
partial_utf(<<_:32>>) -> false;
|
partial_utf(<<>>) -> true;
|
||||||
partial_utf(_) -> true.
|
partial_utf(<<_>>) -> true;
|
||||||
|
partial_utf(<<_, _>>) -> true;
|
||||||
|
partial_utf(<<_, _, _>>) -> true;
|
||||||
|
partial_utf(_) -> false.
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
-ifdef(utf32le).
|
-ifdef(utf32le).
|
||||||
partial_utf(<<_:32>>) -> false;
|
partial_utf(<<>>) -> true;
|
||||||
partial_utf(_) -> true.
|
partial_utf(<<_>>) -> true;
|
||||||
|
partial_utf(<<_, _>>) -> true;
|
||||||
|
partial_utf(<<_, _, _>>) -> true;
|
||||||
|
partial_utf(_) -> false.
|
||||||
|
-endif.
|
||||||
|
|
||||||
|
|
||||||
|
-ifdef(utf8).
|
||||||
|
%% non-characters erlang doesn't recognize as non-characters, idiotically
|
||||||
|
noncharacter(<<S/utf8, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when ?is_noncontrol(S) ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% u+fffe and u+ffff
|
||||||
|
noncharacter(<<239, 191, X, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X == 190; X == 191 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% surrogates
|
||||||
|
noncharacter(<<237, X, _, Rest/binary>>, Stack, Opts, Acc) when X >= 160 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
||||||
|
{error, {badjson, Bin}}.
|
||||||
|
-endif.
|
||||||
|
|
||||||
|
-ifdef(utf16).
|
||||||
|
%% non-characters blah blah
|
||||||
|
noncharacter(<<S/utf16, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when ?is_noncontrol(S) ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% u+ffff and u+fffe
|
||||||
|
noncharacter(<<255, X, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X == 253; X == 254 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% surrogates
|
||||||
|
noncharacter(<<X, _, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X >= 216, X =< 223 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
||||||
|
{error, {badjson, Bin}}.
|
||||||
|
-endif.
|
||||||
|
|
||||||
|
-ifdef(utf16le).
|
||||||
|
noncharacter(<<S/utf16-little, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when ?is_noncontrol(S) ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% u+ffff and u+fffe
|
||||||
|
noncharacter(<<X, 255, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X == 253; X == 254 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% surrogates
|
||||||
|
noncharacter(<<_, X, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X >= 216, X =< 223 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
||||||
|
{error, {badjson, Bin}}.
|
||||||
|
-endif.
|
||||||
|
|
||||||
|
-ifdef(utf32).
|
||||||
|
noncharacter(<<S/utf32, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when ?is_noncontrol(S) ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% u+ffff and u+fffe
|
||||||
|
noncharacter(<<0, 0, 255, X, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X == 254; X == 255 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% surrogates
|
||||||
|
noncharacter(<<0, 0, X, _, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X >= 216, X =< 223 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
||||||
|
{error, {badjson, Bin}}.
|
||||||
|
-endif.
|
||||||
|
|
||||||
|
-ifdef(utf32le).
|
||||||
|
noncharacter(<<S/utf32-little, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when ?is_noncontrol(S) ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% u+ffff and u+fffe
|
||||||
|
noncharacter(<<X, 255, 0, 0, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X == 254; X == 255 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
%% surrogates
|
||||||
|
noncharacter(<<_, X, 0, 0, Rest/binary>>, Stack, Opts, Acc)
|
||||||
|
when X >= 216, X =< 223 ->
|
||||||
|
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||||
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
||||||
|
{error, {badjson, Bin}}.
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
|
|
||||||
|
@ -467,11 +592,21 @@ escaped_unicode(<<D/?utfx, Rest/binary>>, Stack, Opts, String, [C, B, A])
|
||||||
low_surrogate(Rest, Stack, Opts, String, X)
|
low_surrogate(Rest, Stack, Opts, String, X)
|
||||||
%% non-characters, you're not allowed to exchange these
|
%% non-characters, you're not allowed to exchange these
|
||||||
; X when X == 16#fffe; X == 16#ffff; X >= 16#fdd0, X =< 16#fdef ->
|
; X when X == 16#fffe; X == 16#ffff; X >= 16#fdd0, X =< 16#fdef ->
|
||||||
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
case Opts#opts.loose_unicode of
|
||||||
|
true ->
|
||||||
|
string(Rest, Stack, Opts, <<String/binary, 16#fffd/utf8>>)
|
||||||
|
; false ->
|
||||||
|
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
||||||
|
end
|
||||||
%% allowing interchange of null bytes allows attackers to forge
|
%% allowing interchange of null bytes allows attackers to forge
|
||||||
%% malicious streams
|
%% malicious streams
|
||||||
; X when X == 16#0000 ->
|
; X when X == 16#0000 ->
|
||||||
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
case Opts#opts.loose_unicode of
|
||||||
|
true ->
|
||||||
|
string(Rest, Stack, Opts, <<String/binary, 16#fffd/utf8>>)
|
||||||
|
; false ->
|
||||||
|
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
||||||
|
end
|
||||||
%% anything else
|
%% anything else
|
||||||
; X ->
|
; X ->
|
||||||
string(Rest, Stack, Opts, <<String/binary, X/utf8>>)
|
string(Rest, Stack, Opts, <<String/binary, X/utf8>>)
|
||||||
|
@ -498,6 +633,14 @@ escaped_unicode(Bin, Stack, Opts, String, Acc) ->
|
||||||
|
|
||||||
low_surrogate(<<?rsolidus/?utfx, Rest/binary>>, Stack, Opts, String, High) ->
|
low_surrogate(<<?rsolidus/?utfx, Rest/binary>>, Stack, Opts, String, High) ->
|
||||||
low_surrogate_u(Rest, Stack, Opts, String, High);
|
low_surrogate_u(Rest, Stack, Opts, String, High);
|
||||||
|
%% not an escaped codepoint, our high codepoint is illegal
|
||||||
|
low_surrogate(<<S/?utfx, Rest/binary>> = Bin, Stack, Opts, String, _) ->
|
||||||
|
case Opts#opts.loose_unicode of
|
||||||
|
true ->
|
||||||
|
string(Bin, Stack, Opts, <<String/binary, 16#fffd/utf8>>)
|
||||||
|
; false ->
|
||||||
|
{error, {badjson, <<S/?utfx, Rest/binary>>}}
|
||||||
|
end;
|
||||||
low_surrogate(Bin, Stack, Opts, String, High) ->
|
low_surrogate(Bin, Stack, Opts, String, High) ->
|
||||||
case ?partial_codepoint(Bin) of
|
case ?partial_codepoint(Bin) of
|
||||||
true ->
|
true ->
|
||||||
|
@ -518,6 +661,19 @@ low_surrogate(Bin, Stack, Opts, String, High) ->
|
||||||
|
|
||||||
low_surrogate_u(<<$u/?utfx, Rest/binary>>, Stack, Opts, String, High) ->
|
low_surrogate_u(<<$u/?utfx, Rest/binary>>, Stack, Opts, String, High) ->
|
||||||
low_surrogate(Rest, Stack, Opts, String, [], High);
|
low_surrogate(Rest, Stack, Opts, String, [], High);
|
||||||
|
%% not a low surrogate, dispatch back to string to handle, including the
|
||||||
|
%% rsolidus we parsed previously
|
||||||
|
low_surrogate_u(<<S/?utfx, Rest/binary>> = Bin, Stack, Opts, String, _) ->
|
||||||
|
case Opts#opts.loose_unicode of
|
||||||
|
true ->
|
||||||
|
string(<<?rsolidus/?utfx, Bin/binary>>,
|
||||||
|
Stack,
|
||||||
|
Opts,
|
||||||
|
<<String/binary, 16#fffd/utf8>>
|
||||||
|
)
|
||||||
|
; false ->
|
||||||
|
{error, {badjson, <<S/?utfx, Rest/binary>>}}
|
||||||
|
end;
|
||||||
low_surrogate_u(Bin, Stack, Opts, String, High) ->
|
low_surrogate_u(Bin, Stack, Opts, String, High) ->
|
||||||
case ?partial_codepoint(Bin) of
|
case ?partial_codepoint(Bin) of
|
||||||
true ->
|
true ->
|
||||||
|
@ -543,14 +699,32 @@ low_surrogate(<<D/?utfx, Rest/binary>>, Stack, Opts, String, [C, B, A], High)
|
||||||
V = surrogate_to_codepoint(High, X),
|
V = surrogate_to_codepoint(High, X),
|
||||||
case V rem 16#10000 of
|
case V rem 16#10000 of
|
||||||
Y when Y == 16#fffe; Y == 16#ffff ->
|
Y when Y == 16#fffe; Y == 16#ffff ->
|
||||||
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
case Opts#opts.loose_unicode of
|
||||||
|
true ->
|
||||||
|
string(Rest,
|
||||||
|
Stack,
|
||||||
|
Opts,
|
||||||
|
<<String/binary, 16#fffd/utf8, 16#fffd/utf8>>
|
||||||
|
)
|
||||||
|
; false ->
|
||||||
|
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
||||||
|
end
|
||||||
; Y ->
|
; Y ->
|
||||||
io:format("~p ~p~n", [V, Y]),
|
io:format("~p ~p~n", [V, Y]),
|
||||||
string(Rest, Stack, Opts, <<String/binary, V/utf8>>)
|
string(Rest, Stack, Opts, <<String/binary, V/utf8>>)
|
||||||
end
|
end
|
||||||
%% not a low surrogate, bad bad bad
|
%% not a low surrogate, bad bad bad
|
||||||
; _ ->
|
; _ ->
|
||||||
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
case Opts#opts.loose_unicode of
|
||||||
|
true ->
|
||||||
|
string(Rest,
|
||||||
|
Stack,
|
||||||
|
Opts,
|
||||||
|
<<String/binary, 16#fffd/utf8, 16#fffd/utf8>>
|
||||||
|
)
|
||||||
|
; false ->
|
||||||
|
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
||||||
|
end
|
||||||
end;
|
end;
|
||||||
low_surrogate(<<S/?utfx, Rest/binary>>, Stack, Opts, String, Acc, High)
|
low_surrogate(<<S/?utfx, Rest/binary>>, Stack, Opts, String, Acc, High)
|
||||||
when ?is_hex(S) ->
|
when ?is_hex(S) ->
|
||||||
|
|
|
@ -204,10 +204,10 @@ detect_encoding(<<X, 0, 0, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
||||||
detect_encoding(<<0, 0, 0, X, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
detect_encoding(<<0, 0, 0, X, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
||||||
(jsx_utf32:decoder(Opts))(JSON);
|
(jsx_utf32:decoder(Opts))(JSON);
|
||||||
%% utf16-little null order detection
|
%% utf16-little null order detection
|
||||||
detect_encoding(<<X, 0, _, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
detect_encoding(<<X, 0, _, _, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
||||||
(jsx_utf16le:decoder(Opts))(JSON);
|
(jsx_utf16le:decoder(Opts))(JSON);
|
||||||
%% utf16-big null order detection
|
%% utf16-big null order detection
|
||||||
detect_encoding(<<0, X, 0, _, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
detect_encoding(<<0, X, _, _, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
||||||
(jsx_utf16:decoder(Opts))(JSON);
|
(jsx_utf16:decoder(Opts))(JSON);
|
||||||
%% utf8 null order detection
|
%% utf8 null order detection
|
||||||
detect_encoding(<<X, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
|
detect_encoding(<<X, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
|
||||||
|
|
1
test/cases/escaped_noncharacter_ext_replaced.json
Normal file
1
test/cases/escaped_noncharacter_ext_replaced.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
"\ud83f\udfff"
|
4
test/cases/escaped_noncharacter_ext_replaced.test
Normal file
4
test/cases/escaped_noncharacter_ext_replaced.test
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{name, "escaped noncharacter (extended)"}.
|
||||||
|
{jsx, [{string, <<16#fffd/utf8, 16#fffd/utf8>>}, end_json]}.
|
||||||
|
{json, "escaped_noncharacter_ext.json"}.
|
||||||
|
{jsx_flags, [loose_unicode]}.
|
1
test/cases/escaped_noncharacter_replaced.json
Normal file
1
test/cases/escaped_noncharacter_replaced.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
"\uffff"
|
4
test/cases/escaped_noncharacter_replaced.test
Normal file
4
test/cases/escaped_noncharacter_replaced.test
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{name, "escaped noncharacter replacement"}.
|
||||||
|
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
|
||||||
|
{json, "escaped_noncharacter_replaced.json"}.
|
||||||
|
{jsx_flags, [loose_unicode]}.
|
1
test/cases/escaped_nullbyte_replaced.json
Normal file
1
test/cases/escaped_nullbyte_replaced.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
"\u0000"
|
4
test/cases/escaped_nullbyte_replaced.test
Normal file
4
test/cases/escaped_nullbyte_replaced.test
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{name, "escaped nullbyte replaced"}.
|
||||||
|
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
|
||||||
|
{json, "escaped_nullbyte_replaced.json"}.
|
||||||
|
{jsx_flags, [loose_unicode]}.
|
1
test/cases/noncharacter.json
Normal file
1
test/cases/noncharacter.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
""
|
3
test/cases/noncharacter.test
Normal file
3
test/cases/noncharacter.test
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
{name, "noncharacter"}.
|
||||||
|
{jsx, {error, badjson}}.
|
||||||
|
{json, "noncharacter.json"}.
|
1
test/cases/noncharacter_replaced.json
Normal file
1
test/cases/noncharacter_replaced.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
""
|
4
test/cases/noncharacter_replaced.test
Normal file
4
test/cases/noncharacter_replaced.test
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{name, "noncharacter replaced"}.
|
||||||
|
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
|
||||||
|
{json, "noncharacter_replaced.json"}.
|
||||||
|
{jsx_flags, [loose_unicode]}.
|
1
test/cases/nullbyte_replaced.json
Normal file
1
test/cases/nullbyte_replaced.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
"\u0000"
|
4
test/cases/nullbyte_replaced.test
Normal file
4
test/cases/nullbyte_replaced.test
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{name, "nullbyte replaced"}.
|
||||||
|
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
|
||||||
|
{json, "nullbyte_replaced.json"}.
|
||||||
|
{jsx_flags, [loose_unicode]}.
|
1
test/cases/unpaired_surrogate.json
Normal file
1
test/cases/unpaired_surrogate.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
["\ud801blah"]
|
3
test/cases/unpaired_surrogate.test
Normal file
3
test/cases/unpaired_surrogate.test
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
{name, "unpaired_surrogate"}.
|
||||||
|
{jsx, {error, badjson}}.
|
||||||
|
{json, "unpaired_surrogate.json"}.
|
1
test/cases/unpaired_surrogate_replaced.json
Normal file
1
test/cases/unpaired_surrogate_replaced.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
["\ud801blah"]
|
4
test/cases/unpaired_surrogate_replaced.test
Normal file
4
test/cases/unpaired_surrogate_replaced.test
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{name, "unpaired surrogate replaced"}.
|
||||||
|
{jsx, [start_array,{string,<<16#fffd/utf8, "blah">>},end_array,end_json]}.
|
||||||
|
{json, "unpaired_surrogate_replaced.json"}.
|
||||||
|
{jsx_flags, [loose_unicode]}.
|
Loading…
Add table
Add a link
Reference in a new issue