merge in experimental explicitspeed branch
This commit is contained in:
commit
d5d7273e1e
7 changed files with 388 additions and 278 deletions
|
@ -1 +0,0 @@
|
|||
""
|
|
@ -1,3 +0,0 @@
|
|||
{name, "noncharacter"}.
|
||||
{jsx, {error, badjson}}.
|
||||
{json, "noncharacter.json"}.
|
|
@ -1 +0,0 @@
|
|||
""
|
|
@ -1,4 +0,0 @@
|
|||
{name, "noncharacter replaced"}.
|
||||
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
|
||||
{json, "noncharacter_replaced.json"}.
|
||||
{jsx_flags, [loose_unicode]}.
|
|
@ -145,11 +145,11 @@ value(<<$f, Rest/binary>>, Handler, Stack, Opts) ->
|
|||
value(<<$n, Rest/binary>>, Handler, Stack, Opts) ->
|
||||
nu(Rest, Handler, Stack, Opts);
|
||||
value(<<?negative, Rest/binary>>, Handler, Stack, Opts) ->
|
||||
negative(Rest, Handler, [?new_seq($-)|Stack], Opts);
|
||||
negative(Rest, Handler, [[$-]|Stack], Opts);
|
||||
value(<<?zero, Rest/binary>>, Handler, Stack, Opts) ->
|
||||
zero(Rest, Handler, [?new_seq($0)|Stack], Opts);
|
||||
zero(Rest, Handler, [[$0]|Stack], Opts);
|
||||
value(<<S, Rest/binary>>, Handler, Stack, Opts) when ?is_nonzero(S) ->
|
||||
integer(Rest, Handler, [?new_seq(S)|Stack], Opts);
|
||||
integer(Rest, Handler, [[S]|Stack], Opts);
|
||||
value(<<?start_object, Rest/binary>>, {Handler, State}, Stack, Opts) ->
|
||||
object(Rest, {Handler, Handler:handle_event(start_object, State)}, [key|Stack], Opts);
|
||||
value(<<?start_array, Rest/binary>>, {Handler, State}, Stack, Opts) ->
|
||||
|
@ -193,11 +193,11 @@ array(<<$f, Rest/binary>>, Handler, Stack, Opts) ->
|
|||
array(<<$n, Rest/binary>>, Handler, Stack, Opts) ->
|
||||
nu(Rest, Handler, Stack, Opts);
|
||||
array(<<?negative, Rest/binary>>, Handler, Stack, Opts) ->
|
||||
negative(Rest, Handler, [?new_seq($-)|Stack], Opts);
|
||||
negative(Rest, Handler, [[$-]|Stack], Opts);
|
||||
array(<<?zero, Rest/binary>>, Handler, Stack, Opts) ->
|
||||
zero(Rest, Handler, [?new_seq($0)|Stack], Opts);
|
||||
zero(Rest, Handler, [[$0]|Stack], Opts);
|
||||
array(<<S, Rest/binary>>, Handler, Stack, Opts) when ?is_nonzero(S) ->
|
||||
integer(Rest, Handler, [?new_seq(S)|Stack], Opts);
|
||||
integer(Rest, Handler, [[S]|Stack], Opts);
|
||||
array(<<?start_object, Rest/binary>>, {Handler, State}, Stack, Opts) ->
|
||||
object(Rest, {Handler, Handler:handle_event(start_object, State)}, [key|Stack], Opts);
|
||||
array(<<?start_array, Rest/binary>>, {Handler, State}, Stack, Opts) ->
|
||||
|
@ -260,6 +260,12 @@ partial_utf(<<X, Y, Z>>)
|
|||
partial_utf(_) -> false.
|
||||
|
||||
|
||||
%% explicitly whitelist ascii set for better efficiency (seriously, it's worth
|
||||
%% almost a 20% increase)
|
||||
string(<<32, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 32)|Stack], Opts);
|
||||
string(<<33, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 33)|Stack], Opts);
|
||||
string(<<?doublequote, Rest/binary>>, {Handler, State}, S, Opts) ->
|
||||
case S of
|
||||
[Acc, key|Stack] ->
|
||||
|
@ -269,6 +275,14 @@ string(<<?doublequote, Rest/binary>>, {Handler, State}, S, Opts) ->
|
|||
[Acc|Stack] ->
|
||||
maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, Stack, Opts)
|
||||
end;
|
||||
string(<<35, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 35)|Stack], Opts);
|
||||
string(<<36, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 36)|Stack], Opts);
|
||||
string(<<37, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 37)|Stack], Opts);
|
||||
string(<<38, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 38)|Stack], Opts);
|
||||
string(<<?singlequote, Rest/binary>>, {Handler, State}, S, Opts = #opts{single_quotes=true}) ->
|
||||
case S of
|
||||
[Acc, single_quote, key|Stack] ->
|
||||
|
@ -278,37 +292,183 @@ string(<<?singlequote, Rest/binary>>, {Handler, State}, S, Opts = #opts{single_q
|
|||
[Acc|Stack] ->
|
||||
string(Rest, {Handler, State}, [?acc_seq(Acc, ?singlequote)|Stack], Opts)
|
||||
end;
|
||||
string(<<40, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 40)|Stack], Opts);
|
||||
string(<<41, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 41)|Stack], Opts);
|
||||
string(<<42, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 42)|Stack], Opts);
|
||||
string(<<43, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 43)|Stack], Opts);
|
||||
string(<<44, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 44)|Stack], Opts);
|
||||
string(<<45, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 45)|Stack], Opts);
|
||||
string(<<46, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 46)|Stack], Opts);
|
||||
string(<<47, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 47)|Stack], Opts);
|
||||
string(<<48, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 48)|Stack], Opts);
|
||||
string(<<49, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 49)|Stack], Opts);
|
||||
string(<<50, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 50)|Stack], Opts);
|
||||
string(<<51, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 51)|Stack], Opts);
|
||||
string(<<52, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 52)|Stack], Opts);
|
||||
string(<<53, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 53)|Stack], Opts);
|
||||
string(<<54, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 54)|Stack], Opts);
|
||||
string(<<55, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 55)|Stack], Opts);
|
||||
string(<<56, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 56)|Stack], Opts);
|
||||
string(<<57, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 57)|Stack], Opts);
|
||||
string(<<58, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 58)|Stack], Opts);
|
||||
string(<<59, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 59)|Stack], Opts);
|
||||
string(<<60, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 60)|Stack], Opts);
|
||||
string(<<61, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 61)|Stack], Opts);
|
||||
string(<<62, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 62)|Stack], Opts);
|
||||
string(<<63, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 63)|Stack], Opts);
|
||||
string(<<64, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 64)|Stack], Opts);
|
||||
string(<<65, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 65)|Stack], Opts);
|
||||
string(<<66, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 66)|Stack], Opts);
|
||||
string(<<67, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 67)|Stack], Opts);
|
||||
string(<<68, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 68)|Stack], Opts);
|
||||
string(<<69, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 69)|Stack], Opts);
|
||||
string(<<70, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 70)|Stack], Opts);
|
||||
string(<<71, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 71)|Stack], Opts);
|
||||
string(<<72, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 72)|Stack], Opts);
|
||||
string(<<73, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 73)|Stack], Opts);
|
||||
string(<<74, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 74)|Stack], Opts);
|
||||
string(<<75, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 75)|Stack], Opts);
|
||||
string(<<76, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 76)|Stack], Opts);
|
||||
string(<<77, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 77)|Stack], Opts);
|
||||
string(<<78, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 78)|Stack], Opts);
|
||||
string(<<79, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 79)|Stack], Opts);
|
||||
string(<<80, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 80)|Stack], Opts);
|
||||
string(<<81, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 81)|Stack], Opts);
|
||||
string(<<82, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 82)|Stack], Opts);
|
||||
string(<<83, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 83)|Stack], Opts);
|
||||
string(<<84, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 84)|Stack], Opts);
|
||||
string(<<85, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 85)|Stack], Opts);
|
||||
string(<<86, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 86)|Stack], Opts);
|
||||
string(<<87, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 87)|Stack], Opts);
|
||||
string(<<88, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 88)|Stack], Opts);
|
||||
string(<<89, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 89)|Stack], Opts);
|
||||
string(<<90, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 90)|Stack], Opts);
|
||||
string(<<91, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 91)|Stack], Opts);
|
||||
string(<<?rsolidus/utf8, Rest/binary>>, Handler, Stack, Opts) ->
|
||||
escape(Rest, Handler, Stack, Opts);
|
||||
%% things get dumb here. erlang doesn't properly restrict unicode non-characters
|
||||
%% so you can't trust the codepoints it returns always
|
||||
%% the range 32..16#fdcf is safe, so allow that
|
||||
string(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts)
|
||||
when ?is_noncontrol(S), S < 16#fdd0 ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts);
|
||||
%% the range 16#fdf0..16#fffd is also safe
|
||||
string(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts)
|
||||
when S > 16#fdef, S < 16#fffe ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts);
|
||||
%% yes, i think it's insane too
|
||||
string(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts)
|
||||
when S > 16#ffff andalso
|
||||
S =/= 16#1fffe andalso S =/= 16#1ffff andalso
|
||||
S =/= 16#2fffe andalso S =/= 16#2ffff andalso
|
||||
S =/= 16#3fffe andalso S =/= 16#3ffff andalso
|
||||
S =/= 16#4fffe andalso S =/= 16#4ffff andalso
|
||||
S =/= 16#5fffe andalso S =/= 16#5ffff andalso
|
||||
S =/= 16#6fffe andalso S =/= 16#6ffff andalso
|
||||
S =/= 16#7fffe andalso S =/= 16#7ffff andalso
|
||||
S =/= 16#8fffe andalso S =/= 16#8ffff andalso
|
||||
S =/= 16#9fffe andalso S =/= 16#9ffff andalso
|
||||
S =/= 16#afffe andalso S =/= 16#affff andalso
|
||||
S =/= 16#bfffe andalso S =/= 16#bffff andalso
|
||||
S =/= 16#cfffe andalso S =/= 16#cffff andalso
|
||||
S =/= 16#dfffe andalso S =/= 16#dffff andalso
|
||||
S =/= 16#efffe andalso S =/= 16#effff andalso
|
||||
S =/= 16#ffffe andalso S =/= 16#fffff andalso
|
||||
S =/= 16#10fffe andalso S =/= 16#10ffff ->
|
||||
string(<<93, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 93)|Stack], Opts);
|
||||
string(<<94, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 94)|Stack], Opts);
|
||||
string(<<95, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 95)|Stack], Opts);
|
||||
string(<<96, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 96)|Stack], Opts);
|
||||
string(<<97, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 97)|Stack], Opts);
|
||||
string(<<98, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 98)|Stack], Opts);
|
||||
string(<<99, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 99)|Stack], Opts);
|
||||
string(<<100, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 100)|Stack], Opts);
|
||||
string(<<101, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 101)|Stack], Opts);
|
||||
string(<<102, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 102)|Stack], Opts);
|
||||
string(<<103, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 103)|Stack], Opts);
|
||||
string(<<104, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 104)|Stack], Opts);
|
||||
string(<<105, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 105)|Stack], Opts);
|
||||
string(<<106, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 106)|Stack], Opts);
|
||||
string(<<107, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 107)|Stack], Opts);
|
||||
string(<<108, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 108)|Stack], Opts);
|
||||
string(<<109, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 109)|Stack], Opts);
|
||||
string(<<110, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 110)|Stack], Opts);
|
||||
string(<<111, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 111)|Stack], Opts);
|
||||
string(<<112, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 112)|Stack], Opts);
|
||||
string(<<113, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 113)|Stack], Opts);
|
||||
string(<<114, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 114)|Stack], Opts);
|
||||
string(<<115, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 115)|Stack], Opts);
|
||||
string(<<116, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 116)|Stack], Opts);
|
||||
string(<<117, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 117)|Stack], Opts);
|
||||
string(<<118, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 118)|Stack], Opts);
|
||||
string(<<119, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 119)|Stack], Opts);
|
||||
string(<<120, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 120)|Stack], Opts);
|
||||
string(<<121, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 121)|Stack], Opts);
|
||||
string(<<122, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 122)|Stack], Opts);
|
||||
string(<<123, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 123)|Stack], Opts);
|
||||
string(<<124, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 124)|Stack], Opts);
|
||||
string(<<125, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 125)|Stack], Opts);
|
||||
string(<<126, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 126)|Stack], Opts);
|
||||
string(<<127, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 127)|Stack], Opts);
|
||||
string(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts) when ?is_noncontrol(S) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts);
|
||||
string(Bin, Handler, Stack, Opts) ->
|
||||
case partial_utf(Bin) of
|
||||
|
@ -321,35 +481,13 @@ string(Bin, Handler, Stack, Opts) ->
|
|||
end.
|
||||
|
||||
%% we don't need to guard against partial utf here, because it's already taken
|
||||
%% care of in string. theoretically, the last clause of noncharacter/4 is
|
||||
%% unreachable
|
||||
%% non-characters erlang doesn't recognize as non-characters
|
||||
noncharacter(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts)
|
||||
when ?is_noncontrol(S) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts);
|
||||
%% u+fffe and u+ffff
|
||||
noncharacter(<<239, 191, X, Rest/binary>>, Handler, [Acc|Stack], Opts)
|
||||
when X == 190; X == 191 ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts);
|
||||
%% care of in string
|
||||
%% surrogates
|
||||
noncharacter(<<237, X, _, Rest/binary>>, Handler, [Acc|Stack], Opts) when X >= 160 ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts);
|
||||
noncharacter(<<X, Y, 191, Z, Rest/binary>>, Handler, [Acc|Stack], Opts)
|
||||
when (
|
||||
(X == 240 andalso Y == 159) orelse
|
||||
(X == 240 andalso Y == 175) orelse
|
||||
(X == 240 andalso Y == 191) orelse
|
||||
(
|
||||
(X == 241 orelse X == 242 orelse X == 243) andalso
|
||||
(Y == 143 orelse Y == 159 orelse Y == 175 orelse Y == 191)
|
||||
) orelse
|
||||
(X == 244 andalso Y == 143)
|
||||
) andalso (Z == 190 orelse Z == 191) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts);
|
||||
%% bad utf8
|
||||
noncharacter(<<_, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts);
|
||||
noncharacter(Bin, Handler, Stack, Opts) ->
|
||||
?error([Bin, Handler, Stack, Opts]).
|
||||
string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts).
|
||||
|
||||
|
||||
escape(<<$b, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
|
||||
|
@ -1027,7 +1165,6 @@ comments_test_() ->
|
|||
)}
|
||||
].
|
||||
|
||||
|
||||
escape_forward_slash_test_() ->
|
||||
[
|
||||
{"escape forward slash test", ?_assertEqual(
|
||||
|
@ -1036,27 +1173,6 @@ escape_forward_slash_test_() ->
|
|||
)}
|
||||
].
|
||||
|
||||
|
||||
noncharacters_test_() ->
|
||||
[
|
||||
{"noncharacters - badjson",
|
||||
?_assertEqual(check_bad(noncharacters()), [])
|
||||
},
|
||||
{"noncharacters - replaced",
|
||||
?_assertEqual(check_replaced(noncharacters()), [])
|
||||
}
|
||||
].
|
||||
|
||||
extended_noncharacters_test_() ->
|
||||
[
|
||||
{"extended noncharacters - badjson",
|
||||
?_assertEqual(check_bad(extended_noncharacters()), [])
|
||||
},
|
||||
{"extended noncharacters - replaced",
|
||||
?_assertEqual(check_replaced(extended_noncharacters()), [])
|
||||
}
|
||||
].
|
||||
|
||||
surrogates_test_() ->
|
||||
[
|
||||
{"surrogates - badjson",
|
||||
|
@ -1074,16 +1190,6 @@ control_test_() ->
|
|||
}
|
||||
].
|
||||
|
||||
reserved_test_() ->
|
||||
[
|
||||
{"reserved noncharacters - badjson",
|
||||
?_assertEqual(check_bad(reserved_space()), [])
|
||||
},
|
||||
{"reserved noncharacters - replaced",
|
||||
?_assertEqual(check_replaced(reserved_space()), [])
|
||||
}
|
||||
].
|
||||
|
||||
good_characters_test_() ->
|
||||
[
|
||||
{"acceptable codepoints",
|
||||
|
@ -1173,32 +1279,13 @@ decode(JSON, Opts) ->
|
|||
end.
|
||||
|
||||
|
||||
|
||||
noncharacters() -> lists:seq(16#fffe, 16#ffff).
|
||||
|
||||
extended_noncharacters() ->
|
||||
[16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
|
||||
++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
|
||||
++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
|
||||
++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
|
||||
++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
|
||||
++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
|
||||
++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
|
||||
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff].
|
||||
|
||||
surrogates() -> lists:seq(16#d800, 16#dfff).
|
||||
|
||||
control_characters() -> lists:seq(1, 31).
|
||||
|
||||
reserved_space() -> lists:seq(16#fdd0, 16#fdef).
|
||||
good() -> [32, 33] ++ lists:seq(16#23, 16#5b) ++ lists:seq(16#5d, 16#d7ff) ++ lists:seq(16#e000, 16#ffff).
|
||||
|
||||
good() -> [32, 33]
|
||||
++ lists:seq(16#23, 16#5b)
|
||||
++ lists:seq(16#5d, 16#d7ff)
|
||||
++ lists:seq(16#e000, 16#fdcf)
|
||||
++ lists:seq(16#fdf0, 16#fffd).
|
||||
|
||||
good_extended() -> lists:seq(16#100000, 16#10fffd).
|
||||
good_extended() -> lists:seq(16#100000, 16#10ffff).
|
||||
|
||||
%% erlang refuses to encode certain codepoints, so fake them all
|
||||
to_fake_utf(N, utf8) when N < 16#0080 -> <<34/utf8, N:8, 34/utf8>>;
|
||||
|
|
|
@ -53,7 +53,7 @@ start(Term, {Handler, State}, Opts) ->
|
|||
|
||||
|
||||
value(String, {Handler, State}, Opts) when is_binary(String) ->
|
||||
Handler:handle_event({string, clean_string(String, <<>>, Opts)}, State);
|
||||
Handler:handle_event({string, clean_string(String, Opts)}, State);
|
||||
value(Float, {Handler, State}, _Opts) when is_float(Float) ->
|
||||
Handler:handle_event({float, Float}, State);
|
||||
value(Int, {Handler, State}, _Opts) when is_integer(Int) ->
|
||||
|
@ -83,7 +83,7 @@ object([{Key, Value}|Rest], {Handler, State}, Opts) ->
|
|||
Handler,
|
||||
value(
|
||||
Value,
|
||||
{Handler, Handler:handle_event({key, clean_string(fix_key(Key), <<>>, Opts)}, State)},
|
||||
{Handler, Handler:handle_event({key, clean_string(fix_key(Key), Opts)}, State)},
|
||||
Opts
|
||||
)
|
||||
},
|
||||
|
@ -103,96 +103,31 @@ fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
|
|||
fix_key(Key) when is_binary(Key) -> Key.
|
||||
|
||||
|
||||
clean_string(<<$\", Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, $\\, $\">>, Opts);
|
||||
clean_string(<<$\\, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, $\\, $\\>>, Opts);
|
||||
clean_string(<<$\b, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, $\\, $b>>, Opts);
|
||||
clean_string(<<$\f, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, $\\, $f>>, Opts);
|
||||
clean_string(<<$\n, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, $\\, $n>>, Opts);
|
||||
clean_string(<<$\r, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, $\\, $r>>, Opts);
|
||||
clean_string(<<$\t, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, $\\, $t>>, Opts);
|
||||
clean_string(<<$/, Rest/binary>>, Acc, Opts=#opts{json_escape=true, escape_forward_slash=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, $\\, $/>>, Opts);
|
||||
clean_string(<<16#2028/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true, no_jsonp_escapes=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, 16#2028/utf8>>, Opts);
|
||||
clean_string(<<16#2029/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true, no_jsonp_escapes=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, 16#2029/utf8>>, Opts);
|
||||
clean_string(<<16#2028/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, (json_escape_sequence(16#2028))/binary>>, Opts);
|
||||
clean_string(<<16#2029/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, (json_escape_sequence(16#2029))/binary>>, Opts);
|
||||
clean_string(<<C/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) when C < 32 ->
|
||||
clean_string(Rest, <<Acc/binary, (json_escape_sequence(C))/binary>>, Opts);
|
||||
clean_string(<<C/utf8, Rest/binary>>, Acc, Opts) when C < 16#fdd0 ->
|
||||
clean_string(Rest, <<Acc/binary, C/utf8>>, Opts);
|
||||
clean_string(<<C/utf8, Rest/binary>>, Acc, Opts) when C > 16#fdef, C < 16#fffe ->
|
||||
clean_string(Rest, <<Acc/binary, C/utf8>>, Opts);
|
||||
clean_string(<<C/utf8, Rest/binary>>, Acc, Opts)
|
||||
when C > 16#ffff andalso
|
||||
C =/= 16#1fffe andalso C =/= 16#1ffff andalso
|
||||
C =/= 16#2fffe andalso C =/= 16#2ffff andalso
|
||||
C =/= 16#3fffe andalso C =/= 16#3ffff andalso
|
||||
C =/= 16#4fffe andalso C =/= 16#4ffff andalso
|
||||
C =/= 16#5fffe andalso C =/= 16#5ffff andalso
|
||||
C =/= 16#6fffe andalso C =/= 16#6ffff andalso
|
||||
C =/= 16#7fffe andalso C =/= 16#7ffff andalso
|
||||
C =/= 16#8fffe andalso C =/= 16#8ffff andalso
|
||||
C =/= 16#9fffe andalso C =/= 16#9ffff andalso
|
||||
C =/= 16#afffe andalso C =/= 16#affff andalso
|
||||
C =/= 16#bfffe andalso C =/= 16#bffff andalso
|
||||
C =/= 16#cfffe andalso C =/= 16#cffff andalso
|
||||
C =/= 16#dfffe andalso C =/= 16#dffff andalso
|
||||
C =/= 16#efffe andalso C =/= 16#effff andalso
|
||||
C =/= 16#ffffe andalso C =/= 16#fffff andalso
|
||||
C =/= 16#10fffe andalso C =/= 16#10ffff ->
|
||||
clean_string(Rest, <<Acc/binary, C/utf8>>, Opts);
|
||||
clean_string(Bin, Opts) ->
|
||||
case Opts#opts.json_escape of
|
||||
true -> jsx_utils:json_escape(Bin, Opts);
|
||||
false ->
|
||||
case is_clean(Bin) of
|
||||
true -> Bin;
|
||||
false -> clean_string(Bin, [], Opts)
|
||||
end
|
||||
end.
|
||||
|
||||
|
||||
is_clean(<<>>) -> true;
|
||||
is_clean(<<_/utf8, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(_) -> false.
|
||||
|
||||
|
||||
clean_string(Bin, _Acc, Opts=#opts{loose_unicode=false}) -> ?error([Bin, Opts]);
|
||||
clean_string(<<>>, Acc, _Opts) -> unicode:characters_to_binary(lists:reverse(Acc));
|
||||
clean_string(<<X/utf8, Rest/binary>>, Acc, Opts) -> clean_string(Rest, [X] ++ Acc, Opts);
|
||||
%% surrogates
|
||||
clean_string(<<237, X, _, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) when X >= 160 ->
|
||||
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
|
||||
%% private use noncharacters
|
||||
clean_string(<<239, 183, X, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) when X >= 143, X =< 175 ->
|
||||
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
|
||||
%% u+fffe and u+ffff
|
||||
clean_string(<<239, 191, X, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) when X == 190; X == 191 ->
|
||||
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
|
||||
%% the u+Xfffe and u+Xffff noncharacters
|
||||
clean_string(<<X, Y, 191, Z, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) when (
|
||||
(X == 240 andalso Y == 159) orelse
|
||||
(X == 240 andalso Y == 175) orelse
|
||||
(X == 240 andalso Y == 191) orelse
|
||||
(
|
||||
(X == 241 orelse X == 242 orelse X == 243) andalso
|
||||
(Y == 143 orelse Y == 159 orelse Y == 175 orelse Y == 191)
|
||||
) orelse
|
||||
(X == 244 andalso Y == 143)
|
||||
) andalso (Z == 190 orelse Z == 191) ->
|
||||
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
|
||||
clean_string(<<_, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) ->
|
||||
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
|
||||
clean_string(<<>>, Acc, _) -> Acc;
|
||||
clean_string(Bin, _Acc, Opts) -> erlang:error(badarg, [Bin, Opts]).
|
||||
clean_string(<<237, X, _, Rest/binary>>, Acc, Opts) when X >= 160 -> clean_string(Rest, [16#fffd] ++ Acc, Opts);
|
||||
%% bad codepoints
|
||||
clean_string(<<_, Rest/binary>>, Acc, Opts) -> clean_string(Rest, [16#fffd] ++ Acc, Opts).
|
||||
|
||||
|
||||
%% convert a codepoint to it's \uXXXX equiv.
|
||||
json_escape_sequence(X) ->
|
||||
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
|
||||
unicode:characters_to_binary([$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]).
|
||||
|
||||
|
||||
to_hex(10) -> $a;
|
||||
to_hex(11) -> $b;
|
||||
to_hex(12) -> $c;
|
||||
to_hex(13) -> $d;
|
||||
to_hex(14) -> $e;
|
||||
to_hex(15) -> $f;
|
||||
to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc...
|
||||
|
||||
-ifdef(TEST).
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
|
@ -275,26 +210,6 @@ encode_test_() ->
|
|||
}
|
||||
].
|
||||
|
||||
noncharacters_test_() ->
|
||||
[
|
||||
{"noncharacters - badjson",
|
||||
?_assertEqual(check_bad(noncharacters()), [])
|
||||
},
|
||||
{"noncharacters - replaced",
|
||||
?_assertEqual(check_replaced(noncharacters()), [])
|
||||
}
|
||||
].
|
||||
|
||||
extended_noncharacters_test_() ->
|
||||
[
|
||||
{"extended noncharacters - badjson",
|
||||
?_assertEqual(check_bad(extended_noncharacters()), [])
|
||||
},
|
||||
{"extended noncharacters - replaced",
|
||||
?_assertEqual(check_replaced(extended_noncharacters()), [])
|
||||
}
|
||||
].
|
||||
|
||||
surrogates_test_() ->
|
||||
[
|
||||
{"surrogates - badjson",
|
||||
|
@ -305,16 +220,6 @@ surrogates_test_() ->
|
|||
}
|
||||
].
|
||||
|
||||
reserved_test_() ->
|
||||
[
|
||||
{"reserved noncharacters - badjson",
|
||||
?_assertEqual(check_bad(reserved_space()), [])
|
||||
},
|
||||
{"reserved noncharacters - replaced",
|
||||
?_assertEqual(check_replaced(reserved_space()), [])
|
||||
}
|
||||
].
|
||||
|
||||
good_characters_test_() ->
|
||||
[
|
||||
{"acceptable codepoints",
|
||||
|
@ -385,26 +290,11 @@ check([H|T], Opts, Acc) ->
|
|||
check(T, Opts, [{H, R}] ++ Acc).
|
||||
|
||||
|
||||
|
||||
noncharacters() -> lists:seq(16#fffe, 16#ffff).
|
||||
|
||||
extended_noncharacters() ->
|
||||
[16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
|
||||
++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
|
||||
++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
|
||||
++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
|
||||
++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
|
||||
++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
|
||||
++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
|
||||
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff].
|
||||
|
||||
surrogates() -> lists:seq(16#d800, 16#dfff).
|
||||
|
||||
reserved_space() -> lists:seq(16#fdd0, 16#fdef).
|
||||
good() -> lists:seq(1, 16#d7ff) ++ lists:seq(16#e000, 16#ffff).
|
||||
|
||||
good() -> lists:seq(1, 16#d7ff) ++ lists:seq(16#e000, 16#fdcf) ++ lists:seq(16#fdf0, 16#fffd).
|
||||
|
||||
good_extended() -> lists:seq(16#100000, 16#10fffd).
|
||||
good_extended() -> lists:seq(16#100000, 16#10ffff).
|
||||
|
||||
%% erlang refuses to encode certain codepoints, so fake them all
|
||||
to_fake_utf(N, utf8) when N < 16#0080 -> <<N:8>>;
|
||||
|
|
|
@ -90,22 +90,66 @@ extract_parser_opts([K|Rest], Acc) ->
|
|||
json_escape(String, Opts) when is_binary(String) ->
|
||||
json_escape(String, Opts, 0, size(String)).
|
||||
|
||||
|
||||
-define(control_character(X),
|
||||
<<H:L/binary, X, T/binary>> ->
|
||||
json_escape(
|
||||
<<H/binary, (unicode:characters_to_binary(json_escape_sequence(X)))/binary, T/binary>>,
|
||||
Opts,
|
||||
L + 6,
|
||||
Len + 5
|
||||
)
|
||||
).
|
||||
|
||||
json_escape(Str, Opts, L, Len) when L < Len ->
|
||||
case Str of
|
||||
<<H:L/binary, $\", T/binary>> -> %"
|
||||
json_escape(<<H/binary, $\\, $", T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\\, T/binary>> ->
|
||||
json_escape(<<H/binary, $\\, $\\, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\b, T/binary>> ->
|
||||
json_escape(<<H/binary, $\\, $b, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\f, T/binary>> ->
|
||||
json_escape(<<H/binary, $\\, $f, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\n, T/binary>> ->
|
||||
json_escape(<<H/binary, $\\, $n, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\r, T/binary>> ->
|
||||
json_escape(<<H/binary, $\\, $r, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\t, T/binary>> ->
|
||||
json_escape(<<H/binary, $\\, $t, T/binary>>, Opts, L + 2, Len + 1);
|
||||
?control_character(0);
|
||||
?control_character(1);
|
||||
?control_character(2);
|
||||
?control_character(3);
|
||||
?control_character(4);
|
||||
?control_character(5);
|
||||
?control_character(6);
|
||||
?control_character(7);
|
||||
<<H:L/binary, $\b, T/binary>> -> json_escape(<<H/binary, $\\, $b, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\t, T/binary>> -> json_escape(<<H/binary, $\\, $t, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\n, T/binary>> -> json_escape(<<H/binary, $\\, $n, T/binary>>, Opts, L + 2, Len + 1);
|
||||
?control_character(11);
|
||||
<<H:L/binary, $\f, T/binary>> -> json_escape(<<H/binary, $\\, $f, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<H:L/binary, $\r, T/binary>> -> json_escape(<<H/binary, $\\, $r, T/binary>>, Opts, L + 2, Len + 1);
|
||||
?control_character(14);
|
||||
?control_character(15);
|
||||
?control_character(16);
|
||||
?control_character(17);
|
||||
?control_character(18);
|
||||
?control_character(19);
|
||||
?control_character(20);
|
||||
?control_character(21);
|
||||
?control_character(22);
|
||||
?control_character(23);
|
||||
?control_character(24);
|
||||
?control_character(25);
|
||||
?control_character(26);
|
||||
?control_character(27);
|
||||
?control_character(28);
|
||||
?control_character(29);
|
||||
?control_character(30);
|
||||
?control_character(31);
|
||||
<<_:L/binary, 32, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 33, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<H:L/binary, $\", T/binary>> -> json_escape(<<H/binary, $\\, $", T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<_:L/binary, 35, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 36, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 37, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 38, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 39, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 40, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 41, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 42, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 43, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 44, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 45, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 46, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<H:L/binary, $/, T/binary>> ->
|
||||
case Opts#opts.escape_forward_slash of
|
||||
true ->
|
||||
|
@ -113,6 +157,86 @@ json_escape(Str, Opts, L, Len) when L < Len ->
|
|||
false ->
|
||||
json_escape(<<H/binary, $/, T/binary>>, Opts, L + 1, Len)
|
||||
end;
|
||||
<<_:L/binary, 48, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 49, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 50, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 51, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 52, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 53, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 54, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 55, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 56, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 57, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 58, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 59, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 60, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 61, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 62, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 63, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 64, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 65, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 66, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 67, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 68, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 69, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 70, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 71, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 72, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 73, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 74, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 75, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 76, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 77, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 78, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 79, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 80, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 81, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 82, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 83, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 84, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 85, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 86, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 87, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 88, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 89, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 90, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 91, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<H:L/binary, $\\, T/binary>> -> json_escape(<<H/binary, $\\, $\\, T/binary>>, Opts, L + 2, Len + 1);
|
||||
<<_:L/binary, 93, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 94, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 95, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 96, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 97, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 98, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 99, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 100, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 101, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 102, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 103, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 104, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 105, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 106, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 107, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 108, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 109, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 110, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 111, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 112, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 113, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 114, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 115, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 116, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 117, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 118, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 119, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 120, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 121, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 122, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 123, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 124, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 125, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 126, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, 127, _/binary>> -> json_escape(Str, Opts, L + 1, Len);
|
||||
<<H:L/binary, 16#2028/utf8, T/binary>> ->
|
||||
case Opts#opts.no_jsonp_escapes of
|
||||
true ->
|
||||
|
@ -129,9 +253,6 @@ json_escape(Str, Opts, L, Len) when L < Len ->
|
|||
B = unicode:characters_to_binary(json_escape_sequence(16#2029)),
|
||||
json_escape(<<H/binary, B/binary, T/binary>>, Opts, L + size(B), Len + size(B) - size(<<16#2029/utf8>>))
|
||||
end;
|
||||
<<H:L/binary, X/utf8, T/binary>> when X < 32 ->
|
||||
B = unicode:characters_to_binary(json_escape_sequence(X)),
|
||||
json_escape(<<H/binary, B/binary, T/binary>>, Opts, L + size(B), Len + size(B) - size(<<X/utf8>>));
|
||||
<<_:L/binary, X/utf8, _/binary>> when X < 16#0080 ->
|
||||
json_escape(Str, Opts, L + 1, Len);
|
||||
<<_:L/binary, X/utf8, _/binary>> when X < 16#0800 ->
|
||||
|
@ -140,8 +261,16 @@ json_escape(Str, Opts, L, Len) when L < Len ->
|
|||
json_escape(Str, Opts, L + 3, Len);
|
||||
<<_:L/binary, _/utf8, _/binary>> ->
|
||||
json_escape(Str, Opts, L + 4, Len);
|
||||
<<H:L/binary, X, T/binary>> ->
|
||||
erlang:error(badarg, [[<<H:L/binary, X, T/binary>>, Opts]])
|
||||
<<H:L/binary, 237, X, _, T/binary>> when X >= 160 ->
|
||||
case Opts#opts.loose_unicode of
|
||||
true -> json_escape(<<H/binary, 16#fffd/utf8, T/binary>>, Opts, L + 3, Len);
|
||||
false -> erlang:error(badarg, [Str, Opts])
|
||||
end;
|
||||
<<H:L/binary, _, T/binary>> ->
|
||||
case Opts#opts.loose_unicode of
|
||||
true -> json_escape(<<H/binary, 16#fffd/utf8, T/binary>>, Opts, L + 3, Len + 2);
|
||||
false -> erlang:error(badarg, [Str, Opts])
|
||||
end
|
||||
end;
|
||||
json_escape(Str, _, L, Len) when L =:= Len ->
|
||||
Str.
|
||||
|
@ -201,7 +330,20 @@ binary_escape_test_() ->
|
|||
)
|
||||
},
|
||||
{"bad utf8",
|
||||
?_assertError(badarg, json_escape(<<32, 64, 128, 256>>, #opts{}))
|
||||
?_assertError(badarg, json_escape(<<32, 64, 128, 255>>, #opts{}))
|
||||
},
|
||||
{"bad utf8 ok",
|
||||
?_assertEqual(
|
||||
json_escape(<<32, 64, 128, 255>>, #opts{loose_unicode=true}),
|
||||
<<32, 64, 16#fffd/utf8, 16#fffd/utf8>>
|
||||
)
|
||||
},
|
||||
{"bad surrogate", ?_assertError(badarg, json_escape(<<237, 160, 127>>, #opts{}))},
|
||||
{"bad surrogate ok",
|
||||
?_assertEqual(
|
||||
json_escape(<<237, 160, 127>>, #opts{loose_unicode=true}),
|
||||
<<16#fffd/utf8>>
|
||||
)
|
||||
},
|
||||
{"all sizes of codepoints",
|
||||
?_assertEqual(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue