produce binaries in escaped strings in decoder/parser instead of lists

This commit is contained in:
alisdair sullivan 2014-12-09 02:19:13 -08:00
parent 5843dbc56a
commit e5e04db58e
2 changed files with 34 additions and 41 deletions

View file

@ -726,9 +726,9 @@ strip_continuations(<<Rest/binary>>, Handler, Acc, Stack, Config, _) ->
%% this all gets really gross and should probably eventually be folded into %% this all gets really gross and should probably eventually be folded into
%% but for now it fakes being part of string on incompletes and errors %% but for now it fakes being part of string on incompletes and errors
unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
string(<<?rsolidus, Rest/binary>>, Handler, [Acc, ?rsolidus], Stack, Config); string(<<?rsolidus, Rest/binary>>, Handler, [Acc, <<?rsolidus>>], Stack, Config);
unescape(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> unescape(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
string(Rest, Handler, [Acc, ?rsolidus, C], Stack, Config); string(Rest, Handler, [Acc, <<?rsolidus, C>>], Stack, Config);
unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) -> unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) ->
string(Rest, Handler, [Acc, maybe_replace($\b, Config)], Stack, Config); string(Rest, Handler, [Acc, maybe_replace($\b, Config)], Stack, Config);
unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) -> unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) ->
@ -742,7 +742,7 @@ unescape(<<$t, Rest/binary>>, Handler, Acc, Stack, Config) ->
unescape(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) -> unescape(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
string(Rest, Handler, [Acc, maybe_replace($\", Config)], Stack, Config); string(Rest, Handler, [Acc, maybe_replace($\", Config)], Stack, Config);
unescape(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) -> unescape(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) ->
string(Rest, Handler, [Acc, ?singlequote], Stack, Config); string(Rest, Handler, [Acc, <<?singlequote>>], Stack, Config);
unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) -> unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
string(Rest, Handler, [Acc, maybe_replace($\\, Config)], Stack, Config); string(Rest, Handler, [Acc, maybe_replace($\\, Config)], Stack, Config);
unescape(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) -> unescape(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
@ -793,7 +793,7 @@ unescape(Bin, Handler, Acc, Stack, Config) ->
true -> incomplete(string, <<?rsolidus/utf8, Bin/binary>>, Handler, Acc, Stack, Config); true -> incomplete(string, <<?rsolidus/utf8, Bin/binary>>, Handler, Acc, Stack, Config);
false -> case Config#config.strict_escapes of false -> case Config#config.strict_escapes of
true -> ?error(string, <<?rsolidus, Bin/binary>>, Handler, Acc, Stack, Config); true -> ?error(string, <<?rsolidus, Bin/binary>>, Handler, Acc, Stack, Config);
false -> string(Bin, Handler, [Acc, ?rsolidus], Stack, Config) false -> string(Bin, Handler, [Acc, <<?rsolidus>>], Stack, Config)
end end
end. end.
@ -806,19 +806,19 @@ is_partial_escape(<<>>) -> true;
is_partial_escape(_) -> false. is_partial_escape(_) -> false.
maybe_replace(C, #config{dirty_strings=true}) -> C; maybe_replace(C, #config{dirty_strings=true}) -> <<C>>;
maybe_replace($\b, #config{escaped_strings=true}) -> [$\\, $b]; maybe_replace($\b, #config{escaped_strings=true}) -> <<$\\, $b>>;
maybe_replace($\t, #config{escaped_strings=true}) -> [$\\, $t]; maybe_replace($\t, #config{escaped_strings=true}) -> <<$\\, $t>>;
maybe_replace($\n, #config{escaped_strings=true}) -> [$\\, $n]; maybe_replace($\n, #config{escaped_strings=true}) -> <<$\\, $n>>;
maybe_replace($\f, #config{escaped_strings=true}) -> [$\\, $f]; maybe_replace($\f, #config{escaped_strings=true}) -> <<$\\, $f>>;
maybe_replace($\r, #config{escaped_strings=true}) -> [$\\, $r]; maybe_replace($\r, #config{escaped_strings=true}) -> <<$\\, $r>>;
maybe_replace($\", #config{escaped_strings=true}) -> [$\\, $\"]; maybe_replace($\", #config{escaped_strings=true}) -> <<$\\, $\">>;
maybe_replace($/, Config=#config{escaped_strings=true}) -> maybe_replace($/, Config=#config{escaped_strings=true}) ->
case Config#config.escaped_forward_slashes of case Config#config.escaped_forward_slashes of
true -> [$\\, $/] true -> <<$\\, $/>>
; false -> $/ ; false -> <<$/>>
end; end;
maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\]; maybe_replace($\\, #config{escaped_strings=true}) -> <<$\\, $\\>>;
maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
case Config#config.unescaped_jsonp of case Config#config.unescaped_jsonp of
true -> <<X/utf8>> true -> <<X/utf8>>
@ -832,11 +832,11 @@ maybe_replace(X, _Config) -> <<X/utf8>>.
%% convert a codepoint to it's \uXXXX equiv. %% convert a codepoint to it's \uXXXX equiv.
json_escape_sequence(X) when X < 65536 -> json_escape_sequence(X) when X < 65536 ->
<<A:4, B:4, C:4, D:4>> = <<X:16>>, <<A:4, B:4, C:4, D:4>> = <<X:16>>,
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]; <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>;
json_escape_sequence(X) -> json_escape_sequence(X) ->
Adjusted = X - 16#10000, Adjusted = X - 16#10000,
<<A:10, B:10>> = <<Adjusted:20>>, <<A:10, B:10>> = <<Adjusted:20>>,
json_escape_sequence(A + 16#d800) ++ json_escape_sequence(B + 16#dc00). [json_escape_sequence(A + 16#d800), json_escape_sequence(B + 16#dc00)].
%% ascii "1" is [49], "2" is [50], etc... %% ascii "1" is [49], "2" is [50], etc...

View file

@ -471,50 +471,43 @@ strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
strip_continuations(Bin, _) -> Bin. strip_continuations(Bin, _) -> Bin.
maybe_replace($\b, #config{escaped_strings=true}) -> maybe_replace($\b, #config{escaped_strings=true}) -> <<$\\, $b>>;
[$\\, $b]; maybe_replace($\t, #config{escaped_strings=true}) -> <<$\\, $t>>;
maybe_replace($\t, #config{escaped_strings=true}) -> maybe_replace($\n, #config{escaped_strings=true}) -> <<$\\, $n>>;
[$\\, $t]; maybe_replace($\f, #config{escaped_strings=true}) -> <<$\\, $f>>;
maybe_replace($\n, #config{escaped_strings=true}) -> maybe_replace($\r, #config{escaped_strings=true}) -> <<$\\, $r>>;
[$\\, $n]; maybe_replace($\", #config{escaped_strings=true}) -> <<$\\, $\">>;
maybe_replace($\f, #config{escaped_strings=true}) ->
[$\\, $f];
maybe_replace($\r, #config{escaped_strings=true}) ->
[$\\, $r];
maybe_replace($\", #config{escaped_strings=true}) ->
[$\\, $\"];
maybe_replace($/, Config=#config{escaped_strings=true}) -> maybe_replace($/, Config=#config{escaped_strings=true}) ->
case Config#config.escaped_forward_slashes of case Config#config.escaped_forward_slashes of
true -> [$\\, $/]; true -> <<$\\, $/>>;
false -> [$/] false -> <<$/>>
end; end;
maybe_replace($\\, #config{escaped_strings=true}) -> maybe_replace($\\, #config{escaped_strings=true}) -> <<$\\, $\\>>;
[$\\, $\\];
maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
json_escape_sequence(X); json_escape_sequence(X);
maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
case Config#config.unescaped_jsonp of case Config#config.unescaped_jsonp of
true -> [<<X/utf8>>]; true -> <<X/utf8>>;
false -> json_escape_sequence(X) false -> json_escape_sequence(X)
end; end;
maybe_replace(Atom, #config{strict_utf8=true}) when is_atom(Atom) -> maybe_replace(Atom, #config{strict_utf8=true}) when is_atom(Atom) ->
erlang:error(badarg); erlang:error(badarg);
maybe_replace(surrogate, _Config) -> maybe_replace(surrogate, _Config) ->
[<<16#fffd/utf8>>]; <<16#fffd/utf8>>;
maybe_replace(badutf, _Config) -> maybe_replace(badutf, _Config) ->
[<<16#fffd/utf8>>]; <<16#fffd/utf8>>;
maybe_replace(X, _Config) -> maybe_replace(X, _Config) ->
[<<X/utf8>>]. <<X/utf8>>.
%% convert a codepoint to it's \uXXXX equiv. %% convert a codepoint to it's \uXXXX equiv.
json_escape_sequence(X) when X < 65536 -> json_escape_sequence(X) when X < 65536 ->
<<A:4, B:4, C:4, D:4>> = <<X:16>>, <<A:4, B:4, C:4, D:4>> = <<X:16>>,
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]; <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>;
json_escape_sequence(X) -> json_escape_sequence(X) ->
Adjusted = X - 16#10000, Adjusted = X - 16#10000,
<<A:10, B:10>> = <<Adjusted:20>>, <<A:10, B:10>> = <<Adjusted:20>>,
json_escape_sequence(A + 16#d800) ++ json_escape_sequence(B + 16#dc00). [json_escape_sequence(A + 16#d800), json_escape_sequence(B + 16#dc00)].
to_hex(10) -> $a; to_hex(10) -> $a;
@ -1030,9 +1023,9 @@ bad_utf8_test_() ->
json_escape_sequence_test_() -> json_escape_sequence_test_() ->
[ [
{"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")}, {"json escape sequence test - 16#0000", ?_assertEqual(<<"\\u0000"/utf8>>, json_escape_sequence(16#0000))},
{"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")}, {"json escape sequence test - 16#abc", ?_assertEqual(<<"\\u0abc"/utf8>>, json_escape_sequence(16#abc))},
{"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} {"json escape sequence test - 16#def", ?_assertEqual(<<"\\u0def"/utf8>>, json_escape_sequence(16#def))}
]. ].