perform uescape escaping in the decoder (for json -> whatever

transformation)`
This commit is contained in:
alisdair sullivan 2014-12-03 23:02:02 -08:00
parent dc8e6a0c54
commit edee5931a4
2 changed files with 38 additions and 2 deletions

View file

@ -430,7 +430,9 @@ additional options beyond these. see
- `uescape` - `uescape`
escape all codepoints outside the ascii range for 7 bit clean output escape all codepoints outside the ascii range for 7 bit clean output. note
this escaping takes place even if no other string escaping is requested (via
`escaped_strings`)
- `unescaped_jsonp` - `unescaped_jsonp`

View file

@ -310,6 +310,8 @@ string(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
string(Rest, Handler, [Acc, maybe_replace(?solidus, Config)], Stack, Config); string(Rest, Handler, [Acc, maybe_replace(?solidus, Config)], Stack, Config);
string(<<?rsolidus/utf8, Rest/binary>>, Handler, Acc, Stack, Config) -> string(<<?rsolidus/utf8, Rest/binary>>, Handler, Acc, Stack, Config) ->
unescape(Rest, Handler, Acc, Stack, Config); unescape(Rest, Handler, Acc, Stack, Config);
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config=#config{uescape=true}) when X >= 16#80 ->
string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config);
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 -> string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 ->
string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config); string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config);
string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) -> string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) ->
@ -547,6 +549,7 @@ count(<<127, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config); count(Rest, N + 1, Config);
count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) -> count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) ->
count(Rest, N + 1, Config); count(Rest, N + 1, Config);
count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N;
count(<<X/utf8, Rest/binary>>, N, Config) when X < 16#800 -> count(<<X/utf8, Rest/binary>>, N, Config) when X < 16#800 ->
count(Rest, N + 2, Config); count(Rest, N + 2, Config);
count(<<X/utf8, _/binary>>, N, _) when X == 16#2028; X == 16#2029 -> N; count(<<X/utf8, _/binary>>, N, _) when X == 16#2028; X == 16#2029 -> N;
@ -694,13 +697,20 @@ maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X ==
end; end;
maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
json_escape_sequence(X); json_escape_sequence(X);
%% escaped even if no other escaping requested!
maybe_replace(X, #config{uescape=true}) when X >= 16#80 ->
json_escape_sequence(X);
maybe_replace(X, _Config) -> <<X/utf8>>. maybe_replace(X, _Config) -> <<X/utf8>>.
%% convert a codepoint to it's \uXXXX equiv. %% convert a codepoint to it's \uXXXX equiv.
json_escape_sequence(X) when X < 65536 -> json_escape_sequence(X) when X < 65536 ->
<<A:4, B:4, C:4, D:4>> = <<X:16>>, <<A:4, B:4, C:4, D:4>> = <<X:16>>,
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))];
json_escape_sequence(X) ->
Adjusted = X - 16#10000,
<<A:10, B:10>> = <<Adjusted:20>>,
json_escape_sequence(A + 16#d800) ++ json_escape_sequence(B + 16#dc00).
%% ascii "1" is [49], "2" is [50], etc... %% ascii "1" is [49], "2" is [50], etc...
@ -1524,6 +1534,30 @@ special_escape_test_() ->
]. ].
uescape_test_() ->
[
{"\"\\u0080\"", ?_assertEqual(
[{string, <<"\\u0080">>}, end_json],
decode(<<34, 128/utf8, 34>>, [uescape])
)},
{"\"\\u8ca8\\u5481\\u3002\\u0091\\u0091\"", ?_assertEqual(
[{string, <<"\\u8ca8\\u5481\\u3002\\u0091\\u0091">>}, end_json],
decode(
<<34,232,178,168,229,146,129,227,128,130,194,145,194,145,34>>,
[uescape]
)
)},
{"\"\\ud834\\udd1e\"", ?_assertEqual(
[{string, <<"\\ud834\\udd1e">>}, end_json],
decode(<<34, 240, 157, 132, 158, 34>>, [uescape])
)},
{"\"\\ud83d\\ude0a\"", ?_assertEqual(
[{string, <<"\\ud83d\\ude0a">>}, end_json],
decode(<<34, 240, 159, 152, 138, 34>>, [uescape])
)}
].
single_quoted_string_test_() -> single_quoted_string_test_() ->
Cases = [ Cases = [
{"single quoted string", [{string, <<"hello world">>}, end_json], <<39, "hello world", 39>>}, {"single quoted string", [{string, <<"hello world">>}, end_json], <<39, "hello world", 39>>},