add uescape option for 7 bit clean output of strings

This commit is contained in:
alisdair sullivan 2014-11-24 17:59:41 -08:00
parent 3e80f475f5
commit 95a87fa0d7
3 changed files with 45 additions and 7 deletions

View file

@ -65,6 +65,8 @@ parse_config([dirty_strings|Rest], Config) ->
parse_config(Rest, Config#config{dirty_strings=true});
parse_config([repeat_keys|Rest], Config) ->
parse_config(Rest, Config#config{repeat_keys=true});
parse_config([uescape|Rest], Config) ->
parse_config(Rest, Config#config{uescape=true});
parse_config([strict|Rest], Config) ->
parse_config(Rest, Config#config{strict_comments=true,
strict_commas=true,
@ -151,6 +153,7 @@ valid_flags() ->
repeat_keys,
strict,
stream,
uescape,
error_handler,
incomplete_handler
].
@ -193,7 +196,8 @@ config_test_() ->
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true,
stream = true
stream = true,
uescape = true
},
parse_config([escaped_forward_slashes,
escaped_strings,
@ -201,7 +205,8 @@ config_test_() ->
dirty_strings,
repeat_keys,
strict,
stream
stream,
uescape
])
)
},
@ -271,6 +276,7 @@ config_to_list_test_() ->
dirty_strings,
repeat_keys,
stream,
uescape,
strict
],
config_to_list(
@ -283,7 +289,8 @@ config_to_list_test_() ->
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true,
stream = true
stream = true,
uescape = true
}
)
)},

View file

@ -10,6 +10,7 @@
strict_single_quotes = false :: boolean(),
strict_escapes = false :: boolean(),
stream = false :: boolean(),
uescape = false :: boolean(),
error_handler = false :: false | jsx_config:handler(),
incomplete_handler = false :: false | jsx_config:handler()
}).

View file

@ -351,6 +351,8 @@ clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config);
clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config);
clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config);
clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config=#config{uescape=true}) ->
maybe_replace(X, Rest, Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
maybe_replace(X, Rest, Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#d800 ->
@ -439,13 +441,15 @@ maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) ->
end;
maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) ->
clean(Rest, [$\\, $\\] ++ Acc, Config);
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 ->
clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config);
maybe_replace(X, Rest, Acc, Config=#config{uescape=true}) when is_integer(X) ->
clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config);
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
case Config#config.unescaped_jsonp of
true -> clean(Rest, [X] ++ Acc, Config);
false -> clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config)
end;
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 ->
clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config);
maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> {error, badarg};
maybe_replace(noncharacter, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
maybe_replace(surrogate, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
@ -454,9 +458,13 @@ maybe_replace(X, Rest, Acc, Config) -> clean(Rest, [X] ++ Acc, Config).
%% convert a codepoint to it's \uXXXX equiv.
json_escape_sequence(X) ->
json_escape_sequence(X) when X < 65536 ->
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))];
json_escape_sequence(X) ->
Adjusted = X - 16#10000,
<<A:10, B:10>> = <<Adjusted:20>>,
json_escape_sequence(A + 16#d800) ++ json_escape_sequence(B + 16#dc00).
to_hex(10) -> $a;
@ -1033,6 +1041,28 @@ json_escape_sequence_test_() ->
{"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
].
uescaped_test_() ->
[
{"\"\\u0080\"", ?_assertEqual(
<<"\\u0080">>,
clean_string(<<128/utf8>>, #config{uescape=true})
)},
{"\"\\u8ca8\\u5481\\u3002\\u0091\\u0091\"", ?_assertEqual(
<<"\\u8ca8\\u5481\\u3002\\u0091\\u0091">>,
clean_string(
<<232,178,168,229,146,129,227,128,130,194,145,194,145>>,
#config{uescape=true}
)
)},
{"\"\\ud834\\udd1e\"", ?_assertEqual(
<<"\\ud834\\udd1e">>,
clean_string(<<240, 157, 132, 158>>, #config{uescape=true})
)},
{"\"\\ud83d\\ude0a\"", ?_assertEqual(
<<"\\ud83d\\ude0a">>,
clean_string(<<240, 159, 152, 138>>, #config{uescape=true})
)}
].
fix_key_test_() ->
[