diff --git a/include/jsx_opts.hrl b/include/jsx_opts.hrl index 9f75bd9..380de54 100644 --- a/include/jsx_opts.hrl +++ b/include/jsx_opts.hrl @@ -1,39 +1,4 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - -record(opts, { loose_unicode = false, escape_forward_slash = false -}). - - -parse_opts(Opts) -> - parse_opts(Opts, #opts{}). - -parse_opts([], Opts) -> - Opts; -parse_opts([loose_unicode|Rest], Opts) -> - parse_opts(Rest, Opts#opts{loose_unicode=true}); -parse_opts([escape_forward_slash|Rest], Opts) -> - parse_opts(Rest, Opts#opts{escape_forward_slash=true}); -parse_opts(_, _) -> - {error, badarg}. \ No newline at end of file +}). \ No newline at end of file diff --git a/include/jsx_opts_parser.hrl b/include/jsx_opts_parser.hrl new file mode 100644 index 0000000..e25f3e4 --- /dev/null +++ b/include/jsx_opts_parser.hrl @@ -0,0 +1,11 @@ +parse_opts(Opts) -> + parse_opts(Opts, #opts{}). + +parse_opts([], Opts) -> + Opts; +parse_opts([loose_unicode|Rest], Opts) -> + parse_opts(Rest, Opts#opts{loose_unicode=true}); +parse_opts([escape_forward_slash|Rest], Opts) -> + parse_opts(Rest, Opts#opts{escape_forward_slash=true}); +parse_opts(_, _) -> + {error, badarg}. \ No newline at end of file diff --git a/include/jsx_scanner.hrl b/include/jsx_scanner.hrl index 4bfa100..b1728c9 100644 --- a/include/jsx_scanner.hrl +++ b/include/jsx_scanner.hrl @@ -1,26 +1,3 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - %% whitespace -define(space, 16#20). -define(tab, 16#09). diff --git a/include/jsx_tokenizer.hrl b/include/jsx_tokenizer.hrl index 051397b..1070dac 100644 --- a/include/jsx_tokenizer.hrl +++ b/include/jsx_tokenizer.hrl @@ -1,26 +1,3 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - -ifndef(error). -define(error(Args), erlang:error(badarg, Args) @@ -49,7 +26,7 @@ start({string, String}, [], [], Opts) when is_binary(String); is_list(String) -> {ok, - [{string, unicode:characters_to_list(json_escape(String, Opts))}, end_json], + [{string, unicode:characters_to_list(jsx_utils:json_escape(String, Opts))}, end_json], fun(X) when is_list(X) -> ?error([X, [], [], Opts]) end }; start({float, Float}, [], [], Opts) when is_float(Float) -> @@ -80,7 +57,7 @@ list_or_object(Forms, T, Stack, Opts) -> ?error([Forms, T, Stack, Opts]). key([{key, Key}|Forms], T, Stack, Opts) when is_binary(Key); is_list(Key) -> - ?event([{key, unicode:characters_to_list(json_escape(Key, Opts))}], + ?event([{key, unicode:characters_to_list(jsx_utils:json_escape(Key, Opts))}], value, Forms, T, Stack, Opts ); key([end_object|Forms], T, [object|Stack], Opts) -> @@ -90,7 +67,7 @@ key(Forms, T, Stack, Opts) -> ?error([Forms, T, Stack, Opts]). value([{string, S}|Forms], T, Stack, Opts) when is_binary(S); is_list(S) -> - ?event([{string, unicode:characters_to_list(json_escape(S, Opts))}], + ?event([{string, unicode:characters_to_list(jsx_utils:json_escape(S, Opts))}], maybe_done, Forms, T, Stack, Opts ); value([{float, F}|Forms], T, Stack, Opts) when is_float(F) -> @@ -127,102 +104,4 @@ done([], T, [], Opts) -> done(X, T, [], Opts) end }; -done(Forms, T, Stack, Opts) -> ?error([Forms, T, Stack, Opts]). - - -%% json string escaping, for utf8 binaries. escape the json control sequences to -%% their json equivalent, escape other control characters to \uXXXX sequences, -%% everything else should be a legal json string component -json_escape(String, Opts) when is_binary(String) -> - json_escape(String, Opts, <<>>); -json_escape(String, Opts) when is_list(String) -> - json_escape(String, Opts, []). - -%% double quote -json_escape(<<$\", Rest/binary>>, Opts, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([$\"|Rest], Opts, Acc) -> - json_escape(Rest, Opts, [$\", $\\] ++ Acc); -%% backslash \ reverse solidus -json_escape(<<$\\, Rest/binary>>, Opts, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([$\\|Rest], Opts, Acc) -> - json_escape(Rest, Opts, [$\\, $\\] ++ Acc); -%% backspace -json_escape(<<$\b, Rest/binary>>, Opts, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([$\b|Rest], Opts, Acc) -> - json_escape(Rest, Opts, [$b, $\\] ++ Acc); -%% form feed -json_escape(<<$\f, Rest/binary>>, Opts, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([$\f|Rest], Opts, Acc) -> - json_escape(Rest, Opts, [$f, $\\] ++ Acc); -%% newline -json_escape(<<$\n, Rest/binary>>, Opts, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([$\n|Rest], Opts, Acc) -> - json_escape(Rest, Opts, [$n, $\\] ++ Acc); -%% cr -json_escape(<<$\r, Rest/binary>>, Opts, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([$\r|Rest], Opts, Acc) -> - json_escape(Rest, Opts, [$r, $\\] ++ Acc); -%% tab -json_escape(<<$\t, Rest/binary>>, Opts, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([$\t|Rest], Opts, Acc) -> - json_escape(Rest, Opts, [$t, $\\] ++ Acc); -%% other control characters -json_escape(<>, Opts, Acc) when C >= 0, C < $\s -> - json_escape(Rest, - Opts, - <> - ); -json_escape([C|Rest], Opts, Acc) when C >= 0, C < $\s -> - json_escape(Rest, Opts, lists:reverse(json_escape_sequence(C)) ++ Acc); -%% escape forward slashes -- optionally -- to faciliate microsoft's retarded -%% date format -json_escape(<<$/, Rest/binary>>, Opts=#opts{escape_forward_slash=true}, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([$/|Rest], Opts=#opts{escape_forward_slash=true}, Acc) -> - json_escape(Rest, Opts, [$/, $\\] ++ Acc); -%% escape u+2028 and u+2029 to avoid problems with jsonp -json_escape(<>, Opts, Acc) - when C == 16#2028; C == 16#2029 -> - json_escape(Rest, - Opts, - <> - ); -json_escape([C|Rest], Opts, Acc) when C =:= 16#2028; C =:= 16#2029 -> - json_escape(Rest, Opts, lists:reverse(json_escape_sequence(C)) ++ Acc); -%% any other legal codepoint -json_escape(<>, Opts, Acc) -> - json_escape(Rest, Opts, <>); -json_escape([C|Rest], Opts, Acc) -> - json_escape(Rest, Opts, [C] ++ Acc); -json_escape(<<>>, _Opts, Acc) -> - Acc; -json_escape([], _Opts, Acc) -> - lists:reverse(Acc); -json_escape(Rest, Opts, Acc) -> - erlang:error(badarg, [Rest, Opts, Acc]). - - -%% convert a codepoint to it's \uXXXX equiv. -json_escape_sequence(X) -> - <> = <>, - [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. - - -to_hex(15) -> $f; -to_hex(14) -> $e; -to_hex(13) -> $d; -to_hex(12) -> $c; -to_hex(11) -> $b; -to_hex(10) -> $a; -to_hex(X) -> X + $0. \ No newline at end of file +done(Forms, T, Stack, Opts) -> ?error([Forms, T, Stack, Opts]). \ No newline at end of file diff --git a/include/jsx_types.hrl b/include/jsx_types.hrl index 7f24eba..92f703d 100644 --- a/include/jsx_types.hrl +++ b/include/jsx_types.hrl @@ -1,26 +1,3 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - -type jsx_opts() :: [jsx_opt()]. -type jsx_opt() :: multi_term | loose_unicode diff --git a/src/jsx_scanner.erl b/src/jsx_scanner.erl index 733f6f6..8670c2d 100644 --- a/src/jsx_scanner.erl +++ b/src/jsx_scanner.erl @@ -33,6 +33,8 @@ scanner(Opts) -> -include("../include/jsx_opts.hrl"). +-include("../include/jsx_opts_parser.hrl"). + -include("../include/jsx_scanner.hrl"). -ifdef(TEST). diff --git a/src/jsx_tokenizer.erl b/src/jsx_tokenizer.erl index 7d25a3b..fafc61c 100644 --- a/src/jsx_tokenizer.erl +++ b/src/jsx_tokenizer.erl @@ -36,6 +36,8 @@ tokenizer(OptsList) -> -include("../include/jsx_opts.hrl"). +-include("../include/jsx_opts_parser.hrl"). + -include("../include/jsx_tokenizer.hrl"). -ifdef(TEST). @@ -107,62 +109,4 @@ encode_test_() -> {"naked float", ?_assert(encode({float, 1.0}))} ]. - -binary_escape_test_() -> - [ - {"json string escaping", - ?_assert(json_escape( - <<"\"\\\b\f\n\r\t">>, #opts{} - ) =:= <<"\\\"\\\\\\b\\f\\n\\r\\t">> - ) - }, - {"json string hex escape", - ?_assert(json_escape( - <<1, 2, 3, 11, 26, 30, 31>>, #opts{} - ) =:= <<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">> - ) - }, - {"jsonp protection", - ?_assert(json_escape( - <<226, 128, 168, 226, 128, 169>>, #opts{} - ) =:= <<"\\u2028\\u2029">> - ) - }, - {"microsoft i hate your date format", - ?_assert(json_escape(<<"/Date(1303502009425)/">>, - #opts{escape_forward_slash=true} - ) =:= <<"\\/Date(1303502009425)\\/">> - ) - } - ]. - - -string_escape_test_() -> - [ - {"json string escaping", - ?_assert(json_escape( - "\"\\\b\f\n\r\t", #opts{} - ) =:= "\\\"\\\\\\b\\f\\n\\r\\t" - ) - }, - {"json string hex escape", - ?_assert(json_escape( - [1, 2, 3, 11, 26, 30, 31], #opts{} - ) =:= "\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f" - ) - }, - {"jsonp protection", - ?_assert(json_escape( - [16#2028, 16#2029], #opts{} - ) =:= "\\u2028\\u2029" - ) - }, - {"microsoft i hate your date format", - ?_assert(json_escape("/Date(1303502009425)/", - #opts{escape_forward_slash=true} - ) =:= "\\/Date(1303502009425)\\/" - ) - } - ]. - -endif. \ No newline at end of file diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index 11cd92e..b8c1b85 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -23,7 +23,9 @@ -module(jsx_utils). --export([nice_decimal/1]). +-export([nice_decimal/1, json_escape/2]). + +-include("../include/jsx_opts.hrl"). %% conversion of floats to 'nice' decimal output. erlang's float implementation @@ -152,17 +154,108 @@ digits_to_list(Digits, 0, Acc) -> digits_to_list(Digits, ignore, "." ++ Acc); digits_to_list([Digit|Digits], Dpoint, Acc) -> digits_to_list(Digits, - case Dpoint of ignore -> ignore; X -> X - 1 end, to_ascii(Digit) ++ Acc + case Dpoint of ignore -> ignore; X -> X - 1 end, [to_hex(Digit)] ++ Acc ). -to_ascii(10) -> "a"; -to_ascii(11) -> "b"; -to_ascii(12) -> "c"; -to_ascii(13) -> "d"; -to_ascii(14) -> "e"; -to_ascii(15) -> "f"; -to_ascii(X) -> [X + 48]. %% ascii "1" is [49], "2" is [50], etc... + +%% json string escaping, for utf8 binaries. escape the json control sequences to +%% their json equivalent, escape other control characters to \uXXXX sequences, +%% everything else should be a legal json string component +json_escape(String, Opts) when is_binary(String) -> + json_escape(String, Opts, <<>>); +json_escape(String, Opts) when is_list(String) -> + json_escape(String, Opts, []). + +%% double quote +json_escape(<<$\", Rest/binary>>, Opts, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([$\"|Rest], Opts, Acc) -> + json_escape(Rest, Opts, [$\", $\\] ++ Acc); +%% backslash \ reverse solidus +json_escape(<<$\\, Rest/binary>>, Opts, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([$\\|Rest], Opts, Acc) -> + json_escape(Rest, Opts, [$\\, $\\] ++ Acc); +%% backspace +json_escape(<<$\b, Rest/binary>>, Opts, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([$\b|Rest], Opts, Acc) -> + json_escape(Rest, Opts, [$b, $\\] ++ Acc); +%% form feed +json_escape(<<$\f, Rest/binary>>, Opts, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([$\f|Rest], Opts, Acc) -> + json_escape(Rest, Opts, [$f, $\\] ++ Acc); +%% newline +json_escape(<<$\n, Rest/binary>>, Opts, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([$\n|Rest], Opts, Acc) -> + json_escape(Rest, Opts, [$n, $\\] ++ Acc); +%% cr +json_escape(<<$\r, Rest/binary>>, Opts, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([$\r|Rest], Opts, Acc) -> + json_escape(Rest, Opts, [$r, $\\] ++ Acc); +%% tab +json_escape(<<$\t, Rest/binary>>, Opts, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([$\t|Rest], Opts, Acc) -> + json_escape(Rest, Opts, [$t, $\\] ++ Acc); +%% other control characters +json_escape(<>, Opts, Acc) when C >= 0, C < $\s -> + json_escape(Rest, + Opts, + <> + ); +json_escape([C|Rest], Opts, Acc) when C >= 0, C < $\s -> + json_escape(Rest, Opts, lists:reverse(json_escape_sequence(C)) ++ Acc); +%% escape forward slashes -- optionally -- to faciliate microsoft's retarded +%% date format +json_escape(<<$/, Rest/binary>>, Opts=#opts{escape_forward_slash=true}, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([$/|Rest], Opts=#opts{escape_forward_slash=true}, Acc) -> + json_escape(Rest, Opts, [$/, $\\] ++ Acc); +%% escape u+2028 and u+2029 to avoid problems with jsonp +json_escape(<>, Opts, Acc) + when C == 16#2028; C == 16#2029 -> + json_escape(Rest, + Opts, + <> + ); +json_escape([C|Rest], Opts, Acc) when C =:= 16#2028; C =:= 16#2029 -> + json_escape(Rest, Opts, lists:reverse(json_escape_sequence(C)) ++ Acc); +%% any other legal codepoint +json_escape(<>, Opts, Acc) -> + json_escape(Rest, Opts, <>); +json_escape([C|Rest], Opts, Acc) -> + json_escape(Rest, Opts, [C] ++ Acc); +json_escape(<<>>, _Opts, Acc) -> + Acc; +json_escape([], _Opts, Acc) -> + lists:reverse(Acc); +json_escape(Rest, Opts, Acc) -> + erlang:error(badarg, [Rest, Opts, Acc]). + + +%% convert a codepoint to it's \uXXXX equiv. +json_escape_sequence(X) -> + <> = <>, + [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. + + + +to_hex(10) -> $a; +to_hex(11) -> $b; +to_hex(12) -> $c; +to_hex(13) -> $d; +to_hex(14) -> $e; +to_hex(15) -> $f; +to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... %% eunit tests @@ -206,4 +299,62 @@ nice_decimal_test_() -> } ]. + +binary_escape_test_() -> + [ + {"json string escaping", + ?_assert(json_escape( + <<"\"\\\b\f\n\r\t">>, #opts{} + ) =:= <<"\\\"\\\\\\b\\f\\n\\r\\t">> + ) + }, + {"json string hex escape", + ?_assert(json_escape( + <<1, 2, 3, 11, 26, 30, 31>>, #opts{} + ) =:= <<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">> + ) + }, + {"jsonp protection", + ?_assert(json_escape( + <<226, 128, 168, 226, 128, 169>>, #opts{} + ) =:= <<"\\u2028\\u2029">> + ) + }, + {"microsoft i hate your date format", + ?_assert(json_escape(<<"/Date(1303502009425)/">>, + #opts{escape_forward_slash=true} + ) =:= <<"\\/Date(1303502009425)\\/">> + ) + } + ]. + + +string_escape_test_() -> + [ + {"json string escaping", + ?_assert(json_escape( + "\"\\\b\f\n\r\t", #opts{} + ) =:= "\\\"\\\\\\b\\f\\n\\r\\t" + ) + }, + {"json string hex escape", + ?_assert(json_escape( + [1, 2, 3, 11, 26, 30, 31], #opts{} + ) =:= "\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f" + ) + }, + {"jsonp protection", + ?_assert(json_escape( + [16#2028, 16#2029], #opts{} + ) =:= "\\u2028\\u2029" + ) + }, + {"microsoft i hate your date format", + ?_assert(json_escape("/Date(1303502009425)/", + #opts{escape_forward_slash=true} + ) =:= "\\/Date(1303502009425)\\/" + ) + } + ]. + -endif. \ No newline at end of file