From 9763ff189f390980bdbc6fe0d233a4553e468556 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 12:33:19 -0700 Subject: [PATCH 01/38] use io_lib:format for float formatting --- src/jsx_to_json.erl | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 2f0d7e9..60a1b05 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -142,7 +142,7 @@ encode(literal, Literal, _Opts) -> encode(integer, Integer, _Opts) -> erlang:integer_to_list(Integer); encode(float, Float, _Opts) -> - nicedecimal:format(Float). + lists:flatten(io_lib:format("~p", [Float])). space(Opts) -> @@ -176,24 +176,12 @@ indent_or_space(Opts) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -setup_nicedecimal_meck(Return) -> - ok = meck:new(nicedecimal), - ok = meck:expect(nicedecimal, format, fun(1.23) -> Return end). - -teardown_nicedecimal_meck(_) -> - ?assert(meck:validate(nicedecimal)), - ok = meck:unload(nicedecimal). - basic_format_test_() -> [ {"empty object", ?_assertEqual(format(<<"{}">>, []), <<"{}">>)}, {"empty array", ?_assertEqual(format(<<"[]">>, []), <<"[]">>)}, {"naked integer", ?_assertEqual(format(<<"123">>, []), <<"123">>)}, - {foreach, - fun() -> setup_nicedecimal_meck(<<"1.23">>) end, - fun(R) -> teardown_nicedecimal_meck(R) end, - [{"naked float", ?_assertEqual(format(<<"1.23">>, []), <<"1.23">>)}] - }, + {"naked float", ?_assertEqual(format(<<"1.23">>, []), <<"1.23">>)}, {"naked string", ?_assertEqual(format(<<"\"hi\"">>, []), <<"\"hi\"">>)}, {"naked string with control character", ?_assertEqual( format(<<"\"hi\\n\"">>, []), <<"\"hi\\n\"">> @@ -238,11 +226,7 @@ basic_to_json_test_() -> {"empty object", ?_assertEqual(to_json([{}], []), <<"{}">>)}, {"empty array", ?_assertEqual(to_json([], []), <<"[]">>)}, {"naked integer", ?_assertEqual(to_json(123, []), <<"123">>)}, - {foreach, - fun() -> setup_nicedecimal_meck(<<"1.23">>) end, - fun(R) -> teardown_nicedecimal_meck(R) end, - [{"naked float", ?_assertEqual(to_json(1.23, []) , <<"1.23">>)}] - }, + {"naked float", ?_assertEqual(to_json(1.23, []) , <<"1.23">>)}, {"naked string", ?_assertEqual(to_json(<<"hi">>, []), <<"\"hi\"">>)}, {"naked string with control character", ?_assertEqual( to_json(<<"hi\n">>, []), <<"\"hi\\n\"">> @@ -316,14 +300,10 @@ opts_test_() -> format(<<"{\"a\":true,\"b\":true,\"c\":true}">>, [{space, 2}]), <<"{\"a\": true, \"b\": true, \"c\": true}">> )}, - {foreach, - fun() -> setup_nicedecimal_meck(<<"1.23">>) end, - fun(R) -> teardown_nicedecimal_meck(R) end, - [{"array indent", ?_assertEqual( - format(<<"[1.23, 1.23, 1.23]">>, [{indent, 2}]), - <<"[\n 1.23,\n 1.23,\n 1.23\n]">> - )}] - }, + {"array indent", ?_assertEqual( + format(<<"[1.23, 1.23, 1.23]">>, [{indent, 2}]), + <<"[\n 1.23,\n 1.23,\n 1.23\n]">> + )}, {"object indent", ?_assertEqual( format(<<"{\"a\":true,\"b\":true,\"c\":true}">>, [{indent, 2}]), <<"{\n \"a\":true,\n \"b\":true,\n \"c\":true\n}">> From a6aa97fff681a1599d793ad0179e41700f640b50 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 12:34:29 -0700 Subject: [PATCH 02/38] remove unesscessary lists:flatten call --- src/jsx_to_json.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 60a1b05..95e3ad7 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -142,7 +142,7 @@ encode(literal, Literal, _Opts) -> encode(integer, Integer, _Opts) -> erlang:integer_to_list(Integer); encode(float, Float, _Opts) -> - lists:flatten(io_lib:format("~p", [Float])). + [Output] = io_lib:format("~p", [Float]), Output. space(Opts) -> From d1bb294edded6404b80157a0f89c54aba19e1d08 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 12:35:51 -0700 Subject: [PATCH 03/38] remove meck and nicedecimal from deps --- rebar.config | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/rebar.config b/rebar.config index 1806929..5218e3b 100644 --- a/rebar.config +++ b/rebar.config @@ -28,16 +28,3 @@ {xref_checks, [undefined_function_calls]}. {cover_enabled, true}. - -{deps, [ - { - 'nicedecimal', - ".*", - {git, "git://github.com/talentdeficit/nicedecimal.git", {branch, "master"}} - }, - { - 'meck', - ".*", - {git, "git://github.com/eproxus/meck.git", {branch, "master"}} - } -]}. From 7c31e9643e4b970582517e2eb3991b7d048c76f4 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 12:36:18 -0700 Subject: [PATCH 04/38] remove mention of meck from README.markdown --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index cedfe18..47199e7 100644 --- a/README.markdown +++ b/README.markdown @@ -6,7 +6,7 @@ copyright 2011, 2012 alisdair sullivan jsx is released under the terms of the [MIT][MIT] license -jsx uses [rebar][rebar] for it's build chain and [meck][meck] for it's test suite +jsx uses [rebar][rebar] for it's build chain [![Build Status](https://secure.travis-ci.org/talentdeficit/jsx.png?branch=develop)](http://travis-ci.org/talentdeficit/jsx) From 7c810cb05d9145c0bf7469c7be51adb41b99725a Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 01:24:06 -0700 Subject: [PATCH 05/38] inline json escaping into encoder --- src/jsx_encoder.erl | 492 ++++++++++++++++++++++++++++++++++++-------- src/jsx_utils.erl | 4 +- 2 files changed, 406 insertions(+), 90 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index cc43a1a..397b99d 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -104,102 +104,393 @@ fix_key(Key) when is_binary(Key) -> Key. clean_string(Bin, Opts) -> - case Opts#opts.loose_unicode of - true -> maybe_escape(clean_string(Bin, 0, size(Bin), Opts), Opts) - ; false -> - case is_clean(Bin) of - true -> maybe_escape(Bin, Opts) - ; false -> erlang:error(badarg, [Bin, Opts]) - end + case Opts#opts.loose_unicode orelse Opts#opts.json_escape of + true -> clean(Bin, [], Opts) + ; false -> ensure_clean(Bin), Bin end. -maybe_escape(Bin, Opts=#opts{json_escape=true}) -> jsx_utils:json_escape(Bin, Opts); -maybe_escape(Bin, _) -> Bin. +%% fast path for no escaping and no correcting, throws error if string is 'bad' +ensure_clean(<<>>) -> ok; +ensure_clean(<<0, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<1, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<2, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<3, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<4, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<5, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<6, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<7, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<8, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<9, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<10, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<11, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<12, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<13, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<14, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<15, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<16, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<17, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<18, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<19, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<20, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<21, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<22, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<23, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<24, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<25, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<26, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<27, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<28, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<29, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<30, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<31, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<32, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<33, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<34, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<35, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<36, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<37, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<38, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<39, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<40, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<41, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<42, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<43, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<44, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<45, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<46, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<47, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<48, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<49, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<50, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<51, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<52, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<53, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<54, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<55, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<56, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<57, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<58, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<59, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<60, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<61, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<62, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<63, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<64, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<65, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<66, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<67, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<68, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<69, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<70, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<71, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<72, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<73, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<74, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<75, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<76, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<77, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<78, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<79, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<80, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<81, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<82, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<83, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<84, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<85, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<86, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<87, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<88, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<89, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<90, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<91, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<92, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<93, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<94, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<95, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<96, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<97, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<98, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<99, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<100, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<101, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<102, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<103, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<104, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<105, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<106, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<107, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<108, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<109, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<110, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<111, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<112, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<113, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<114, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<115, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<116, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<117, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<118, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<119, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<120, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<121, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<122, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<123, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<124, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<125, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<126, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<<127, Rest/binary>>) -> ensure_clean(Rest); +ensure_clean(<>) when X < 16#800 -> ensure_clean(Rest); +ensure_clean(<>) when X < 16#dcff -> ensure_clean(Rest); +ensure_clean(<>) when X > 16#dfff, X < 16#fdd0 -> ensure_clean(Rest); +ensure_clean(<>) when X > 16#fdef, X < 16#fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#10000, X < 16#1fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#20000, X < 16#2fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#30000, X < 16#3fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#40000, X < 16#4fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#50000, X < 16#5fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#60000, X < 16#6fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#70000, X < 16#7fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#80000, X < 16#8fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#90000, X < 16#9fffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#a0000, X < 16#afffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#b0000, X < 16#bfffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#c0000, X < 16#cfffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#d0000, X < 16#dfffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#e0000, X < 16#efffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#f0000, X < 16#ffffe -> ensure_clean(Rest); +ensure_clean(<>) when X >= 16#100000, X < 16#10fffe -> ensure_clean(Rest); +ensure_clean(Bin) -> erlang:error(badarg, [Bin]). -is_clean(<<>>) -> true; -is_clean(<>) when X < 16#80 -> is_clean(Rest); -is_clean(<>) when X < 16#800 -> is_clean(Rest); -is_clean(<>) when X < 16#dcff -> is_clean(Rest); -is_clean(<>) when X > 16#dfff, X < 16#fdd0 -> is_clean(Rest); -is_clean(<>) when X > 16#fdef, X < 16#fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#10000, X < 16#1fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#20000, X < 16#2fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#30000, X < 16#3fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#40000, X < 16#4fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#50000, X < 16#5fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#60000, X < 16#6fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#70000, X < 16#7fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#80000, X < 16#8fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#90000, X < 16#9fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#a0000, X < 16#afffe -> is_clean(Rest); -is_clean(<>) when X >= 16#b0000, X < 16#bfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#c0000, X < 16#cfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#d0000, X < 16#dfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#e0000, X < 16#efffe -> is_clean(Rest); -is_clean(<>) when X >= 16#f0000, X < 16#ffffe -> is_clean(Rest); -is_clean(<>) when X >= 16#100000, X < 16#10fffe -> is_clean(Rest); -is_clean(Bin) -> erlang:error(badarg, [Bin]). +%% escape and/or replace bad codepoints if requested +clean(<<>>, Acc, _Opts) -> unicode:characters_to_binary(lists:reverse(Acc)); +clean(<<0, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(0, Opts) ++ Acc, Opts); +clean(<<1, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(1, Opts) ++ Acc, Opts); +clean(<<2, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(2, Opts) ++ Acc, Opts); +clean(<<3, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(3, Opts) ++ Acc, Opts); +clean(<<4, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(4, Opts) ++ Acc, Opts); +clean(<<5, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(5, Opts) ++ Acc, Opts); +clean(<<6, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(6, Opts) ++ Acc, Opts); +clean(<<7, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(7, Opts) ++ Acc, Opts); +clean(<<8, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(8, Opts) ++ Acc, Opts); +clean(<<9, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(9, Opts) ++ Acc, Opts); +clean(<<10, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(10, Opts) ++ Acc, Opts); +clean(<<11, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(11, Opts) ++ Acc, Opts); +clean(<<12, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(12, Opts) ++ Acc, Opts); +clean(<<13, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(13, Opts) ++ Acc, Opts); +clean(<<14, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(14, Opts) ++ Acc, Opts); +clean(<<15, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(15, Opts) ++ Acc, Opts); +clean(<<16, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(16, Opts) ++ Acc, Opts); +clean(<<17, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(17, Opts) ++ Acc, Opts); +clean(<<18, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(18, Opts) ++ Acc, Opts); +clean(<<19, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(19, Opts) ++ Acc, Opts); +clean(<<20, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(20, Opts) ++ Acc, Opts); +clean(<<21, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(21, Opts) ++ Acc, Opts); +clean(<<22, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(22, Opts) ++ Acc, Opts); +clean(<<23, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(23, Opts) ++ Acc, Opts); +clean(<<24, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(24, Opts) ++ Acc, Opts); +clean(<<25, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(25, Opts) ++ Acc, Opts); +clean(<<26, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(26, Opts) ++ Acc, Opts); +clean(<<27, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(27, Opts) ++ Acc, Opts); +clean(<<28, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(28, Opts) ++ Acc, Opts); +clean(<<29, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(29, Opts) ++ Acc, Opts); +clean(<<30, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(30, Opts) ++ Acc, Opts); +clean(<<31, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(31, Opts) ++ Acc, Opts); +clean(<<32, Rest/binary>>, Acc, Opts) -> clean(Rest, [32] ++ Acc, Opts); +clean(<<33, Rest/binary>>, Acc, Opts) -> clean(Rest, [33] ++ Acc, Opts); +clean(<<34, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(34, Opts) ++ Acc, Opts); +clean(<<35, Rest/binary>>, Acc, Opts) -> clean(Rest, [35] ++ Acc, Opts); +clean(<<36, Rest/binary>>, Acc, Opts) -> clean(Rest, [36] ++ Acc, Opts); +clean(<<37, Rest/binary>>, Acc, Opts) -> clean(Rest, [37] ++ Acc, Opts); +clean(<<38, Rest/binary>>, Acc, Opts) -> clean(Rest, [38] ++ Acc, Opts); +clean(<<39, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(39, Opts) ++ Acc, Opts); +clean(<<40, Rest/binary>>, Acc, Opts) -> clean(Rest, [40] ++ Acc, Opts); +clean(<<41, Rest/binary>>, Acc, Opts) -> clean(Rest, [41] ++ Acc, Opts); +clean(<<42, Rest/binary>>, Acc, Opts) -> clean(Rest, [42] ++ Acc, Opts); +clean(<<43, Rest/binary>>, Acc, Opts) -> clean(Rest, [43] ++ Acc, Opts); +clean(<<44, Rest/binary>>, Acc, Opts) -> clean(Rest, [44] ++ Acc, Opts); +clean(<<45, Rest/binary>>, Acc, Opts) -> clean(Rest, [45] ++ Acc, Opts); +clean(<<46, Rest/binary>>, Acc, Opts) -> clean(Rest, [46] ++ Acc, Opts); +clean(<<47, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(47, Opts) ++ Acc, Opts); +clean(<<48, Rest/binary>>, Acc, Opts) -> clean(Rest, [48] ++ Acc, Opts); +clean(<<49, Rest/binary>>, Acc, Opts) -> clean(Rest, [49] ++ Acc, Opts); +clean(<<50, Rest/binary>>, Acc, Opts) -> clean(Rest, [50] ++ Acc, Opts); +clean(<<51, Rest/binary>>, Acc, Opts) -> clean(Rest, [51] ++ Acc, Opts); +clean(<<52, Rest/binary>>, Acc, Opts) -> clean(Rest, [52] ++ Acc, Opts); +clean(<<53, Rest/binary>>, Acc, Opts) -> clean(Rest, [53] ++ Acc, Opts); +clean(<<54, Rest/binary>>, Acc, Opts) -> clean(Rest, [54] ++ Acc, Opts); +clean(<<55, Rest/binary>>, Acc, Opts) -> clean(Rest, [55] ++ Acc, Opts); +clean(<<56, Rest/binary>>, Acc, Opts) -> clean(Rest, [56] ++ Acc, Opts); +clean(<<57, Rest/binary>>, Acc, Opts) -> clean(Rest, [57] ++ Acc, Opts); +clean(<<58, Rest/binary>>, Acc, Opts) -> clean(Rest, [58] ++ Acc, Opts); +clean(<<59, Rest/binary>>, Acc, Opts) -> clean(Rest, [59] ++ Acc, Opts); +clean(<<60, Rest/binary>>, Acc, Opts) -> clean(Rest, [60] ++ Acc, Opts); +clean(<<61, Rest/binary>>, Acc, Opts) -> clean(Rest, [61] ++ Acc, Opts); +clean(<<62, Rest/binary>>, Acc, Opts) -> clean(Rest, [62] ++ Acc, Opts); +clean(<<63, Rest/binary>>, Acc, Opts) -> clean(Rest, [63] ++ Acc, Opts); +clean(<<64, Rest/binary>>, Acc, Opts) -> clean(Rest, [64] ++ Acc, Opts); +clean(<<65, Rest/binary>>, Acc, Opts) -> clean(Rest, [65] ++ Acc, Opts); +clean(<<66, Rest/binary>>, Acc, Opts) -> clean(Rest, [66] ++ Acc, Opts); +clean(<<67, Rest/binary>>, Acc, Opts) -> clean(Rest, [67] ++ Acc, Opts); +clean(<<68, Rest/binary>>, Acc, Opts) -> clean(Rest, [68] ++ Acc, Opts); +clean(<<69, Rest/binary>>, Acc, Opts) -> clean(Rest, [69] ++ Acc, Opts); +clean(<<70, Rest/binary>>, Acc, Opts) -> clean(Rest, [70] ++ Acc, Opts); +clean(<<71, Rest/binary>>, Acc, Opts) -> clean(Rest, [71] ++ Acc, Opts); +clean(<<72, Rest/binary>>, Acc, Opts) -> clean(Rest, [72] ++ Acc, Opts); +clean(<<73, Rest/binary>>, Acc, Opts) -> clean(Rest, [73] ++ Acc, Opts); +clean(<<74, Rest/binary>>, Acc, Opts) -> clean(Rest, [74] ++ Acc, Opts); +clean(<<75, Rest/binary>>, Acc, Opts) -> clean(Rest, [75] ++ Acc, Opts); +clean(<<76, Rest/binary>>, Acc, Opts) -> clean(Rest, [76] ++ Acc, Opts); +clean(<<77, Rest/binary>>, Acc, Opts) -> clean(Rest, [77] ++ Acc, Opts); +clean(<<78, Rest/binary>>, Acc, Opts) -> clean(Rest, [78] ++ Acc, Opts); +clean(<<79, Rest/binary>>, Acc, Opts) -> clean(Rest, [79] ++ Acc, Opts); +clean(<<80, Rest/binary>>, Acc, Opts) -> clean(Rest, [80] ++ Acc, Opts); +clean(<<81, Rest/binary>>, Acc, Opts) -> clean(Rest, [81] ++ Acc, Opts); +clean(<<82, Rest/binary>>, Acc, Opts) -> clean(Rest, [82] ++ Acc, Opts); +clean(<<83, Rest/binary>>, Acc, Opts) -> clean(Rest, [83] ++ Acc, Opts); +clean(<<84, Rest/binary>>, Acc, Opts) -> clean(Rest, [84] ++ Acc, Opts); +clean(<<85, Rest/binary>>, Acc, Opts) -> clean(Rest, [85] ++ Acc, Opts); +clean(<<86, Rest/binary>>, Acc, Opts) -> clean(Rest, [86] ++ Acc, Opts); +clean(<<87, Rest/binary>>, Acc, Opts) -> clean(Rest, [87] ++ Acc, Opts); +clean(<<88, Rest/binary>>, Acc, Opts) -> clean(Rest, [88] ++ Acc, Opts); +clean(<<89, Rest/binary>>, Acc, Opts) -> clean(Rest, [89] ++ Acc, Opts); +clean(<<90, Rest/binary>>, Acc, Opts) -> clean(Rest, [90] ++ Acc, Opts); +clean(<<91, Rest/binary>>, Acc, Opts) -> clean(Rest, [91] ++ Acc, Opts); +clean(<<92, Rest/binary>>, Acc, Opts) -> clean(Rest, maybe_replace(92, Opts) ++ Acc, Opts); +clean(<<93, Rest/binary>>, Acc, Opts) -> clean(Rest, [93] ++ Acc, Opts); +clean(<<94, Rest/binary>>, Acc, Opts) -> clean(Rest, [94] ++ Acc, Opts); +clean(<<95, Rest/binary>>, Acc, Opts) -> clean(Rest, [95] ++ Acc, Opts); +clean(<<96, Rest/binary>>, Acc, Opts) -> clean(Rest, [96] ++ Acc, Opts); +clean(<<97, Rest/binary>>, Acc, Opts) -> clean(Rest, [97] ++ Acc, Opts); +clean(<<98, Rest/binary>>, Acc, Opts) -> clean(Rest, [98] ++ Acc, Opts); +clean(<<99, Rest/binary>>, Acc, Opts) -> clean(Rest, [99] ++ Acc, Opts); +clean(<<100, Rest/binary>>, Acc, Opts) -> clean(Rest, [100] ++ Acc, Opts); +clean(<<101, Rest/binary>>, Acc, Opts) -> clean(Rest, [101] ++ Acc, Opts); +clean(<<102, Rest/binary>>, Acc, Opts) -> clean(Rest, [102] ++ Acc, Opts); +clean(<<103, Rest/binary>>, Acc, Opts) -> clean(Rest, [103] ++ Acc, Opts); +clean(<<104, Rest/binary>>, Acc, Opts) -> clean(Rest, [104] ++ Acc, Opts); +clean(<<105, Rest/binary>>, Acc, Opts) -> clean(Rest, [105] ++ Acc, Opts); +clean(<<106, Rest/binary>>, Acc, Opts) -> clean(Rest, [106] ++ Acc, Opts); +clean(<<107, Rest/binary>>, Acc, Opts) -> clean(Rest, [107] ++ Acc, Opts); +clean(<<108, Rest/binary>>, Acc, Opts) -> clean(Rest, [108] ++ Acc, Opts); +clean(<<109, Rest/binary>>, Acc, Opts) -> clean(Rest, [109] ++ Acc, Opts); +clean(<<110, Rest/binary>>, Acc, Opts) -> clean(Rest, [110] ++ Acc, Opts); +clean(<<111, Rest/binary>>, Acc, Opts) -> clean(Rest, [111] ++ Acc, Opts); +clean(<<112, Rest/binary>>, Acc, Opts) -> clean(Rest, [112] ++ Acc, Opts); +clean(<<113, Rest/binary>>, Acc, Opts) -> clean(Rest, [113] ++ Acc, Opts); +clean(<<114, Rest/binary>>, Acc, Opts) -> clean(Rest, [114] ++ Acc, Opts); +clean(<<115, Rest/binary>>, Acc, Opts) -> clean(Rest, [115] ++ Acc, Opts); +clean(<<116, Rest/binary>>, Acc, Opts) -> clean(Rest, [116] ++ Acc, Opts); +clean(<<117, Rest/binary>>, Acc, Opts) -> clean(Rest, [117] ++ Acc, Opts); +clean(<<118, Rest/binary>>, Acc, Opts) -> clean(Rest, [118] ++ Acc, Opts); +clean(<<119, Rest/binary>>, Acc, Opts) -> clean(Rest, [119] ++ Acc, Opts); +clean(<<120, Rest/binary>>, Acc, Opts) -> clean(Rest, [120] ++ Acc, Opts); +clean(<<121, Rest/binary>>, Acc, Opts) -> clean(Rest, [121] ++ Acc, Opts); +clean(<<122, Rest/binary>>, Acc, Opts) -> clean(Rest, [122] ++ Acc, Opts); +clean(<<123, Rest/binary>>, Acc, Opts) -> clean(Rest, [123] ++ Acc, Opts); +clean(<<124, Rest/binary>>, Acc, Opts) -> clean(Rest, [124] ++ Acc, Opts); +clean(<<125, Rest/binary>>, Acc, Opts) -> clean(Rest, [125] ++ Acc, Opts); +clean(<<126, Rest/binary>>, Acc, Opts) -> clean(Rest, [126] ++ Acc, Opts); +clean(<<127, Rest/binary>>, Acc, Opts) -> clean(Rest, [127] ++ Acc, Opts); +clean(<>, Acc, Opts) when X < 16#800 -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X == 16#2028; X == 16#2029 -> + clean(Rest, maybe_replace(X, Opts) ++ Acc, Opts); +clean(<>, Acc, Opts) when X < 16#dcff -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X > 16#dfff, X < 16#fdd0 -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X > 16#fdef, X < 16#fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#10000, X < 16#1fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#20000, X < 16#2fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#30000, X < 16#3fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#40000, X < 16#4fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#50000, X < 16#5fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#60000, X < 16#6fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#70000, X < 16#7fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#80000, X < 16#8fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#90000, X < 16#9fffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#a0000, X < 16#afffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#b0000, X < 16#bfffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#c0000, X < 16#cfffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#d0000, X < 16#dfffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#e0000, X < 16#efffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#f0000, X < 16#ffffe -> + clean(Rest, [X] ++ Acc, Opts); +clean(<>, Acc, Opts) when X >= 16#100000, X < 16#10fffe -> + clean(Rest, [X] ++ Acc, Opts); +%% noncharacters +clean(<<_/utf8, Rest/binary>>, Acc, Opts) -> + clean(Rest, maybe_replace(noncharacter, Opts) ++ Acc, Opts); +%% surrogates +clean(<<237, X, _, Rest/binary>>, Acc, Opts) when X >= 160 -> + clean(Rest, maybe_replace(surrogate, Opts) ++ Acc, Opts); +%% u+fffe and u+ffff for R14BXX +clean(<<239, 191, X, Rest/binary>>, Acc, Opts) when X == 190; X == 191 -> + clean(Rest, maybe_replace(noncharacter, Opts) ++ Acc, Opts); +%% overlong encodings and missing continuations of a 2 byte sequence +clean(<>, Acc, Opts) when X >= 192, X =< 223 -> + clean(strip_continuations(Rest, 1), maybe_replace(badutf, Opts) ++ Acc, Opts); +%% overlong encodings and missing continuations of a 3 byte sequence +clean(<>, Acc, Opts) when X >= 224, X =< 239 -> + clean(strip_continuations(Rest, 2), maybe_replace(badutf, Opts) ++ Acc, Opts); +%% overlong encodings and missing continuations of a 4 byte sequence +clean(<>, Acc, Opts) when X >= 240, X =< 247 -> + clean(strip_continuations(Rest, 3), maybe_replace(badutf, Opts) ++ Acc, Opts); +clean(<<_, Rest/binary>>, Acc, Opts) -> + clean(Rest, maybe_replace(badutf, Opts) ++ Acc, Opts). -clean_string(Str, Len, Len, _Opts) -> Str; -clean_string(Str, L, Len, Opts) -> - case Str of - <<_:L/binary, X/utf8, _/binary>> when X < 16#80 -> clean_string(Str, L + 1, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X < 16#800 -> clean_string(Str, L + 2, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X < 16#dcff -> clean_string(Str, L + 3, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X > 16#dfff, X < 16#fdd0 -> clean_string(Str, L + 3, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X > 16#fdef, X < 16#fffe -> clean_string(Str, L + 3, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#10000, X < 16#1fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#20000, X < 16#2fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#30000, X < 16#3fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#40000, X < 16#4fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#50000, X < 16#5fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#60000, X < 16#6fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#70000, X < 16#7fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#80000, X < 16#8fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#90000, X < 16#9fffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#a0000, X < 16#afffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#b0000, X < 16#bfffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#c0000, X < 16#cfffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#d0000, X < 16#dfffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#e0000, X < 16#efffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#f0000, X < 16#ffffe -> clean_string(Str, L + 4, Len, Opts) - ; <<_:L/binary, X/utf8, _/binary>> when X >= 16#100000, X < 16#10fffe -> clean_string(Str, L + 4, Len, Opts) - %% noncharacters - ; <> when X < 16#10000 -> - clean_string(<>, L + 3, Len, Opts) - ; <> -> - clean_string(<>, L + 4, Len, Opts) - %% surrogates - ; <> when X >= 160 -> - clean_string(<>, L + 3, Len, Opts) - %% u+fffe and u+ffff for R14BXX - ; <> when X == 190; X == 191 -> - clean_string(<>, L + 3, Len, Opts) - %% overlong encodings and missing continuations of a 2 byte sequence - ; <> when X >= 192, X =< 223 -> - {Tail, Stripped} = strip_continuations(T, 1, 0), - clean_string(<>, L + 3, Len + 2 - Stripped, Opts) - %% overlong encodings and missing continuations of a 3 byte sequence - ; <> when X >= 224, X =< 239 -> - {Tail, Stripped} = strip_continuations(T, 2, 0), - clean_string(<>, L + 3, Len + 2 - Stripped, Opts) - %% overlong encodings and missing continuations of a 4 byte sequence - ; <> when X >= 240, X =< 247 -> - {Tail, Stripped} = strip_continuations(T, 3, 0), - clean_string(<>, L + 3, Len + 2 - Stripped, Opts) - ; <> -> - clean_string(<>, L + 3, Len + 2, Opts) - end. - - -strip_continuations(Bin, 0, N) -> {Bin, N}; -strip_continuations(<>, N, M) when X >= 128, X =< 191 -> - strip_continuations(Rest, N - 1, M + 1); +strip_continuations(Bin, 0) -> Bin; +strip_continuations(<>, N) when X >= 128, X =< 191 -> + strip_continuations(Rest, N - 1); %% not a continuation byte -strip_continuations(Bin, _, N) -> {Bin, N}. +strip_continuations(Bin, _) -> Bin. + + +maybe_replace($\b, #opts{json_escape=true}) -> [$b, $\\]; +maybe_replace($\t, #opts{json_escape=true}) -> [$t, $\\]; +maybe_replace($\n, #opts{json_escape=true}) -> [$n, $\\]; +maybe_replace($\f, #opts{json_escape=true}) -> [$f, $\\]; +maybe_replace($\r, #opts{json_escape=true}) -> [$r, $\\]; +maybe_replace($\", #opts{json_escape=true}) -> [$\", $\\]; +maybe_replace($', Opts=#opts{json_escape=true}) -> + case Opts#opts.single_quotes of + true -> [$', $\\] + ; false -> [$'] + end; +maybe_replace($/, Opts=#opts{json_escape=true}) -> + case Opts#opts.escape_forward_slash of + true -> [$/, $\\] + ; false -> [$/] + end; +maybe_replace($\\, #opts{json_escape=true}) -> [$\\, $\\]; +maybe_replace(X, Opts=#opts{json_escape=true}) when X == 16#2028; X == 16#2029 -> + case Opts#opts.no_jsonp_escapes of + true -> [X] + ; false -> lists:reverse(jsx_utils:json_escape_sequence(X)) + end; +maybe_replace(X, #opts{json_escape=true}) when X < 32 -> + lists:reverse(jsx_utils:json_escape_sequence(X)); +maybe_replace(noncharacter, #opts{loose_unicode=true}) -> [16#fffd]; +maybe_replace(surrogate, #opts{loose_unicode=true}) -> [16#fffd]; +maybe_replace(badutf, #opts{loose_unicode=true}) -> [16#fffd]. -ifdef(TEST). @@ -488,6 +779,31 @@ encode_test_() -> ]. +escapes_test_() -> + [ + {"backspace escape", ?_assertEqual(encode(<<"\b">>, [json_escape]), [{string, <<"\\b">>}, end_json])}, + {"formfeed escape", ?_assertEqual(encode(<<"\f">>, [json_escape]), [{string, <<"\\f">>}, end_json])}, + {"newline escape", ?_assertEqual(encode(<<"\n">>, [json_escape]), [{string, <<"\\n">>}, end_json])}, + {"carriage return escape", ?_assertEqual(encode(<<"\r">>, [json_escape]), [{string, <<"\\r">>}, end_json])}, + {"tab escape", ?_assertEqual(encode(<<"\t">>, [json_escape]), [{string, <<"\\t">>}, end_json])}, + {"quote escape", ?_assertEqual(encode(<<"\"">>, [json_escape]), [{string, <<"\\\"">>}, end_json])}, + {"single quote escape", ?_assertEqual(encode(<<"'">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, + {"no single quote escape", ?_assertEqual(encode(<<"'">>, [json_escape]), [{string, <<"'">>}, end_json])}, + {"forward slash escape", ?_assertEqual(encode(<<"/">>, [json_escape, escape_forward_slash]), [{string, <<"\\/">>}, end_json])}, + {"no forward slash escape", ?_assertEqual(encode(<<"/">>, [json_escape]), [{string, <<"/">>}, end_json])}, + {"back slash escape", ?_assertEqual(encode(<<"\\">>, [json_escape]), [{string, <<"\\\\">>}, end_json])}, + {"jsonp escape", ?_assertEqual( + encode(<<16#2028/utf8, 16#2029/utf8>>, [json_escape]), + [{string, <<"\\u2028\\u2029">>}, end_json] + )}, + {"no jsonp escape", ?_assertEqual( + encode(<<16#2028/utf8, 16#2029/utf8>>, [json_escape, no_jsonp_escapes]), + [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] + )}, + {"control escape", ?_assertEqual(encode(<<0>>, [json_escape]), [{string, <<"\\u0000">>}, end_json])} + ]. + + surrogates_test_() -> [ {"surrogates - badjson", diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index ac1c9f3..eb6fb0f 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -25,7 +25,7 @@ -export([parse_opts/1]). -export([extract_opts/1]). --export([json_escape/2]). +-export([json_escape/2, json_escape_sequence/1]). -include("jsx_opts.hrl"). @@ -321,7 +321,7 @@ json_escape(Str, _, L, Len) when L =:= Len -> %% convert a codepoint to it's \uXXXX equiv. json_escape_sequence(X) -> <> = <>, - unicode:characters_to_binary([$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]). + [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. to_hex(10) -> $a; From fa173e2d113042ee178eca0f4c8759f18cb21887 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 01:34:21 -0700 Subject: [PATCH 06/38] json escape to_json and format by default --- src/jsx_to_json.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 95e3ad7..0760349 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -39,13 +39,13 @@ -spec to_json(Source::any(), Opts::opts()) -> binary(). to_json(Source, Opts) when is_list(Opts) -> - (jsx:encoder(?MODULE, Opts, jsx_utils:extract_opts([json_escape] ++ Opts)))(Source). + (jsx:encoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [json_escape])))(Source). -spec format(Source::binary(), Opts::opts()) -> binary(). format(Source, Opts) when is_binary(Source) andalso is_list(Opts) -> - (jsx:decoder(?MODULE, Opts, jsx_utils:extract_opts([json_escape] ++ Opts)))(Source). + (jsx:decoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [json_escape])))(Source). parse_opts(Opts) -> parse_opts(Opts, #opts{}). From cb6c676775599fdc0e135161c07faeeb70daf626 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 01:40:05 -0700 Subject: [PATCH 07/38] add test for dirty_strings --- src/jsx_encoder.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 397b99d..9b5af93 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -464,6 +464,7 @@ strip_continuations(<>, N) when X >= 128, X =< 191 -> strip_continuations(Bin, _) -> Bin. +maybe_replace(X, #opts{dirty_strings=true}) when is_integer(X) -> [X]; maybe_replace($\b, #opts{json_escape=true}) -> [$b, $\\]; maybe_replace($\t, #opts{json_escape=true}) -> [$t, $\\]; maybe_replace($\n, #opts{json_escape=true}) -> [$n, $\\]; @@ -800,7 +801,8 @@ escapes_test_() -> encode(<<16#2028/utf8, 16#2029/utf8>>, [json_escape, no_jsonp_escapes]), [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] )}, - {"control escape", ?_assertEqual(encode(<<0>>, [json_escape]), [{string, <<"\\u0000">>}, end_json])} + {"control escape", ?_assertEqual(encode(<<0>>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, + {"dirty strings", ?_assertEqual(encode(<<"\n">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])} ]. From 0e4e04ee36667219aa435d5f28b81f97e21adcc4 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 07:27:34 -0700 Subject: [PATCH 08/38] couple new tests for better test coverage in jsx_encoder --- src/jsx_encoder.erl | 54 +++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 9b5af93..de51418 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -627,7 +627,7 @@ bad_utf8_test_() -> {"missing continuation byte (4 byte missing one)", ?_assert(is_bad(xcode(<<240, 144, 128, 32>>))) }, - {"missing continuation byte2 (4 byte missing one) replaced", + {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( xcode(<<240, 144, 128, 32>>, [loose_unicode]), <<16#fffd/utf8, 32>> @@ -636,7 +636,7 @@ bad_utf8_test_() -> {"missing continuation byte (4 byte missing two)", ?_assert(is_bad(xcode(<<240, 144, 32>>))) }, - {"missing continuation byte2 (4 byte missing two) replaced", + {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( xcode(<<240, 144, 32>>, [loose_unicode]), <<16#fffd/utf8, 32>> @@ -809,10 +809,10 @@ escapes_test_() -> surrogates_test_() -> [ {"surrogates - badjson", - ?_assertEqual(check_bad(surrogates()), []) + ?_assert(check_bad(surrogates())) }, {"surrogates - replaced", - ?_assertEqual(check_replaced(surrogates()), []) + ?_assert(check_replaced(surrogates())) } ]. @@ -820,10 +820,25 @@ surrogates_test_() -> good_characters_test_() -> [ {"acceptable codepoints", - ?_assertEqual(check_good(good()), []) + ?_assert(check_good(good(), [])) + }, + {"acceptable codepoints - json_escape", + ?_assert(check_good(good(), [json_escape])) + }, + {"acceptable codepoints - loose_unicode", + ?_assert(check_good(good(), [json_escape])) + }, + {"acceptable codepoints - json_escape + loose_unicode", + ?_assert(check_good(good(), [json_escape, loose_unicode])) }, {"acceptable extended", - ?_assertEqual(check_good(good_extended()), []) + ?_assert(check_good(good_extended(), [])) + }, + {"acceptable extended - json_escape", + ?_assert(check_good(good_extended(), [json_escape])) + }, + {"acceptable extended - json_escape", + ?_assert(check_good(good_extended(), [loose_unicode])) } ]. @@ -831,10 +846,10 @@ good_characters_test_() -> reserved_test_() -> [ {"reserved noncharacters - badjson", - ?_assertEqual(check_bad(reserved_space()), []) + ?_assert(check_bad(reserved_space())) }, {"reserved noncharacters - replaced", - ?_assertEqual(check_replaced(reserved_space()), []) + ?_assert(check_replaced(reserved_space())) } ]. @@ -842,10 +857,10 @@ reserved_test_() -> noncharacters_test_() -> [ {"noncharacters - badjson", - ?_assertEqual(check_bad(noncharacters()), []) + ?_assert(check_bad(noncharacters())) }, {"noncharacters - replaced", - ?_assertEqual(check_replaced(noncharacters()), []) + ?_assert(check_replaced(noncharacters())) } ]. @@ -853,31 +868,31 @@ noncharacters_test_() -> extended_noncharacters_test_() -> [ {"extended noncharacters - badjson", - ?_assertEqual(check_bad(extended_noncharacters()), []) + ?_assert(check_bad(extended_noncharacters())) }, {"extended noncharacters - replaced", - ?_assertEqual(check_replaced(extended_noncharacters()), []) + ?_assert(check_replaced(extended_noncharacters())) } ]. check_bad(List) -> - lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end, + [] == lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end, check(List, [], []) ). check_replaced(List) -> - lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true + [] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false end, check(List, [loose_unicode], []) ). -check_good(List) -> - lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end, - check(List, [], []) +check_good(List, Opts) -> + [] == lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end, + check(List, Opts, []) ). @@ -889,7 +904,6 @@ check([H|T], Opts, Acc) -> noncharacters() -> lists:seq(16#fffe, 16#ffff). - extended_noncharacters() -> [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] @@ -900,16 +914,12 @@ extended_noncharacters() -> ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]. - surrogates() -> lists:seq(16#d800, 16#dfff). - reserved_space() -> lists:seq(16#fdd0, 16#fdef). - good() -> lists:seq(16#0000, 16#d7ff) ++ lists:seq(16#e000, 16#fdcf) ++ lists:seq(16#fdf0, 16#fffd). - good_extended() -> [16#10000, 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, 16#c0000, 16#d0000, 16#e0000, 16#f0000 From ab49c3379b6e62ad29b1e3363cf4573013751e3f Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 08:00:34 -0700 Subject: [PATCH 09/38] style and whitespace --- src/jsx_encoder.erl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index de51418..2a5151b 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -820,7 +820,7 @@ surrogates_test_() -> good_characters_test_() -> [ {"acceptable codepoints", - ?_assert(check_good(good(), [])) + ?_assert(check_good(good())) }, {"acceptable codepoints - json_escape", ?_assert(check_good(good(), [json_escape])) @@ -832,7 +832,7 @@ good_characters_test_() -> ?_assert(check_good(good(), [json_escape, loose_unicode])) }, {"acceptable extended", - ?_assert(check_good(good_extended(), [])) + ?_assert(check_good(good_extended())) }, {"acceptable extended - json_escape", ?_assert(check_good(good_extended(), [json_escape])) @@ -883,13 +883,14 @@ check_bad(List) -> check_replaced(List) -> - [] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true - ; (_) -> false + [] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false end, check(List, [loose_unicode], []) ). +check_good(List) -> check_good(List, []). + check_good(List, Opts) -> [] == lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end, check(List, Opts, []) From ae2aae1ebfbf13d4e252c4b544491cd3890c0f5c Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 08:02:27 -0700 Subject: [PATCH 10/38] style and whitespace --- src/jsx_decoder.erl | 61 +++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index b6311c8..ba4a449 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -1249,6 +1249,16 @@ bad_utf8_test_() -> ]. +decode(JSON) -> decode(JSON, []). + +decode(JSON, Opts) -> + try + (decoder(jsx, [], Opts))(JSON) + catch + error:badarg -> {error, badjson} + end. + + ignore_bad_escapes_test_() -> [ {"ignore unrecognized escape sequence", ?_assertEqual( @@ -1459,7 +1469,7 @@ json_escape_test_() -> [start_array, {string, <<"a string\\n">>}, end_array, end_json] )}, {"no json escape test", ?_assertEqual( - decode(<<"[\"a string\\n\"]">>, []), + decode(<<"[\"a string\\n\"]">>), [start_array, {string, <<"a string\n">>}, end_array, end_json] )} ]. @@ -1468,10 +1478,10 @@ json_escape_test_() -> noncharacters_test_() -> [ {"noncharacters - badjson", - ?_assertEqual(check_bad(noncharacters()), []) + ?_assert(check_bad(noncharacters())) }, {"noncharacters - replaced", - ?_assertEqual(check_replaced(noncharacters()), []) + ?_assert(check_replaced(noncharacters())) } ]. @@ -1479,10 +1489,10 @@ noncharacters_test_() -> extended_noncharacters_test_() -> [ {"extended noncharacters - badjson", - ?_assertEqual(check_bad(extended_noncharacters()), []) + ?_assert(check_bad(extended_noncharacters())) }, {"extended noncharacters - replaced", - ?_assertEqual(check_replaced(extended_noncharacters()), []) + ?_assert(check_replaced(extended_noncharacters())) } ]. @@ -1490,10 +1500,10 @@ extended_noncharacters_test_() -> surrogates_test_() -> [ {"surrogates - badjson", - ?_assertEqual(check_bad(surrogates()), []) + ?_assert(check_bad(surrogates())) }, {"surrogates - replaced", - ?_assertEqual(check_replaced(surrogates()), []) + ?_assert(check_replaced(surrogates())) } ]. @@ -1501,7 +1511,7 @@ surrogates_test_() -> control_test_() -> [ {"control characters - badjson", - ?_assertEqual(check_bad(control_characters()), []) + ?_assert(check_bad(control_characters())) } ]. @@ -1509,10 +1519,10 @@ control_test_() -> reserved_test_() -> [ {"reserved noncharacters - badjson", - ?_assertEqual(check_bad(reserved_space()), []) + ?_assert(check_bad(reserved_space())) }, {"reserved noncharacters - replaced", - ?_assertEqual(check_replaced(reserved_space()), []) + ?_assert(check_replaced(reserved_space())) } ]. @@ -1520,31 +1530,32 @@ reserved_test_() -> good_characters_test_() -> [ {"acceptable codepoints", - ?_assertEqual(check_good(good()), []) + ?_assert(check_good(good())) }, {"acceptable extended", - ?_assertEqual(check_good(good_extended()), []) + ?_assert(check_good(good_extended())) } ]. check_bad(List) -> - lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end, + [] == lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end, check(List, [], []) ). check_replaced(List) -> - lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true - ; (_) -> false + [] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false end, check(List, [loose_unicode], []) ). -check_good(List) -> - lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end, - check(List, [], []) +check_good(List) -> check_good(List, []). + +check_good(List, Opts) -> + [] == lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end, + check(List, Opts, []) ). @@ -1554,17 +1565,8 @@ check([H|T], Opts, Acc) -> check(T, Opts, [{H, R}] ++ Acc). -decode(JSON, Opts) -> - try - (decoder(jsx, [], Opts))(JSON) - catch - error:badarg -> {error, badjson} - end. - - noncharacters() -> lists:seq(16#fffe, 16#ffff). - extended_noncharacters() -> [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] @@ -1575,23 +1577,18 @@ extended_noncharacters() -> ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]. - surrogates() -> lists:seq(16#d800, 16#dfff). - control_characters() -> lists:seq(1, 31). - reserved_space() -> lists:seq(16#fdd0, 16#fdef). - good() -> [32, 33] ++ lists:seq(16#23, 16#5b) ++ lists:seq(16#5d, 16#d7ff) ++ lists:seq(16#e000, 16#fdcf) ++ lists:seq(16#fdf0, 16#fffd). - good_extended() -> [16#10000, 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, 16#c0000, 16#d0000, 16#e0000, 16#f0000 From 60bc46e2d4c3729b37a095846f1f9ddf2abfbc4f Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 12:30:47 -0700 Subject: [PATCH 11/38] add test suite for json_escape in decoder --- src/jsx_decoder.erl | 46 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index ba4a449..9aebb31 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -1462,16 +1462,29 @@ escape_forward_slash_test_() -> ]. -json_escape_test_() -> +escapes_test_() -> [ - {"json escape test", ?_assertEqual( - decode(<<"[\"a string\\n\"]">>, [json_escape]), - [start_array, {string, <<"a string\\n">>}, end_array, end_json] + {"backspace escape", ?_assertEqual(decode(<<"\"\\b\"">>, [json_escape]), [{string, <<"\\b">>}, end_json])}, + {"formfeed escape", ?_assertEqual(decode(<<"\"\\f\"">>, [json_escape]), [{string, <<"\\f">>}, end_json])}, + {"newline escape", ?_assertEqual(decode(<<"\"\\n\"">>, [json_escape]), [{string, <<"\\n">>}, end_json])}, + {"carriage return escape", ?_assertEqual(decode(<<"\"\\r\"">>, [json_escape]), [{string, <<"\\r">>}, end_json])}, + {"tab escape", ?_assertEqual(decode(<<"\"\\t\"">>, [json_escape]), [{string, <<"\\t">>}, end_json])}, + {"quote escape", ?_assertEqual(decode(<<"\"\\\"\"">>, [json_escape]), [{string, <<"\\\"">>}, end_json])}, + {"single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, + {"no single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [json_escape]), [{string, <<"'">>}, end_json])}, + {"forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [json_escape, escape_forward_slash]), [{string, <<"\\/">>}, end_json])}, + {"no forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [json_escape]), [{string, <<"/">>}, end_json])}, + {"back slash escape", ?_assertEqual(decode(<<"\"\\\\\"">>, [json_escape]), [{string, <<"\\\\">>}, end_json])}, + {"jsonp escape", ?_assertEqual( + decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [json_escape]), + [{string, <<"\\u2028\\u2029">>}, end_json] )}, - {"no json escape test", ?_assertEqual( - decode(<<"[\"a string\\n\"]">>), - [start_array, {string, <<"a string\n">>}, end_array, end_json] - )} + {"no jsonp escape", ?_assertEqual( + decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [json_escape, no_jsonp_escapes]), + [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] + )}, + {"control escape", ?_assertEqual(decode(<<$\", "\\u0000"/utf8, $\">>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, + {"dirty strings", ?_assertEqual(decode(<<"\"\\n\"">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])} ]. @@ -1532,8 +1545,23 @@ good_characters_test_() -> {"acceptable codepoints", ?_assert(check_good(good())) }, + {"acceptable codepoints - json_escape", + ?_assert(check_good(good(), [json_escape])) + }, + {"acceptable codepoints - loose_unicode", + ?_assert(check_good(good(), [json_escape])) + }, + {"acceptable codepoints - json_escape + loose_unicode", + ?_assert(check_good(good(), [json_escape, loose_unicode])) + }, {"acceptable extended", ?_assert(check_good(good_extended())) + }, + {"acceptable extended - json_escape", + ?_assert(check_good(good_extended(), [json_escape])) + }, + {"acceptable extended - json_escape", + ?_assert(check_good(good_extended(), [loose_unicode])) } ]. @@ -1595,7 +1623,7 @@ good_extended() -> [16#10000, 16#20000, 16#30000, 16#40000, 16#50000, ] ++ lists:seq(16#100000, 16#10fffd). -%% erlang refuses to encode certain codepoints, so fake them all +%% erlang refuses to decode certain codepoints, so fake them all to_fake_utf(N, utf8) when N < 16#0080 -> <<34/utf8, N:8, 34/utf8>>; to_fake_utf(N, utf8) when N < 16#0800 -> <<0:5, Y:5, X:6>> = <>, From 66cb67148f61d0e09452a1b8adc407714083620d Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 17:51:20 -0700 Subject: [PATCH 12/38] whitespace changes in jsx_decoder.erl --- src/jsx_decoder.erl | 89 +++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 55 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 9aebb31..9b0f45c 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -29,15 +29,7 @@ -spec decoder(Handler::module(), State::any(), Opts::jsx:opts()) -> jsx:decoder(). decoder(Handler, State, Opts) -> - fun(JSON) -> - value( - JSON, - {Handler, Handler:init(State)}, - [], - jsx_utils:parse_opts(Opts) - ) - end. - + fun(JSON) -> value(JSON, {Handler, Handler:init(State)}, [], jsx_utils:parse_opts(Opts)) end. -include("jsx_opts.hrl"). @@ -83,8 +75,9 @@ decoder(Handler, State, Opts) -> %% some useful guards -define(is_hex(Symbol), - (Symbol >= $a andalso Symbol =< $z); (Symbol >= $A andalso Symbol =< $Z); - (Symbol >= $0 andalso Symbol =< $9) + (Symbol >= $a andalso Symbol =< $z); + (Symbol >= $A andalso Symbol =< $Z); + (Symbol >= $0 andalso Symbol =< $9) ). -define(is_nonzero(Symbol), @@ -695,18 +688,16 @@ negative(Bin, Handler, Stack, Opts) -> zero(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event(end_object, - Handler:handle_event(format_number(Acc), State) - )}, + maybe_done( + Rest, + {Handler, Handler:handle_event(end_object, Handler:handle_event(format_number(Acc), State))}, Stack, Opts ); zero(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event(end_array, - Handler:handle_event(format_number(Acc), State) - )}, + maybe_done( + Rest, + {Handler, Handler:handle_event(end_array, Handler:handle_event(format_number(Acc), State))}, Stack, Opts ); @@ -732,18 +723,16 @@ zero(Bin, Handler, Stack, Opts) -> integer(<>, Handler, [Acc|Stack], Opts) when ?is_nonzero(S) -> integer(Rest, Handler, [[S] ++ Acc|Stack], Opts); integer(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event(end_object, - Handler:handle_event(format_number(Acc), State) - )}, + maybe_done( + Rest, + {Handler, Handler:handle_event(end_object, Handler:handle_event(format_number(Acc), State))}, Stack, Opts ); integer(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event(end_array, - Handler:handle_event(format_number(Acc), State) - )}, + maybe_done( + Rest, + {Handler, Handler:handle_event(end_array, Handler:handle_event(format_number(Acc), State))}, Stack, Opts ); @@ -770,8 +759,7 @@ integer(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -initial_decimal(<>, Handler, [{Int, Frac}|Stack], Opts) - when S =:= ?zero; ?is_nonzero(S) -> +initial_decimal(<>, Handler, [{Int, Frac}|Stack], Opts) when S =:= ?zero; ?is_nonzero(S) -> decimal(Rest, Handler, [{Int, [S] ++ Frac}|Stack], Opts); initial_decimal(<<>>, Handler, Stack, Opts) -> ?incomplete(initial_decimal, <<>>, Handler, Stack, Opts); @@ -783,18 +771,16 @@ decimal(<>, Handler, [{Int, Frac}|Stack], Opts) when S=:= ?zero; ?is_nonzero(S) -> decimal(Rest, Handler, [{Int, [S] ++ Frac}|Stack], Opts); decimal(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event(end_object, - Handler:handle_event(format_number(Acc), State) - )}, + maybe_done( + Rest, + {Handler, Handler:handle_event(end_object, Handler:handle_event(format_number(Acc), State))}, Stack, Opts ); decimal(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event(end_array, - Handler:handle_event(format_number(Acc), State) - )}, + maybe_done( + Rest, + {Handler, Handler:handle_event(end_array, Handler:handle_event(format_number(Acc), State))}, Stack, Opts ); @@ -802,8 +788,7 @@ decimal(<>, {Handler, State}, [Acc, object|Stack], Opts) -> key(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [key|Stack], Opts); decimal(<>, {Handler, State}, [Acc, array|Stack], Opts) -> value(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [array|Stack], Opts); -decimal(<>, Handler, [{Int, Frac}|Stack], Opts) - when S =:= $e; S =:= $E -> +decimal(<>, Handler, [{Int, Frac}|Stack], Opts) when S =:= $e; S =:= $E -> e(Rest, Handler, [{Int, Frac, []}|Stack], Opts); decimal(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); @@ -818,11 +803,9 @@ decimal(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -e(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) - when S =:= ?zero; ?is_nonzero(S) -> +e(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Handler, [{Int, Frac, [S] ++ Exp}|Stack], Opts); -e(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) - when S =:= ?positive; S =:= ?negative -> +e(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) when S =:= ?positive; S =:= ?negative -> ex(Rest, Handler, [{Int, Frac, [S] ++ Exp}|Stack], Opts); e(<<>>, Handler, Stack, Opts) -> ?incomplete(e, <<>>, Handler, Stack, Opts); @@ -830,8 +813,7 @@ e(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -ex(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) - when S =:= ?zero; ?is_nonzero(S) -> +ex(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Handler, [{Int, Frac, [S] ++ Exp}|Stack], Opts); ex(<<>>, Handler, Stack, Opts) -> ?incomplete(ex, <<>>, Handler, Stack, Opts); @@ -839,22 +821,19 @@ ex(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -exp(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) - when S =:= ?zero; ?is_nonzero(S) -> +exp(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Handler, [{Int, Frac, [S] ++ Exp}|Stack], Opts); exp(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event(end_object, - Handler:handle_event(format_number(Acc), State) - )}, + maybe_done( + Rest, + {Handler, Handler:handle_event(end_object, Handler:handle_event(format_number(Acc), State))}, Stack, Opts ); exp(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event(end_array, - Handler:handle_event(format_number(Acc), State) - )}, + maybe_done( + Rest, + {Handler, Handler:handle_event(end_array, Handler:handle_event(format_number(Acc), State))}, Stack, Opts ); From ff42a04cdfcb1878f41bc1b1bfaffa9c1b454528 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 20:31:28 -0700 Subject: [PATCH 13/38] remove old escaping code path --- src/jsx_utils.erl | 266 +--------------------------------------------- 1 file changed, 5 insertions(+), 261 deletions(-) diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index eb6fb0f..bfe1900 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -25,13 +25,12 @@ -export([parse_opts/1]). -export([extract_opts/1]). --export([json_escape/2, json_escape_sequence/1]). +-export([json_escape_sequence/1]). -include("jsx_opts.hrl"). %% parsing of jsx opts - parse_opts(Opts) -> parse_opts(Opts, #opts{}). @@ -97,227 +96,6 @@ extract_parser_opts([K|Rest], Acc) -> end. -%% json string escaping, for utf8 binaries. escape the json control sequences to -%% their json equivalent, escape other control characters to \uXXXX sequences, -%% everything else should be a legal json string component - -json_escape(String, Opts) when is_binary(String) -> - case Opts#opts.dirty_strings of - true -> String - ; false -> json_escape(String, Opts, 0, size(String)) - end. - - --define(control_character(X), - <> -> - json_escape( - <>, - Opts, - L + 6, - Len + 5 - ) -). - -json_escape(Str, Opts, L, Len) when L < Len -> - case Str of - ?control_character(0); - ?control_character(1); - ?control_character(2); - ?control_character(3); - ?control_character(4); - ?control_character(5); - ?control_character(6); - ?control_character(7); - <> -> json_escape(<>, Opts, L + 2, Len + 1); - <> -> json_escape(<>, Opts, L + 2, Len + 1); - <> -> json_escape(<>, Opts, L + 2, Len + 1); - ?control_character(11); - <> -> json_escape(<>, Opts, L + 2, Len + 1); - <> -> json_escape(<>, Opts, L + 2, Len + 1); - ?control_character(14); - ?control_character(15); - ?control_character(16); - ?control_character(17); - ?control_character(18); - ?control_character(19); - ?control_character(20); - ?control_character(21); - ?control_character(22); - ?control_character(23); - ?control_character(24); - ?control_character(25); - ?control_character(26); - ?control_character(27); - ?control_character(28); - ?control_character(29); - ?control_character(30); - ?control_character(31); - <<_:L/binary, 32, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 33, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <> -> json_escape(<>, Opts, L + 2, Len + 1); - <<_:L/binary, 35, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 36, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 37, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 38, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 39, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 40, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 41, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 42, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 43, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 44, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 45, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 46, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <> -> - case Opts#opts.escape_forward_slash of - true -> - json_escape(<>, Opts, L + 2, Len + 1); - false -> - json_escape(<>, Opts, L + 1, Len) - end; - <<_:L/binary, 48, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 49, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 50, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 51, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 52, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 53, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 54, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 55, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 56, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 57, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 58, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 59, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 60, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 61, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 62, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 63, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 64, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 65, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 66, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 67, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 68, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 69, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 70, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 71, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 72, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 73, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 74, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 75, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 76, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 77, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 78, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 79, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 80, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 81, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 82, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 83, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 84, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 85, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 86, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 87, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 88, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 89, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 90, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 91, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <> -> json_escape(<>, Opts, L + 2, Len + 1); - <<_:L/binary, 93, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 94, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 95, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 96, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 97, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 98, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 99, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 100, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 101, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 102, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 103, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 104, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 105, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 106, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 107, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 108, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 109, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 110, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 111, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 112, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 113, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 114, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 115, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 116, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 117, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 118, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 119, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 120, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 121, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 122, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 123, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 124, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 125, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 126, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, 127, _/binary>> -> json_escape(Str, Opts, L + 1, Len); - <> -> - case Opts#opts.no_jsonp_escapes of - true -> - json_escape(<>, Opts, L + 3, Len); - false -> - B = unicode:characters_to_binary(json_escape_sequence(16#2028)), - json_escape(<>, Opts, L + 6, Len + 3) - end; - <> -> - case Opts#opts.no_jsonp_escapes of - true -> - json_escape(<>, Opts, L + 3, Len); - false -> - B = unicode:characters_to_binary(json_escape_sequence(16#2029)), - json_escape(<>, Opts, L + 6, Len + 3) - end; - <<_:L/binary, X/utf8, _/binary>> when X < 16#0080 -> - json_escape(Str, Opts, L + 1, Len); - <<_:L/binary, X/utf8, _/binary>> when X < 16#0800 -> - json_escape(Str, Opts, L + 2, Len); - <<_:L/binary, X/utf8, _/binary>> when X < 16#dcff -> - json_escape(Str, Opts, L + 3, Len); - <<_:L/binary, X/utf8, _/binary>> when X > 16#dfff, X < 16#fdd0 -> - json_escape(Str, Opts, L + 3, Len); - <<_:L/binary, X/utf8, _/binary>> when X > 16#fdef, X < 16#fffe -> - json_escape(Str, Opts, L + 3, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#10000, X < 16#1fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#20000, X < 16#2fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#30000, X < 16#3fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#40000, X < 16#4fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#50000, X < 16#5fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#60000, X < 16#6fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#70000, X < 16#7fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#80000, X < 16#8fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#90000, X < 16#9fffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#a0000, X < 16#afffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#b0000, X < 16#bfffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#c0000, X < 16#cfffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#d0000, X < 16#dfffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#e0000, X < 16#efffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#f0000, X < 16#ffffe -> - json_escape(Str, Opts, L + 4, Len); - <<_:L/binary, X/utf8, _/binary>> when X >= 16#100000, X < 16#10fffe -> - json_escape(Str, Opts, L + 4, Len); - _ -> erlang:error(badarg, [Str, Opts]) - end; -json_escape(Str, _, L, Len) when L =:= Len -> - Str. - - %% convert a codepoint to it's \uXXXX equiv. json_escape_sequence(X) -> <> = <>, @@ -338,47 +116,13 @@ to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... -include_lib("eunit/include/eunit.hrl"). -binary_escape_test_() -> +json_escape_sequence_test_() -> [ - {"json string escaping", - ?_assertEqual( - json_escape(<<"\"\\\b\f\n\r\t">>, #opts{}), - <<"\\\"\\\\\\b\\f\\n\\r\\t">> - ) - }, - {"json string hex escape", - ?_assertEqual( - json_escape(<<0, 1, 2, 3, 11, 26, 30, 31>>, #opts{}), - <<"\\u0000\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">> - ) - }, - {"jsonp protection", - ?_assertEqual( - json_escape(<<226, 128, 168, 226, 128, 169>>, #opts{}), - <<"\\u2028\\u2029">> - ) - }, - {"no jsonp escapes", - ?_assertEqual( - json_escape(<<226, 128, 168, 226, 128, 169>>, #opts{no_jsonp_escapes=true}), - <<226, 128, 168, 226, 128, 169>> - ) - }, - {"microsoft i hate your date format", - ?_assertEqual( - json_escape(<<"/Date(1303502009425)/">>, #opts{escape_forward_slash=true}), - <<"\\/Date(1303502009425)\\/">> - ) - }, - {"dirty strings", - ?_assertEqual( - json_escape(<<"\\x25\\uffff">>, #opts{dirty_strings=true}), - <<"\\x25\\uffff">> - ) - } + {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")}, + {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")}, + {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} ]. - opts_test_() -> [ {"all flags", From 068af5621a037773f465440dc99b51968eaf1b1a Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 20:38:51 -0700 Subject: [PATCH 14/38] add tests for ignore_bad_escapes --- src/jsx_decoder.erl | 78 +++++++++++++++++++++++++++++---------------- src/jsx_encoder.erl | 4 ++- 2 files changed, 53 insertions(+), 29 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 9b0f45c..8279745 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -263,11 +263,11 @@ string(<<33, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(<>, {Handler, State}, S, Opts) -> case S of [Acc, key|Stack] -> - colon(Rest, {Handler, Handler:handle_event({key, maybe_escape(?end_seq(Acc), Opts)}, State)}, [key|Stack], Opts); + colon(Rest, {Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)}, [key|Stack], Opts); [_Acc, single_quote|_Stack] -> ?error([<>, {Handler, State}, S, Opts]); [Acc|Stack] -> - maybe_done(Rest, {Handler, Handler:handle_event({string, maybe_escape(?end_seq(Acc), Opts)}, State)}, Stack, Opts) + maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, Stack, Opts) end; string(<<35, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 35)|Stack], Opts); @@ -282,11 +282,11 @@ string(<>, {Handler, State}, [Acc|Stack], Opts) -> true -> case Stack of [single_quote, key|S] -> - colon(Rest, {Handler, Handler:handle_event({key, maybe_escape(?end_seq(Acc), Opts)}, State)}, [key|S], Opts) + colon(Rest, {Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)}, [key|S], Opts) ; [single_quote|S] -> - maybe_done(Rest, {Handler, Handler:handle_event({string, maybe_escape(?end_seq(Acc), Opts)}, State)}, S, Opts) + maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, S, Opts) ; _ -> - string(Rest, {Handler, State}, [?acc_seq(Acc, ?singlequote)|Stack], Opts) + string(Rest, {Handler, State}, [?acc_seq(Acc, maybe_replace(?singlequote, Opts))|Stack], Opts) end ; false -> string(Rest, {Handler, State}, [?acc_seq(Acc, ?singlequote)|Stack], Opts) @@ -305,8 +305,8 @@ string(<<45, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 45)|Stack], Opts); string(<<46, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 46)|Stack], Opts); -string(<<47, Rest/binary>>, Handler, [Acc|Stack], Opts) -> - string(Rest, Handler, [?acc_seq(Acc, 47)|Stack], Opts); +string(<<$/, Rest/binary>>, Handler, [Acc|Stack], Opts) -> + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($/, Opts))|Stack], Opts); string(<<48, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 48)|Stack], Opts); string(<<49, Rest/binary>>, Handler, [Acc|Stack], Opts) -> @@ -467,6 +467,8 @@ string(<<126, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 126)|Stack], Opts); string(<<127, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 127)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X == 16#2028; X == 16#2029 -> + string(Rest, Handler, [?acc_seq(Acc, maybe_replace(X, Opts))|Stack], Opts); string(<>, Handler, [Acc|Stack], Opts) -> case S of %% not strictly true, but exceptions are already taken care of in preceding clauses @@ -524,10 +526,6 @@ string(Bin, Handler, Stack, Opts) -> end end. - -maybe_escape(Str, Opts=#opts{json_escape=true}) -> jsx_utils:json_escape(Str, Opts); -maybe_escape(Str, _Opts) -> Str. - %% we don't need to guard against partial utf here, because it's already taken %% care of in string @@ -571,23 +569,23 @@ strip_continuations(Rest, Handler, [_, Acc|Stack], Opts) -> escape(<<$b, Rest/binary>>, Handler, [Acc|Stack], Opts) -> - string(Rest, Handler, [?acc_seq(Acc, $\b)|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\b, Opts))|Stack], Opts); escape(<<$f, Rest/binary>>, Handler, [Acc|Stack], Opts) -> - string(Rest, Handler, [?acc_seq(Acc, $\f)|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\f, Opts))|Stack], Opts); escape(<<$n, Rest/binary>>, Handler, [Acc|Stack], Opts) -> - string(Rest, Handler, [?acc_seq(Acc, $\n)|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\n, Opts))|Stack], Opts); escape(<<$r, Rest/binary>>, Handler, [Acc|Stack], Opts) -> - string(Rest, Handler, [?acc_seq(Acc, $\r)|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\r, Opts))|Stack], Opts); escape(<<$t, Rest/binary>>, Handler, [Acc|Stack], Opts) -> - string(Rest, Handler, [?acc_seq(Acc, $\t)|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\t, Opts))|Stack], Opts); escape(<>, Handler, [Acc|Stack], Opts) -> - string(Rest, Handler, [?acc_seq(Acc, $\\)|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\\, Opts))|Stack], Opts); escape(<>, Handler, [Acc|Stack], Opts=#opts{escape_forward_slash=true}) -> - string(Rest, Handler, [?acc_seq(Acc, $/)|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($/, Opts))|Stack], Opts); escape(<>, Handler, [Acc|Stack], Opts) -> - string(Rest, Handler, [?acc_seq(Acc, $\")|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\", Opts))|Stack], Opts); escape(<>, Handler, [Acc|Stack], Opts = #opts{single_quotes=true}) -> - string(Rest, Handler, [?acc_seq(Acc, ?singlequote)|Stack], Opts); + string(Rest, Handler, [?acc_seq(Acc, maybe_replace(?singlequote, Opts))|Stack], Opts); escape(<<$u, Rest/binary>>, Handler, Stack, Opts) -> escaped_unicode(Rest, Handler, Stack, Opts); escape(<<>>, Handler, Stack, Opts) -> @@ -613,7 +611,7 @@ escaped_unicode(<>, Handler, [Acc|Stack], Opts) ; false -> ?error([<>, Handler, [Acc|Stack], Opts]) end %% anything else - ; X -> string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts) + ; X -> string(Rest, Handler, [?acc_seq(Acc, maybe_replace(X, Opts))|Stack], Opts) end; escaped_unicode(Bin, Handler, Stack, Opts) -> case is_partial_escape(Bin) of @@ -675,6 +673,35 @@ surrogate_to_codepoint(High, Low) -> (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. +maybe_replace(X, #opts{dirty_strings=true}) when is_integer(X) -> [X]; +maybe_replace($\b, #opts{json_escape=true}) -> [$\\, $b]; +maybe_replace($\t, #opts{json_escape=true}) -> [$\\, $t]; +maybe_replace($\n, #opts{json_escape=true}) -> [$\\, $n]; +maybe_replace($\f, #opts{json_escape=true}) -> [$\\, $f]; +maybe_replace($\r, #opts{json_escape=true}) -> [$\\, $r]; +maybe_replace($\", #opts{json_escape=true}) -> [$\\, $\"]; +maybe_replace($', Opts=#opts{json_escape=true}) -> + case Opts#opts.single_quotes of + true -> [$\\, $'] + ; false -> [$'] + end; +maybe_replace($/, Opts=#opts{json_escape=true}) -> + case Opts#opts.escape_forward_slash of + true -> [$\\, $/] + ; false -> [$/] + end; +maybe_replace($\\, #opts{json_escape=true}) -> [$\\, $\\]; +maybe_replace(X, Opts=#opts{json_escape=true}) when X == 16#2028; X == 16#2029 -> + case Opts#opts.no_jsonp_escapes of + true -> [X] + ; false -> jsx_utils:json_escape_sequence(X) + end; +maybe_replace(X, #opts{json_escape=true}) when X < 32 -> + jsx_utils:json_escape_sequence(X); +maybe_replace(X, _Opts) -> [X]. + + + %% like strings, numbers are collected in an intermediate accumulator before %% being emitted to the callback handler negative(<<$0, Rest/binary>>, Handler, [Acc|Stack], Opts) -> @@ -1463,7 +1490,8 @@ escapes_test_() -> [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] )}, {"control escape", ?_assertEqual(decode(<<$\", "\\u0000"/utf8, $\">>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, - {"dirty strings", ?_assertEqual(decode(<<"\"\\n\"">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])} + {"dirty strings", ?_assertEqual(decode(<<"\"\\n\"">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])}, + {"ignore bad escapes", ?_assertEqual(decode(<<"\"\\x25\"">>, [json_escape, ignore_bad_escapes]), [{string, <<"\\x25">>}, end_json])} ]. @@ -1535,12 +1563,6 @@ good_characters_test_() -> }, {"acceptable extended", ?_assert(check_good(good_extended())) - }, - {"acceptable extended - json_escape", - ?_assert(check_good(good_extended(), [json_escape])) - }, - {"acceptable extended - json_escape", - ?_assert(check_good(good_extended(), [loose_unicode])) } ]. diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 2a5151b..d6b9787 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -481,6 +481,7 @@ maybe_replace($/, Opts=#opts{json_escape=true}) -> true -> [$/, $\\] ; false -> [$/] end; +maybe_replace($\\, #opts{ignore_bad_escapes=true}) -> [$\\]; maybe_replace($\\, #opts{json_escape=true}) -> [$\\, $\\]; maybe_replace(X, Opts=#opts{json_escape=true}) when X == 16#2028; X == 16#2029 -> case Opts#opts.no_jsonp_escapes of @@ -802,7 +803,8 @@ escapes_test_() -> [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] )}, {"control escape", ?_assertEqual(encode(<<0>>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, - {"dirty strings", ?_assertEqual(encode(<<"\n">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])} + {"dirty strings", ?_assertEqual(encode(<<"\n">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])}, + {"ignore bad escapes", ?_assertEqual(encode(<<"\\x25">>, [json_escape, ignore_bad_escapes]), [{string, <<"\\x25">>}, end_json])} ]. From 1c9da676b79b5f4a67289b45bc23e80a8d36a5ac Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 20:39:15 -0700 Subject: [PATCH 15/38] remove decode/1 from jsx_decoder test suite --- src/jsx_decoder.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 8279745..87251ab 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -1255,8 +1255,6 @@ bad_utf8_test_() -> ]. -decode(JSON) -> decode(JSON, []). - decode(JSON, Opts) -> try (decoder(jsx, [], Opts))(JSON) From 95feb74d1b7fa55913ffa159388dbc113e3d985b Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 20:51:47 -0700 Subject: [PATCH 16/38] correct operation of ignore_bad_escapes --- src/jsx_encoder.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index d6b9787..74fa097 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -481,7 +481,6 @@ maybe_replace($/, Opts=#opts{json_escape=true}) -> true -> [$/, $\\] ; false -> [$/] end; -maybe_replace($\\, #opts{ignore_bad_escapes=true}) -> [$\\]; maybe_replace($\\, #opts{json_escape=true}) -> [$\\, $\\]; maybe_replace(X, Opts=#opts{json_escape=true}) when X == 16#2028; X == 16#2029 -> case Opts#opts.no_jsonp_escapes of @@ -804,7 +803,7 @@ escapes_test_() -> )}, {"control escape", ?_assertEqual(encode(<<0>>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, {"dirty strings", ?_assertEqual(encode(<<"\n">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])}, - {"ignore bad escapes", ?_assertEqual(encode(<<"\\x25">>, [json_escape, ignore_bad_escapes]), [{string, <<"\\x25">>}, end_json])} + {"ignore bad escapes", ?_assertEqual(encode(<<"\\x25">>, [json_escape, ignore_bad_escapes]), [{string, <<"\\\\x25">>}, end_json])} ]. From 357e3e0aebe65a0555c67f908a1ce8aebb778648 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 21:19:26 -0700 Subject: [PATCH 17/38] add naked single quoted json escape test --- src/jsx_decoder.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 87251ab..48dc82a 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -1475,6 +1475,7 @@ escapes_test_() -> {"tab escape", ?_assertEqual(decode(<<"\"\\t\"">>, [json_escape]), [{string, <<"\\t">>}, end_json])}, {"quote escape", ?_assertEqual(decode(<<"\"\\\"\"">>, [json_escape]), [{string, <<"\\\"">>}, end_json])}, {"single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, + {"naked single quote escape", ?_assertEqual(decode(<<"'\\''">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, {"no single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [json_escape]), [{string, <<"'">>}, end_json])}, {"forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [json_escape, escape_forward_slash]), [{string, <<"\\/">>}, end_json])}, {"no forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [json_escape]), [{string, <<"/">>}, end_json])}, From fdea98ebbf208de7ba9df66995ee85ed96e18d28 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 21:19:37 -0700 Subject: [PATCH 18/38] add escape equiv test --- src/jsx.erl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/jsx.erl b/src/jsx.erl index 11630d6..3d1c08e 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -130,6 +130,16 @@ encoder_decoder_equiv_test_() -> <<"[\"a\", 17, 3.14, true, {\"k\":false}, []]">> ) =:= (jsx_encoder:encoder(?MODULE, [], []))([<<"a">>, 17, 3.14, true, [{<<"k">>, false}], []]) ) + }, + {"string escape equivalency", + ?_assertEqual( + (jsx_decoder:decoder(?MODULE, [], [json_escape, loose_unicode]))( + <<"\"\\u0000\\b\\t\\n\\f\\r\\\\\\\"'/", 16#2028/utf8, 16#2029/utf8, 239, 191, 191, "\"">> + ), + (jsx_encoder:encoder(?MODULE, [], [json_escape, loose_unicode]))( + <<0, $\b, $\t, $\n, $\f, $\r, $\\, $\", $', $/, 16#2028/utf8, 16#2029/utf8, 239, 191, 191>> + ) + ) } ]. From 3bc2c68ea3e315980fb735bac88a891dddddf54e Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 21:58:18 -0700 Subject: [PATCH 19/38] fix the tenses and forms of option flags --- .../bad_low_surrogate_replaced.test | 2 +- .../unpaired_surrogate_replaced.test | 2 +- src/jsx.erl | 26 +-- src/jsx_decoder.erl | 154 +++++++++--------- src/jsx_encoder.erl | 136 ++++++++-------- src/jsx_opts.hrl | 14 +- src/jsx_to_json.erl | 4 +- src/jsx_utils.erl | 94 ++++++----- 8 files changed, 221 insertions(+), 211 deletions(-) diff --git a/priv/test_cases/bad_low_surrogate_replaced.test b/priv/test_cases/bad_low_surrogate_replaced.test index 7f13f9e..5762c33 100644 --- a/priv/test_cases/bad_low_surrogate_replaced.test +++ b/priv/test_cases/bad_low_surrogate_replaced.test @@ -1,4 +1,4 @@ {name, "bad_low_surrogate_replaced"}. {jsx, [start_array,{string, <<16#fffd/utf8, 16#fffd/utf8>>},end_array,end_json]}. {json, "bad_low_surrogate_replaced.json"}. -{jsx_flags, [loose_unicode]}. +{jsx_flags, [replaced_bad_utf8]}. diff --git a/priv/test_cases/unpaired_surrogate_replaced.test b/priv/test_cases/unpaired_surrogate_replaced.test index 7d3bbbd..e31290d 100644 --- a/priv/test_cases/unpaired_surrogate_replaced.test +++ b/priv/test_cases/unpaired_surrogate_replaced.test @@ -1,4 +1,4 @@ {name, "unpaired surrogate replaced"}. {jsx, [start_array,{string,<<65533/utf8,$b,$l,$a,$h>>},end_array,end_json]}. {json, "unpaired_surrogate_replaced.json"}. -{jsx_flags, [loose_unicode]}. +{jsx_flags, [replaced_bad_utf8]}. diff --git a/src/jsx.erl b/src/jsx.erl index 3d1c08e..78dc409 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -130,55 +130,45 @@ encoder_decoder_equiv_test_() -> <<"[\"a\", 17, 3.14, true, {\"k\":false}, []]">> ) =:= (jsx_encoder:encoder(?MODULE, [], []))([<<"a">>, 17, 3.14, true, [{<<"k">>, false}], []]) ) - }, - {"string escape equivalency", - ?_assertEqual( - (jsx_decoder:decoder(?MODULE, [], [json_escape, loose_unicode]))( - <<"\"\\u0000\\b\\t\\n\\f\\r\\\\\\\"'/", 16#2028/utf8, 16#2029/utf8, 239, 191, 191, "\"">> - ), - (jsx_encoder:encoder(?MODULE, [], [json_escape, loose_unicode]))( - <<0, $\b, $\t, $\n, $\f, $\r, $\\, $\", $', $/, 16#2028/utf8, 16#2029/utf8, 239, 191, 191>> - ) - ) } ]. -single_quotes_test_() -> +single_quoted_strings_test_() -> [ {"single quoted keys", ?_assertEqual( - to_term(<<"{'key':true}">>, [single_quotes]), + to_term(<<"{'key':true}">>, [single_quoted_strings]), [{<<"key">>, true}] ) }, {"multiple single quoted keys", ?_assertEqual( - to_term(<<"{'key':true, 'another key':true}">>, [single_quotes]), + to_term(<<"{'key':true, 'another key':true}">>, [single_quoted_strings]), [{<<"key">>, true}, {<<"another key">>, true}] ) }, {"nested single quoted keys", ?_assertEqual( - to_term(<<"{'key': {'key':true, 'another key':true}}">>, [single_quotes]), + to_term(<<"{'key': {'key':true, 'another key':true}}">>, [single_quoted_strings]), [{<<"key">>, [{<<"key">>, true}, {<<"another key">>, true}]}] ) }, {"single quoted string", ?_assertEqual( - to_term(<<"['string']">>, [single_quotes]), + to_term(<<"['string']">>, [single_quoted_strings]), [<<"string">>] ) }, {"single quote in double quoted string", ?_assertEqual( - to_term(<<"[\"a single quote: '\"]">>, [single_quotes]), + to_term(<<"[\"a single quote: '\"]">>, [single_quoted_strings]), [<<"a single quote: '">>] ) }, {"escaped single quote in single quoted string", ?_assertEqual( - to_term(<<"['a single quote: \\'']">>, [single_quotes]), + to_term(<<"['a single quote: \\'']">>, [single_quoted_strings]), [<<"a single quote: '">>] ) }, @@ -191,7 +181,7 @@ single_quotes_test_() -> {"mismatched quotes", ?_assertError( badarg, - to_term(<<"['mismatched\"]">>, [single_quotes]) + to_term(<<"['mismatched\"]">>, [single_quoted_strings]) ) } ]. diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 48dc82a..59f6ab7 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -130,7 +130,7 @@ decoder(Handler, State, Opts) -> value(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); -value(<>, Handler, Stack, Opts = #opts{single_quotes=true}) -> +value(<>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) -> string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); value(<<$t, Rest/binary>>, Handler, Stack, Opts) -> tr(Rest, Handler, Stack, Opts); @@ -161,7 +161,7 @@ value(Bin, Handler, Stack, Opts) -> object(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); -object(<>, Handler, Stack, Opts = #opts{single_quotes=true}) -> +object(<>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) -> string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); object(<>, {Handler, State}, [key|Stack], Opts) -> maybe_done(Rest, {Handler, Handler:handle_event(end_object, State)}, Stack, Opts); @@ -178,7 +178,7 @@ object(Bin, Handler, Stack, Opts) -> array(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); -array(<>, Handler, Stack, Opts = #opts{single_quotes=true}) -> +array(<>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) -> string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); array(<<$t, Rest/binary>>, Handler, Stack, Opts) -> tr(Rest, Handler, Stack, Opts); @@ -224,7 +224,7 @@ colon(Bin, Handler, Stack, Opts) -> key(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); -key(<>, Handler, Stack, Opts = #opts{single_quotes=true}) -> +key(<>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) -> string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); key(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> key(Rest, Handler, Stack, Opts); @@ -278,7 +278,7 @@ string(<<37, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(<<38, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 38)|Stack], Opts); string(<>, {Handler, State}, [Acc|Stack], Opts) -> - case Opts#opts.single_quotes of + case Opts#opts.single_quoted_strings of true -> case Stack of [single_quote, key|S] -> @@ -511,7 +511,7 @@ string(<>, Handler, [Acc|Stack], Opts) -> ; S when S >= 16#100000, S < 16#10fffe -> string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) ; _ -> - case Opts#opts.loose_unicode of + case Opts#opts.replaced_bad_utf8 of true -> noncharacter(<>, Handler, [Acc|Stack], Opts) ; false -> ?error([<>, Handler, [Acc|Stack], Opts]) end @@ -520,7 +520,7 @@ string(Bin, Handler, Stack, Opts) -> case partial_utf(Bin) of true -> ?incomplete(string, Bin, Handler, Stack, Opts) ; false -> - case Opts#opts.loose_unicode of + case Opts#opts.replaced_bad_utf8 of true -> noncharacter(Bin, Handler, Stack, Opts) ; false -> ?error([Bin, Handler, Stack, Opts]) end @@ -580,17 +580,17 @@ escape(<<$t, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\t, Opts))|Stack], Opts); escape(<>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\\, Opts))|Stack], Opts); -escape(<>, Handler, [Acc|Stack], Opts=#opts{escape_forward_slash=true}) -> +escape(<>, Handler, [Acc|Stack], Opts=#opts{escaped_forward_slashes=true}) -> string(Rest, Handler, [?acc_seq(Acc, maybe_replace($/, Opts))|Stack], Opts); escape(<>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\", Opts))|Stack], Opts); -escape(<>, Handler, [Acc|Stack], Opts = #opts{single_quotes=true}) -> +escape(<>, Handler, [Acc|Stack], Opts = #opts{single_quoted_strings=true}) -> string(Rest, Handler, [?acc_seq(Acc, maybe_replace(?singlequote, Opts))|Stack], Opts); escape(<<$u, Rest/binary>>, Handler, Stack, Opts) -> escaped_unicode(Rest, Handler, Stack, Opts); escape(<<>>, Handler, Stack, Opts) -> ?incomplete(escape, <<>>, Handler, Stack, Opts); -escape(Bin, Handler, [Acc|Stack], Opts=#opts{ignore_bad_escapes=true}) -> +escape(Bin, Handler, [Acc|Stack], Opts=#opts{ignored_bad_escapes=true}) -> string(Bin, Handler, [?acc_seq(Acc, ?rsolidus)|Stack], Opts); escape(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). @@ -606,7 +606,7 @@ escaped_unicode(<>, Handler, [Acc|Stack], Opts) low_surrogate(Rest, Handler, [X, Acc|Stack], Opts) %% low surrogate, illegal in this position ; X when X >= 16#dc00, X =< 16#dfff -> - case Opts#opts.loose_unicode of + case Opts#opts.replaced_bad_utf8 of true -> string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts) ; false -> ?error([<>, Handler, [Acc|Stack], Opts]) end @@ -635,7 +635,7 @@ low_surrogate(<>, Handler, [High, Acc|St case (Y =< 16#d800 orelse Y >= 16#e000) of true -> string(Rest, Handler, [?acc_seq(Acc, Y)|Stack], Opts) ; false -> - case Opts#opts.loose_unicode of + case Opts#opts.replaced_bad_utf8 of true -> string(Rest, Handler, [?acc_seq(Acc, 16#fffd, 16#fffd)|Stack], Opts) ; false -> @@ -643,7 +643,7 @@ low_surrogate(<>, Handler, [High, Acc|St end end ; _ -> - case Opts#opts.loose_unicode of + case Opts#opts.replaced_bad_utf8 of true -> string(Rest, Handler, [?acc_seq(Acc, 16#fffd, 16#fffd)|Stack], Opts) ; false -> ?error([<>, Handler, [High, Acc|Stack], Opts]) end @@ -652,7 +652,7 @@ low_surrogate(Bin, Handler, [High, Acc|Stack], Opts) -> case is_partial_low(Bin) of true -> ?incomplete(low_surrogate, Bin, Handler, [High, Acc|Stack], Opts) ; false -> - case Opts#opts.loose_unicode of + case Opts#opts.replaced_bad_utf8 of true -> string(Bin, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts) ; false -> ?error([Bin, Handler, [High, Acc|Stack], Opts]) end @@ -674,29 +674,29 @@ surrogate_to_codepoint(High, Low) -> maybe_replace(X, #opts{dirty_strings=true}) when is_integer(X) -> [X]; -maybe_replace($\b, #opts{json_escape=true}) -> [$\\, $b]; -maybe_replace($\t, #opts{json_escape=true}) -> [$\\, $t]; -maybe_replace($\n, #opts{json_escape=true}) -> [$\\, $n]; -maybe_replace($\f, #opts{json_escape=true}) -> [$\\, $f]; -maybe_replace($\r, #opts{json_escape=true}) -> [$\\, $r]; -maybe_replace($\", #opts{json_escape=true}) -> [$\\, $\"]; -maybe_replace($', Opts=#opts{json_escape=true}) -> - case Opts#opts.single_quotes of +maybe_replace($\b, #opts{escaped_strings=true}) -> [$\\, $b]; +maybe_replace($\t, #opts{escaped_strings=true}) -> [$\\, $t]; +maybe_replace($\n, #opts{escaped_strings=true}) -> [$\\, $n]; +maybe_replace($\f, #opts{escaped_strings=true}) -> [$\\, $f]; +maybe_replace($\r, #opts{escaped_strings=true}) -> [$\\, $r]; +maybe_replace($\", #opts{escaped_strings=true}) -> [$\\, $\"]; +maybe_replace($', Opts=#opts{escaped_strings=true}) -> + case Opts#opts.single_quoted_strings of true -> [$\\, $'] ; false -> [$'] end; -maybe_replace($/, Opts=#opts{json_escape=true}) -> - case Opts#opts.escape_forward_slash of +maybe_replace($/, Opts=#opts{escaped_strings=true}) -> + case Opts#opts.escaped_forward_slashes of true -> [$\\, $/] ; false -> [$/] end; -maybe_replace($\\, #opts{json_escape=true}) -> [$\\, $\\]; -maybe_replace(X, Opts=#opts{json_escape=true}) when X == 16#2028; X == 16#2029 -> - case Opts#opts.no_jsonp_escapes of +maybe_replace($\\, #opts{escaped_strings=true}) -> [$\\, $\\]; +maybe_replace(X, Opts=#opts{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> + case Opts#opts.unescaped_jsonp of true -> [X] ; false -> jsx_utils:json_escape_sequence(X) end; -maybe_replace(X, #opts{json_escape=true}) when X < 32 -> +maybe_replace(X, #opts{escaped_strings=true}) when X < 32 -> jsx_utils:json_escape_sequence(X); maybe_replace(X, _Opts) -> [X]. @@ -1073,20 +1073,20 @@ bad_utf8_test_() -> ?_assert(is_bad(xcode(<<16#0080>>))) }, {"orphan continuation byte u+0080 replaced", - ?_assertEqual(xcode(<<16#0080>>, [loose_unicode]), <<16#fffd/utf8>>) + ?_assertEqual(xcode(<<16#0080>>, [replaced_bad_utf8]), <<16#fffd/utf8>>) }, {"orphan continuation byte u+00bf", ?_assert(is_bad(xcode(<<16#00bf>>))) }, {"orphan continuation byte u+00bf replaced", - ?_assertEqual(xcode(<<16#00bf>>, [loose_unicode]), <<16#fffd/utf8>>) + ?_assertEqual(xcode(<<16#00bf>>, [replaced_bad_utf8]), <<16#fffd/utf8>>) }, {"2 continuation bytes", ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>))) }, {"2 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 2) ) }, @@ -1095,7 +1095,7 @@ bad_utf8_test_() -> }, {"3 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 3) ) }, @@ -1104,7 +1104,7 @@ bad_utf8_test_() -> }, {"4 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 4) ) }, @@ -1113,7 +1113,7 @@ bad_utf8_test_() -> }, {"5 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 5) ) }, @@ -1122,7 +1122,7 @@ bad_utf8_test_() -> }, {"6 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 6) ) }, @@ -1131,7 +1131,7 @@ bad_utf8_test_() -> }, {"all continuation bytes replaced", ?_assertEqual( - xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [loose_unicode]), + xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))) ) }, @@ -1140,7 +1140,7 @@ bad_utf8_test_() -> }, {"lonely start byte replaced", ?_assertEqual( - xcode(<<16#00c0>>, [loose_unicode]), + xcode(<<16#00c0>>, [replaced_bad_utf8]), <<16#fffd/utf8>> ) }, @@ -1149,7 +1149,7 @@ bad_utf8_test_() -> }, {"lonely start bytes (2 byte) replaced", ?_assertEqual( - xcode(<<16#00c0, 32, 16#00df>>, [loose_unicode]), + xcode(<<16#00c0, 32, 16#00df>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32, 16#fffd/utf8>> ) }, @@ -1158,7 +1158,7 @@ bad_utf8_test_() -> }, {"lonely start bytes (3 byte) replaced", ?_assertEqual( - xcode(<<16#00e0, 32, 16#00ef>>, [loose_unicode]), + xcode(<<16#00e0, 32, 16#00ef>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32, 16#fffd/utf8>> ) }, @@ -1167,7 +1167,7 @@ bad_utf8_test_() -> }, {"lonely start bytes (4 byte) replaced", ?_assertEqual( - xcode(<<16#00f0, 32, 16#00f7>>, [loose_unicode]), + xcode(<<16#00f0, 32, 16#00f7>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32, 16#fffd/utf8>> ) }, @@ -1176,7 +1176,7 @@ bad_utf8_test_() -> }, {"missing continuation byte (3 byte) replaced", ?_assertEqual( - xcode(<<224, 160, 32>>, [loose_unicode]), + xcode(<<224, 160, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -1185,7 +1185,7 @@ bad_utf8_test_() -> }, {"missing continuation byte2 (4 byte missing one) replaced", ?_assertEqual( - xcode(<<240, 144, 128, 32>>, [loose_unicode]), + xcode(<<240, 144, 128, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -1194,7 +1194,7 @@ bad_utf8_test_() -> }, {"missing continuation byte2 (4 byte missing two) replaced", ?_assertEqual( - xcode(<<240, 144, 32>>, [loose_unicode]), + xcode(<<240, 144, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -1203,7 +1203,7 @@ bad_utf8_test_() -> }, {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( - xcode(<<16#c0, 16#af, 32>>, [loose_unicode]), + xcode(<<16#c0, 16#af, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -1212,7 +1212,7 @@ bad_utf8_test_() -> }, {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( - xcode(<<16#e0, 16#80, 16#af, 32>>, [loose_unicode]), + xcode(<<16#e0, 16#80, 16#af, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -1221,7 +1221,7 @@ bad_utf8_test_() -> }, {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( - xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [loose_unicode]), + xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -1230,7 +1230,7 @@ bad_utf8_test_() -> }, {"highest overlong 2 byte sequence replaced", ?_assertEqual( - xcode(<<16#c1, 16#bf, 32>>, [loose_unicode]), + xcode(<<16#c1, 16#bf, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -1239,7 +1239,7 @@ bad_utf8_test_() -> }, {"highest overlong 3 byte sequence replaced", ?_assertEqual( - xcode(<<16#e0, 16#9f, 16#bf, 32>>, [loose_unicode]), + xcode(<<16#e0, 16#9f, 16#bf, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -1248,7 +1248,7 @@ bad_utf8_test_() -> }, {"highest overlong 4 byte sequence replaced", ?_assertEqual( - xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [loose_unicode]), + xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) } @@ -1263,10 +1263,10 @@ decode(JSON, Opts) -> end. -ignore_bad_escapes_test_() -> +ignored_bad_escapes_test_() -> [ {"ignore unrecognized escape sequence", ?_assertEqual( - decode(<<"[\"\\x25\"]">>, [ignore_bad_escapes]), + decode(<<"[\"\\x25\"]">>, [ignored_bad_escapes]), [start_array, {string, <<"\\x25">>}, end_array, end_json] )} ]. @@ -1457,10 +1457,10 @@ comments_test_() -> ]. -escape_forward_slash_test_() -> +escaped_forward_slashes_test_() -> [ {"escape forward slash test", ?_assertEqual( - decode(<<"[ \" \/ \" ]">>, [escape_forward_slash]), + decode(<<"[ \" \/ \" ]">>, [escaped_forward_slashes]), [start_array, {string, <<" / ">>}, end_array, end_json] )} ]. @@ -1468,29 +1468,29 @@ escape_forward_slash_test_() -> escapes_test_() -> [ - {"backspace escape", ?_assertEqual(decode(<<"\"\\b\"">>, [json_escape]), [{string, <<"\\b">>}, end_json])}, - {"formfeed escape", ?_assertEqual(decode(<<"\"\\f\"">>, [json_escape]), [{string, <<"\\f">>}, end_json])}, - {"newline escape", ?_assertEqual(decode(<<"\"\\n\"">>, [json_escape]), [{string, <<"\\n">>}, end_json])}, - {"carriage return escape", ?_assertEqual(decode(<<"\"\\r\"">>, [json_escape]), [{string, <<"\\r">>}, end_json])}, - {"tab escape", ?_assertEqual(decode(<<"\"\\t\"">>, [json_escape]), [{string, <<"\\t">>}, end_json])}, - {"quote escape", ?_assertEqual(decode(<<"\"\\\"\"">>, [json_escape]), [{string, <<"\\\"">>}, end_json])}, - {"single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, - {"naked single quote escape", ?_assertEqual(decode(<<"'\\''">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, - {"no single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [json_escape]), [{string, <<"'">>}, end_json])}, - {"forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [json_escape, escape_forward_slash]), [{string, <<"\\/">>}, end_json])}, - {"no forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [json_escape]), [{string, <<"/">>}, end_json])}, - {"back slash escape", ?_assertEqual(decode(<<"\"\\\\\"">>, [json_escape]), [{string, <<"\\\\">>}, end_json])}, + {"backspace escape", ?_assertEqual(decode(<<"\"\\b\"">>, [escaped_strings]), [{string, <<"\\b">>}, end_json])}, + {"formfeed escape", ?_assertEqual(decode(<<"\"\\f\"">>, [escaped_strings]), [{string, <<"\\f">>}, end_json])}, + {"newline escape", ?_assertEqual(decode(<<"\"\\n\"">>, [escaped_strings]), [{string, <<"\\n">>}, end_json])}, + {"carriage return escape", ?_assertEqual(decode(<<"\"\\r\"">>, [escaped_strings]), [{string, <<"\\r">>}, end_json])}, + {"tab escape", ?_assertEqual(decode(<<"\"\\t\"">>, [escaped_strings]), [{string, <<"\\t">>}, end_json])}, + {"quote escape", ?_assertEqual(decode(<<"\"\\\"\"">>, [escaped_strings]), [{string, <<"\\\"">>}, end_json])}, + {"single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [escaped_strings, single_quoted_strings]), [{string, <<"\\'">>}, end_json])}, + {"naked single quote escape", ?_assertEqual(decode(<<"'\\''">>, [escaped_strings, single_quoted_strings]), [{string, <<"\\'">>}, end_json])}, + {"no single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [escaped_strings]), [{string, <<"'">>}, end_json])}, + {"forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [escaped_strings, escaped_forward_slashes]), [{string, <<"\\/">>}, end_json])}, + {"no forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [escaped_strings]), [{string, <<"/">>}, end_json])}, + {"back slash escape", ?_assertEqual(decode(<<"\"\\\\\"">>, [escaped_strings]), [{string, <<"\\\\">>}, end_json])}, {"jsonp escape", ?_assertEqual( - decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [json_escape]), + decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [escaped_strings]), [{string, <<"\\u2028\\u2029">>}, end_json] )}, {"no jsonp escape", ?_assertEqual( - decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [json_escape, no_jsonp_escapes]), + decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [escaped_strings, unescaped_jsonp]), [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] )}, - {"control escape", ?_assertEqual(decode(<<$\", "\\u0000"/utf8, $\">>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, - {"dirty strings", ?_assertEqual(decode(<<"\"\\n\"">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])}, - {"ignore bad escapes", ?_assertEqual(decode(<<"\"\\x25\"">>, [json_escape, ignore_bad_escapes]), [{string, <<"\\x25">>}, end_json])} + {"control escape", ?_assertEqual(decode(<<$\", "\\u0000"/utf8, $\">>, [escaped_strings]), [{string, <<"\\u0000">>}, end_json])}, + {"dirty strings", ?_assertEqual(decode(<<"\"\\n\"">>, [escaped_strings, dirty_strings]), [{string, <<"\n">>}, end_json])}, + {"ignore bad escapes", ?_assertEqual(decode(<<"\"\\x25\"">>, [escaped_strings, ignored_bad_escapes]), [{string, <<"\\x25">>}, end_json])} ]. @@ -1551,14 +1551,14 @@ good_characters_test_() -> {"acceptable codepoints", ?_assert(check_good(good())) }, - {"acceptable codepoints - json_escape", - ?_assert(check_good(good(), [json_escape])) + {"acceptable codepoints - escaped_strings", + ?_assert(check_good(good(), [escaped_strings])) }, - {"acceptable codepoints - loose_unicode", - ?_assert(check_good(good(), [json_escape])) + {"acceptable codepoints - replaced_bad_utf8", + ?_assert(check_good(good(), [escaped_strings])) }, - {"acceptable codepoints - json_escape + loose_unicode", - ?_assert(check_good(good(), [json_escape, loose_unicode])) + {"acceptable codepoints - escaped_strings + replaced_bad_utf8", + ?_assert(check_good(good(), [escaped_strings, replaced_bad_utf8])) }, {"acceptable extended", ?_assert(check_good(good_extended())) @@ -1575,7 +1575,7 @@ check_bad(List) -> check_replaced(List) -> [] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false end, - check(List, [loose_unicode], []) + check(List, [replaced_bad_utf8], []) ). diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 74fa097..59c0343 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -104,7 +104,7 @@ fix_key(Key) when is_binary(Key) -> Key. clean_string(Bin, Opts) -> - case Opts#opts.loose_unicode orelse Opts#opts.json_escape of + case Opts#opts.replaced_bad_utf8 orelse Opts#opts.escaped_strings of true -> clean(Bin, [], Opts) ; false -> ensure_clean(Bin), Bin end. @@ -465,33 +465,33 @@ strip_continuations(Bin, _) -> Bin. maybe_replace(X, #opts{dirty_strings=true}) when is_integer(X) -> [X]; -maybe_replace($\b, #opts{json_escape=true}) -> [$b, $\\]; -maybe_replace($\t, #opts{json_escape=true}) -> [$t, $\\]; -maybe_replace($\n, #opts{json_escape=true}) -> [$n, $\\]; -maybe_replace($\f, #opts{json_escape=true}) -> [$f, $\\]; -maybe_replace($\r, #opts{json_escape=true}) -> [$r, $\\]; -maybe_replace($\", #opts{json_escape=true}) -> [$\", $\\]; -maybe_replace($', Opts=#opts{json_escape=true}) -> - case Opts#opts.single_quotes of +maybe_replace($\b, #opts{escaped_strings=true}) -> [$b, $\\]; +maybe_replace($\t, #opts{escaped_strings=true}) -> [$t, $\\]; +maybe_replace($\n, #opts{escaped_strings=true}) -> [$n, $\\]; +maybe_replace($\f, #opts{escaped_strings=true}) -> [$f, $\\]; +maybe_replace($\r, #opts{escaped_strings=true}) -> [$r, $\\]; +maybe_replace($\", #opts{escaped_strings=true}) -> [$\", $\\]; +maybe_replace($', Opts=#opts{escaped_strings=true}) -> + case Opts#opts.single_quoted_strings of true -> [$', $\\] ; false -> [$'] end; -maybe_replace($/, Opts=#opts{json_escape=true}) -> - case Opts#opts.escape_forward_slash of +maybe_replace($/, Opts=#opts{escaped_strings=true}) -> + case Opts#opts.escaped_forward_slashes of true -> [$/, $\\] ; false -> [$/] end; -maybe_replace($\\, #opts{json_escape=true}) -> [$\\, $\\]; -maybe_replace(X, Opts=#opts{json_escape=true}) when X == 16#2028; X == 16#2029 -> - case Opts#opts.no_jsonp_escapes of +maybe_replace($\\, #opts{escaped_strings=true}) -> [$\\, $\\]; +maybe_replace(X, Opts=#opts{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> + case Opts#opts.unescaped_jsonp of true -> [X] ; false -> lists:reverse(jsx_utils:json_escape_sequence(X)) end; -maybe_replace(X, #opts{json_escape=true}) when X < 32 -> +maybe_replace(X, #opts{escaped_strings=true}) when X < 32 -> lists:reverse(jsx_utils:json_escape_sequence(X)); -maybe_replace(noncharacter, #opts{loose_unicode=true}) -> [16#fffd]; -maybe_replace(surrogate, #opts{loose_unicode=true}) -> [16#fffd]; -maybe_replace(badutf, #opts{loose_unicode=true}) -> [16#fffd]. +maybe_replace(noncharacter, #opts{replaced_bad_utf8=true}) -> [16#fffd]; +maybe_replace(surrogate, #opts{replaced_bad_utf8=true}) -> [16#fffd]; +maybe_replace(badutf, #opts{replaced_bad_utf8=true}) -> [16#fffd]. -ifdef(TEST). @@ -500,7 +500,7 @@ maybe_replace(badutf, #opts{loose_unicode=true}) -> [16#fffd]. xcode(Bin) -> xcode(Bin, #opts{}). -xcode(Bin, [loose_unicode]) -> xcode(Bin, #opts{loose_unicode=true}); +xcode(Bin, [replaced_bad_utf8]) -> xcode(Bin, #opts{replaced_bad_utf8=true}); xcode(Bin, Opts) -> try clean_string(Bin, Opts) catch error:badarg -> {error, badarg} @@ -517,20 +517,20 @@ bad_utf8_test_() -> ?_assert(is_bad(xcode(<<16#0080>>))) }, {"orphan continuation byte u+0080 replaced", - ?_assertEqual(xcode(<<16#0080>>, [loose_unicode]), <<16#fffd/utf8>>) + ?_assertEqual(xcode(<<16#0080>>, [replaced_bad_utf8]), <<16#fffd/utf8>>) }, {"orphan continuation byte u+00bf", ?_assert(is_bad(xcode(<<16#00bf>>))) }, {"orphan continuation byte u+00bf replaced", - ?_assertEqual(xcode(<<16#00bf>>, [loose_unicode]), <<16#fffd/utf8>>) + ?_assertEqual(xcode(<<16#00bf>>, [replaced_bad_utf8]), <<16#fffd/utf8>>) }, {"2 continuation bytes", ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>))) }, {"2 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 2) ) }, @@ -539,7 +539,7 @@ bad_utf8_test_() -> }, {"3 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 3) ) }, @@ -548,7 +548,7 @@ bad_utf8_test_() -> }, {"4 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 4) ) }, @@ -557,7 +557,7 @@ bad_utf8_test_() -> }, {"5 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 5) ) }, @@ -566,7 +566,7 @@ bad_utf8_test_() -> }, {"6 continuation bytes replaced", ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [loose_unicode]), + xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, 6) ) }, @@ -575,7 +575,7 @@ bad_utf8_test_() -> }, {"all continuation bytes replaced", ?_assertEqual( - xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [loose_unicode]), + xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [replaced_bad_utf8]), binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))) ) }, @@ -584,7 +584,7 @@ bad_utf8_test_() -> }, {"lonely start byte replaced", ?_assertEqual( - xcode(<<16#00c0>>, [loose_unicode]), + xcode(<<16#00c0>>, [replaced_bad_utf8]), <<16#fffd/utf8>> ) }, @@ -593,7 +593,7 @@ bad_utf8_test_() -> }, {"lonely start bytes (2 byte) replaced", ?_assertEqual( - xcode(<<16#00c0, 32, 16#00df>>, [loose_unicode]), + xcode(<<16#00c0, 32, 16#00df>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32, 16#fffd/utf8>> ) }, @@ -602,7 +602,7 @@ bad_utf8_test_() -> }, {"lonely start bytes (3 byte) replaced", ?_assertEqual( - xcode(<<16#00e0, 32, 16#00ef>>, [loose_unicode]), + xcode(<<16#00e0, 32, 16#00ef>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32, 16#fffd/utf8>> ) }, @@ -611,7 +611,7 @@ bad_utf8_test_() -> }, {"lonely start bytes (4 byte) replaced", ?_assertEqual( - xcode(<<16#00f0, 32, 16#00f7>>, [loose_unicode]), + xcode(<<16#00f0, 32, 16#00f7>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32, 16#fffd/utf8>> ) }, @@ -620,7 +620,7 @@ bad_utf8_test_() -> }, {"missing continuation byte (3 byte) replaced", ?_assertEqual( - xcode(<<224, 160, 32>>, [loose_unicode]), + xcode(<<224, 160, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -629,7 +629,7 @@ bad_utf8_test_() -> }, {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( - xcode(<<240, 144, 128, 32>>, [loose_unicode]), + xcode(<<240, 144, 128, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -638,7 +638,7 @@ bad_utf8_test_() -> }, {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( - xcode(<<240, 144, 32>>, [loose_unicode]), + xcode(<<240, 144, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -647,7 +647,7 @@ bad_utf8_test_() -> }, {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( - xcode(<<16#c0, 16#af, 32>>, [loose_unicode]), + xcode(<<16#c0, 16#af, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -656,7 +656,7 @@ bad_utf8_test_() -> }, {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( - xcode(<<16#e0, 16#80, 16#af, 32>>, [loose_unicode]), + xcode(<<16#e0, 16#80, 16#af, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -665,7 +665,7 @@ bad_utf8_test_() -> }, {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( - xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [loose_unicode]), + xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -674,7 +674,7 @@ bad_utf8_test_() -> }, {"highest overlong 2 byte sequence replaced", ?_assertEqual( - xcode(<<16#c1, 16#bf, 32>>, [loose_unicode]), + xcode(<<16#c1, 16#bf, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -683,7 +683,7 @@ bad_utf8_test_() -> }, {"highest overlong 3 byte sequence replaced", ?_assertEqual( - xcode(<<16#e0, 16#9f, 16#bf, 32>>, [loose_unicode]), + xcode(<<16#e0, 16#9f, 16#bf, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) }, @@ -692,7 +692,7 @@ bad_utf8_test_() -> }, {"highest overlong 4 byte sequence replaced", ?_assertEqual( - xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [loose_unicode]), + xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [replaced_bad_utf8]), <<16#fffd/utf8, 32>> ) } @@ -710,7 +710,7 @@ encode(Term, Opts) -> encode_test_() -> [ {"naked string", ?_assertEqual(encode(<<"a string\n">>), [{string, <<"a string\n">>}, end_json])}, - {"escaped naked string", ?_assertEqual(encode(<<"a string\n">>, [json_escape]), [{string, <<"a string\\n">>}, end_json])}, + {"escaped naked string", ?_assertEqual(encode(<<"a string\n">>, [escaped_strings]), [{string, <<"a string\\n">>}, end_json])}, {"naked integer", ?_assertEqual(encode(123), [{integer, 123}, end_json])}, {"naked float", ?_assertEqual(encode(1.23), [{float, 1.23}, end_json])}, {"naked literal", ?_assertEqual(encode(null), [{literal, null}, end_json])}, @@ -782,28 +782,28 @@ encode_test_() -> escapes_test_() -> [ - {"backspace escape", ?_assertEqual(encode(<<"\b">>, [json_escape]), [{string, <<"\\b">>}, end_json])}, - {"formfeed escape", ?_assertEqual(encode(<<"\f">>, [json_escape]), [{string, <<"\\f">>}, end_json])}, - {"newline escape", ?_assertEqual(encode(<<"\n">>, [json_escape]), [{string, <<"\\n">>}, end_json])}, - {"carriage return escape", ?_assertEqual(encode(<<"\r">>, [json_escape]), [{string, <<"\\r">>}, end_json])}, - {"tab escape", ?_assertEqual(encode(<<"\t">>, [json_escape]), [{string, <<"\\t">>}, end_json])}, - {"quote escape", ?_assertEqual(encode(<<"\"">>, [json_escape]), [{string, <<"\\\"">>}, end_json])}, - {"single quote escape", ?_assertEqual(encode(<<"'">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, - {"no single quote escape", ?_assertEqual(encode(<<"'">>, [json_escape]), [{string, <<"'">>}, end_json])}, - {"forward slash escape", ?_assertEqual(encode(<<"/">>, [json_escape, escape_forward_slash]), [{string, <<"\\/">>}, end_json])}, - {"no forward slash escape", ?_assertEqual(encode(<<"/">>, [json_escape]), [{string, <<"/">>}, end_json])}, - {"back slash escape", ?_assertEqual(encode(<<"\\">>, [json_escape]), [{string, <<"\\\\">>}, end_json])}, + {"backspace escape", ?_assertEqual(encode(<<"\b">>, [escaped_strings]), [{string, <<"\\b">>}, end_json])}, + {"formfeed escape", ?_assertEqual(encode(<<"\f">>, [escaped_strings]), [{string, <<"\\f">>}, end_json])}, + {"newline escape", ?_assertEqual(encode(<<"\n">>, [escaped_strings]), [{string, <<"\\n">>}, end_json])}, + {"carriage return escape", ?_assertEqual(encode(<<"\r">>, [escaped_strings]), [{string, <<"\\r">>}, end_json])}, + {"tab escape", ?_assertEqual(encode(<<"\t">>, [escaped_strings]), [{string, <<"\\t">>}, end_json])}, + {"quote escape", ?_assertEqual(encode(<<"\"">>, [escaped_strings]), [{string, <<"\\\"">>}, end_json])}, + {"single quote escape", ?_assertEqual(encode(<<"'">>, [escaped_strings, single_quoted_strings]), [{string, <<"\\'">>}, end_json])}, + {"no single quote escape", ?_assertEqual(encode(<<"'">>, [escaped_strings]), [{string, <<"'">>}, end_json])}, + {"forward slash escape", ?_assertEqual(encode(<<"/">>, [escaped_strings, escaped_forward_slashes]), [{string, <<"\\/">>}, end_json])}, + {"no forward slash escape", ?_assertEqual(encode(<<"/">>, [escaped_strings]), [{string, <<"/">>}, end_json])}, + {"back slash escape", ?_assertEqual(encode(<<"\\">>, [escaped_strings]), [{string, <<"\\\\">>}, end_json])}, {"jsonp escape", ?_assertEqual( - encode(<<16#2028/utf8, 16#2029/utf8>>, [json_escape]), + encode(<<16#2028/utf8, 16#2029/utf8>>, [escaped_strings]), [{string, <<"\\u2028\\u2029">>}, end_json] )}, {"no jsonp escape", ?_assertEqual( - encode(<<16#2028/utf8, 16#2029/utf8>>, [json_escape, no_jsonp_escapes]), + encode(<<16#2028/utf8, 16#2029/utf8>>, [escaped_strings, unescaped_jsonp]), [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] )}, - {"control escape", ?_assertEqual(encode(<<0>>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, - {"dirty strings", ?_assertEqual(encode(<<"\n">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])}, - {"ignore bad escapes", ?_assertEqual(encode(<<"\\x25">>, [json_escape, ignore_bad_escapes]), [{string, <<"\\\\x25">>}, end_json])} + {"control escape", ?_assertEqual(encode(<<0>>, [escaped_strings]), [{string, <<"\\u0000">>}, end_json])}, + {"dirty strings", ?_assertEqual(encode(<<"\n">>, [escaped_strings, dirty_strings]), [{string, <<"\n">>}, end_json])}, + {"ignore bad escapes", ?_assertEqual(encode(<<"\\x25">>, [escaped_strings, ignored_bad_escapes]), [{string, <<"\\\\x25">>}, end_json])} ]. @@ -823,23 +823,23 @@ good_characters_test_() -> {"acceptable codepoints", ?_assert(check_good(good())) }, - {"acceptable codepoints - json_escape", - ?_assert(check_good(good(), [json_escape])) + {"acceptable codepoints - escaped_strings", + ?_assert(check_good(good(), [escaped_strings])) }, - {"acceptable codepoints - loose_unicode", - ?_assert(check_good(good(), [json_escape])) + {"acceptable codepoints - replaced_bad_utf8", + ?_assert(check_good(good(), [escaped_strings])) }, - {"acceptable codepoints - json_escape + loose_unicode", - ?_assert(check_good(good(), [json_escape, loose_unicode])) + {"acceptable codepoints - escaped_strings + replaced_bad_utf8", + ?_assert(check_good(good(), [escaped_strings, replaced_bad_utf8])) }, {"acceptable extended", ?_assert(check_good(good_extended())) }, - {"acceptable extended - json_escape", - ?_assert(check_good(good_extended(), [json_escape])) + {"acceptable extended - escaped_strings", + ?_assert(check_good(good_extended(), [escaped_strings])) }, - {"acceptable extended - json_escape", - ?_assert(check_good(good_extended(), [loose_unicode])) + {"acceptable extended - escaped_strings", + ?_assert(check_good(good_extended(), [replaced_bad_utf8])) } ]. @@ -886,7 +886,7 @@ check_bad(List) -> check_replaced(List) -> [] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false end, - check(List, [loose_unicode], []) + check(List, [replaced_bad_utf8], []) ). diff --git a/src/jsx_opts.hrl b/src/jsx_opts.hrl index 3db2dcb..d62a790 100644 --- a/src/jsx_opts.hrl +++ b/src/jsx_opts.hrl @@ -1,11 +1,11 @@ -record(opts, { - loose_unicode = false, - escape_forward_slash = false, - explicit_end = false, - single_quotes = false, - no_jsonp_escapes = false, + replaced_bad_utf8 = false, + escaped_forward_slashes = false, + single_quoted_strings = false, + unescaped_jsonp = false, comments = false, - json_escape = false, + escaped_strings = false, dirty_strings = false, - ignore_bad_escapes = false + ignored_bad_escapes = false, + explicit_end = false }). \ No newline at end of file diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 0760349..f593c96 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -39,13 +39,13 @@ -spec to_json(Source::any(), Opts::opts()) -> binary(). to_json(Source, Opts) when is_list(Opts) -> - (jsx:encoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [json_escape])))(Source). + (jsx:encoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [escaped_strings])))(Source). -spec format(Source::binary(), Opts::opts()) -> binary(). format(Source, Opts) when is_binary(Source) andalso is_list(Opts) -> - (jsx:decoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [json_escape])))(Source). + (jsx:decoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [escaped_strings])))(Source). parse_opts(Opts) -> parse_opts(Opts, #opts{}). diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index bfe1900..c160c40 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -36,49 +36,69 @@ parse_opts(Opts) -> parse_opts([], Opts) -> Opts; -parse_opts([loose_unicode|Rest], Opts) -> - parse_opts(Rest, Opts#opts{loose_unicode=true}); -parse_opts([escape_forward_slash|Rest], Opts) -> - parse_opts(Rest, Opts#opts{escape_forward_slash=true}); +parse_opts([replaced_bad_utf8|Rest], Opts) -> + parse_opts(Rest, Opts#opts{replaced_bad_utf8=true}); +parse_opts([escaped_forward_slashes|Rest], Opts) -> + parse_opts(Rest, Opts#opts{escaped_forward_slashes=true}); parse_opts([explicit_end|Rest], Opts) -> parse_opts(Rest, Opts#opts{explicit_end=true}); -parse_opts([single_quotes|Rest], Opts) -> - parse_opts(Rest, Opts#opts{single_quotes=true}); -parse_opts([no_jsonp_escapes|Rest], Opts) -> - parse_opts(Rest, Opts#opts{no_jsonp_escapes=true}); +parse_opts([single_quoted_strings|Rest], Opts) -> + parse_opts(Rest, Opts#opts{single_quoted_strings=true}); +parse_opts([unescaped_jsonp|Rest], Opts) -> + parse_opts(Rest, Opts#opts{unescaped_jsonp=true}); parse_opts([comments|Rest], Opts) -> parse_opts(Rest, Opts#opts{comments=true}); -parse_opts([json_escape|Rest], Opts) -> - parse_opts(Rest, Opts#opts{json_escape=true}); +parse_opts([escaped_strings|Rest], Opts) -> + parse_opts(Rest, Opts#opts{escaped_strings=true}); parse_opts([dirty_strings|Rest], Opts) -> parse_opts(Rest, Opts#opts{dirty_strings=true}); -parse_opts([ignore_bad_escapes|Rest], Opts) -> - parse_opts(Rest, Opts#opts{ignore_bad_escapes=true}); +parse_opts([ignored_bad_escapes|Rest], Opts) -> + parse_opts(Rest, Opts#opts{ignored_bad_escapes=true}); parse_opts([relax|Rest], Opts) -> parse_opts(Rest, Opts#opts{ - loose_unicode = true, - single_quotes = true, + replaced_bad_utf8 = true, + single_quoted_strings = true, comments = true, - ignore_bad_escapes = true + ignored_bad_escapes = true }); +%% deprecated flags +parse_opts([loose_unicode|Rest], Opts) -> + parse_opts(Rest, Opts#opts{replaced_bad_utf8=true}); +parse_opts([escape_forward_slash|Rest], Opts) -> + parse_opts(Rest, Opts#opts{escaped_forward_slashes=true}); +parse_opts([single_quotes|Rest], Opts) -> + parse_opts(Rest, Opts#opts{single_quoted_strings=true}); +parse_opts([no_jsonp_escapes|Rest], Opts) -> + parse_opts(Rest, Opts#opts{unescaped_jsonp=true}); +parse_opts([json_escape|Rest], Opts) -> + parse_opts(Rest, Opts#opts{escaped_strings=true}); +parse_opts([ignore_bad_escapes|Rest], Opts) -> + parse_opts(Rest, Opts#opts{ignored_bad_escapes=true}); parse_opts(_, _) -> {error, badarg}. valid_flags() -> [ - loose_unicode, - escape_forward_slash, - explicit_end, - single_quotes, - no_jsonp_escapes, + replaced_bad_utf8, + escaped_forward_slashes, + single_quoted_strings, + unescaped_jsonp, comments, - json_escape, + escaped_strings, dirty_strings, - ignore_bad_escapes, - relax + ignored_bad_escapes, + explicit_end, + relax, + %% deprecated flags + loose_unicode, %% replaced_bad_utf8 + escape_forward_slash, %% escaped_forward_slashes + single_quotes, %% single_quotes_strings + no_jsonp_escapes, %% unescaped_jsonp + json_escape, %% escaped_strings + ignore_bad_escapes %% ignored_bad_escapes ]. - + extract_opts(Opts) -> extract_parser_opts(Opts, []). @@ -128,24 +148,24 @@ opts_test_() -> {"all flags", ?_assertEqual( parse_opts([ - loose_unicode, - escape_forward_slash, + replaced_bad_utf8, + escaped_forward_slashes, explicit_end, - single_quotes, - no_jsonp_escapes, + single_quoted_strings, + unescaped_jsonp, comments, dirty_strings, - ignore_bad_escapes + ignored_bad_escapes ]), #opts{ - loose_unicode=true, - escape_forward_slash=true, + replaced_bad_utf8=true, + escaped_forward_slashes=true, explicit_end=true, - single_quotes=true, - no_jsonp_escapes=true, + single_quoted_strings=true, + unescaped_jsonp=true, comments=true, dirty_strings=true, - ignore_bad_escapes=true + ignored_bad_escapes=true } ) }, @@ -153,10 +173,10 @@ opts_test_() -> ?_assertEqual( parse_opts([relax]), #opts{ - loose_unicode=true, - single_quotes=true, + replaced_bad_utf8=true, + single_quoted_strings=true, comments=true, - ignore_bad_escapes=true + ignored_bad_escapes=true } ) } From 1a791f2a782def44d9bdae34bde12ff64155ffb3 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 31 Mar 2012 21:58:45 -0700 Subject: [PATCH 20/38] replace old option flag names with new option flag names, and clarify a couple of them --- README.markdown | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/README.markdown b/README.markdown index 47199e7..b77cace 100644 --- a/README.markdown +++ b/README.markdown @@ -103,23 +103,23 @@ json objects are represented by erlang proplists. the empty object has the speci jsx functions all take a common set of options. not all flags have meaning in all contexts, but they are always valid options. flags are always atoms and have no value. functions may have additional options beyond these, see individual function documentation for details -#### `loose_unicode` #### +#### `replaced_bad_utf8` #### json text input and json strings SHOULD be utf8 encoded binaries, appropriately escaped as per the json spec. if this option is present attempts are made to replace invalid codepoints with `u+FFFD` as per the unicode spec. this applies both to malformed unicode and disallowed codepoints -#### `escape_forward_slash` #### +#### `escaped_forward_slashes` #### json strings are escaped according to the json spec. this means forward slashes are never escaped. unfortunately, a microsoft implementation of json uses escaped forward slashes in json formatted date strings. without this option it is impossible to get date strings that some microsoft tools understand -#### `explicit_end` #### - -this option treats all exhausted inputs as incomplete, as explained below. the parser will not attempt to return a final state until the function is called with the value `end_stream` - -#### `single_quotes` #### +#### `single_quoted_strings` #### -some parsers allow double quotes (`u+0022`) to be replaced by single quotes (`u+0027`) to deliminate keys and strings. this option allows json containing single quotes as structural (deliminator) characters to be parsed without errors. note that the parser expects strings to be terminated by the same quote type that opened it and that single quotes must, obviously, be escaped within strings deliminated by single quotes. the parser will never emit json with keys or strings deliminated by single quotes +some parsers allow double quotes (`u+0022`) to be replaced by single quotes (`u+0027`) to deliminate keys and strings. this option allows json containing single quotes as structural (deliminator) characters to be parsed without errors. note that the parser expects strings to be terminated by the same quote type that opened it and that single quotes must, obviously, be escaped within strings deliminated by single quotes -#### `no_jsonp_escapes` #### +double quotes must ALWAYS be escaped, regardless of what kind of quotes deliminate the string they are found in + +the parser will never emit json with keys or strings deliminated by single quotes + +#### `unescaped_jsonp` #### javascript interpreters treat the codepoints `u+2028` and `u+2029` as significant whitespace. json strings that contain either of these codepoints will be parsed incorrectly by some javascript interpreters. by default, these codepoints are escaped (to `"\u2028"` and `\u2029`, respectively) to retain compatibility. this option simply removes that escaping if, for some reason, you object to this @@ -127,21 +127,25 @@ javascript interpreters treat the codepoints `u+2028` and `u+2029` as significan json has no official comments but some parsers allow c style comments. this flag allows comments (both `// ...` and `/* ... */` style) anywhere whitespace is allowed -#### `json_escape` #### +#### `escaped_strings` #### -by default, both the encoder and decoder return strings as utf8 binaries appropriate for use in erlang. escape sequences that were present in decoded terms are converted into the appropriate codepoint and encoded terms are unaltered. this flag escapes strings for output in json, removing control codes and replacing them with the appropriate escapes +by default, both the encoder and decoder return strings as utf8 binaries appropriate for use in erlang. escape sequences that were present in decoded terms are converted into the appropriate codepoint and encoded terms are unaltered. this flag escapes strings as if for output in json, removing control codes and problematic codepoints and replacing them with the appropriate escapes #### `dirty_strings` #### json escaping is lossy, it mutates the json string and repeated application can result in unwanted behaviour. if your strings are already escaped (or you'd like to force invalid strings into "json") use this flag to bypass escaping -#### `ignore_bad_escapes` #### +#### `ignored_bad_escapes` #### -during decoding, ignore unrecognized escape sequences and leave them as is in the stream +during decoding, ignore unrecognized escape sequences and leave them as is in the stream. note that if you combine this option with `escaped_strings` the escape character itself will be escaped + +#### `explicit_end` #### + +this option treats all exhausted inputs as incomplete, as explained below. the parser will not attempt to return a final state until the function is called with the value `end_stream` #### `relax` #### -relax is a synonym for `[loose_unicode, single_quotes, comments, ignore_bad_escapes]` +relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, ignored_bad_escapes]` for what you don't care how janky and awful your json input is, you just want the parser to do the best it can ### incomplete input ### From 3e56b7ff3d14f6f21d3895a60c29375d23c7d490 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 1 Apr 2012 17:16:22 -0700 Subject: [PATCH 21/38] more explicit handling of comments --- src/jsx_decoder.erl | 65 +++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 59f6ab7..2779718 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -151,8 +151,7 @@ value(<>, {Handler, State}, Stack, Opts) -> value(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> value(Rest, Handler, Stack, Opts); value(<>, Handler, Stack, Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> value(R, H, S, O) end, - comment(Rest, Handler, [Resume|Stack], Opts); + comment(Rest, Handler, [value|Stack], Opts); value(<<>>, Handler, Stack, Opts) -> ?incomplete(value, <<>>, Handler, Stack, Opts); value(Bin, Handler, Stack, Opts) -> @@ -168,8 +167,7 @@ object(<>, {Handler, State}, [key|Stack], Opts) -> object(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> object(Rest, Handler, Stack, Opts); object(<>, Handler, Stack, Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> object(R, H, S, O) end, - comment(Rest, Handler, [Resume|Stack], Opts); + comment(Rest, Handler, [object|Stack], Opts); object(<<>>, Handler, Stack, Opts) -> ?incomplete(object, <<>>, Handler, Stack, Opts); object(Bin, Handler, Stack, Opts) -> @@ -201,8 +199,7 @@ array(<>, {Handler, State}, [array|Stack], Opts) -> array(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> array(Rest, Handler, Stack, Opts); array(<>, Handler, Stack, Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> array(R, H, S, O) end, - comment(Rest, Handler, [Resume|Stack], Opts); + comment(Rest, Handler, [array|Stack], Opts); array(<<>>, Handler, Stack, Opts) -> ?incomplete(array, <<>>, Handler, Stack, Opts); array(Bin, Handler, Stack, Opts) -> @@ -214,8 +211,7 @@ colon(<>, Handler, [key|Stack], Opts) -> colon(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> colon(Rest, Handler, Stack, Opts); colon(<>, Handler, Stack, Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> colon(R, H, S, O) end, - comment(Rest, Handler, [Resume|Stack], Opts); + comment(Rest, Handler, [colon|Stack], Opts); colon(<<>>, Handler, Stack, Opts) -> ?incomplete(colon, <<>>, Handler, Stack, Opts); colon(Bin, Handler, Stack, Opts) -> @@ -229,8 +225,7 @@ key(<>, Handler, Stack, Opts = #opts{single_quoted_st key(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> key(Rest, Handler, Stack, Opts); key(<>, Handler, Stack, Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> key(R, H, S, O) end, - comment(Rest, Handler, [Resume|Stack], Opts); + comment(Rest, Handler, [key|Stack], Opts); key(<<>>, Handler, Stack, Opts) -> ?incomplete(key, <<>>, Handler, Stack, Opts); key(Bin, Handler, Stack, Opts) -> @@ -737,8 +732,7 @@ zero(<>, Handler, [Acc|Stack], Opts) -> zero(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); zero(<>, {Handler, State}, [Acc|Stack], Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> maybe_done(R, H, S, O) end, - comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [Resume|Stack], Opts); + comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [maybe_done|Stack], Opts); zero(<<>>, {Handler, State}, [Acc|Stack], Opts = #opts{explicit_end=false}) -> maybe_done(<<>>, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); zero(<<>>, Handler, Stack, Opts) -> @@ -776,8 +770,7 @@ integer(<>, Handler, [Acc|Stack], Opts) when S =:= $e; S =:= $E integer(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); integer(<>, {Handler, State}, [Acc|Stack], Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> maybe_done(R, H, S, O) end, - comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [Resume|Stack], Opts); + comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [maybe_done|Stack], Opts); integer(<<>>, {Handler, State}, [Acc|Stack], Opts = #opts{explicit_end=false}) -> maybe_done(<<>>, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); integer(<<>>, Handler, Stack, Opts) -> @@ -820,8 +813,7 @@ decimal(<>, Handler, [{Int, Frac}|Stack], Opts) when S =:= $e; S decimal(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); decimal(<>, {Handler, State}, [Acc|Stack], Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> maybe_done(R, H, S, O) end, - comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [Resume|Stack], Opts); + comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [maybe_done|Stack], Opts); decimal(<<>>, {Handler, State}, [Acc|Stack], Opts = #opts{explicit_end=false}) -> maybe_done(<<>>, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); decimal(<<>>, Handler, Stack, Opts) -> @@ -871,8 +863,7 @@ exp(<>, {Handler, State}, [Acc, array|Stack], Opts) -> exp(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); exp(<>, {Handler, State}, [Acc|Stack], Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> maybe_done(R, H, S, O) end, - comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [Resume|Stack], Opts); + comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [maybe_done|Stack], Opts); exp(<<>>, {Handler, State}, [Acc|Stack], Opts = #opts{explicit_end=false}) -> maybe_done(<<>>, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); exp(<<>>, Handler, Stack, Opts) -> @@ -974,19 +965,19 @@ null(Bin, Handler, Stack, Opts) -> comment(<>, Handler, Stack, Opts) -> single_comment(Rest, Handler, Stack, Opts); comment(<>, Handler, Stack, Opts) -> - multi_comment(Rest, Handler, Stack, Opts); + multi_comment(Rest, Handler, Stack, Opts); comment(<<>>, Handler, Stack, Opts) -> ?incomplete(comment, <<>>, Handler, Stack, Opts); comment(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -single_comment(<>, Handler, [Resume|Stack], Opts) -> - Resume(Rest, Handler, Stack, Opts); -single_comment(<<>>, Handler, [Resume|Stack], Opts) -> - Resume(<<>>, Handler, Stack, Opts); -single_comment(<<_S/utf8, Rest/binary>>, Handler, Stack, Opts) -> +single_comment(<>, Handler, Stack, Opts) -> + end_comment(Rest, Handler, Stack, Opts); +single_comment(<<_/utf8, Rest/binary>>, Handler, Stack, Opts) -> single_comment(Rest, Handler, Stack, Opts); +single_comment(<<>>, Handler, [done], Opts) -> + end_comment(<<>>, Handler, [done], Opts); single_comment(<<>>, Handler, Stack, Opts) -> ?incomplete(single_comment, <<>>, Handler, Stack, Opts); single_comment(Bin, Handler, Stack, Opts) -> @@ -1003,8 +994,8 @@ multi_comment(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -end_multi_comment(<>, Handler, [Resume|Stack], Opts) -> - Resume(Rest, Handler, Stack, Opts); +end_multi_comment(<>, Handler, Stack, Opts) -> + end_comment(Rest, Handler, Stack, Opts); end_multi_comment(<<_S/utf8, Rest/binary>>, Handler, Stack, Opts) -> multi_comment(Rest, Handler, Stack, Opts); end_multi_comment(<<>>, Handler, Stack, Opts) -> @@ -1013,6 +1004,20 @@ end_multi_comment(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). +end_comment(Rest, Handler, [Resume|Stack], Opts) -> + case Resume of + value -> value(Rest, Handler, Stack, Opts) + ; object -> object(Rest, Handler, Stack, Opts) + ; array -> array(Rest, Handler, Stack, Opts) + ; colon -> colon(Rest, Handler, Stack, Opts) + ; key -> key(Rest, Handler, Stack, Opts) + ; maybe_done -> maybe_done(Rest, Handler, Stack, Opts) + ; done -> done(Rest, Handler, Stack, Opts) + end. + + +maybe_done(Rest, {Handler, State}, [], Opts) -> + done(Rest, {Handler, Handler:handle_event(end_json, State)}, [], Opts); maybe_done(<>, {Handler, State}, [object|Stack], Opts) -> maybe_done(Rest, {Handler, Handler:handle_event(end_object, State)}, Stack, Opts); maybe_done(<>, {Handler, State}, [array|Stack], Opts) -> @@ -1024,12 +1029,9 @@ maybe_done(<>, Handler, [array|_] = Stack, Opts) -> maybe_done(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> maybe_done(Rest, Handler, Stack, Opts); maybe_done(<>, Handler, Stack, Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> maybe_done(R, H, S, O) end, - comment(Rest, Handler, [Resume|Stack], Opts); + comment(Rest, Handler, [maybe_done|Stack], Opts); maybe_done(<<>>, Handler, Stack, Opts) when length(Stack) > 0 -> ?incomplete(maybe_done, <<>>, Handler, Stack, Opts); -maybe_done(Rest, {Handler, State}, [], Opts) -> - done(Rest, {Handler, Handler:handle_event(end_json, State)}, [], Opts); maybe_done(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). @@ -1037,8 +1039,7 @@ maybe_done(Bin, Handler, Stack, Opts) -> done(<>, Handler, [], Opts) when ?is_whitespace(S) -> done(Rest, Handler, [], Opts); done(<>, Handler, [], Opts=#opts{comments=true}) -> - Resume = fun(R, H, S, O) -> done(R, H, S, O) end, - comment(Rest, Handler, [Resume], Opts); + comment(Rest, Handler, [done], Opts); done(<<>>, {Handler, State}, [], Opts = #opts{explicit_end=true}) -> {incomplete, fun(Stream) when is_binary(Stream) -> done(<>, {Handler, State}, [], Opts) From 7b5f56a4be41758ed2f21e56c913dca5b85a8d97 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 1 Apr 2012 17:27:55 -0700 Subject: [PATCH 22/38] minor refactoring in jsx_decoder --- src/jsx_decoder.erl | 88 ++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 2779718..2abd30c 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -464,52 +464,48 @@ string(<<127, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 127)|Stack], Opts); string(<>, Handler, [Acc|Stack], Opts) when X == 16#2028; X == 16#2029 -> string(Rest, Handler, [?acc_seq(Acc, maybe_replace(X, Opts))|Stack], Opts); -string(<>, Handler, [Acc|Stack], Opts) -> - case S of - %% not strictly true, but exceptions are already taken care of in preceding clauses - S when S >= 16#20, S < 16#d800 -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S > 16#dfff, S < 16#fdd0 -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S > 16#fdef, S < 16#fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#10000, S < 16#1fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#20000, S < 16#2fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#30000, S < 16#3fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#40000, S < 16#4fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#50000, S < 16#5fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#60000, S < 16#6fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#70000, S < 16#7fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#80000, S < 16#8fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#90000, S < 16#9fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#a0000, S < 16#afffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#b0000, S < 16#bfffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#c0000, S < 16#cfffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#d0000, S < 16#dfffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#e0000, S < 16#efffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#f0000, S < 16#ffffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; S when S >= 16#100000, S < 16#10fffe -> - string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) - ; _ -> - case Opts#opts.replaced_bad_utf8 of - true -> noncharacter(<>, Handler, [Acc|Stack], Opts) - ; false -> ?error([<>, Handler, [Acc|Stack], Opts]) - end +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#20, X < 16#d800 -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X > 16#dfff, X < 16#fdd0 -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X > 16#fdef, X < 16#fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#10000, X < 16#1fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#20000, X < 16#2fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#30000, X < 16#3fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#40000, X < 16#4fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#50000, X < 16#5fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#60000, X < 16#6fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#70000, X < 16#7fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#80000, X < 16#8fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#90000, X < 16#9fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#a0000, X < 16#afffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#b0000, X < 16#bfffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#c0000, X < 16#cfffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#d0000, X < 16#dfffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#e0000, X < 16#efffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#f0000, X < 16#ffffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) when X >= 16#100000, X < 16#10fffe -> + string(Rest, Handler, [?acc_seq(Acc, X)|Stack], Opts); +string(<>, Handler, [Acc|Stack], Opts) -> + case Opts#opts.replaced_bad_utf8 of + true -> noncharacter(<>, Handler, [Acc|Stack], Opts) + ; false -> ?error([<>, Handler, [Acc|Stack], Opts]) end; string(Bin, Handler, Stack, Opts) -> case partial_utf(Bin) of From 447e0b1356bb21b5c164df11445217e5154f3fb7 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 1 Apr 2012 17:40:47 -0700 Subject: [PATCH 23/38] README fixes and clarifications --- README.markdown | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.markdown b/README.markdown index b77cace..9a8ffde 100644 --- a/README.markdown +++ b/README.markdown @@ -60,7 +60,7 @@ to minify a json string: `jsx:format(JSON)` json must be a binary encoded in `utf8`. if it's invalid `utf8` or invalid json, it probably won't parse without errors. there are a few non-standard extensions to the parser available that may change that, they are detailed in the options section below -jsx also supports json fragments; valid json values that are not complete json. that means jsx will parse things like `<<"1">`, `<<"true">>` and `<<"\"hello world\"">>` without problems +jsx also supports json fragments; valid json values that are not complete json. that means jsx will parse things like `<<"1">>`, `<<"true">>` and `<<"\"hello world\"">>` without complaint #### erlang #### @@ -74,7 +74,7 @@ when converting from erlang to json, numbers are represented with their shortest #### strings #### -the [json spec][rfc4627] is frustratingly vague on the exact details of json strings. json must be unicode, but no encoding is specified. javascript explicitly allows strings containing codepoints explicitly disallowed by unicode. json allows implementations to set limits on the content of strings and other implementations attempt to resolve this in various ways. this implementation, in default operation, only accepts strings that meet the constraints set out in the json spec (properly escaped control characters, `"` and the escape character, `\`) and that are encoded in `utf8` +the [json spec][rfc4627] is frustratingly vague on the exact details of json strings. json must be unicode, but no encoding is specified. javascript explicitly allows strings containing codepoints explicitly disallowed by unicode. json allows implementations to set limits on the content of strings and other implementations attempt to resolve this in various ways. this implementation, in default operation, only accepts strings that meet the constraints set out in the json spec (strings are sequences of unicode codepoints deliminated by `"` (`u+0022`) that may not contain control codes unless properly escaped with `\` (`u+005c`)) and that are encoded in `utf8` the utf8 restriction means improperly paired surrogates are explicitly disallowed. `u+d800` to `u+dfff` are allowed, but only when they form valid surrogate pairs. surrogates that appear otherwise are an error @@ -82,7 +82,7 @@ json string escapes of the form `\uXXXX` will be converted to their equivalent c in the interests of pragmatism, there is an option for looser parsing, see options below -all erlang strings are represented by *valid* `utf8` encoded binaries. the encoder will check strings for conformance. the same restrictions apply as for strings encountered within json texts. that means no unpaired surrogates +all erlang strings are represented by *valid* `utf8` encoded binaries. the encoder will check strings for conformance. noncharacters (like `u+ffff`) are allowed in erlang utf8 encoded binaries, but not in strings passed to the encoder (although see options below) this implementation performs no normalization on strings beyond that detailed here. be careful when comparing strings as equivalent strings may have different `utf8` encodings @@ -121,7 +121,7 @@ the parser will never emit json with keys or strings deliminated by single quote #### `unescaped_jsonp` #### -javascript interpreters treat the codepoints `u+2028` and `u+2029` as significant whitespace. json strings that contain either of these codepoints will be parsed incorrectly by some javascript interpreters. by default, these codepoints are escaped (to `"\u2028"` and `\u2029`, respectively) to retain compatibility. this option simply removes that escaping if, for some reason, you object to this +javascript interpreters treat the codepoints `u+2028` and `u+2029` as significant whitespace. json strings that contain either of these codepoints will be parsed incorrectly by some javascript interpreters. by default, these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to retain compatibility. this option simply removes that escaping if, for some reason, you object to this #### `comments` #### @@ -145,7 +145,7 @@ this option treats all exhausted inputs as incomplete, as explained below. the p #### `relax` #### -relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, ignored_bad_escapes]` for what you don't care how janky and awful your json input is, you just want the parser to do the best it can +relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, ignored_bad_escapes]` for when you don't care how janky and awful your json input is, you just want the parser to do the best it can ### incomplete input ### From 19402fbee47b61053a62722fb024af691fce4ae4 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 1 Apr 2012 17:53:59 -0700 Subject: [PATCH 24/38] two new tests for comments and one (tiny) fix for trailing comments --- priv/test_cases/comment_style_a.json | 10 ++++++++++ priv/test_cases/comment_style_a.test | 4 ++++ priv/test_cases/comment_style_b.json | 1 + priv/test_cases/comment_style_b.test | 4 ++++ src/jsx_decoder.erl | 2 +- 5 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 priv/test_cases/comment_style_a.json create mode 100644 priv/test_cases/comment_style_a.test create mode 100644 priv/test_cases/comment_style_b.json create mode 100644 priv/test_cases/comment_style_b.test diff --git a/priv/test_cases/comment_style_a.json b/priv/test_cases/comment_style_a.json new file mode 100644 index 0000000..5bdc1f7 --- /dev/null +++ b/priv/test_cases/comment_style_a.json @@ -0,0 +1,10 @@ +// comment +{ // comment + "key" // comment + : // comment + [ // comment + true // comment + , // comment + false // comment + ] // comment +} // comment \ No newline at end of file diff --git a/priv/test_cases/comment_style_a.test b/priv/test_cases/comment_style_a.test new file mode 100644 index 0000000..60d9900 --- /dev/null +++ b/priv/test_cases/comment_style_a.test @@ -0,0 +1,4 @@ +{name, "comment_style_a"}. +{jsx, [start_object,{key, <<"key">>}, start_array, {literal, true}, {literal, false}, end_array, end_object,end_json]}. +{json, "comment_style_a.json"}. +{jsx_flags, [comments]}. \ No newline at end of file diff --git a/priv/test_cases/comment_style_b.json b/priv/test_cases/comment_style_b.json new file mode 100644 index 0000000..c515fee --- /dev/null +++ b/priv/test_cases/comment_style_b.json @@ -0,0 +1 @@ +/* comment */ { /* comment */ "key" /* comment */ : /* comment */ [ /* comment */ true /* comment */ , /* comment */ false /* comment */ ] /* comment */ } /* comment */ \ No newline at end of file diff --git a/priv/test_cases/comment_style_b.test b/priv/test_cases/comment_style_b.test new file mode 100644 index 0000000..60d9900 --- /dev/null +++ b/priv/test_cases/comment_style_b.test @@ -0,0 +1,4 @@ +{name, "comment_style_a"}. +{jsx, [start_object,{key, <<"key">>}, start_array, {literal, true}, {literal, false}, end_array, end_object,end_json]}. +{json, "comment_style_a.json"}. +{jsx_flags, [comments]}. \ No newline at end of file diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 2abd30c..4e64bec 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -972,7 +972,7 @@ single_comment(<>, Handler, Stack, Opts) -> end_comment(Rest, Handler, Stack, Opts); single_comment(<<_/utf8, Rest/binary>>, Handler, Stack, Opts) -> single_comment(Rest, Handler, Stack, Opts); -single_comment(<<>>, Handler, [done], Opts) -> +single_comment(<<>>, Handler, [done], Opts=#opts{explicit_end=false}) -> end_comment(<<>>, Handler, [done], Opts); single_comment(<<>>, Handler, Stack, Opts) -> ?incomplete(single_comment, <<>>, Handler, Stack, Opts); From 941620129da34bd954043a9920343b0649ea6918 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 4 Apr 2012 18:41:55 -0700 Subject: [PATCH 25/38] remove nicedecimal from jsx.app.src --- src/jsx.app.src | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/jsx.app.src b/src/jsx.app.src index 5875359..72e3fe1 100644 --- a/src/jsx.app.src +++ b/src/jsx.app.src @@ -14,8 +14,7 @@ {registered, []}, {applications, [ kernel, - stdlib, - nicedecimal + stdlib ]}, {env, []} ]}. From 89292c940d9d4040b07fde95cd8dcfd731313dbf Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 4 Apr 2012 20:04:17 -0700 Subject: [PATCH 26/38] pre_encoders for preprocessing input to encoder --- src/jsx_encoder.erl | 121 ++++++++++++++++++++++++++++++++++++++++++-- src/jsx_opts.hrl | 3 +- src/jsx_utils.erl | 5 ++ 3 files changed, 125 insertions(+), 4 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 59c0343..7bf2b3c 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -49,7 +49,7 @@ encoder(Handler, State, Opts) -> start(Term, {Handler, State}, Opts) -> - Handler:handle_event(end_json, value(Term, {Handler, State}, Opts)). + Handler:handle_event(end_json, value(pre_encode(Term, Opts), {Handler, State}, Opts)). value(String, {Handler, State}, Opts) when is_binary(String) -> @@ -82,7 +82,7 @@ object([{Key, Value}|Rest], {Handler, State}, Opts) -> { Handler, value( - Value, + pre_encode(Value, Opts), {Handler, Handler:handle_event({key, clean_string(fix_key(Key), Opts)}, State)}, Opts ) @@ -94,11 +94,14 @@ object(Term, Handler, Opts) -> ?error([Term, Handler, Opts]). list([Value|Rest], {Handler, State}, Opts) -> - list(Rest, {Handler, value(Value, {Handler, State}, Opts)}, Opts); + list(Rest, {Handler, value(pre_encode(Value, Opts), {Handler, State}, Opts)}, Opts); list([], {Handler, State}, _Opts) -> Handler:handle_event(end_array, State); list(Term, Handler, Opts) -> ?error([Term, Handler, Opts]). +pre_encode(Value, Opts) -> lists:foldl(fun(F, V) -> F(V) end, Value, Opts#opts.pre_encoders). + + fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8)); fix_key(Key) when is_binary(Key) -> Key. @@ -780,6 +783,118 @@ encode_test_() -> ]. +pre_encoders_test_() -> + Term = [ + {<<"object">>, [ + {<<"literals">>, [true, false, null]}, + {<<"strings">>, [<<"foo">>, <<"bar">>, <<"baz">>]}, + {<<"numbers">>, [1, 1.0, 1.0e0]} + ]} + ], + [ + {"no pre encode", ?_assertEqual( + encode(Term, []), + [ + start_object, + {key, <<"object">>}, start_object, + {key, <<"literals">>}, start_array, + {literal, true}, {literal, false}, {literal, null}, + end_array, + {key, <<"strings">>}, start_array, + {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>}, + end_array, + {key, <<"numbers">>}, start_array, + {integer, 1}, {float, 1.0}, {float, 1.0}, + end_array, + end_object, + end_object, + end_json + ] + )}, + {"replace lists with empty lists", ?_assertEqual( + encode(Term, [{pre_encoders, [fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end]}]), + [ + start_object, + {key, <<"object">>}, start_object, + {key, <<"literals">>}, start_array, end_array, + {key, <<"strings">>}, start_array, end_array, + {key, <<"numbers">>}, start_array, end_array, + end_object, + end_object, + end_json + ] + )}, + {"replace objects with empty objects", ?_assertEqual( + encode(Term, [{pre_encoders, [fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end]}]), + [ + start_object, + end_object, + end_json + ] + )}, + {"replace all non-list values with false", ?_assertEqual( + encode(Term, [{pre_encoders, [fun(V) when is_list(V) -> V; (_) -> false end]}]), + [ + start_object, + {key, <<"object">>}, start_object, + {key, <<"literals">>}, start_array, + {literal, false}, {literal, false}, {literal, false}, + end_array, + {key, <<"strings">>}, start_array, + {literal, false}, {literal, false}, {literal, false}, + end_array, + {key, <<"numbers">>}, start_array, + {literal, false}, {literal, false}, {literal, false}, + end_array, + end_object, + end_object, + end_json + ] + )}, + {"replace all atoms with atom_to_list", ?_assertEqual( + encode(Term, [{pre_encoders, [fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end]}]), + [ + start_object, + {key, <<"object">>}, start_object, + {key, <<"literals">>}, start_array, + {string, <<"true">>}, {string, <<"false">>}, {string, <<"null">>}, + end_array, + {key, <<"strings">>}, start_array, + {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>}, + end_array, + {key, <<"numbers">>}, start_array, + {integer, 1}, {float, 1.0}, {float, 1.0}, + end_array, + end_object, + end_object, + end_json + ] + )}, + {"replace all atoms to strings and back", ?_assertEqual( + encode(Term, [{pre_encoders, [ + fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end, + fun(<<"true">>) -> true; (<<"false">>) -> false; (<<"null">>) -> null; (V) -> V end + ]}]), + [ + start_object, + {key, <<"object">>}, start_object, + {key, <<"literals">>}, start_array, + {literal, true}, {literal, false}, {literal, null}, + end_array, + {key, <<"strings">>}, start_array, + {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>}, + end_array, + {key, <<"numbers">>}, start_array, + {integer, 1}, {float, 1.0}, {float, 1.0}, + end_array, + end_object, + end_object, + end_json + ] + )} + ]. + + escapes_test_() -> [ {"backspace escape", ?_assertEqual(encode(<<"\b">>, [escaped_strings]), [{string, <<"\\b">>}, end_json])}, diff --git a/src/jsx_opts.hrl b/src/jsx_opts.hrl index d62a790..625c737 100644 --- a/src/jsx_opts.hrl +++ b/src/jsx_opts.hrl @@ -7,5 +7,6 @@ escaped_strings = false, dirty_strings = false, ignored_bad_escapes = false, - explicit_end = false + explicit_end = false, + pre_encoders = [] }). \ No newline at end of file diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index c160c40..2afd8db 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -61,6 +61,10 @@ parse_opts([relax|Rest], Opts) -> comments = true, ignored_bad_escapes = true }); +parse_opts([{pre_encoders, Encoders}|Rest], Opts) when is_list(Encoders) -> + lists:foreach(fun(F) when is_function(F, 1) -> ok end, Encoders), + AllEncoders = Opts#opts.pre_encoders ++ Encoders, + parse_opts(Rest, Opts#opts{pre_encoders=AllEncoders}); %% deprecated flags parse_opts([loose_unicode|Rest], Opts) -> parse_opts(Rest, Opts#opts{replaced_bad_utf8=true}); @@ -90,6 +94,7 @@ valid_flags() -> ignored_bad_escapes, explicit_end, relax, + pre_encoders, %% deprecated flags loose_unicode, %% replaced_bad_utf8 escape_forward_slash, %% escaped_forward_slashes From ae13b934c6bc5d7bbedfcae47fa6018bace3089a Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 4 Apr 2012 20:13:27 -0700 Subject: [PATCH 27/38] add {pre_encoder, F} variant of {pre_encoders, [F, G,...]} --- src/jsx_encoder.erl | 8 ++++---- src/jsx_utils.erl | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 7bf2b3c..cd2a08c 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -812,7 +812,7 @@ pre_encoders_test_() -> ] )}, {"replace lists with empty lists", ?_assertEqual( - encode(Term, [{pre_encoders, [fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end]}]), + encode(Term, [{pre_encoder, fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end}]), [ start_object, {key, <<"object">>}, start_object, @@ -825,7 +825,7 @@ pre_encoders_test_() -> ] )}, {"replace objects with empty objects", ?_assertEqual( - encode(Term, [{pre_encoders, [fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end]}]), + encode(Term, [{pre_encoder, fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end}]), [ start_object, end_object, @@ -833,7 +833,7 @@ pre_encoders_test_() -> ] )}, {"replace all non-list values with false", ?_assertEqual( - encode(Term, [{pre_encoders, [fun(V) when is_list(V) -> V; (_) -> false end]}]), + encode(Term, [{pre_encoder, fun(V) when is_list(V) -> V; (_) -> false end}]), [ start_object, {key, <<"object">>}, start_object, @@ -852,7 +852,7 @@ pre_encoders_test_() -> ] )}, {"replace all atoms with atom_to_list", ?_assertEqual( - encode(Term, [{pre_encoders, [fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end]}]), + encode(Term, [{pre_encoder, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}]), [ start_object, {key, <<"object">>}, start_object, diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index 2afd8db..a889cb5 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -61,6 +61,9 @@ parse_opts([relax|Rest], Opts) -> comments = true, ignored_bad_escapes = true }); +parse_opts([{pre_encoder, Encoder}|Rest], Opts) when is_function(Encoder, 1) -> + AllEncoders = Opts#opts.pre_encoders ++ [Encoder], + parse_opts(Rest, Opts#opts{pre_encoders=AllEncoders}); parse_opts([{pre_encoders, Encoders}|Rest], Opts) when is_list(Encoders) -> lists:foreach(fun(F) when is_function(F, 1) -> ok end, Encoders), AllEncoders = Opts#opts.pre_encoders ++ Encoders, @@ -94,6 +97,7 @@ valid_flags() -> ignored_bad_escapes, explicit_end, relax, + pre_encoder, pre_encoders, %% deprecated flags loose_unicode, %% replaced_bad_utf8 From 5e87f02fc5ef329d0b77c7a9e9e8ab431c90e6e4 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 4 Apr 2012 20:13:46 -0700 Subject: [PATCH 28/38] update README to include info on pre_encoders --- README.markdown | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index 9a8ffde..a90a1a6 100644 --- a/README.markdown +++ b/README.markdown @@ -101,7 +101,7 @@ json objects are represented by erlang proplists. the empty object has the speci ### options ### -jsx functions all take a common set of options. not all flags have meaning in all contexts, but they are always valid options. flags are always atoms and have no value. functions may have additional options beyond these, see individual function documentation for details +jsx functions all take a common set of options. not all flags have meaning in all contexts, but they are always valid options. flags are always atoms or {atom, Term} tuples. functions may have additional options beyond these, see individual function documentation for details #### `replaced_bad_utf8` #### @@ -147,6 +147,12 @@ this option treats all exhausted inputs as incomplete, as explained below. the p relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, ignored_bad_escapes]` for when you don't care how janky and awful your json input is, you just want the parser to do the best it can +#### `{pre_encoder, F}` or `{pre_encoders, [F, G,...]}` #### + +pre encoders are functions of arity 1 that pre-process input to the encoder. only input evaluated in a *value* context is pre-processed in this manner (so keys are not pre-processed, but objects and lists are). if more than one pre encoder is declared, the input will be passed to each of them in the order they are declared + +input can be any term, but final output from the chain should be otherwise recognized input to the encoder + ### incomplete input ### From e69ac5f3714c9284e9b162b4560942a29d7c8107 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 4 Apr 2012 20:46:41 -0700 Subject: [PATCH 29/38] detect utf8 bom and ignore if present --- priv/test_cases/bom.json | 1 + priv/test_cases/bom.test | 3 +++ src/jsx_decoder.erl | 26 +++++++++++++++++++++++++- 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 priv/test_cases/bom.json create mode 100644 priv/test_cases/bom.test diff --git a/priv/test_cases/bom.json b/priv/test_cases/bom.json new file mode 100644 index 0000000..ad47dbb --- /dev/null +++ b/priv/test_cases/bom.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/priv/test_cases/bom.test b/priv/test_cases/bom.test new file mode 100644 index 0000000..75b9d3e --- /dev/null +++ b/priv/test_cases/bom.test @@ -0,0 +1,3 @@ +{name, "byte order mark"}. +{jsx, [start_array, end_array, end_json]}. +{json, "bom.json"}. diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 4e64bec..a362638 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -29,7 +29,7 @@ -spec decoder(Handler::module(), State::any(), Opts::jsx:opts()) -> jsx:decoder(). decoder(Handler, State, Opts) -> - fun(JSON) -> value(JSON, {Handler, Handler:init(State)}, [], jsx_utils:parse_opts(Opts)) end. + fun(JSON) -> start(JSON, {Handler, Handler:init(State)}, [], jsx_utils:parse_opts(Opts)) end. -include("jsx_opts.hrl"). @@ -128,6 +128,30 @@ decoder(Handler, State, Opts) -> -define(end_seq(Seq), unicode:characters_to_binary(lists:reverse(Seq))). +start(<<16#ef, Rest/binary>>, Handler, Stack, Opts) -> + maybe_bom(Rest, Handler, Stack, Opts); +start(<<>>, Handler, Stack, Opts) -> + ?incomplete(start, <<>>, Handler, Stack, Opts); +start(Bin, Handler, Stack, Opts) -> + value(Bin, Handler, Stack, Opts). + + +maybe_bom(<<16#bb, Rest/binary>>, Handler, Stack, Opts) -> + definitely_bom(Rest, Handler, Stack, Opts); +maybe_bom(<<>>, Handler, Stack, Opts) -> + ?incomplete(maybe_bom, <<>>, Handler, Stack, Opts); +maybe_bom(Bin, Handler, Stack, Opts) -> + ?error([Bin, Handler, Stack, Opts]). + + +definitely_bom(<<16#bf, Rest/binary>>, Handler, Stack, Opts) -> + value(Rest, Handler, Stack, Opts); +definitely_bom(<<>>, Handler, Stack, Opts) -> + ?incomplete(definitely_bom, <<>>, Handler, Stack, Opts); +definitely_bom(Bin, Handler, Stack, Opts) -> + ?error([Bin, Handler, Stack, Opts]). + + value(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); value(<>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) -> From 78ca4e4bd9f91b4e863f3a1af686b20476bca7c1 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 4 Apr 2012 20:57:39 -0700 Subject: [PATCH 30/38] change lists to array for clarity --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index a90a1a6..3feb320 100644 --- a/README.markdown +++ b/README.markdown @@ -149,7 +149,7 @@ relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, ign #### `{pre_encoder, F}` or `{pre_encoders, [F, G,...]}` #### -pre encoders are functions of arity 1 that pre-process input to the encoder. only input evaluated in a *value* context is pre-processed in this manner (so keys are not pre-processed, but objects and lists are). if more than one pre encoder is declared, the input will be passed to each of them in the order they are declared +pre encoders are functions of arity 1 that pre-process input to the encoder. only input evaluated in a *value* context is pre-processed in this manner (so keys are not pre-processed, but objects and arrays are). if more than one pre encoder is declared, the input will be passed to each of them in the order they are declared input can be any term, but final output from the chain should be otherwise recognized input to the encoder From 76723ce73675342c79457ca6725b587f47dfdd6f Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 6 Apr 2012 08:09:52 -0700 Subject: [PATCH 31/38] semantic change to way pre_encode works --- README.markdown | 6 +++--- src/jsx_encoder.erl | 33 ++++++--------------------------- src/jsx_opts.hrl | 2 +- src/jsx_utils.erl | 21 ++++++++++++--------- 4 files changed, 22 insertions(+), 40 deletions(-) diff --git a/README.markdown b/README.markdown index a90a1a6..2900d5d 100644 --- a/README.markdown +++ b/README.markdown @@ -147,11 +147,11 @@ this option treats all exhausted inputs as incomplete, as explained below. the p relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, ignored_bad_escapes]` for when you don't care how janky and awful your json input is, you just want the parser to do the best it can -#### `{pre_encoder, F}` or `{pre_encoders, [F, G,...]}` #### +#### `{pre_encode, F}` #### -pre encoders are functions of arity 1 that pre-process input to the encoder. only input evaluated in a *value* context is pre-processed in this manner (so keys are not pre-processed, but objects and lists are). if more than one pre encoder is declared, the input will be passed to each of them in the order they are declared +`F` is a function of arity 1 that pre-process input to the encoder. only input evaluated in a *value* context is pre-processed in this manner (so keys are not pre-processed, but objects and arrays are). if more than one pre encoder is declared, a `badarg` exception will occur -input can be any term, but final output from the chain should be otherwise recognized input to the encoder +input can be any term, but output from the function must be a valid type for input ### incomplete input ### diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index cd2a08c..5302712 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -99,7 +99,8 @@ list([], {Handler, State}, _Opts) -> Handler:handle_event(end_array, State); list(Term, Handler, Opts) -> ?error([Term, Handler, Opts]). -pre_encode(Value, Opts) -> lists:foldl(fun(F, V) -> F(V) end, Value, Opts#opts.pre_encoders). +pre_encode(Value, #opts{pre_encode=false}) -> Value; +pre_encode(Value, Opts) -> (Opts#opts.pre_encode)(Value). fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8)); @@ -812,7 +813,7 @@ pre_encoders_test_() -> ] )}, {"replace lists with empty lists", ?_assertEqual( - encode(Term, [{pre_encoder, fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end}]), + encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end}]), [ start_object, {key, <<"object">>}, start_object, @@ -825,7 +826,7 @@ pre_encoders_test_() -> ] )}, {"replace objects with empty objects", ?_assertEqual( - encode(Term, [{pre_encoder, fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end}]), + encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end}]), [ start_object, end_object, @@ -833,7 +834,7 @@ pre_encoders_test_() -> ] )}, {"replace all non-list values with false", ?_assertEqual( - encode(Term, [{pre_encoder, fun(V) when is_list(V) -> V; (_) -> false end}]), + encode(Term, [{pre_encode, fun(V) when is_list(V) -> V; (_) -> false end}]), [ start_object, {key, <<"object">>}, start_object, @@ -852,7 +853,7 @@ pre_encoders_test_() -> ] )}, {"replace all atoms with atom_to_list", ?_assertEqual( - encode(Term, [{pre_encoder, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}]), + encode(Term, [{pre_encode, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}]), [ start_object, {key, <<"object">>}, start_object, @@ -869,28 +870,6 @@ pre_encoders_test_() -> end_object, end_json ] - )}, - {"replace all atoms to strings and back", ?_assertEqual( - encode(Term, [{pre_encoders, [ - fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end, - fun(<<"true">>) -> true; (<<"false">>) -> false; (<<"null">>) -> null; (V) -> V end - ]}]), - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"literals">>}, start_array, - {literal, true}, {literal, false}, {literal, null}, - end_array, - {key, <<"strings">>}, start_array, - {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>}, - end_array, - {key, <<"numbers">>}, start_array, - {integer, 1}, {float, 1.0}, {float, 1.0}, - end_array, - end_object, - end_object, - end_json - ] )} ]. diff --git a/src/jsx_opts.hrl b/src/jsx_opts.hrl index 625c737..6b3de90 100644 --- a/src/jsx_opts.hrl +++ b/src/jsx_opts.hrl @@ -8,5 +8,5 @@ dirty_strings = false, ignored_bad_escapes = false, explicit_end = false, - pre_encoders = [] + pre_encode = false }). \ No newline at end of file diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index a889cb5..d4dfd41 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -61,14 +61,17 @@ parse_opts([relax|Rest], Opts) -> comments = true, ignored_bad_escapes = true }); -parse_opts([{pre_encoder, Encoder}|Rest], Opts) when is_function(Encoder, 1) -> - AllEncoders = Opts#opts.pre_encoders ++ [Encoder], - parse_opts(Rest, Opts#opts{pre_encoders=AllEncoders}); -parse_opts([{pre_encoders, Encoders}|Rest], Opts) when is_list(Encoders) -> - lists:foreach(fun(F) when is_function(F, 1) -> ok end, Encoders), - AllEncoders = Opts#opts.pre_encoders ++ Encoders, - parse_opts(Rest, Opts#opts{pre_encoders=AllEncoders}); +parse_opts([{pre_encode, Encoder}|Rest] = Options, Opts) when is_function(Encoder, 1) -> + case Opts#opts.pre_encode of + false -> parse_opts(Rest, Opts#opts{pre_encode=Encoder}) + ; _ -> erlang:error(badarg, [Options, Opts]) + end; %% deprecated flags +parse_opts([{pre_encoder, Encoder}|Rest] = Options, Opts) when is_function(Encoder, 1) -> + case Opts#opts.pre_encode of + false -> parse_opts(Rest, Opts#opts{pre_encode=Encoder}) + ; _ -> erlang:error(badarg, [Options, Opts]) + end; parse_opts([loose_unicode|Rest], Opts) -> parse_opts(Rest, Opts#opts{replaced_bad_utf8=true}); parse_opts([escape_forward_slash|Rest], Opts) -> @@ -97,9 +100,9 @@ valid_flags() -> ignored_bad_escapes, explicit_end, relax, - pre_encoder, - pre_encoders, + pre_encode, %% deprecated flags + pre_encoder, %% pre_encode loose_unicode, %% replaced_bad_utf8 escape_forward_slash, %% escaped_forward_slashes single_quotes, %% single_quotes_strings From 7e242b7e1e59f61c059646c81e2a25e9475e880a Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 6 Apr 2012 08:12:55 -0700 Subject: [PATCH 32/38] throw exception on bad options, rather than returning error tuple --- src/jsx_utils.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index d4dfd41..58ffed7 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -84,8 +84,8 @@ parse_opts([json_escape|Rest], Opts) -> parse_opts(Rest, Opts#opts{escaped_strings=true}); parse_opts([ignore_bad_escapes|Rest], Opts) -> parse_opts(Rest, Opts#opts{ignored_bad_escapes=true}); -parse_opts(_, _) -> - {error, badarg}. +parse_opts(Options, Opts) -> + erlang:error(badarg, [Options, Opts]). valid_flags() -> From ded212c397163b571842745460eb2a890ef5a3be Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 6 Apr 2012 08:35:40 -0700 Subject: [PATCH 33/38] minor refactoring of jsx_decoder in prep for post_decode hook --- src/jsx_decoder.erl | 223 +++++++++++++++++++------------------------- 1 file changed, 94 insertions(+), 129 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index a362638..06239ec 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -128,6 +128,11 @@ decoder(Handler, State, Opts) -> -define(end_seq(Seq), unicode:characters_to_binary(lists:reverse(Seq))). +handle_event([], Handler, _Opts) -> Handler; +handle_event([Event|Rest], Handler, Opts) -> handle_event(Rest, handle_event(Event, Handler, Opts), Opts); +handle_event(Event, {Handler, State}, _Opts) -> {Handler, Handler:handle_event(Event, State)}. + + start(<<16#ef, Rest/binary>>, Handler, Stack, Opts) -> maybe_bom(Rest, Handler, Stack, Opts); start(<<>>, Handler, Stack, Opts) -> @@ -168,10 +173,10 @@ value(<>, Handler, Stack, Opts) -> zero(Rest, Handler, [[$0]|Stack], Opts); value(<>, Handler, Stack, Opts) when ?is_nonzero(S) -> integer(Rest, Handler, [[S]|Stack], Opts); -value(<>, {Handler, State}, Stack, Opts) -> - object(Rest, {Handler, Handler:handle_event(start_object, State)}, [key|Stack], Opts); -value(<>, {Handler, State}, Stack, Opts) -> - array(Rest, {Handler, Handler:handle_event(start_array, State)}, [array|Stack], Opts); +value(<>, Handler, Stack, Opts) -> + object(Rest, handle_event(start_object, Handler, Opts), [key|Stack], Opts); +value(<>, Handler, Stack, Opts) -> + array(Rest, handle_event(start_array, Handler, Opts), [array|Stack], Opts); value(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> value(Rest, Handler, Stack, Opts); value(<>, Handler, Stack, Opts=#opts{comments=true}) -> @@ -186,8 +191,8 @@ object(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); object(<>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) -> string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); -object(<>, {Handler, State}, [key|Stack], Opts) -> - maybe_done(Rest, {Handler, Handler:handle_event(end_object, State)}, Stack, Opts); +object(<>, Handler, [key|Stack], Opts) -> + maybe_done(Rest, handle_event(end_object, Handler, Opts), Stack, Opts); object(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> object(Rest, Handler, Stack, Opts); object(<>, Handler, Stack, Opts=#opts{comments=true}) -> @@ -214,12 +219,12 @@ array(<>, Handler, Stack, Opts) -> zero(Rest, Handler, [[$0]|Stack], Opts); array(<>, Handler, Stack, Opts) when ?is_nonzero(S) -> integer(Rest, Handler, [[S]|Stack], Opts); -array(<>, {Handler, State}, Stack, Opts) -> - object(Rest, {Handler, Handler:handle_event(start_object, State)}, [key|Stack], Opts); -array(<>, {Handler, State}, Stack, Opts) -> - array(Rest, {Handler, Handler:handle_event(start_array, State)}, [array|Stack], Opts); -array(<>, {Handler, State}, [array|Stack], Opts) -> - maybe_done(Rest, {Handler, Handler:handle_event(end_array, State)}, Stack, Opts); +array(<>, Handler, Stack, Opts) -> + object(Rest, handle_event(start_object, Handler, Opts), [key|Stack], Opts); +array(<>, Handler, Stack, Opts) -> + array(Rest, handle_event(start_array, Handler, Opts), [array|Stack], Opts); +array(<>, Handler, [array|Stack], Opts) -> + maybe_done(Rest, handle_event(end_array, Handler, Opts), Stack, Opts); array(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> array(Rest, Handler, Stack, Opts); array(<>, Handler, Stack, Opts=#opts{comments=true}) -> @@ -279,14 +284,14 @@ string(<<32, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 32)|Stack], Opts); string(<<33, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 33)|Stack], Opts); -string(<>, {Handler, State}, S, Opts) -> +string(<>, Handler, S, Opts) -> case S of [Acc, key|Stack] -> - colon(Rest, {Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)}, [key|Stack], Opts); + colon(Rest, handle_event({key, ?end_seq(Acc)}, Handler, Opts), [key|Stack], Opts); [_Acc, single_quote|_Stack] -> - ?error([<>, {Handler, State}, S, Opts]); + ?error([<>, Handler, S, Opts]); [Acc|Stack] -> - maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, Stack, Opts) + maybe_done(Rest, handle_event({string, ?end_seq(Acc)}, Handler, Opts), Stack, Opts) end; string(<<35, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 35)|Stack], Opts); @@ -296,19 +301,19 @@ string(<<37, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 37)|Stack], Opts); string(<<38, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 38)|Stack], Opts); -string(<>, {Handler, State}, [Acc|Stack], Opts) -> +string(<>, Handler, [Acc|Stack], Opts) -> case Opts#opts.single_quoted_strings of true -> case Stack of [single_quote, key|S] -> - colon(Rest, {Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)}, [key|S], Opts) + colon(Rest, handle_event({key, ?end_seq(Acc)}, Handler, Opts), [key|S], Opts) ; [single_quote|S] -> - maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, S, Opts) + maybe_done(Rest, handle_event({string, ?end_seq(Acc)}, Handler, Opts), S, Opts) ; _ -> - string(Rest, {Handler, State}, [?acc_seq(Acc, maybe_replace(?singlequote, Opts))|Stack], Opts) + string(Rest, Handler, [?acc_seq(Acc, maybe_replace(?singlequote, Opts))|Stack], Opts) end ; false -> - string(Rest, {Handler, State}, [?acc_seq(Acc, ?singlequote)|Stack], Opts) + string(Rest, Handler, [?acc_seq(Acc, ?singlequote)|Stack], Opts) end; string(<<40, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 40)|Stack], Opts); @@ -729,32 +734,22 @@ negative(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -zero(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - maybe_done( - Rest, - {Handler, Handler:handle_event(end_object, Handler:handle_event(format_number(Acc), State))}, - Stack, - Opts - ); -zero(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - maybe_done( - Rest, - {Handler, Handler:handle_event(end_array, Handler:handle_event(format_number(Acc), State))}, - Stack, - Opts - ); -zero(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - key(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [key|Stack], Opts); -zero(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - value(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [array|Stack], Opts); +zero(<>, Handler, [Acc, object|Stack], Opts) -> + maybe_done(Rest, handle_event([format_number(Acc), end_object], Handler, Opts), Stack, Opts); +zero(<>, Handler, [Acc, array|Stack], Opts) -> + maybe_done(Rest, handle_event(end_array, handle_event(format_number(Acc), Handler, Opts), Opts), Stack, Opts); +zero(<>, Handler, [Acc, object|Stack], Opts) -> + key(Rest, handle_event(format_number(Acc), Handler, Opts), [key|Stack], Opts); +zero(<>, Handler, [Acc, array|Stack], Opts) -> + value(Rest, handle_event(format_number(Acc), Handler, Opts), [array|Stack], Opts); zero(<>, Handler, [Acc|Stack], Opts) -> initial_decimal(Rest, Handler, [{Acc, []}|Stack], Opts); -zero(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> - maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); -zero(<>, {Handler, State}, [Acc|Stack], Opts=#opts{comments=true}) -> - comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [maybe_done|Stack], Opts); -zero(<<>>, {Handler, State}, [Acc|Stack], Opts = #opts{explicit_end=false}) -> - maybe_done(<<>>, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); +zero(<>, Handler, [Acc|Stack], Opts) when ?is_whitespace(S) -> + maybe_done(Rest, handle_event(format_number(Acc), Handler, Opts), Stack, Opts); +zero(<>, Handler, [Acc|Stack], Opts=#opts{comments=true}) -> + comment(Rest, handle_event(format_number(Acc), Handler, Opts), [maybe_done|Stack], Opts); +zero(<<>>, Handler, [Acc|Stack], Opts = #opts{explicit_end=false}) -> + maybe_done(<<>>, handle_event(format_number(Acc), Handler, Opts), Stack, Opts); zero(<<>>, Handler, Stack, Opts) -> ?incomplete(zero, <<>>, Handler, Stack, Opts); zero(Bin, Handler, Stack, Opts) -> @@ -763,36 +758,26 @@ zero(Bin, Handler, Stack, Opts) -> integer(<>, Handler, [Acc|Stack], Opts) when ?is_nonzero(S) -> integer(Rest, Handler, [[S] ++ Acc|Stack], Opts); -integer(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - maybe_done( - Rest, - {Handler, Handler:handle_event(end_object, Handler:handle_event(format_number(Acc), State))}, - Stack, - Opts - ); -integer(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - maybe_done( - Rest, - {Handler, Handler:handle_event(end_array, Handler:handle_event(format_number(Acc), State))}, - Stack, - Opts - ); -integer(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - key(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [key|Stack], Opts); -integer(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - value(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [array|Stack], Opts); +integer(<>, Handler, [Acc, object|Stack], Opts) -> + maybe_done(Rest, handle_event([format_number(Acc), end_object], Handler, Opts), Stack, Opts); +integer(<>, Handler, [Acc, array|Stack], Opts) -> + maybe_done(Rest, handle_event([format_number(Acc), end_array], Handler, Opts), Stack, Opts); +integer(<>, Handler, [Acc, object|Stack], Opts) -> + key(Rest, handle_event(format_number(Acc), Handler, Opts), [key|Stack], Opts); +integer(<>, Handler, [Acc, array|Stack], Opts) -> + value(Rest, handle_event(format_number(Acc), Handler, Opts), [array|Stack], Opts); integer(<>, Handler, [Acc|Stack], Opts) -> initial_decimal(Rest, Handler, [{Acc, []}|Stack], Opts); integer(<>, Handler, [Acc|Stack], Opts) -> integer(Rest, Handler, [[?zero] ++ Acc|Stack], Opts); integer(<>, Handler, [Acc|Stack], Opts) when S =:= $e; S =:= $E -> e(Rest, Handler, [{Acc, [], []}|Stack], Opts); -integer(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> - maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); -integer(<>, {Handler, State}, [Acc|Stack], Opts=#opts{comments=true}) -> - comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [maybe_done|Stack], Opts); -integer(<<>>, {Handler, State}, [Acc|Stack], Opts = #opts{explicit_end=false}) -> - maybe_done(<<>>, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); +integer(<>, Handler, [Acc|Stack], Opts) when ?is_whitespace(S) -> + maybe_done(Rest, handle_event(format_number(Acc), Handler, Opts), Stack, Opts); +integer(<>, Handler, [Acc|Stack], Opts=#opts{comments=true}) -> + comment(Rest, handle_event(format_number(Acc), Handler, Opts), [maybe_done|Stack], Opts); +integer(<<>>, Handler, [Acc|Stack], Opts = #opts{explicit_end=false}) -> + maybe_done(<<>>, handle_event(format_number(Acc), Handler, Opts), Stack, Opts); integer(<<>>, Handler, Stack, Opts) -> ?incomplete(integer, <<>>, Handler, Stack, Opts); integer(Bin, Handler, Stack, Opts) -> @@ -810,32 +795,22 @@ initial_decimal(Bin, Handler, Stack, Opts) -> decimal(<>, Handler, [{Int, Frac}|Stack], Opts) when S=:= ?zero; ?is_nonzero(S) -> decimal(Rest, Handler, [{Int, [S] ++ Frac}|Stack], Opts); -decimal(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - maybe_done( - Rest, - {Handler, Handler:handle_event(end_object, Handler:handle_event(format_number(Acc), State))}, - Stack, - Opts - ); -decimal(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - maybe_done( - Rest, - {Handler, Handler:handle_event(end_array, Handler:handle_event(format_number(Acc), State))}, - Stack, - Opts - ); -decimal(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - key(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [key|Stack], Opts); -decimal(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - value(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [array|Stack], Opts); +decimal(<>, Handler, [Acc, object|Stack], Opts) -> + maybe_done(Rest, handle_event([format_number(Acc), end_object], Handler, Opts), Stack, Opts); +decimal(<>, Handler, [Acc, array|Stack], Opts) -> + maybe_done(Rest, handle_event([format_number(Acc), end_array], Handler, Opts), Stack, Opts); +decimal(<>, Handler, [Acc, object|Stack], Opts) -> + key(Rest, handle_event(format_number(Acc), Handler, Opts), [key|Stack], Opts); +decimal(<>, Handler, [Acc, array|Stack], Opts) -> + value(Rest, handle_event(format_number(Acc), Handler, Opts), [array|Stack], Opts); decimal(<>, Handler, [{Int, Frac}|Stack], Opts) when S =:= $e; S =:= $E -> e(Rest, Handler, [{Int, Frac, []}|Stack], Opts); -decimal(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> - maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); -decimal(<>, {Handler, State}, [Acc|Stack], Opts=#opts{comments=true}) -> - comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [maybe_done|Stack], Opts); -decimal(<<>>, {Handler, State}, [Acc|Stack], Opts = #opts{explicit_end=false}) -> - maybe_done(<<>>, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); +decimal(<>, Handler, [Acc|Stack], Opts) when ?is_whitespace(S) -> + maybe_done(Rest, handle_event(format_number(Acc), Handler, Opts), Stack, Opts); +decimal(<>, Handler, [Acc|Stack], Opts=#opts{comments=true}) -> + comment(Rest, handle_event(format_number(Acc), Handler, Opts), [maybe_done|Stack], Opts); +decimal(<<>>, Handler, [Acc|Stack], Opts = #opts{explicit_end=false}) -> + maybe_done(<<>>, handle_event(format_number(Acc), Handler, Opts), Stack, Opts); decimal(<<>>, Handler, Stack, Opts) -> ?incomplete(decimal, <<>>, Handler, Stack, Opts); decimal(Bin, Handler, Stack, Opts) -> @@ -862,30 +837,20 @@ ex(Bin, Handler, Stack, Opts) -> exp(<>, Handler, [{Int, Frac, Exp}|Stack], Opts) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Handler, [{Int, Frac, [S] ++ Exp}|Stack], Opts); -exp(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - maybe_done( - Rest, - {Handler, Handler:handle_event(end_object, Handler:handle_event(format_number(Acc), State))}, - Stack, - Opts - ); -exp(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - maybe_done( - Rest, - {Handler, Handler:handle_event(end_array, Handler:handle_event(format_number(Acc), State))}, - Stack, - Opts - ); -exp(<>, {Handler, State}, [Acc, object|Stack], Opts) -> - key(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [key|Stack], Opts); -exp(<>, {Handler, State}, [Acc, array|Stack], Opts) -> - value(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [array|Stack], Opts); -exp(<>, {Handler, State}, [Acc|Stack], Opts) when ?is_whitespace(S) -> - maybe_done(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); -exp(<>, {Handler, State}, [Acc|Stack], Opts=#opts{comments=true}) -> - comment(Rest, {Handler, Handler:handle_event(format_number(Acc), State)}, [maybe_done|Stack], Opts); -exp(<<>>, {Handler, State}, [Acc|Stack], Opts = #opts{explicit_end=false}) -> - maybe_done(<<>>, {Handler, Handler:handle_event(format_number(Acc), State)}, Stack, Opts); +exp(<>, Handler, [Acc, object|Stack], Opts) -> + maybe_done(Rest, handle_event([format_number(Acc), end_object], Handler, Opts), Stack, Opts); +exp(<>, Handler, [Acc, array|Stack], Opts) -> + maybe_done(Rest, handle_event([format_number(Acc), end_array], Handler, Opts), Stack, Opts); +exp(<>, Handler, [Acc, object|Stack], Opts) -> + key(Rest, handle_event(format_number(Acc), Handler, Opts), [key|Stack], Opts); +exp(<>, Handler, [Acc, array|Stack], Opts) -> + value(Rest, handle_event(format_number(Acc), Handler, Opts), [array|Stack], Opts); +exp(<>, Handler, [Acc|Stack], Opts) when ?is_whitespace(S) -> + maybe_done(Rest, handle_event(format_number(Acc), Handler, Opts), Stack, Opts); +exp(<>, Handler, [Acc|Stack], Opts=#opts{comments=true}) -> + comment(Rest, handle_event(format_number(Acc), Handler, Opts), [maybe_done|Stack], Opts); +exp(<<>>, Handler, [Acc|Stack], Opts = #opts{explicit_end=false}) -> + maybe_done(<<>>, handle_event(format_number(Acc), Handler, Opts), Stack, Opts); exp(<<>>, Handler, Stack, Opts) -> ?incomplete(exp, <<>>, Handler, Stack, Opts); exp(Bin, Handler, Stack, Opts) -> @@ -918,8 +883,8 @@ tru(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -true(<<$e, Rest/binary>>, {Handler, State}, Stack, Opts) -> - maybe_done(Rest, {Handler, Handler:handle_event({literal, true}, State)}, Stack, Opts); +true(<<$e, Rest/binary>>, Handler, Stack, Opts) -> + maybe_done(Rest, handle_event({literal, true}, Handler, Opts), Stack, Opts); true(<<>>, Handler, Stack, Opts) -> ?incomplete(true, <<>>, Handler, Stack, Opts); true(Bin, Handler, Stack, Opts) -> @@ -950,8 +915,8 @@ fals(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -false(<<$e, Rest/binary>>, {Handler, State}, Stack, Opts) -> - maybe_done(Rest, {Handler, Handler:handle_event({literal, false}, State)}, Stack, Opts); +false(<<$e, Rest/binary>>, Handler, Stack, Opts) -> + maybe_done(Rest, handle_event({literal, false}, Handler, Opts), Stack, Opts); false(<<>>, Handler, Stack, Opts) -> ?incomplete(false, <<>>, Handler, Stack, Opts); false(Bin, Handler, Stack, Opts) -> @@ -974,8 +939,8 @@ nul(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -null(<<$l, Rest/binary>>, {Handler, State}, Stack, Opts) -> - maybe_done(Rest, {Handler, Handler:handle_event({literal, null}, State)}, Stack, Opts); +null(<<$l, Rest/binary>>, Handler, Stack, Opts) -> + maybe_done(Rest, handle_event({literal, null}, Handler, Opts), Stack, Opts); null(<<>>, Handler, Stack, Opts) -> ?incomplete(null, <<>>, Handler, Stack, Opts); null(Bin, Handler, Stack, Opts) -> @@ -1036,12 +1001,12 @@ end_comment(Rest, Handler, [Resume|Stack], Opts) -> end. -maybe_done(Rest, {Handler, State}, [], Opts) -> - done(Rest, {Handler, Handler:handle_event(end_json, State)}, [], Opts); -maybe_done(<>, {Handler, State}, [object|Stack], Opts) -> - maybe_done(Rest, {Handler, Handler:handle_event(end_object, State)}, Stack, Opts); -maybe_done(<>, {Handler, State}, [array|Stack], Opts) -> - maybe_done(Rest, {Handler, Handler:handle_event(end_array, State)}, Stack, Opts); +maybe_done(Rest, Handler, [], Opts) -> + done(Rest, handle_event(end_json, Handler, Opts), [], Opts); +maybe_done(<>, Handler, [object|Stack], Opts) -> + maybe_done(Rest, handle_event(end_object, Handler, Opts), Stack, Opts); +maybe_done(<>, Handler, [array|Stack], Opts) -> + maybe_done(Rest, handle_event(end_array, Handler, Opts), Stack, Opts); maybe_done(<>, Handler, [object|Stack], Opts) -> key(Rest, Handler, [key|Stack], Opts); maybe_done(<>, Handler, [array|_] = Stack, Opts) -> From d9bb9ee9ac8d209ee120e4168e13c9a4fe47f708 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 6 Apr 2012 08:51:50 -0700 Subject: [PATCH 34/38] machinery for post_decode (still does nothing) --- src/jsx_to_term.erl | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 00785d0..8bdcdba 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -28,7 +28,8 @@ -record(opts, { - labels = binary + labels = binary, + post_decode = false }). -type opts() :: list(). @@ -49,7 +50,6 @@ to_term(Source, Opts) when is_list(Opts) -> (jsx:decoder(?MODULE, Opts, jsx_utils:extract_opts(Opts)))(Source). - parse_opts(Opts) -> parse_opts(Opts, #opts{}). parse_opts([{labels, Val}|Rest], Opts) @@ -57,42 +57,43 @@ parse_opts([{labels, Val}|Rest], Opts) parse_opts(Rest, Opts#opts{labels = Val}); parse_opts([labels|Rest], Opts) -> parse_opts(Rest, Opts#opts{labels = binary}); +parse_opts([{post_decode, F}|Rest], Opts=#opts{post_decode=false}) when is_function(F, 1) -> + parse_opts(Rest, Opts#opts{post_decode=F}); +parse_opts([{post_decode, _}|_] = Options, Opts) -> + erlang:error(badarg, [Options, Opts]); parse_opts([_|Rest], Opts) -> parse_opts(Rest, Opts); parse_opts([], Opts) -> Opts. - init(Opts) -> {[[]], parse_opts(Opts)}. - handle_event(end_json, {[[Terms]], _Opts}) -> Terms; handle_event(start_object, {Terms, Opts}) -> {[[]|Terms], Opts}; handle_event(end_object, {[[], {key, Key}, Last|Terms], Opts}) -> - {[[{Key, [{}]}] ++ Last] ++ Terms, Opts}; + {[[{Key, post_decode([{}], Opts)}] ++ Last] ++ Terms, Opts}; handle_event(end_object, {[Object, {key, Key}, Last|Terms], Opts}) -> - {[[{Key, lists:reverse(Object)}] ++ Last] ++ Terms, Opts}; + {[[{Key, post_decode(lists:reverse(Object), Opts)}] ++ Last] ++ Terms, Opts}; handle_event(end_object, {[[], Last|Terms], Opts}) -> - {[[[{}]] ++ Last] ++ Terms, Opts}; + {[[post_decode([{}], Opts)] ++ Last] ++ Terms, Opts}; handle_event(end_object, {[Object, Last|Terms], Opts}) -> - {[[lists:reverse(Object)] ++ Last] ++ Terms, Opts}; + {[[post_decode(lists:reverse(Object), Opts)] ++ Last] ++ Terms, Opts}; handle_event(start_array, {Terms, Opts}) -> {[[]|Terms], Opts}; handle_event(end_array, {[List, {key, Key}, Last|Terms], Opts}) -> - {[[{Key, lists:reverse(List)}] ++ Last] ++ Terms, Opts}; + {[[{Key, post_decode(lists:reverse(List), Opts)}] ++ Last] ++ Terms, Opts}; handle_event(end_array, {[Current, Last|Terms], Opts}) -> - {[[lists:reverse(Current)] ++ Last] ++ Terms, Opts}; + {[[post_decode(lists:reverse(Current), Opts)] ++ Last] ++ Terms, Opts}; handle_event({key, Key}, {Terms, Opts}) -> {[{key, format_key(Key, Opts)}] ++ Terms, Opts}; handle_event({_, Event}, {[{key, Key}, Last|Terms], Opts}) -> - {[[{Key, Event}] ++ Last] ++ Terms, Opts}; + {[[{Key, post_decode(Event, Opts)}] ++ Last] ++ Terms, Opts}; handle_event({_, Event}, {[Last|Terms], Opts}) -> - {[[Event] ++ Last] ++ Terms, Opts}. - + {[[post_decode(Event, Opts)] ++ Last] ++ Terms, Opts}. format_key(Key, Opts) -> @@ -103,6 +104,9 @@ format_key(Key, Opts) -> end. +post_decode(Value, #opts{post_decode=false}) -> Value; +post_decode(Value, Opts) -> (Opts#opts.post_decode)(Value). + %% eunit tests From 79b8740da82edeb1dd431c927889382c16f4a43c Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 6 Apr 2012 09:05:08 -0700 Subject: [PATCH 35/38] post_decode working --- src/jsx_to_term.erl | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 8bdcdba..d124cc1 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -185,5 +185,46 @@ naked_test_() -> {"naked literal", ?_assertEqual(to_term(<<"true">>, []), true)}, {"naked string", ?_assertEqual(to_term(<<"\"string\"">>, []), <<"string">>)} ]. + +post_decoders_test_() -> + JSON = <<"{\"object\": { + \"literals\": [true, false, null], + \"strings\": [\"foo\", \"bar\", \"baz\"], + \"numbers\": [1, 1.0, 1e0] + }}">>, + [ + {"no post_decode", ?_assertEqual( + to_term(JSON, []), + [{<<"object">>, [ + {<<"literals">>, [true, false, null]}, + {<<"strings">>, [<<"foo">>, <<"bar">>, <<"baz">>]}, + {<<"numbers">>, [1, 1.0, 1.0]} + ]}] + )}, + {"replace arrays with empty arrays", ?_assertEqual( + to_term(JSON, [{post_decode, fun([T|_] = V) when is_tuple(T) -> V; (V) when is_list(V) -> []; (V) -> V end}]), + [{<<"object">>, [{<<"literals">>, []}, {<<"strings">>, []}, {<<"numbers">>, []}]}] + )}, + {"replace objects with empty objects", ?_assertEqual( + to_term(JSON, [{post_decode, fun(V) when is_list(V) -> [{}]; (V) -> V end}]), + [{}] + )}, + {"replace all non-list values with false", ?_assertEqual( + to_term(JSON, [{post_decode, fun(V) when is_list(V) -> V; (_) -> false end}]), + [{<<"object">>, [ + {<<"literals">>, [false, false, false]}, + {<<"strings">>, [false, false, false]}, + {<<"numbers">>, [false, false, false]} + ]}] + )}, + {"atoms_to_strings", ?_assertEqual( + to_term(JSON, [{post_decode, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}]), + [{<<"object">>, [ + {<<"literals">>, [<<"true">>, <<"false">>, <<"null">>]}, + {<<"strings">>, [<<"foo">>, <<"bar">>, <<"baz">>]}, + {<<"numbers">>, [1, 1.0, 1.0]} + ]}] + )} + ]. -endif. From a9b36522bd1ab05b841114d82eecd3d791494288 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 6 Apr 2012 09:12:00 -0700 Subject: [PATCH 36/38] post decode documented in readme --- README.markdown | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index c5cad9f..06fb879 100644 --- a/README.markdown +++ b/README.markdown @@ -149,7 +149,6 @@ relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, ign #### `{pre_encode, F}` #### - `F` is a function of arity 1 that pre-process input to the encoder. only input evaluated in a *value* context is pre-processed in this manner (so keys are not pre-processed, but objects and arrays are). if more than one pre encoder is declared, a `badarg` exception will occur input can be any term, but output from the function must be a valid type for input @@ -240,9 +239,20 @@ types: * `binary` * `atom` * `existing_atom` + - `{post_decode, F}` the option `labels` controls how keys are converted from json to erlang terms. `binary` does no conversion beyond normal escaping. `atom` converts keys to erlang atoms, and results in a badarg error if keys fall outside the range of erlang atoms. `existing_atom` is identical to `atom`, except it will not add new atoms to the atom table +`{post_decode, F}` is a user defined function of arity 1 that is called on each output value (objects, arrays, strings, numbers and literals). it may return any value to be substituted in the returned term. for example: + +```erlang + 1> F = fun(V) when is_list(V) -> V; (V) -> false end. + 2> jsx:to_term(<<"{"a list": [true, "a string", 1]}">>, [{post_decode, F}]). + [{<<"a list">>, [false, false, false]}] +``` + +if more than one decoder is declared a badarg exception will result + #### converting erlang terms to json #### `to_json` parses an erlang term and produces a JSON text (see json <-> erlang mapping details below) From 439f934ebb5991cdc85475b7c9ea99a4057e9917 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 6 Apr 2012 09:13:42 -0700 Subject: [PATCH 37/38] readme typo --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index 06fb879..51fb723 100644 --- a/README.markdown +++ b/README.markdown @@ -247,7 +247,7 @@ the option `labels` controls how keys are converted from json to erlang terms. ` ```erlang 1> F = fun(V) when is_list(V) -> V; (V) -> false end. - 2> jsx:to_term(<<"{"a list": [true, "a string", 1]}">>, [{post_decode, F}]). + 2> jsx:to_term(<<"{\"a list\": [true, \"a string\", 1]}">>, [{post_decode, F}]). [{<<"a list">>, [false, false, false]}] ``` From b893aeeda845011e83b083f165b1aedf157930ce Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 22 Apr 2012 21:28:01 -0700 Subject: [PATCH 38/38] add simple benchmarking script in /bin --- bin/jsx_bench.escript | 185 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100755 bin/jsx_bench.escript diff --git a/bin/jsx_bench.escript b/bin/jsx_bench.escript new file mode 100755 index 0000000..8e52dc1 --- /dev/null +++ b/bin/jsx_bench.escript @@ -0,0 +1,185 @@ +#!/usr/bin/env escript + +%% The MIT License + +%% Copyright (c) 2012 Alisdair Sullivan + +%% Permission is hereby granted, free of charge, to any person obtaining a copy +%% of this software and associated documentation files (the "Software"), to deal +%% in the Software without restriction, including without limitation the rights +%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the Software is +%% furnished to do so, subject to the following conditions: + +%% The above copyright notice and this permission notice shall be included in +%% all copies or substantial portions of the Software. + +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +%% THE SOFTWARE. + +-mode(compile). + + +-define(averageN(Test, N), + {average, {repeat, N, Test}} +). + + +main([]) -> + %% preload jsx mods + jsx:to_term(<<"{}">>), + format(frequency:profile({"empty object to term", ?averageN({jsx, to_term, [<<"{}">>]}, 1000)})), + format(frequency:profile({"empty object to json", ?averageN({jsx, to_json, [[{}]]}, 1000)})), + format(frequency:profile({"empty array to term", ?averageN({jsx, to_term, [<<"[]">>]}, 1000)})), + format(frequency:profile({"empty array to json", ?averageN({jsx, to_json, [[]]}, 1000)})), + format(frequency:profile({"sample tweet to term", ?averageN({jsx, to_term, [sample_tweet()]}, 1000)})), + format(frequency:profile({"sample tweet to json", ?averageN({jsx, to_json, [jsx:to_term(sample_tweet())]}, 1000)})), + format(frequency:profile({"sample github user to term", ?averageN({jsx, to_term, [sample_github_user()]}, 1000)})), + format(frequency:profile({"sample github user to json", ?averageN({jsx, to_json, [jsx:to_term(sample_github_user())]}, 1000)})). + + +format([]) -> ok; +format([{name, Name}|Rest]) -> + io:format("name : ~p~n", [Name]), + format(Rest); +format([{time, Time}|Rest]) -> + io:format("time : ~p~n", [Time]), + format(Rest); +format([{error, Error}|Rest]) -> + io:format("error : ~p~n", [Error]), + format(Rest); +format([Result|Rest]) when is_list(Result) -> format(Result), format(Rest); +format([_|Rest]) -> format(Rest). + + +sample_tweet() -> + <<"{ + \"coordinates\": null, + \"created_at\": \"Sat Sep 10 22:23:38 +0000 2011\", + \"truncated\": false, + \"favorited\": false, + \"id_str\": \"112652479837110273\", + \"entities\": { + \"urls\": [{ + \"expanded_url\": \"http://instagr.am/p/MuW67/\", + \"url\": \"http://t.co/6J2EgYM\", + \"indices\": [67, 86], + \"display_url\": \"instagr.am/p/MuW67/\" + }], + \"hashtags\": [{ + \"text\": \"tcdisrupt\", + \"indices\": [32,42] + }], + \"user_mentions\": [ + { + \"name\": \"Twitter\", + \"id_str\": \"783214\", + \"id\": 783214, + \"indices\": [0, 8], + \"screen_name\": \"twitter\" + }, + { + \"name\": \"Picture.ly\", + \"id_str\": \"334715534\", + \"id\": 334715534, + \"indices\": [15, 28], + \"screen_name\": \"SeePicturely\" + }, + { + \"name\": \"Bosco So\", + \"id_str\": \"14792670\", + \"id\": 14792670, + \"indices\": [46, 58], + \"screen_name\": \"boscomonkey\" + }, + { + \"name\": \"Taylor Singletary\", + \"id_str\": \"819797\", + \"id\": 819797, + \"indices\": [59, 66], + \"screen_name\": \"episod\" + } + ] + }, + \"in_reply_to_user_id_str\": \"783214\", + \"text\": \"@twitter meets @seepicturely at #tcdisrupt cc.@boscomonkey @episod http://t.co/6J2EgYM\", + \"contributors\": null, + \"id\": 112652479837110273, + \"retweet_count\": 0, + \"in_reply_to_status_id_str\": null, + \"geo\": null, + \"retweeted\": false, + \"possibly_sensitive\": false, + \"in_reply_to_user_id\": 783214, + \"place\": null, + \"source\": \"Instagram\", + \"user\": { + \"profile_sidebar_border_color\": \"eeeeee\", + \"profile_background_tile\": true, + \"profile_sidebar_fill_color\": \"efefef\", + \"name\": \"Eoin McMillan \", + \"profile_image_url\": \"http://a1.twimg.com/profile_images/1380912173/Screen_shot_2011-06-03_at_7.35.36_PM_normal.png\", + \"created_at\": \"Mon May 16 20:07:59 +0000 2011\", + \"location\": \"Twitter\", + \"profile_link_color\": \"009999\", + \"follow_request_sent\": null, + \"is_translator\": false, + \"id_str\": \"299862462\", + \"favourites_count\": 0, + \"default_profile\": false, + \"url\": \"http://www.eoin.me\", + \"contributors_enabled\": false, + \"id\": 299862462, + \"utc_offset\": null, + \"profile_image_url_https\": \"https://si0.twimg.com/profile_images/1380912173/Screen_shot_2011-06-03_at_7.35.36_PM_normal.png\", + \"profile_use_background_image\": true, + \"listed_count\": 0, + \"followers_count\": 9, + \"lang\": \"en\", + \"profile_text_color\": \"333333\", + \"protected\": false, + \"profile_background_image_url_https\": \"https://si0.twimg.com/images/themes/theme14/bg.gif\", + \"description\": \"Eoin's photography account. See @mceoin for tweets.\", + \"geo_enabled\": false, + \"verified\": false, + \"profile_background_color\": \"131516\", + \"time_zone\": null, + \"notifications\": null, + \"statuses_count\": 255, + \"friends_count\": 0, + \"default_profile_image\": false, + \"profile_background_image_url\": \"http://a1.twimg.com/images/themes/theme14/bg.gif\", + \"screen_name\": \"imeoin\", + \"following\": null, + \"show_all_inline_media\": false + }, + \"in_reply_to_screen_name\": \"twitter\", + \"in_reply_to_status_id\": null + }">>. + + +sample_github_user() -> + <<"{ + \"user\": { + \"gravatar_id\": \"b8dbb1987e8e5318584865f880036796\", + \"company\": \"GitHub\", + \"name\": \"Chris Wanstrath\", + \"created_at\": \"2007/10/19 22:24:19 -0700\", + \"location\": \"San Francisco, CA\", + \"public_repo_count\": 98, + \"public_gist_count\": 270, + \"blog\": \"http://chriswanstrath.com/\", + \"following_count\": 196, + \"id\": 2, + \"type\": \"User\", + \"permission\": null, + \"followers_count\": 1692, + \"login\": \"defunkt\", + \"email\": \"chris@wanstrath.com\" + } + }">>.