From 02389164721caf868ae105592d316a264d7e05d6 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 11 Mar 2013 01:01:49 -0700 Subject: [PATCH 01/40] remove post_decode support --- src/jsx_to_term.erl | 144 +++----------------------------------------- 1 file changed, 9 insertions(+), 135 deletions(-) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 52bc724..b7cec2a 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -28,8 +28,7 @@ -record(config, { - labels = binary, - post_decode = false + labels = binary }). -type config() :: list(). @@ -57,8 +56,6 @@ parse_config([{labels, Val}|Rest], Config) parse_config(Rest, Config#config{labels = Val}); parse_config([labels|Rest], Config) -> parse_config(Rest, Config#config{labels = binary}); -parse_config([{post_decode, F}|Rest], Config=#config{post_decode=false}) when is_function(F, 1) -> - parse_config(Rest, Config#config{post_decode=F}); parse_config([{K, _}|Rest] = Options, Config) -> case lists:member(K, jsx_config:valid_flags()) of true -> parse_config(Rest, Config) @@ -80,26 +77,26 @@ handle_event(end_json, {[[Terms]], _Config}) -> Terms; handle_event(start_object, {Terms, Config}) -> {[[]|Terms], Config}; handle_event(end_object, {[[], {key, Key}, Last|Terms], Config}) -> - {[[{Key, post_decode([{}], Config)}] ++ Last] ++ Terms, Config}; + {[[{Key, [{}]}] ++ Last] ++ Terms, Config}; handle_event(end_object, {[Object, {key, Key}, Last|Terms], Config}) -> - {[[{Key, post_decode(lists:reverse(Object), Config)}] ++ Last] ++ Terms, Config}; + {[[{Key, lists:reverse(Object)}] ++ Last] ++ Terms, Config}; handle_event(end_object, {[[], Last|Terms], Config}) -> - {[[post_decode([{}], Config)] ++ Last] ++ Terms, Config}; + {[[[{}]] ++ Last] ++ Terms, Config}; handle_event(end_object, {[Object, Last|Terms], Config}) -> - {[[post_decode(lists:reverse(Object), Config)] ++ Last] ++ Terms, Config}; + {[[lists:reverse(Object)] ++ Last] ++ Terms, Config}; handle_event(start_array, {Terms, Config}) -> {[[]|Terms], Config}; handle_event(end_array, {[List, {key, Key}, Last|Terms], Config}) -> - {[[{Key, post_decode(lists:reverse(List), Config)}] ++ Last] ++ Terms, Config}; + {[[{Key, lists:reverse(List)}] ++ Last] ++ Terms, Config}; handle_event(end_array, {[List, Last|Terms], Config}) -> - {[[post_decode(lists:reverse(List), Config)] ++ Last] ++ Terms, Config}; + {[[lists:reverse(List)] ++ Last] ++ Terms, Config}; handle_event({key, Key}, {Terms, Config}) -> {[{key, format_key(Key, Config)}] ++ Terms, Config}; handle_event({_, Event}, {[{key, Key}, Last|Terms], Config}) -> - {[[{Key, post_decode(Event, Config)}] ++ Last] ++ Terms, Config}; + {[[{Key, Event}] ++ Last] ++ Terms, Config}; handle_event({_, Event}, {[Last|Terms], Config}) -> - {[[post_decode(Event, Config)] ++ Last] ++ Terms, Config}. + {[[Event] ++ Last] ++ Terms, Config}. format_key(Key, Config) -> @@ -116,9 +113,6 @@ format_key(Key, Config) -> end. -post_decode(Value, #config{post_decode=false}) -> Value; -post_decode(Value, Config) -> (Config#config.post_decode)(Value). - %% eunit tests @@ -127,9 +121,6 @@ post_decode(Value, Config) -> (Config#config.post_decode)(Value). config_test_() -> - %% for post_decode tests - F = fun(X) -> X end, - G = fun(X, Y) -> {X, Y} end, [ {"empty config", ?_assertEqual(#config{}, parse_config([]))}, {"implicit binary labels", ?_assertEqual(#config{}, parse_config([labels]))}, @@ -139,15 +130,6 @@ config_test_() -> #config{labels=existing_atom}, parse_config([{labels, existing_atom}]) )}, - {"sloppy existing atom labels", ?_assertEqual( - #config{labels=attempt_atom}, - parse_config([{labels, attempt_atom}]) - )}, - {"post decode", ?_assertEqual( - #config{post_decode=F}, - parse_config([{post_decode, F}]) - )}, - {"post decode wrong arity", ?_assertError(badarg, parse_config([{post_decode, G}]))}, {"invalid opt flag", ?_assertError(badarg, parse_config([error]))}, {"invalid opt tuple", ?_assertError(badarg, parse_config([{error, true}]))} ]. @@ -176,114 +158,6 @@ format_key_test_() -> ]. -post_decoders_test_() -> - Events = [ - [{}], - [{<<"key">>, <<"value">>}], - [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}], - [], - [<<"string">>], - [true, false, null], - true, - false, - null, - <<"hello">>, - <<"world">>, - 1, - 1.0 - ], - [ - {"no post_decode", ?_assertEqual( - Events, - [ post_decode(Event, #config{}) || Event <- Events ] - )}, - {"replace arrays with empty arrays", ?_assertEqual( - [ - [{}], - [{<<"key">>, <<"value">>}], - [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}], - [], - [], - [], - true, - false, - null, - <<"hello">>, - <<"world">>, - 1, - 1.0 - ], - [ post_decode(Event, #config{ - post_decode=fun([T|_] = V) when is_tuple(T) -> V; (V) when is_list(V) -> []; (V) -> V end - }) || Event <- Events - ] - )}, - {"replace objects with empty objects", ?_assertEqual( - [ - [{}], - [{}], - [{}], - [], - [<<"string">>], - [true, false, null], - true, - false, - null, - <<"hello">>, - <<"world">>, - 1, - 1.0 - ], - [ post_decode(Event, #config{ - post_decode=fun([T|_]) when is_tuple(T) -> [{}]; (V) -> V end - }) || Event <- Events - ] - )}, - {"replace all non-array/non-object values with false", ?_assertEqual( - [ - [{}], - [{<<"key">>, <<"value">>}], - [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}], - [], - [<<"string">>], - [true, false, null], - false, - false, - false, - false, - false, - false, - false - ], - [ post_decode(Event, #config{ - post_decode=fun(V) when is_list(V) -> V; (_) -> false end - }) || Event <- Events - ] - )}, - {"atoms_to_strings", ?_assertEqual( - [ - [{}], - [{<<"key">>, <<"value">>}], - [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}], - [], - [<<"string">>], - [true, false, null], - <<"true">>, - <<"false">>, - <<"null">>, - <<"hello">>, - <<"world">>, - 1, - 1.0 - ], - [ post_decode(Event, #config{ - post_decode=fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end - }) || Event <- Events - ] - )} - ]. - - handle_event_test_() -> Data = jsx:test_cases(), [ From 07d84e25d06c139a49267e08432bc39002c1ffc8 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 11 Mar 2013 01:13:01 -0700 Subject: [PATCH 02/40] remove pre_encode support --- src/jsx_config.erl | 32 +--------- src/jsx_config.hrl | 1 - src/jsx_encoder.erl | 152 +++----------------------------------------- 3 files changed, 10 insertions(+), 175 deletions(-) diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 9c90aec..183dbbf 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -65,11 +65,6 @@ parse_config([relax|Rest], Config) -> comments = true, ignored_bad_escapes = true }); -parse_config([{pre_encode, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) -> - case Config#config.pre_encode of - false -> parse_config(Rest, Config#config{pre_encode=Encoder}) - ; _ -> erlang:error(badarg, [Options, Config]) - end; parse_config([{error_handler, ErrorHandler}|Rest] = Options, Config) when is_function(ErrorHandler, 3) -> case Config#config.error_handler of false -> parse_config(Rest, Config#config{error_handler=ErrorHandler}) @@ -81,11 +76,6 @@ parse_config([{incomplete_handler, IncompleteHandler}|Rest] = Options, Config) w ; _ -> erlang:error(badarg, [Options, Config]) end; %% deprecated flags -parse_config([{pre_encoder, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) -> - case Config#config.pre_encode of - false -> parse_config(Rest, Config#config{pre_encode=Encoder}) - ; _ -> erlang:error(badarg, [Options, Config]) - end; parse_config([loose_unicode|Rest], Config) -> parse_config(Rest, Config#config{replaced_bad_utf8=true}); parse_config([escape_forward_slash|Rest], Config) -> @@ -104,8 +94,7 @@ parse_config(Options, Config) -> config_to_list(Config) -> lists:map( - fun ({pre_encode, F}) -> {pre_encode, F}; - ({error_handler, F}) -> {error_handler, F}; + fun ({error_handler, F}) -> {error_handler, F}; ({incomplete_handler, F}) -> {incomplete_handler, F}; ({Key, true}) -> Key end, @@ -128,11 +117,9 @@ valid_flags() -> ignored_bad_escapes, explicit_end, relax, - pre_encode, error_handler, incomplete_handler, %% deprecated flags - pre_encoder, %% pre_encode loose_unicode, %% replaced_bad_utf8 escape_forward_slash, %% escaped_forward_slashes single_quotes, %% single_quoted_strings @@ -202,7 +189,6 @@ config_test_() -> }, {"deprecated flags", ?_assertEqual( #config{ - pre_encode=fun lists:length/1, replaced_bad_utf8=true, escaped_forward_slashes=true, single_quoted_strings=true, @@ -211,7 +197,6 @@ config_test_() -> ignored_bad_escapes=true }, parse_config([ - {pre_encoder, fun lists:length/1}, loose_unicode, escape_forward_slash, single_quotes, @@ -220,17 +205,6 @@ config_test_() -> ignore_bad_escapes ]) )}, - {"pre_encode flag", ?_assertEqual( - #config{pre_encode=fun lists:length/1}, - parse_config([{pre_encode, fun lists:length/1}]) - )}, - {"two pre_encoders defined", ?_assertError( - badarg, - parse_config([ - {pre_encode, fun(_) -> true end}, - {pre_encode, fun(_) -> false end} - ]) - )}, {"error_handler flag", ?_assertEqual( #config{error_handler=fun ?MODULE:fake_error_handler/3}, parse_config([{error_handler, fun ?MODULE:fake_error_handler/3}]) @@ -287,10 +261,6 @@ config_to_list_test_() -> } ) )}, - {"pre_encode", ?_assertEqual( - [{pre_encode, fun lists:length/1}], - config_to_list(#config{pre_encode=fun lists:length/1}) - )}, {"error handler", ?_assertEqual( [{error_handler, fun ?MODULE:fake_error_handler/3}], config_to_list(#config{error_handler=fun ?MODULE:fake_error_handler/3}) diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index 642dcf5..502f103 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -8,7 +8,6 @@ dirty_strings = false, ignored_bad_escapes = false, explicit_end = false, - pre_encode = false, error_handler = false, incomplete_handler = false }). \ No newline at end of file diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 05403d3..a68a957 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -52,7 +52,7 @@ encoder(Handler, State, Config) -> start(Term, {Handler, State}, Config) -> - try Handler:handle_event(end_json, value(pre_encode(Term, Config), {Handler, State}, Config)) + try Handler:handle_event(end_json, value(Term, {Handler, State}, Config)) catch throw:Error -> Error; Type:Value -> erlang:Type(Value) @@ -77,23 +77,16 @@ value(List, Handler, Config) when is_list(List) -> value(Term, Handler, Config) -> ?error(value, Term, Handler, Config). -list_or_object([Term|Rest], {Handler, State}, Config) -> - case pre_encode(Term, Config) of - {K, V} when is_atom(K); is_binary(K) -> - object([{K, V}|Rest], {Handler, Handler:handle_event(start_object, State)}, Config) - ; T -> - list([T|Rest], {Handler, Handler:handle_event(start_array, State)}, Config) - end. +list_or_object([{K, V}|Rest], {Handler, State}, Config) when is_atom(K); is_binary(K) -> + object([{K, V}|Rest], {Handler, Handler:handle_event(start_object, State)}, Config); +list_or_object(Terms, {Handler, State}, Config) when is_list(Terms) -> + list(Terms, {Handler, Handler:handle_event(start_array, State)}, Config). object([{Key, Value}, Next|Rest], {Handler, State}, Config) when is_atom(Key); is_binary(Key) -> - V = pre_encode(Value, Config), - object( - [pre_encode(Next, Config)|Rest], - { - Handler, + object([Next|Rest], {Handler, value( - V, + Value, {Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)}, Config ) @@ -106,7 +99,7 @@ object([{Key, Value}], {Handler, State}, Config) when is_atom(Key); is_binary(Ke { Handler, value( - pre_encode(Value, Config), + Value, {Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)}, Config ) @@ -118,14 +111,11 @@ object(Term, Handler, Config) -> ?error(object, Term, Handler, Config). list([Value, Next|Rest], {Handler, State}, Config) -> - list([pre_encode(Next, Config)|Rest], {Handler, value(Value, {Handler, State}, Config)}, Config); + list([Next|Rest], {Handler, value(Value, {Handler, State}, Config)}, Config); list([Value], {Handler, State}, Config) -> list([], {Handler, value(Value, {Handler, State}, Config)}, Config); list([], {Handler, State}, _Config) -> Handler:handle_event(end_array, State). -pre_encode(Value, #config{pre_encode=false}) -> Value; -pre_encode(Value, Config) -> (Config#config.pre_encode)(Value). - fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8)); fix_key(Key) when is_binary(Key) -> Key. @@ -160,130 +150,6 @@ encode_test_() -> encode(Term, Config) -> start(Term, {jsx, []}, jsx_config:parse_config(Config)). -pre_encoders_test_() -> - Term = [ - {<<"object">>, [ - {<<"literals">>, [true, false, null]}, - {<<"strings">>, [<<"foo">>, <<"bar">>, <<"baz">>]}, - {<<"numbers">>, [1, 1.0, 1.0e0]} - ]} - ], - [ - {"no pre encode", ?_assertEqual( - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"literals">>}, start_array, - {literal, true}, {literal, false}, {literal, null}, - end_array, - {key, <<"strings">>}, start_array, - {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>}, - end_array, - {key, <<"numbers">>}, start_array, - {integer, 1}, {float, 1.0}, {float, 1.0}, - end_array, - end_object, - end_object, - end_json - ], - encode(Term, []) - )}, - {"replace lists with empty lists", ?_assertEqual( - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"literals">>}, start_array, end_array, - {key, <<"strings">>}, start_array, end_array, - {key, <<"numbers">>}, start_array, end_array, - end_object, - end_object, - end_json - ], - encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end}]) - )}, - {"replace objects with empty objects", ?_assertEqual( - [ - start_object, - end_object, - end_json - ], - encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end}]) - )}, - {"replace all non-list and non_tuple values with false", ?_assertEqual( - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"literals">>}, start_array, - {literal, false}, {literal, false}, {literal, false}, - end_array, - {key, <<"strings">>}, start_array, - {literal, false}, {literal, false}, {literal, false}, - end_array, - {key, <<"numbers">>}, start_array, - {literal, false}, {literal, false}, {literal, false}, - end_array, - end_object, - end_object, - end_json - ], - encode(Term, [{pre_encode, fun(V) when is_list(V); is_tuple(V) -> V; (_) -> false end}]) - )}, - {"replace all atoms with atom_to_list", ?_assertEqual( - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"literals">>}, start_array, - {string, <<"true">>}, {string, <<"false">>}, {string, <<"null">>}, - end_array, - {key, <<"strings">>}, start_array, - {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>}, - end_array, - {key, <<"numbers">>}, start_array, - {integer, 1}, {float, 1.0}, {float, 1.0}, - end_array, - end_object, - end_object, - end_json - ], - encode(Term, [{pre_encode, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}]) - )}, - {"pre_encode tuple", ?_assertEqual( - [ - start_array, - {integer, 1}, {integer, 2}, {integer, 3}, - end_array, - end_json - ], - encode({1, 2, 3}, [{pre_encode, fun(Tuple) when is_tuple(Tuple) -> tuple_to_list(Tuple); (V) -> V end}]) - )}, - {"pre_encode 2-tuples", ?_assertEqual( - [ - start_object, - {key, <<"two">>}, {integer, 2}, {key, <<"three">>}, {integer, 3}, - end_object, - end_json - ], - encode([{two, 1}, {three, 2}], [{pre_encode, fun({K, V}) -> {K, V + 1}; (V) -> V end}]) - )}, - {"pre_encode one field record", ?_assertEqual( - [ - start_object, - {key, <<"bar">>}, {literal, false}, - end_object, - end_json - ], - encode([{foo, bar}], [{pre_encode, fun({foo, V}) -> {V, undefined}; (undefined) -> false; (V) -> V end}]) - )}, - {"pre_encode list", ?_assertEqual( - [ - start_array, - {integer, 2}, {integer, 3}, {integer, 4}, - end_array, - end_json - ], - encode([1,2,3], [{pre_encode, fun(X) when is_integer(X) -> X + 1; (V) -> V end}]) - )} - ]. error_test_() -> [ From 73b9032c99b8e9d89cce2f16e80815c87b7aa698 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 2 Jun 2013 22:54:11 +0000 Subject: [PATCH 03/40] remove superfluous parser states --- src/jsx_parser.erl | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 85492a6..0616200 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -91,12 +91,6 @@ value([start_object|Tokens], Handler, Stack, Config) -> object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); value([start_array|Tokens], Handler, Stack, Config) -> array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); -value([{literal, true}|Tokens], Handler, [], Config) -> - done(Tokens, handle_event({literal, true}, Handler, Config), [], Config); -value([{literal, false}|Tokens], Handler, [], Config) -> - done(Tokens, handle_event({literal, false}, Handler, Config), [], Config); -value([{literal, null}|Tokens], Handler, [], Config) -> - done(Tokens, handle_event({literal, null}, Handler, Config), [], Config); value([{literal, true}|Tokens], Handler, Stack, Config) -> maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config); value([{literal, false}|Tokens], Handler, Stack, Config) -> @@ -105,10 +99,6 @@ value([{literal, null}|Tokens], Handler, Stack, Config) -> maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config); value([Literal|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> value([{literal, Literal}] ++ Tokens, Handler, Stack, Config); -value([{integer, Number}|Tokens], Handler, [], Config) when is_integer(Number) -> - done(Tokens, handle_event({integer, Number}, Handler, Config), [], Config); -value([{float, Number}|Tokens], Handler, [], Config) when is_float(Number) -> - done(Tokens, handle_event({float, Number}, Handler, Config), [], Config); value([{integer, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) -> maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config); value([{float, Number}|Tokens], Handler, Stack, Config) when is_float(Number) -> @@ -121,12 +111,6 @@ value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) -> value([{integer, Number}] ++ Tokens, Handler, Stack, Config); value([Number|Tokens], Handler, Stack, Config) when is_float(Number) -> value([{float, Number}] ++ Tokens, Handler, Stack, Config); -value([{string, String}|Tokens], Handler, [], Config) when is_binary(String) -> - case clean_string(String, Tokens, Handler, [], Config) of - Clean when is_binary(Clean) -> - done(Tokens, handle_event({string, Clean}, Handler, Config), [], Config); - Error -> Error - end; value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) -> case clean_string(String, Tokens, Handler, Stack, Config) of Clean when is_binary(Clean) -> @@ -277,7 +261,7 @@ custom_error_handler_test_() -> parse_error([start_array, end_array, start_array, end_json], [{error_handler, Error}]) )}, {"done error", ?_assertEqual( - {done, [{literal, true}, end_json]}, + {maybe_done, [{literal, true}, end_json]}, parse_error([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) )}, {"string error", ?_assertEqual( From 3d1096f8f5cbc9356c912486c4e466337bb560ee Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 4 Jun 2013 01:12:25 +0000 Subject: [PATCH 04/40] remove deprecated functions --- src/jsx.erl | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/src/jsx.erl b/src/jsx.erl index dbf072b..8c45bfe 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -28,10 +28,6 @@ -export([format/1, format/2, minify/1, prettify/1]). -export([encoder/3, decoder/3, parser/3]). -export([resume/3]). -%% old api --export([term_to_json/1, term_to_json/2, json_to_term/1, json_to_term/2]). --export([to_json/1, to_json/2]). --export([to_term/1, to_term/2]). -export_type([json_term/0, json_text/0, token/0]). -export_type([encoder/0, decoder/0, parser/0, internal_state/0]). @@ -61,12 +57,12 @@ encode(Source) -> encode(Source, []). encode(Source, Config) -> jsx_to_json:to_json(Source, Config). -%% old api, alias for encode/x -to_json(Source) -> encode(Source, []). -to_json(Source, Config) -> encode(Source, Config). -term_to_json(Source) -> encode(Source, []). -term_to_json(Source, Config) -> encode(Source, Config). +-spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}. +-spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}. + +decode(Source) -> decode(Source, []). +decode(Source, Config) -> jsx_to_term:to_term(Source, Config). -spec format(Source::json_text()) -> json_text() | {incomplete, decoder()}. @@ -86,20 +82,6 @@ minify(Source) -> format(Source, []). prettify(Source) -> format(Source, [space, {indent, 2}]). --spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}. --spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}. - -decode(Source) -> decode(Source, []). -decode(Source, Config) -> jsx_to_term:to_term(Source, Config). - -%% old api, alias for to_term/x - -to_term(Source) -> decode(Source, []). -to_term(Source, Config) -> decode(Source, Config). -json_to_term(Source) -> decode(Source, []). -json_to_term(Source, Config) -> decode(Source, Config). - - -spec is_json(Source::any()) -> true | false. -spec is_json(Source::any(), Config::jsx_verify:config()) -> true | false. From 2d385c33424056d1fcb420ad1ec557c7d6e43918 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 27 Aug 2013 03:40:16 +0000 Subject: [PATCH 05/40] delegate almost all responsibility of encoder to parser to simplify internals --- src/jsx_encoder.erl | 136 ++++++++++---------------------------------- src/jsx_parser.erl | 8 +-- 2 files changed, 32 insertions(+), 112 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index a68a957..6c04d27 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -23,113 +23,38 @@ -module(jsx_encoder). --export([encoder/3]). +-export([encoder/3, encode/1, encode/2, unzip/1]). -spec encoder(Handler::module(), State::any(), Config::jsx:config()) -> jsx:encoder(). encoder(Handler, State, Config) -> - fun(JSON) -> - start( - JSON, - {Handler, Handler:init(State)}, - jsx_config:parse_config(Config) - ) - end. + Parser = jsx:parser(Handler, State, Config), + fun(Term) -> Parser(encode(Term) ++ [end_json]) end. +-spec encode(Term::any()) -> any(). --include("jsx_config.hrl"). +encode(Term) -> encode(Term, ?MODULE). --ifndef(error). --define(error(State, Term, Handler, Config), - case Config#config.error_handler of - false -> erlang:error(badarg); - F -> erlang:throw(F(Term, {encoder, State, Handler}, jsx_config:config_to_list(Config))) - end -). --endif. +-spec encode(Term::any(), EntryPoint::module()) -> any(). + +encode([], _EntryPoint) -> [start_array, end_array]; +encode([{}], _EntryPoint) -> [start_object, end_object]; + +encode([{_, _}|_] = Term, EntryPoint) -> + lists:flatten([start_object] ++ [ EntryPoint:encode(T) || T <- unzip(Term) ] ++ [end_object]); +encode(Term, EntryPoint) when is_list(Term) -> + lists:flatten([start_array] ++ [ EntryPoint:encode(T) || T <- Term ] ++ [end_array]); + +encode(Else, _EntryPoint) -> [Else]. -start(Term, {Handler, State}, Config) -> - try Handler:handle_event(end_json, value(Term, {Handler, State}, Config)) - catch - throw:Error -> Error; - Type:Value -> erlang:Type(Value) - end. +unzip(List) -> unzip(List, []). +unzip([], Acc) -> lists:reverse(Acc); +unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K) -> unzip(Rest, [V, K] ++ Acc). -value(String, {Handler, State}, Config) when is_binary(String) -> - Handler:handle_event({string, clean_string(String, {Handler, State}, Config)}, State); -value(Float, {Handler, State}, _Config) when is_float(Float) -> - Handler:handle_event({float, Float}, State); -value(Int, {Handler, State}, _Config) when is_integer(Int) -> - Handler:handle_event({integer, Int}, State); -value(Literal, {Handler, State}, _Config) - when Literal == true; Literal == false; Literal == null -> - Handler:handle_event({literal, Literal}, State); -value([{}], {Handler, State}, _Config) -> - Handler:handle_event(end_object, Handler:handle_event(start_object, State)); -value([], {Handler, State}, _Config) -> - Handler:handle_event(end_array, Handler:handle_event(start_array, State)); -value(List, Handler, Config) when is_list(List) -> - list_or_object(List, Handler, Config); -value(Term, Handler, Config) -> ?error(value, Term, Handler, Config). - - -list_or_object([{K, V}|Rest], {Handler, State}, Config) when is_atom(K); is_binary(K) -> - object([{K, V}|Rest], {Handler, Handler:handle_event(start_object, State)}, Config); -list_or_object(Terms, {Handler, State}, Config) when is_list(Terms) -> - list(Terms, {Handler, Handler:handle_event(start_array, State)}, Config). - - -object([{Key, Value}, Next|Rest], {Handler, State}, Config) when is_atom(Key); is_binary(Key) -> - object([Next|Rest], {Handler, - value( - Value, - {Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)}, - Config - ) - }, - Config - ); -object([{Key, Value}], {Handler, State}, Config) when is_atom(Key); is_binary(Key) -> - object( - [], - { - Handler, - value( - Value, - {Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)}, - Config - ) - }, - Config - ); -object([], {Handler, State}, _Config) -> Handler:handle_event(end_object, State); -object(Term, Handler, Config) -> ?error(object, Term, Handler, Config). - - -list([Value, Next|Rest], {Handler, State}, Config) -> - list([Next|Rest], {Handler, value(Value, {Handler, State}, Config)}, Config); -list([Value], {Handler, State}, Config) -> - list([], {Handler, value(Value, {Handler, State}, Config)}, Config); -list([], {Handler, State}, _Config) -> Handler:handle_event(end_array, State). - - -fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8)); -fix_key(Key) when is_binary(Key) -> Key. - - -clean_string(Bin, Handler, Config) -> - case clean_string(Bin, Config) of - {error, badarg} -> ?error(string, Bin, Handler, Config); - String -> String - end. - - - --include("jsx_strings.hrl"). -ifdef(TEST). @@ -138,35 +63,34 @@ clean_string(Bin, Handler, Config) -> encode_test_() -> Data = jsx:test_cases(), + Encode = encoder(jsx, [], []), [ { Title, ?_assertEqual( - Events ++ [end_json], - start(Term, {jsx, []}, #config{}) + Events, + Encode(Term) -- [end_json] ) } || {Title, _, Term, Events} <- Data ]. - -encode(Term, Config) -> start(Term, {jsx, []}, jsx_config:parse_config(Config)). - +err(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). error_test_() -> [ - {"value error", ?_assertError(badarg, encode(self(), []))}, - {"string error", ?_assertError(badarg, encode(<<239, 191, 191>>, []))} + {"value error", ?_assertError(badarg, err(self(), []))}, + {"string error", ?_assertError(badarg, err(<<239, 191, 191>>, []))} ]. custom_error_handler_test_() -> - Error = fun(Term, {_, State, _}, _) -> {State, Term} end, + Error = fun(Term, {_, State, _, _}, _) -> {State, Term} end, [ {"value error", ?_assertEqual( - {value, self()}, - encode(self(), [{error_handler, Error}]) + {value, [self()]}, + err(self(), [{error_handler, Error}]) )}, {"string error", ?_assertEqual( - {string, <<239, 191, 191>>}, - encode(<<239, 191, 191>>, [{error_handler, Error}]) + {string, [{string, <<239, 191, 191>>}]}, + err(<<239, 191, 191>>, [{error_handler, Error}]) )} ]. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 0616200..11738b5 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -91,12 +91,8 @@ value([start_object|Tokens], Handler, Stack, Config) -> object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); value([start_array|Tokens], Handler, Stack, Config) -> array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); -value([{literal, true}|Tokens], Handler, Stack, Config) -> - maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config); -value([{literal, false}|Tokens], Handler, Stack, Config) -> - maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config); -value([{literal, null}|Tokens], Handler, Stack, Config) -> - maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config); +value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> + maybe_done(Tokens, handle_event({literal, Literal}, Handler, Config), Stack, Config); value([Literal|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> value([{literal, Literal}] ++ Tokens, Handler, Stack, Config); value([{integer, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) -> From d77f23ec9ab637c00c8769380b00f86c96f4e786 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 27 Aug 2013 03:45:40 +0000 Subject: [PATCH 06/40] merge jsx_strings and jsx_parser --- src/jsx_parser.erl | 411 +++++++++++++++++++++++++++++++++++++++++++- src/jsx_strings.hrl | 403 ------------------------------------------- src/jsx_tests.hrl | 125 +++++++------- 3 files changed, 471 insertions(+), 468 deletions(-) delete mode 100644 src/jsx_strings.hrl diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 11738b5..f327bab 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -26,6 +26,10 @@ -export([parser/3, resume/5]). -export([init/1, handle_event/2]). +-ifdef(TEST). +-export([clean_string/2, json_escape_sequence/1]). +-endif. + -spec parser(Handler::module(), State::any(), Config::jsx:config()) -> jsx:parser(). @@ -186,6 +190,411 @@ clean_string(Bin, Tokens, Handler, Stack, Config) -> String -> String end. +clean_string(Bin, #config{dirty_strings=true}) -> Bin; +clean_string(Bin, Config) -> + case Config#config.replaced_bad_utf8 orelse Config#config.escaped_strings of + true -> clean(Bin, [], Config); + false -> ensure_clean(Bin) + end. + + +ensure_clean(Bin) -> + case is_clean(Bin) of + ok -> Bin; + {error, badarg} -> {error, badarg} + end. + + +%% fast path for no escaping and no correcting, throws error if string is 'bad' +is_clean(<<>>) -> ok; +is_clean(<<0, Rest/binary>>) -> is_clean(Rest); +is_clean(<<1, Rest/binary>>) -> is_clean(Rest); +is_clean(<<2, Rest/binary>>) -> is_clean(Rest); +is_clean(<<3, Rest/binary>>) -> is_clean(Rest); +is_clean(<<4, Rest/binary>>) -> is_clean(Rest); +is_clean(<<5, Rest/binary>>) -> is_clean(Rest); +is_clean(<<6, Rest/binary>>) -> is_clean(Rest); +is_clean(<<7, Rest/binary>>) -> is_clean(Rest); +is_clean(<<8, Rest/binary>>) -> is_clean(Rest); +is_clean(<<9, Rest/binary>>) -> is_clean(Rest); +is_clean(<<10, Rest/binary>>) -> is_clean(Rest); +is_clean(<<11, Rest/binary>>) -> is_clean(Rest); +is_clean(<<12, Rest/binary>>) -> is_clean(Rest); +is_clean(<<13, Rest/binary>>) -> is_clean(Rest); +is_clean(<<14, Rest/binary>>) -> is_clean(Rest); +is_clean(<<15, Rest/binary>>) -> is_clean(Rest); +is_clean(<<16, Rest/binary>>) -> is_clean(Rest); +is_clean(<<17, Rest/binary>>) -> is_clean(Rest); +is_clean(<<18, Rest/binary>>) -> is_clean(Rest); +is_clean(<<19, Rest/binary>>) -> is_clean(Rest); +is_clean(<<20, Rest/binary>>) -> is_clean(Rest); +is_clean(<<21, Rest/binary>>) -> is_clean(Rest); +is_clean(<<22, Rest/binary>>) -> is_clean(Rest); +is_clean(<<23, Rest/binary>>) -> is_clean(Rest); +is_clean(<<24, Rest/binary>>) -> is_clean(Rest); +is_clean(<<25, Rest/binary>>) -> is_clean(Rest); +is_clean(<<26, Rest/binary>>) -> is_clean(Rest); +is_clean(<<27, Rest/binary>>) -> is_clean(Rest); +is_clean(<<28, Rest/binary>>) -> is_clean(Rest); +is_clean(<<29, Rest/binary>>) -> is_clean(Rest); +is_clean(<<30, Rest/binary>>) -> is_clean(Rest); +is_clean(<<31, Rest/binary>>) -> is_clean(Rest); +is_clean(<<32, Rest/binary>>) -> is_clean(Rest); +is_clean(<<33, Rest/binary>>) -> is_clean(Rest); +is_clean(<<34, Rest/binary>>) -> is_clean(Rest); +is_clean(<<35, Rest/binary>>) -> is_clean(Rest); +is_clean(<<36, Rest/binary>>) -> is_clean(Rest); +is_clean(<<37, Rest/binary>>) -> is_clean(Rest); +is_clean(<<38, Rest/binary>>) -> is_clean(Rest); +is_clean(<<39, Rest/binary>>) -> is_clean(Rest); +is_clean(<<40, Rest/binary>>) -> is_clean(Rest); +is_clean(<<41, Rest/binary>>) -> is_clean(Rest); +is_clean(<<42, Rest/binary>>) -> is_clean(Rest); +is_clean(<<43, Rest/binary>>) -> is_clean(Rest); +is_clean(<<44, Rest/binary>>) -> is_clean(Rest); +is_clean(<<45, Rest/binary>>) -> is_clean(Rest); +is_clean(<<46, Rest/binary>>) -> is_clean(Rest); +is_clean(<<47, Rest/binary>>) -> is_clean(Rest); +is_clean(<<48, Rest/binary>>) -> is_clean(Rest); +is_clean(<<49, Rest/binary>>) -> is_clean(Rest); +is_clean(<<50, Rest/binary>>) -> is_clean(Rest); +is_clean(<<51, Rest/binary>>) -> is_clean(Rest); +is_clean(<<52, Rest/binary>>) -> is_clean(Rest); +is_clean(<<53, Rest/binary>>) -> is_clean(Rest); +is_clean(<<54, Rest/binary>>) -> is_clean(Rest); +is_clean(<<55, Rest/binary>>) -> is_clean(Rest); +is_clean(<<56, Rest/binary>>) -> is_clean(Rest); +is_clean(<<57, Rest/binary>>) -> is_clean(Rest); +is_clean(<<58, Rest/binary>>) -> is_clean(Rest); +is_clean(<<59, Rest/binary>>) -> is_clean(Rest); +is_clean(<<60, Rest/binary>>) -> is_clean(Rest); +is_clean(<<61, Rest/binary>>) -> is_clean(Rest); +is_clean(<<62, Rest/binary>>) -> is_clean(Rest); +is_clean(<<63, Rest/binary>>) -> is_clean(Rest); +is_clean(<<64, Rest/binary>>) -> is_clean(Rest); +is_clean(<<65, Rest/binary>>) -> is_clean(Rest); +is_clean(<<66, Rest/binary>>) -> is_clean(Rest); +is_clean(<<67, Rest/binary>>) -> is_clean(Rest); +is_clean(<<68, Rest/binary>>) -> is_clean(Rest); +is_clean(<<69, Rest/binary>>) -> is_clean(Rest); +is_clean(<<70, Rest/binary>>) -> is_clean(Rest); +is_clean(<<71, Rest/binary>>) -> is_clean(Rest); +is_clean(<<72, Rest/binary>>) -> is_clean(Rest); +is_clean(<<73, Rest/binary>>) -> is_clean(Rest); +is_clean(<<74, Rest/binary>>) -> is_clean(Rest); +is_clean(<<75, Rest/binary>>) -> is_clean(Rest); +is_clean(<<76, Rest/binary>>) -> is_clean(Rest); +is_clean(<<77, Rest/binary>>) -> is_clean(Rest); +is_clean(<<78, Rest/binary>>) -> is_clean(Rest); +is_clean(<<79, Rest/binary>>) -> is_clean(Rest); +is_clean(<<80, Rest/binary>>) -> is_clean(Rest); +is_clean(<<81, Rest/binary>>) -> is_clean(Rest); +is_clean(<<82, Rest/binary>>) -> is_clean(Rest); +is_clean(<<83, Rest/binary>>) -> is_clean(Rest); +is_clean(<<84, Rest/binary>>) -> is_clean(Rest); +is_clean(<<85, Rest/binary>>) -> is_clean(Rest); +is_clean(<<86, Rest/binary>>) -> is_clean(Rest); +is_clean(<<87, Rest/binary>>) -> is_clean(Rest); +is_clean(<<88, Rest/binary>>) -> is_clean(Rest); +is_clean(<<89, Rest/binary>>) -> is_clean(Rest); +is_clean(<<90, Rest/binary>>) -> is_clean(Rest); +is_clean(<<91, Rest/binary>>) -> is_clean(Rest); +is_clean(<<92, Rest/binary>>) -> is_clean(Rest); +is_clean(<<93, Rest/binary>>) -> is_clean(Rest); +is_clean(<<94, Rest/binary>>) -> is_clean(Rest); +is_clean(<<95, Rest/binary>>) -> is_clean(Rest); +is_clean(<<96, Rest/binary>>) -> is_clean(Rest); +is_clean(<<97, Rest/binary>>) -> is_clean(Rest); +is_clean(<<98, Rest/binary>>) -> is_clean(Rest); +is_clean(<<99, Rest/binary>>) -> is_clean(Rest); +is_clean(<<100, Rest/binary>>) -> is_clean(Rest); +is_clean(<<101, Rest/binary>>) -> is_clean(Rest); +is_clean(<<102, Rest/binary>>) -> is_clean(Rest); +is_clean(<<103, Rest/binary>>) -> is_clean(Rest); +is_clean(<<104, Rest/binary>>) -> is_clean(Rest); +is_clean(<<105, Rest/binary>>) -> is_clean(Rest); +is_clean(<<106, Rest/binary>>) -> is_clean(Rest); +is_clean(<<107, Rest/binary>>) -> is_clean(Rest); +is_clean(<<108, Rest/binary>>) -> is_clean(Rest); +is_clean(<<109, Rest/binary>>) -> is_clean(Rest); +is_clean(<<110, Rest/binary>>) -> is_clean(Rest); +is_clean(<<111, Rest/binary>>) -> is_clean(Rest); +is_clean(<<112, Rest/binary>>) -> is_clean(Rest); +is_clean(<<113, Rest/binary>>) -> is_clean(Rest); +is_clean(<<114, Rest/binary>>) -> is_clean(Rest); +is_clean(<<115, Rest/binary>>) -> is_clean(Rest); +is_clean(<<116, Rest/binary>>) -> is_clean(Rest); +is_clean(<<117, Rest/binary>>) -> is_clean(Rest); +is_clean(<<118, Rest/binary>>) -> is_clean(Rest); +is_clean(<<119, Rest/binary>>) -> is_clean(Rest); +is_clean(<<120, Rest/binary>>) -> is_clean(Rest); +is_clean(<<121, Rest/binary>>) -> is_clean(Rest); +is_clean(<<122, Rest/binary>>) -> is_clean(Rest); +is_clean(<<123, Rest/binary>>) -> is_clean(Rest); +is_clean(<<124, Rest/binary>>) -> is_clean(Rest); +is_clean(<<125, Rest/binary>>) -> is_clean(Rest); +is_clean(<<126, Rest/binary>>) -> is_clean(Rest); +is_clean(<<127, Rest/binary>>) -> is_clean(Rest); +is_clean(<>) when X < 16#d800 -> is_clean(Rest); +is_clean(<>) when X > 16#dfff, X < 16#fdd0 -> is_clean(Rest); +is_clean(<>) when X > 16#fdef, X < 16#fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#10000, X < 16#1fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#20000, X < 16#2fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#30000, X < 16#3fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#40000, X < 16#4fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#50000, X < 16#5fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#60000, X < 16#6fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#70000, X < 16#7fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#80000, X < 16#8fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#90000, X < 16#9fffe -> is_clean(Rest); +is_clean(<>) when X >= 16#a0000, X < 16#afffe -> is_clean(Rest); +is_clean(<>) when X >= 16#b0000, X < 16#bfffe -> is_clean(Rest); +is_clean(<>) when X >= 16#c0000, X < 16#cfffe -> is_clean(Rest); +is_clean(<>) when X >= 16#d0000, X < 16#dfffe -> is_clean(Rest); +is_clean(<>) when X >= 16#e0000, X < 16#efffe -> is_clean(Rest); +is_clean(<>) when X >= 16#f0000, X < 16#ffffe -> is_clean(Rest); +is_clean(<>) when X >= 16#100000, X < 16#10fffe -> is_clean(Rest); +is_clean(_Bin) -> {error, badarg}. + + +%% escape and/or replace bad codepoints if requested +clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc)); +clean(<<0, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(0, Config) ++ Acc, Config); +clean(<<1, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(1, Config) ++ Acc, Config); +clean(<<2, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(2, Config) ++ Acc, Config); +clean(<<3, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(3, Config) ++ Acc, Config); +clean(<<4, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(4, Config) ++ Acc, Config); +clean(<<5, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(5, Config) ++ Acc, Config); +clean(<<6, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(6, Config) ++ Acc, Config); +clean(<<7, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(7, Config) ++ Acc, Config); +clean(<<8, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(8, Config) ++ Acc, Config); +clean(<<9, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(9, Config) ++ Acc, Config); +clean(<<10, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(10, Config) ++ Acc, Config); +clean(<<11, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(11, Config) ++ Acc, Config); +clean(<<12, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(12, Config) ++ Acc, Config); +clean(<<13, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(13, Config) ++ Acc, Config); +clean(<<14, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(14, Config) ++ Acc, Config); +clean(<<15, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(15, Config) ++ Acc, Config); +clean(<<16, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(16, Config) ++ Acc, Config); +clean(<<17, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(17, Config) ++ Acc, Config); +clean(<<18, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(18, Config) ++ Acc, Config); +clean(<<19, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(19, Config) ++ Acc, Config); +clean(<<20, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(20, Config) ++ Acc, Config); +clean(<<21, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(21, Config) ++ Acc, Config); +clean(<<22, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(22, Config) ++ Acc, Config); +clean(<<23, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(23, Config) ++ Acc, Config); +clean(<<24, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(24, Config) ++ Acc, Config); +clean(<<25, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(25, Config) ++ Acc, Config); +clean(<<26, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(26, Config) ++ Acc, Config); +clean(<<27, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(27, Config) ++ Acc, Config); +clean(<<28, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(28, Config) ++ Acc, Config); +clean(<<29, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(29, Config) ++ Acc, Config); +clean(<<30, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(30, Config) ++ Acc, Config); +clean(<<31, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(31, Config) ++ Acc, Config); +clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config); +clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config); +clean(<<34, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(34, Config) ++ Acc, Config); +clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config); +clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config); +clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config); +clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config); +clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config); +clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config); +clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config); +clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config); +clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config); +clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config); +clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config); +clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config); +clean(<<47, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(47, Config) ++ Acc, Config); +clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config); +clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config); +clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config); +clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config); +clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config); +clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config); +clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config); +clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config); +clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config); +clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config); +clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config); +clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config); +clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config); +clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config); +clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config); +clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config); +clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config); +clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config); +clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config); +clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config); +clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config); +clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config); +clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config); +clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config); +clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config); +clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config); +clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config); +clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config); +clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config); +clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config); +clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config); +clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config); +clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config); +clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config); +clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config); +clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config); +clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config); +clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config); +clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config); +clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config); +clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config); +clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config); +clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config); +clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config); +clean(<<92, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(92, Config) ++ Acc, Config); +clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config); +clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config); +clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config); +clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config); +clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config); +clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config); +clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config); +clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config); +clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config); +clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config); +clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config); +clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config); +clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config); +clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config); +clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config); +clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config); +clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config); +clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config); +clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config); +clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config); +clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config); +clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config); +clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config); +clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config); +clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config); +clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config); +clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config); +clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config); +clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config); +clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config); +clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config); +clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config); +clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config); +clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config); +clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config); +clean(<>, Acc, Config) when X == 16#2028; X == 16#2029 -> + clean(Rest, maybe_replace(X, Config) ++ Acc, Config); +clean(<>, Acc, Config) when X < 16#d800 -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X > 16#dfff, X < 16#fdd0 -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X > 16#fdef, X < 16#fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#10000, X < 16#1fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#20000, X < 16#2fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#30000, X < 16#3fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#40000, X < 16#4fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#50000, X < 16#5fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#60000, X < 16#6fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#70000, X < 16#7fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#80000, X < 16#8fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#90000, X < 16#9fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#a0000, X < 16#afffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#b0000, X < 16#bfffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#c0000, X < 16#cfffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#d0000, X < 16#dfffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#e0000, X < 16#efffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#f0000, X < 16#ffffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#100000, X < 16#10fffe -> + clean(Rest, [X] ++ Acc, Config); +%% surrogates +clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> + clean(Rest, maybe_replace(surrogate, Config) ++ Acc, Config); +%% noncharacters +clean(<<_/utf8, Rest/binary>>, Acc, Config) -> + clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config); +%% u+fffe and u+ffff for R14BXX +clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 -> + clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config); +%% overlong encodings and missing continuations of a 2 byte sequence +clean(<>, Acc, Config) when X >= 192, X =< 223 -> + clean(strip_continuations(Rest, 1), maybe_replace(badutf, Config) ++ Acc, Config); +%% overlong encodings and missing continuations of a 3 byte sequence +clean(<>, Acc, Config) when X >= 224, X =< 239 -> + clean(strip_continuations(Rest, 2), maybe_replace(badutf, Config) ++ Acc, Config); +%% overlong encodings and missing continuations of a 4 byte sequence +clean(<>, Acc, Config) when X >= 240, X =< 247 -> + clean(strip_continuations(Rest, 3), maybe_replace(badutf, Config) ++ Acc, Config); +clean(<<_, Rest/binary>>, Acc, Config) -> + clean(Rest, maybe_replace(badutf, Config) ++ Acc, Config). + + +strip_continuations(Bin, 0) -> Bin; +strip_continuations(<>, N) when X >= 128, X =< 191 -> + strip_continuations(Rest, N - 1); +%% not a continuation byte +strip_continuations(Bin, _) -> Bin. + + +maybe_replace($\b, #config{escaped_strings=true}) -> [$b, $\\]; +maybe_replace($\t, #config{escaped_strings=true}) -> [$t, $\\]; +maybe_replace($\n, #config{escaped_strings=true}) -> [$n, $\\]; +maybe_replace($\f, #config{escaped_strings=true}) -> [$f, $\\]; +maybe_replace($\r, #config{escaped_strings=true}) -> [$r, $\\]; +maybe_replace($\", #config{escaped_strings=true}) -> [$\", $\\]; +maybe_replace($/, Config=#config{escaped_strings=true}) -> + case Config#config.escaped_forward_slashes of + true -> [$/, $\\]; + false -> [$/] + end; +maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\]; +maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> + case Config#config.unescaped_jsonp of + true -> [X]; + false -> lists:reverse(json_escape_sequence(X)) + end; +maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> + lists:reverse(json_escape_sequence(X)); +maybe_replace(noncharacter, #config{replaced_bad_utf8=true}) -> [16#fffd]; +maybe_replace(surrogate, #config{replaced_bad_utf8=true}) -> [16#fffd]; +maybe_replace(badutf, #config{replaced_bad_utf8=true}) -> [16#fffd]; +maybe_replace(_, _) -> {error, badarg}. + + +%% convert a codepoint to it's \uXXXX equiv. +json_escape_sequence(X) -> + <> = <>, + [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. + + +to_hex(10) -> $a; +to_hex(11) -> $b; +to_hex(12) -> $c; +to_hex(13) -> $d; +to_hex(14) -> $e; +to_hex(15) -> $f; +to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... + %% for raw input init([]) -> []. @@ -194,8 +603,6 @@ handle_event(end_json, State) -> lists:reverse(State); handle_event(Event, State) -> [Event] ++ State. --include("jsx_strings.hrl"). - -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/jsx_strings.hrl b/src/jsx_strings.hrl deleted file mode 100644 index b572480..0000000 --- a/src/jsx_strings.hrl +++ /dev/null @@ -1,403 +0,0 @@ -clean_string(Bin, #config{dirty_strings=true}) -> Bin; -clean_string(Bin, Config) -> - case Config#config.replaced_bad_utf8 orelse Config#config.escaped_strings of - true -> clean(Bin, [], Config); - false -> ensure_clean(Bin) - end. - - -ensure_clean(Bin) -> - case is_clean(Bin) of - ok -> Bin; - {error, badarg} -> {error, badarg} - end. - -%% fast path for no escaping and no correcting, throws error if string is 'bad' -is_clean(<<>>) -> ok; -is_clean(<<0, Rest/binary>>) -> is_clean(Rest); -is_clean(<<1, Rest/binary>>) -> is_clean(Rest); -is_clean(<<2, Rest/binary>>) -> is_clean(Rest); -is_clean(<<3, Rest/binary>>) -> is_clean(Rest); -is_clean(<<4, Rest/binary>>) -> is_clean(Rest); -is_clean(<<5, Rest/binary>>) -> is_clean(Rest); -is_clean(<<6, Rest/binary>>) -> is_clean(Rest); -is_clean(<<7, Rest/binary>>) -> is_clean(Rest); -is_clean(<<8, Rest/binary>>) -> is_clean(Rest); -is_clean(<<9, Rest/binary>>) -> is_clean(Rest); -is_clean(<<10, Rest/binary>>) -> is_clean(Rest); -is_clean(<<11, Rest/binary>>) -> is_clean(Rest); -is_clean(<<12, Rest/binary>>) -> is_clean(Rest); -is_clean(<<13, Rest/binary>>) -> is_clean(Rest); -is_clean(<<14, Rest/binary>>) -> is_clean(Rest); -is_clean(<<15, Rest/binary>>) -> is_clean(Rest); -is_clean(<<16, Rest/binary>>) -> is_clean(Rest); -is_clean(<<17, Rest/binary>>) -> is_clean(Rest); -is_clean(<<18, Rest/binary>>) -> is_clean(Rest); -is_clean(<<19, Rest/binary>>) -> is_clean(Rest); -is_clean(<<20, Rest/binary>>) -> is_clean(Rest); -is_clean(<<21, Rest/binary>>) -> is_clean(Rest); -is_clean(<<22, Rest/binary>>) -> is_clean(Rest); -is_clean(<<23, Rest/binary>>) -> is_clean(Rest); -is_clean(<<24, Rest/binary>>) -> is_clean(Rest); -is_clean(<<25, Rest/binary>>) -> is_clean(Rest); -is_clean(<<26, Rest/binary>>) -> is_clean(Rest); -is_clean(<<27, Rest/binary>>) -> is_clean(Rest); -is_clean(<<28, Rest/binary>>) -> is_clean(Rest); -is_clean(<<29, Rest/binary>>) -> is_clean(Rest); -is_clean(<<30, Rest/binary>>) -> is_clean(Rest); -is_clean(<<31, Rest/binary>>) -> is_clean(Rest); -is_clean(<<32, Rest/binary>>) -> is_clean(Rest); -is_clean(<<33, Rest/binary>>) -> is_clean(Rest); -is_clean(<<34, Rest/binary>>) -> is_clean(Rest); -is_clean(<<35, Rest/binary>>) -> is_clean(Rest); -is_clean(<<36, Rest/binary>>) -> is_clean(Rest); -is_clean(<<37, Rest/binary>>) -> is_clean(Rest); -is_clean(<<38, Rest/binary>>) -> is_clean(Rest); -is_clean(<<39, Rest/binary>>) -> is_clean(Rest); -is_clean(<<40, Rest/binary>>) -> is_clean(Rest); -is_clean(<<41, Rest/binary>>) -> is_clean(Rest); -is_clean(<<42, Rest/binary>>) -> is_clean(Rest); -is_clean(<<43, Rest/binary>>) -> is_clean(Rest); -is_clean(<<44, Rest/binary>>) -> is_clean(Rest); -is_clean(<<45, Rest/binary>>) -> is_clean(Rest); -is_clean(<<46, Rest/binary>>) -> is_clean(Rest); -is_clean(<<47, Rest/binary>>) -> is_clean(Rest); -is_clean(<<48, Rest/binary>>) -> is_clean(Rest); -is_clean(<<49, Rest/binary>>) -> is_clean(Rest); -is_clean(<<50, Rest/binary>>) -> is_clean(Rest); -is_clean(<<51, Rest/binary>>) -> is_clean(Rest); -is_clean(<<52, Rest/binary>>) -> is_clean(Rest); -is_clean(<<53, Rest/binary>>) -> is_clean(Rest); -is_clean(<<54, Rest/binary>>) -> is_clean(Rest); -is_clean(<<55, Rest/binary>>) -> is_clean(Rest); -is_clean(<<56, Rest/binary>>) -> is_clean(Rest); -is_clean(<<57, Rest/binary>>) -> is_clean(Rest); -is_clean(<<58, Rest/binary>>) -> is_clean(Rest); -is_clean(<<59, Rest/binary>>) -> is_clean(Rest); -is_clean(<<60, Rest/binary>>) -> is_clean(Rest); -is_clean(<<61, Rest/binary>>) -> is_clean(Rest); -is_clean(<<62, Rest/binary>>) -> is_clean(Rest); -is_clean(<<63, Rest/binary>>) -> is_clean(Rest); -is_clean(<<64, Rest/binary>>) -> is_clean(Rest); -is_clean(<<65, Rest/binary>>) -> is_clean(Rest); -is_clean(<<66, Rest/binary>>) -> is_clean(Rest); -is_clean(<<67, Rest/binary>>) -> is_clean(Rest); -is_clean(<<68, Rest/binary>>) -> is_clean(Rest); -is_clean(<<69, Rest/binary>>) -> is_clean(Rest); -is_clean(<<70, Rest/binary>>) -> is_clean(Rest); -is_clean(<<71, Rest/binary>>) -> is_clean(Rest); -is_clean(<<72, Rest/binary>>) -> is_clean(Rest); -is_clean(<<73, Rest/binary>>) -> is_clean(Rest); -is_clean(<<74, Rest/binary>>) -> is_clean(Rest); -is_clean(<<75, Rest/binary>>) -> is_clean(Rest); -is_clean(<<76, Rest/binary>>) -> is_clean(Rest); -is_clean(<<77, Rest/binary>>) -> is_clean(Rest); -is_clean(<<78, Rest/binary>>) -> is_clean(Rest); -is_clean(<<79, Rest/binary>>) -> is_clean(Rest); -is_clean(<<80, Rest/binary>>) -> is_clean(Rest); -is_clean(<<81, Rest/binary>>) -> is_clean(Rest); -is_clean(<<82, Rest/binary>>) -> is_clean(Rest); -is_clean(<<83, Rest/binary>>) -> is_clean(Rest); -is_clean(<<84, Rest/binary>>) -> is_clean(Rest); -is_clean(<<85, Rest/binary>>) -> is_clean(Rest); -is_clean(<<86, Rest/binary>>) -> is_clean(Rest); -is_clean(<<87, Rest/binary>>) -> is_clean(Rest); -is_clean(<<88, Rest/binary>>) -> is_clean(Rest); -is_clean(<<89, Rest/binary>>) -> is_clean(Rest); -is_clean(<<90, Rest/binary>>) -> is_clean(Rest); -is_clean(<<91, Rest/binary>>) -> is_clean(Rest); -is_clean(<<92, Rest/binary>>) -> is_clean(Rest); -is_clean(<<93, Rest/binary>>) -> is_clean(Rest); -is_clean(<<94, Rest/binary>>) -> is_clean(Rest); -is_clean(<<95, Rest/binary>>) -> is_clean(Rest); -is_clean(<<96, Rest/binary>>) -> is_clean(Rest); -is_clean(<<97, Rest/binary>>) -> is_clean(Rest); -is_clean(<<98, Rest/binary>>) -> is_clean(Rest); -is_clean(<<99, Rest/binary>>) -> is_clean(Rest); -is_clean(<<100, Rest/binary>>) -> is_clean(Rest); -is_clean(<<101, Rest/binary>>) -> is_clean(Rest); -is_clean(<<102, Rest/binary>>) -> is_clean(Rest); -is_clean(<<103, Rest/binary>>) -> is_clean(Rest); -is_clean(<<104, Rest/binary>>) -> is_clean(Rest); -is_clean(<<105, Rest/binary>>) -> is_clean(Rest); -is_clean(<<106, Rest/binary>>) -> is_clean(Rest); -is_clean(<<107, Rest/binary>>) -> is_clean(Rest); -is_clean(<<108, Rest/binary>>) -> is_clean(Rest); -is_clean(<<109, Rest/binary>>) -> is_clean(Rest); -is_clean(<<110, Rest/binary>>) -> is_clean(Rest); -is_clean(<<111, Rest/binary>>) -> is_clean(Rest); -is_clean(<<112, Rest/binary>>) -> is_clean(Rest); -is_clean(<<113, Rest/binary>>) -> is_clean(Rest); -is_clean(<<114, Rest/binary>>) -> is_clean(Rest); -is_clean(<<115, Rest/binary>>) -> is_clean(Rest); -is_clean(<<116, Rest/binary>>) -> is_clean(Rest); -is_clean(<<117, Rest/binary>>) -> is_clean(Rest); -is_clean(<<118, Rest/binary>>) -> is_clean(Rest); -is_clean(<<119, Rest/binary>>) -> is_clean(Rest); -is_clean(<<120, Rest/binary>>) -> is_clean(Rest); -is_clean(<<121, Rest/binary>>) -> is_clean(Rest); -is_clean(<<122, Rest/binary>>) -> is_clean(Rest); -is_clean(<<123, Rest/binary>>) -> is_clean(Rest); -is_clean(<<124, Rest/binary>>) -> is_clean(Rest); -is_clean(<<125, Rest/binary>>) -> is_clean(Rest); -is_clean(<<126, Rest/binary>>) -> is_clean(Rest); -is_clean(<<127, Rest/binary>>) -> is_clean(Rest); -is_clean(<>) when X < 16#d800 -> is_clean(Rest); -is_clean(<>) when X > 16#dfff, X < 16#fdd0 -> is_clean(Rest); -is_clean(<>) when X > 16#fdef, X < 16#fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#10000, X < 16#1fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#20000, X < 16#2fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#30000, X < 16#3fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#40000, X < 16#4fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#50000, X < 16#5fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#60000, X < 16#6fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#70000, X < 16#7fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#80000, X < 16#8fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#90000, X < 16#9fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#a0000, X < 16#afffe -> is_clean(Rest); -is_clean(<>) when X >= 16#b0000, X < 16#bfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#c0000, X < 16#cfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#d0000, X < 16#dfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#e0000, X < 16#efffe -> is_clean(Rest); -is_clean(<>) when X >= 16#f0000, X < 16#ffffe -> is_clean(Rest); -is_clean(<>) when X >= 16#100000, X < 16#10fffe -> is_clean(Rest); -is_clean(_Bin) -> {error, badarg}. - - -%% escape and/or replace bad codepoints if requested -clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc)); -clean(<<0, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(0, Config) ++ Acc, Config); -clean(<<1, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(1, Config) ++ Acc, Config); -clean(<<2, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(2, Config) ++ Acc, Config); -clean(<<3, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(3, Config) ++ Acc, Config); -clean(<<4, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(4, Config) ++ Acc, Config); -clean(<<5, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(5, Config) ++ Acc, Config); -clean(<<6, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(6, Config) ++ Acc, Config); -clean(<<7, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(7, Config) ++ Acc, Config); -clean(<<8, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(8, Config) ++ Acc, Config); -clean(<<9, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(9, Config) ++ Acc, Config); -clean(<<10, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(10, Config) ++ Acc, Config); -clean(<<11, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(11, Config) ++ Acc, Config); -clean(<<12, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(12, Config) ++ Acc, Config); -clean(<<13, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(13, Config) ++ Acc, Config); -clean(<<14, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(14, Config) ++ Acc, Config); -clean(<<15, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(15, Config) ++ Acc, Config); -clean(<<16, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(16, Config) ++ Acc, Config); -clean(<<17, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(17, Config) ++ Acc, Config); -clean(<<18, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(18, Config) ++ Acc, Config); -clean(<<19, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(19, Config) ++ Acc, Config); -clean(<<20, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(20, Config) ++ Acc, Config); -clean(<<21, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(21, Config) ++ Acc, Config); -clean(<<22, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(22, Config) ++ Acc, Config); -clean(<<23, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(23, Config) ++ Acc, Config); -clean(<<24, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(24, Config) ++ Acc, Config); -clean(<<25, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(25, Config) ++ Acc, Config); -clean(<<26, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(26, Config) ++ Acc, Config); -clean(<<27, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(27, Config) ++ Acc, Config); -clean(<<28, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(28, Config) ++ Acc, Config); -clean(<<29, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(29, Config) ++ Acc, Config); -clean(<<30, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(30, Config) ++ Acc, Config); -clean(<<31, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(31, Config) ++ Acc, Config); -clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config); -clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config); -clean(<<34, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(34, Config) ++ Acc, Config); -clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config); -clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config); -clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config); -clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config); -clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config); -clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config); -clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config); -clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config); -clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config); -clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config); -clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config); -clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config); -clean(<<47, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(47, Config) ++ Acc, Config); -clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config); -clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config); -clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config); -clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config); -clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config); -clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config); -clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config); -clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config); -clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config); -clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config); -clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config); -clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config); -clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config); -clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config); -clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config); -clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config); -clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config); -clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config); -clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config); -clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config); -clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config); -clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config); -clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config); -clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config); -clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config); -clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config); -clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config); -clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config); -clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config); -clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config); -clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config); -clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config); -clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config); -clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config); -clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config); -clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config); -clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config); -clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config); -clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config); -clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config); -clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config); -clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config); -clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config); -clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config); -clean(<<92, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(92, Config) ++ Acc, Config); -clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config); -clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config); -clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config); -clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config); -clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config); -clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config); -clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config); -clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config); -clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config); -clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config); -clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config); -clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config); -clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config); -clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config); -clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config); -clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config); -clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config); -clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config); -clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config); -clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config); -clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config); -clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config); -clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config); -clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config); -clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config); -clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config); -clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config); -clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config); -clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config); -clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config); -clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config); -clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config); -clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config); -clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config); -clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config); -clean(<>, Acc, Config) when X == 16#2028; X == 16#2029 -> - clean(Rest, maybe_replace(X, Config) ++ Acc, Config); -clean(<>, Acc, Config) when X < 16#d800 -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X > 16#dfff, X < 16#fdd0 -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X > 16#fdef, X < 16#fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#10000, X < 16#1fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#20000, X < 16#2fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#30000, X < 16#3fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#40000, X < 16#4fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#50000, X < 16#5fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#60000, X < 16#6fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#70000, X < 16#7fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#80000, X < 16#8fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#90000, X < 16#9fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#a0000, X < 16#afffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#b0000, X < 16#bfffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#c0000, X < 16#cfffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#d0000, X < 16#dfffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#e0000, X < 16#efffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#f0000, X < 16#ffffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#100000, X < 16#10fffe -> - clean(Rest, [X] ++ Acc, Config); -%% surrogates -clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> - clean(Rest, maybe_replace(surrogate, Config) ++ Acc, Config); -%% noncharacters -clean(<<_/utf8, Rest/binary>>, Acc, Config) -> - clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config); -%% u+fffe and u+ffff for R14BXX -clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 -> - clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config); -%% overlong encodings and missing continuations of a 2 byte sequence -clean(<>, Acc, Config) when X >= 192, X =< 223 -> - clean(strip_continuations(Rest, 1), maybe_replace(badutf, Config) ++ Acc, Config); -%% overlong encodings and missing continuations of a 3 byte sequence -clean(<>, Acc, Config) when X >= 224, X =< 239 -> - clean(strip_continuations(Rest, 2), maybe_replace(badutf, Config) ++ Acc, Config); -%% overlong encodings and missing continuations of a 4 byte sequence -clean(<>, Acc, Config) when X >= 240, X =< 247 -> - clean(strip_continuations(Rest, 3), maybe_replace(badutf, Config) ++ Acc, Config); -clean(<<_, Rest/binary>>, Acc, Config) -> - clean(Rest, maybe_replace(badutf, Config) ++ Acc, Config). - - -strip_continuations(Bin, 0) -> Bin; -strip_continuations(<>, N) when X >= 128, X =< 191 -> - strip_continuations(Rest, N - 1); -%% not a continuation byte -strip_continuations(Bin, _) -> Bin. - - -maybe_replace($\b, #config{escaped_strings=true}) -> [$b, $\\]; -maybe_replace($\t, #config{escaped_strings=true}) -> [$t, $\\]; -maybe_replace($\n, #config{escaped_strings=true}) -> [$n, $\\]; -maybe_replace($\f, #config{escaped_strings=true}) -> [$f, $\\]; -maybe_replace($\r, #config{escaped_strings=true}) -> [$r, $\\]; -maybe_replace($\", #config{escaped_strings=true}) -> [$\", $\\]; -maybe_replace($/, Config=#config{escaped_strings=true}) -> - case Config#config.escaped_forward_slashes of - true -> [$/, $\\]; - false -> [$/] - end; -maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\]; -maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> - case Config#config.unescaped_jsonp of - true -> [X]; - false -> lists:reverse(json_escape_sequence(X)) - end; -maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> - lists:reverse(json_escape_sequence(X)); -maybe_replace(noncharacter, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(surrogate, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(badutf, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(_, _) -> {error, badarg}. - - -%% convert a codepoint to it's \uXXXX equiv. -json_escape_sequence(X) -> - <> = <>, - [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. - - -to_hex(10) -> $a; -to_hex(11) -> $b; -to_hex(12) -> $c; -to_hex(13) -> $d; -to_hex(14) -> $e; -to_hex(15) -> $f; -to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... \ No newline at end of file diff --git a/src/jsx_tests.hrl b/src/jsx_tests.hrl index 3e8c6e6..fb4bf30 100644 --- a/src/jsx_tests.hrl +++ b/src/jsx_tests.hrl @@ -211,7 +211,6 @@ sane_float_to_list(X) -> Output. -include("jsx_config.hrl"). --include("jsx_strings.hrl"). %% erlang refuses to encode certain codepoints, so fake them @@ -269,56 +268,56 @@ clean_string_test_() -> [ {"clean codepoints", ?_assertEqual( codepoints(), - clean_string(codepoints(), #config{}) + jsx_parser:clean_string(codepoints(), #config{}) )}, {"clean extended codepoints", ?_assertEqual( extended_codepoints(), - clean_string(extended_codepoints(), #config{}) + jsx_parser:clean_string(extended_codepoints(), #config{}) )}, {"escape path codepoints", ?_assertEqual( codepoints(), - clean_string(codepoints(), #config{escaped_strings=true}) + jsx_parser:clean_string(codepoints(), #config{escaped_strings=true}) )}, {"escape path extended codepoints", ?_assertEqual( extended_codepoints(), - clean_string(extended_codepoints(), #config{escaped_strings=true}) + jsx_parser:clean_string(extended_codepoints(), #config{escaped_strings=true}) )}, {"error reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, reserved_space()) )}, {"error surrogates", ?_assertEqual( lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, surrogates()) )}, {"error noncharacters", ?_assertEqual( lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, noncharacters()) )}, {"error extended noncharacters", ?_assertEqual( lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, extended_noncharacters()) )}, {"clean reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, reserved_space()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, reserved_space()) )}, {"clean surrogates", ?_assertEqual( lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, surrogates()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, surrogates()) )}, {"clean noncharacters", ?_assertEqual( lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, noncharacters()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, noncharacters()) )}, {"clean extended noncharacters", ?_assertEqual( lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, extended_noncharacters()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, extended_noncharacters()) )} ]. -maybe_escape(Bin, Config) -> clean_string(Bin, Config). +maybe_escape(Bin, Config) -> jsx_parser:clean_string(Bin, Config). escape_test_() -> [ @@ -493,197 +492,197 @@ bad_utf8_test_() -> [ {"noncharacter u+fffe", ?_assertEqual( {error, badarg}, - clean_string(to_fake_utf8(16#fffe), #config{}) + jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{}) )}, {"noncharacter u+fffe replaced", ?_assertEqual( <<16#fffd/utf8>>, - clean_string(to_fake_utf8(16#fffe), #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{replaced_bad_utf8=true}) )}, {"noncharacter u+ffff", ?_assertEqual( {error, badarg}, - clean_string(to_fake_utf8(16#ffff), #config{}) + jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{}) )}, {"noncharacter u+ffff replaced", ?_assertEqual( <<16#fffd/utf8>>, - clean_string(to_fake_utf8(16#ffff), #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{replaced_bad_utf8=true}) )}, {"orphan continuation byte u+0080", ?_assertEqual( {error, badarg}, - clean_string(<<16#0080>>, #config{}) + jsx_parser:clean_string(<<16#0080>>, #config{}) )}, {"orphan continuation byte u+0080 replaced", ?_assertEqual( <<16#fffd/utf8>>, - clean_string(<<16#0080>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#0080>>, #config{replaced_bad_utf8=true}) )}, {"orphan continuation byte u+00bf", ?_assertEqual( {error, badarg}, - clean_string(<<16#00bf>>, #config{}) + jsx_parser:clean_string(<<16#00bf>>, #config{}) )}, {"orphan continuation byte u+00bf replaced", ?_assertEqual( <<16#fffd/utf8>>, - clean_string(<<16#00bf>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00bf>>, #config{replaced_bad_utf8=true}) )}, {"2 continuation bytes", ?_assertEqual( {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) )}, {"2 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 2), - clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{replaced_bad_utf8=true}) )}, {"3 continuation bytes", ?_assertEqual( {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) )}, {"3 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 3), - clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{replaced_bad_utf8=true}) )}, {"4 continuation bytes", ?_assertEqual( {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) )}, {"4 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 4), - clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{replaced_bad_utf8=true}) )}, {"5 continuation bytes", ?_assertEqual( {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) )}, {"5 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 5), - clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{replaced_bad_utf8=true}) )}, {"6 continuation bytes", ?_assertEqual( {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) )}, {"6 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 6), - clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{replaced_bad_utf8=true}) )}, {"all continuation bytes", ?_assertEqual( {error, badarg}, - clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{}) + jsx_parser:clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{}) )}, {"all continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), - clean_string( + jsx_parser:clean_string( <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{replaced_bad_utf8=true} ) )}, {"lonely start byte", ?_assertEqual( {error, badarg}, - clean_string(<<16#00c0>>, #config{}) + jsx_parser:clean_string(<<16#00c0>>, #config{}) )}, {"lonely start byte replaced", ?_assertEqual( <<16#fffd/utf8>>, - clean_string(<<16#00c0>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00c0>>, #config{replaced_bad_utf8=true}) )}, {"lonely start bytes (2 byte)", ?_assertEqual( {error, badarg}, - clean_string(<<16#00c0, 32, 16#00df>>, #config{}) + jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{}) )}, {"lonely start bytes (2 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - clean_string(<<16#00c0, 32, 16#00df>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{replaced_bad_utf8=true}) )}, {"lonely start bytes (3 byte)", ?_assertEqual( {error, badarg}, - clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) + jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) )}, {"lonely start bytes (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - clean_string(<<16#00e0, 32, 16#00ef>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{replaced_bad_utf8=true}) )}, {"lonely start bytes (4 byte)", ?_assertEqual( {error, badarg}, - clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) + jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) )}, {"lonely start bytes (4 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - clean_string(<<16#00f0, 32, 16#00f7>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{replaced_bad_utf8=true}) )}, {"missing continuation byte (3 byte)", ?_assertEqual( {error, badarg}, - clean_string(<<224, 160, 32>>, #config{}) + jsx_parser:clean_string(<<224, 160, 32>>, #config{}) )}, {"missing continuation byte (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<224, 160, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<224, 160, 32>>, #config{replaced_bad_utf8=true}) )}, {"missing continuation byte (4 byte missing one)", ?_assertEqual( {error, badarg}, - clean_string(<<240, 144, 128, 32>>, #config{}) + jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{}) )}, {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<240, 144, 128, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{replaced_bad_utf8=true}) )}, {"missing continuation byte (4 byte missing two)", ?_assertEqual( {error, badarg}, - clean_string(<<240, 144, 32>>, #config{}) + jsx_parser:clean_string(<<240, 144, 32>>, #config{}) )}, {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<240, 144, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<240, 144, 32>>, #config{replaced_bad_utf8=true}) )}, {"overlong encoding of u+002f (2 byte)", ?_assertEqual( {error, badarg}, - clean_string(<<16#c0, 16#af, 32>>, #config{}) + jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{}) )}, {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<16#c0, 16#af, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{replaced_bad_utf8=true}) )}, {"overlong encoding of u+002f (3 byte)", ?_assertEqual( {error, badarg}, - clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) + jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) )}, {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true}) )}, {"overlong encoding of u+002f (4 byte)", ?_assertEqual( {error, badarg}, - clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) + jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) )}, {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true}) )}, {"highest overlong 2 byte sequence", ?_assertEqual( {error, badarg}, - clean_string(<<16#c1, 16#bf, 32>>, #config{}) + jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{}) )}, {"highest overlong 2 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<16#c1, 16#bf, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{replaced_bad_utf8=true}) )}, {"highest overlong 3 byte sequence", ?_assertEqual( {error, badarg}, - clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) + jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) )}, {"highest overlong 3 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{replaced_bad_utf8=true}) )}, {"highest overlong 4 byte sequence", ?_assertEqual( {error, badarg}, - clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) + jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) )}, {"highest overlong 4 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{replaced_bad_utf8=true}) )} ]. json_escape_sequence_test_() -> [ - {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")}, - {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")}, - {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} + {"json escape sequence test - 16#0000", ?_assertEqual(jsx_parser:json_escape_sequence(16#0000), "\\u0000")}, + {"json escape sequence test - 16#abc", ?_assertEqual(jsx_parser:json_escape_sequence(16#abc), "\\u0abc")}, + {"json escape sequence test - 16#def", ?_assertEqual(jsx_parser:json_escape_sequence(16#def), "\\u0def")} ]. \ No newline at end of file From 6797bf3ed7bbf77f6fd41bc00c8f378a032ce74e Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 10 Jul 2013 05:41:19 +0000 Subject: [PATCH 07/40] rename `explicit_end` to `stream` in prep for behavior change --- README.md | 6 +++--- src/jsx_config.erl | 14 +++++++------- src/jsx_config.hrl | 2 +- src/jsx_decoder.erl | 28 ++++++++++++++-------------- src/jsx_parser.erl | 4 ++-- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index ff4d670..81f8dda 100644 --- a/README.md +++ b/README.md @@ -231,7 +231,7 @@ consider the parsing complete if input is exhausted and the json text is not unambiguously incomplete. this is mostly relevant when parsing bare numbers like `<<"1234">>`. this could be a complete json integer or just the beginning of a json integer that is being parsed incrementally. jsx will treat it as a whole -integer. calling jsx with the [option](#options) `explicit_end` reverses this +integer. calling jsx with the [option](#options) `stream` reverses this behavior and never considers parsing complete until the `incomplete` function is called with the argument `end_stream` @@ -308,7 +308,7 @@ option() = replaced_bad_utf8 | dirty_strings | ignored_bad_escapes | relax - | explicit_end + | stream ``` jsx functions all take a common set of options. not all flags have meaning @@ -384,7 +384,7 @@ additional options beyond these. see string term. note that this overrides `ignored_bad_escapes`, `unescaped_jsonp` and `escaped_strings` -- `explicit_end` +- `stream` see [incomplete input](#incomplete-input) diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 183dbbf..1cb4d33 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -44,8 +44,8 @@ parse_config([replaced_bad_utf8|Rest], Config) -> parse_config(Rest, Config#config{replaced_bad_utf8=true}); parse_config([escaped_forward_slashes|Rest], Config) -> parse_config(Rest, Config#config{escaped_forward_slashes=true}); -parse_config([explicit_end|Rest], Config) -> - parse_config(Rest, Config#config{explicit_end=true}); +parse_config([stream|Rest], Config) -> + parse_config(Rest, Config#config{stream=true}); parse_config([single_quoted_strings|Rest], Config) -> parse_config(Rest, Config#config{single_quoted_strings=true}); parse_config([unescaped_jsonp|Rest], Config) -> @@ -115,7 +115,7 @@ valid_flags() -> escaped_strings, dirty_strings, ignored_bad_escapes, - explicit_end, + stream, relax, error_handler, incomplete_handler, @@ -157,7 +157,7 @@ config_test_() -> #config{ replaced_bad_utf8=true, escaped_forward_slashes=true, - explicit_end=true, + stream=true, single_quoted_strings=true, unescaped_jsonp=true, comments=true, @@ -167,7 +167,7 @@ config_test_() -> parse_config([ replaced_bad_utf8, escaped_forward_slashes, - explicit_end, + stream, single_quoted_strings, unescaped_jsonp, comments, @@ -246,13 +246,13 @@ config_to_list_test_() -> comments, dirty_strings, ignored_bad_escapes, - explicit_end + stream ], config_to_list( #config{ replaced_bad_utf8=true, escaped_forward_slashes=true, - explicit_end=true, + stream=true, single_quoted_strings=true, unescaped_jsonp=true, comments=true, diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index 502f103..09ebce6 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -7,7 +7,7 @@ escaped_strings = false, dirty_strings = false, ignored_bad_escapes = false, - explicit_end = false, + stream = false, error_handler = false, incomplete_handler = false }). \ No newline at end of file diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 18ce26a..f5d09c7 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -145,7 +145,7 @@ incomplete(State, Rest, Handler, Acc, Stack, Config=#config{incomplete_handler=f {incomplete, fun(Stream) when is_binary(Stream) -> resume(<>, State, Handler, Acc, Stack, Config); (end_stream) -> - case resume(<>, State, Handler, Acc, Stack, Config#config{explicit_end=false}) of + case resume(<>, State, Handler, Acc, Stack, Config#config{stream=false}) of {incomplete, _} -> ?error(State, Rest, Handler, Acc, Stack, Config); Else -> Else end @@ -748,7 +748,7 @@ zero(<>, Handler, Acc, Stack, Config) -> decimal(Rest, Handler, acc_seq(Acc, ?decimalpoint), Stack, Config); zero(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> e(Rest, Handler, acc_seq(Acc, ".0e"), Stack, Config); -zero(<<>>, Handler, Acc, [], Config=#config{explicit_end=false}) -> +zero(<<>>, Handler, Acc, [], Config=#config{stream=false}) -> finish_number(<<>>, Handler, {zero, Acc}, [], Config); zero(<<>>, Handler, Acc, Stack, Config) -> incomplete(value, (end_seq(Acc)), Handler, Stack, Config); @@ -806,7 +806,7 @@ exp(Bin, Handler, Acc, Stack, Config) -> finish_number(Bin, Handler, {exp, Acc}, Stack, Config). -finish_number(Rest, Handler, Acc, [], Config=#config{explicit_end=false}) -> +finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), [], Config); finish_number(<>, Handler, Acc, [object|Stack], Config) -> maybe_done(Rest, handle_event([format_number(Acc), end_object], Handler, Config), Stack, Config); @@ -901,7 +901,7 @@ comment(<<_/utf8, Rest/binary>>, Handler, Resume, Stack, Config) -> comment(Rest, Handler, Resume, Stack, Config); comment(<<_, Rest/binary>>, Handler, Resume, Stack, Config=#config{replaced_bad_utf8=true}) -> comment(Rest, Handler, Resume, Stack, Config); -comment(<<>>, Handler, done, [Comment], Config=#config{explicit_end=false}) +comment(<<>>, Handler, done, [Comment], Config=#config{stream=false}) when Comment == comment; Comment == multicomment -> resume(<<>>, done, Handler, unused, [], Config); comment(<<>>, Handler, Resume, Stack, Config) -> @@ -942,7 +942,7 @@ done(<>, Handler, Stack, Config=#config{comments=t comment(Rest, Handler, done, [multicomment|Stack], Config); done(<>, Handler, Stack, Config=#config{comments=true}) -> incomplete(done, <>, Handler, Stack, Config); -done(<<>>, {Handler, State}, [], Config=#config{explicit_end=true}) -> +done(<<>>, {Handler, State}, [], Config=#config{stream=true}) -> incomplete(done, <<>>, {Handler, State}, [], Config); done(<<>>, {_Handler, State}, [], _Config) -> State; done(Bin, Handler, Stack, Config) -> ?error(done, Bin, Handler, Stack, Config). @@ -968,7 +968,7 @@ decode(JSON, Config) -> Incremental = try Final = lists:foldl( fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, - decoder(jsx, [], [explicit_end] ++ Config), + decoder(jsx, [], [stream] ++ Config), json_to_bytes(JSON) ), Final(end_stream) @@ -1931,19 +1931,19 @@ error_test_() -> )}, {"zero error", ?_assertError( badarg, - Decode(<<"0"/utf8, 0>>, [explicit_end]) + Decode(<<"0"/utf8, 0>>, [stream]) )}, {"integer error", ?_assertError( badarg, - Decode(<<"1"/utf8, 0>>, [explicit_end]) + Decode(<<"1"/utf8, 0>>, [stream]) )}, {"decimal error", ?_assertError( badarg, - Decode(<<"1.0"/utf8, 0>>, [explicit_end]) + Decode(<<"1.0"/utf8, 0>>, [stream]) )}, {"exp error", ?_assertError( badarg, - Decode(<<"1.0e1"/utf8, 0>>, [explicit_end]) + Decode(<<"1.0e1"/utf8, 0>>, [stream]) )}, {"e error", ?_assertError( badarg, @@ -2026,19 +2026,19 @@ custom_error_handler_test_() -> )}, {"zero error", ?_assertEqual( {value, <<"0"/utf8, 0>>}, - Decode(<<"0"/utf8, 0>>, [explicit_end, {error_handler, Error}]) + Decode(<<"0"/utf8, 0>>, [stream, {error_handler, Error}]) )}, {"integer error", ?_assertEqual( {integer, <<0>>}, - Decode(<<"1"/utf8, 0>>, [explicit_end, {error_handler, Error}]) + Decode(<<"1"/utf8, 0>>, [stream, {error_handler, Error}]) )}, {"decimal error", ?_assertEqual( {decimal, <<0>>}, - Decode(<<"1.0"/utf8, 0>>, [explicit_end, {error_handler, Error}]) + Decode(<<"1.0"/utf8, 0>>, [stream, {error_handler, Error}]) )}, {"exp error", ?_assertEqual( {exp, <<0>>}, - Decode(<<"1.0e1"/utf8, 0>>, [explicit_end, {error_handler, Error}]) + Decode(<<"1.0e1"/utf8, 0>>, [stream, {error_handler, Error}]) )}, {"e error", ?_assertEqual( {decimal, <<$e, 0>>}, diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index f327bab..099b71e 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -169,7 +169,7 @@ maybe_done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> maybe_done(Token, Handler, Stack, Config) -> maybe_done([Token], Handler, Stack, Config). -done([], Handler, [], Config=#config{explicit_end=true}) -> +done([], Handler, [], Config=#config{stream=true}) -> incomplete(done, Handler, [], Config); done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] -> {_, State} = handle_event(end_json, Handler, Config), @@ -617,7 +617,7 @@ parse(Events, Config) -> Incremental = try Final = lists:foldl( fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end, - parser(jsx, [], [explicit_end] ++ Config), + parser(jsx, [], [stream] ++ Config), lists:map(fun(X) -> [X] end, Events) ), Final(end_stream) From e4a401a3a65a4bbdc56a737ef6c53d160251b848 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 13 Jul 2013 02:05:16 +0000 Subject: [PATCH 08/40] incomplete input now results in an error when parsing json or jsx internal form streaming decoding is now only available when forced with the `stream` option and no longer returns until forced to via the token `end_stream`. this api is still subject to being replaced with an even more explicit streaming mode --- src/jsx_decoder.erl | 30 ++++++++++++++++++++++++++++-- src/jsx_parser.erl | 2 ++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index f5d09c7..26e5aa6 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -138,10 +138,15 @@ resume(Rest, State, Handler, Acc, Stack, Config) -> -endif. +incomplete(State, Rest, Handler, Stack, Config = #config{stream=false}) -> + ?error(State, Rest, Handler, Stack, Config); incomplete(State, Rest, Handler, Stack, Config) -> incomplete(State, Rest, Handler, unused, Stack, Config). -incomplete(State, Rest, Handler, Acc, Stack, Config=#config{incomplete_handler=false}) -> + +incomplete(State, Rest, Handler, Acc, Stack, Config = #config{stream=false}) -> + ?error(State, Rest, Handler, Acc, Stack, Config); +incomplete(State, Rest, Handler, Acc, Stack, Config = #config{incomplete_handler=false}) -> {incomplete, fun(Stream) when is_binary(Stream) -> resume(<>, State, Handler, Acc, Stack, Config); (end_stream) -> @@ -151,7 +156,7 @@ incomplete(State, Rest, Handler, Acc, Stack, Config=#config{incomplete_handler=f end end }; -incomplete(State, Rest, Handler, Acc, Stack, Config=#config{incomplete_handler=F}) -> +incomplete(State, Rest, Handler, Acc, Stack, Config = #config{incomplete_handler=F}) -> F(Rest, {decoder, State, Handler, Acc, Stack}, jsx_config:config_to_list(Config)). @@ -1898,6 +1903,27 @@ bom_test_() -> ]. +incomplete_test_() -> + [ + {"stream false", ?_assertError( + badarg, + start(<<"{">>, {jsx, []}, [], jsx_config:parse_config([])) + )}, + {"stream true", ?_assert( + case start(<<"{">>, {jsx, []}, [], jsx_config:parse_config([stream])) of + {incomplete, _} -> true; + _ -> false + end + )}, + {"complete input", ?_assert( + case start(<<"{}">>, {jsx, []}, [], jsx_config:parse_config([stream])) of + {incomplete, _} -> true; + _ -> false + end + )} + ]. + + error_test_() -> Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, [ diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 099b71e..6b27e35 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -72,6 +72,8 @@ resume(Rest, State, Handler, Stack, Config) -> -endif. +incomplete(State, Handler, Stack, Config=#config{stream=false}) -> + ?error(State, [], Handler, Stack, Config); incomplete(State, Handler, Stack, Config=#config{incomplete_handler=false}) -> {incomplete, fun(end_stream) -> case resume([end_json], State, Handler, Stack, Config) of From 7b05d0e1b7c09c7dc58559bb4c1392b191275024 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Thu, 22 Aug 2013 06:58:43 +0000 Subject: [PATCH 09/40] remove deprecated option flags handling --- src/jsx_config.erl | 39 +-------------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 1cb4d33..2aa9107 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -76,18 +76,6 @@ parse_config([{incomplete_handler, IncompleteHandler}|Rest] = Options, Config) w ; _ -> erlang:error(badarg, [Options, Config]) end; %% deprecated flags -parse_config([loose_unicode|Rest], Config) -> - parse_config(Rest, Config#config{replaced_bad_utf8=true}); -parse_config([escape_forward_slash|Rest], Config) -> - parse_config(Rest, Config#config{escaped_forward_slashes=true}); -parse_config([single_quotes|Rest], Config) -> - parse_config(Rest, Config#config{single_quoted_strings=true}); -parse_config([no_jsonp_escapes|Rest], Config) -> - parse_config(Rest, Config#config{unescaped_jsonp=true}); -parse_config([json_escape|Rest], Config) -> - parse_config(Rest, Config#config{escaped_strings=true}); -parse_config([ignore_bad_escapes|Rest], Config) -> - parse_config(Rest, Config#config{ignored_bad_escapes=true}); parse_config(Options, Config) -> erlang:error(badarg, [Options, Config]). @@ -118,14 +106,7 @@ valid_flags() -> stream, relax, error_handler, - incomplete_handler, - %% deprecated flags - loose_unicode, %% replaced_bad_utf8 - escape_forward_slash, %% escaped_forward_slashes - single_quotes, %% single_quoted_strings - no_jsonp_escapes, %% unescaped_jsonp - json_escape, %% escaped_strings - ignore_bad_escapes %% ignored_bad_escapes + incomplete_handler ]. @@ -187,24 +168,6 @@ config_test_() -> parse_config([relax]) ) }, - {"deprecated flags", ?_assertEqual( - #config{ - replaced_bad_utf8=true, - escaped_forward_slashes=true, - single_quoted_strings=true, - unescaped_jsonp=true, - escaped_strings=true, - ignored_bad_escapes=true - }, - parse_config([ - loose_unicode, - escape_forward_slash, - single_quotes, - no_jsonp_escapes, - json_escape, - ignore_bad_escapes - ]) - )}, {"error_handler flag", ?_assertEqual( #config{error_handler=fun ?MODULE:fake_error_handler/3}, parse_config([{error_handler, fun ?MODULE:fake_error_handler/3}]) From f137ff484bd5440eadad9a92243b3a0f0119e472 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 20 Oct 2013 03:16:56 +0000 Subject: [PATCH 10/40] fix bad utf8 encodings by default (convert to u+FFFD) this (temporarily, i hope) removes the fast path for handling strings internally without escaping or replacing bad utf8 --- src/jsx_config.erl | 16 ++- src/jsx_config.hrl | 2 +- src/jsx_decoder.erl | 167 ++++++++++++------------ src/jsx_encoder.erl | 4 +- src/jsx_parser.erl | 301 +++++++++++--------------------------------- src/jsx_tests.hrl | 108 ++++++++-------- 6 files changed, 220 insertions(+), 378 deletions(-) diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 2aa9107..0427ed5 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -40,8 +40,8 @@ parse_config(Config) -> parse_config([], Config) -> Config; -parse_config([replaced_bad_utf8|Rest], Config) -> - parse_config(Rest, Config#config{replaced_bad_utf8=true}); +parse_config([strict_utf8|Rest], Config) -> + parse_config(Rest, Config#config{strict_utf8=true}); parse_config([escaped_forward_slashes|Rest], Config) -> parse_config(Rest, Config#config{escaped_forward_slashes=true}); parse_config([stream|Rest], Config) -> @@ -60,7 +60,6 @@ parse_config([ignored_bad_escapes|Rest], Config) -> parse_config(Rest, Config#config{ignored_bad_escapes=true}); parse_config([relax|Rest], Config) -> parse_config(Rest, Config#config{ - replaced_bad_utf8 = true, single_quoted_strings = true, comments = true, ignored_bad_escapes = true @@ -95,7 +94,7 @@ config_to_list(Config) -> valid_flags() -> [ - replaced_bad_utf8, + strict_utf8, escaped_forward_slashes, single_quoted_strings, unescaped_jsonp, @@ -136,7 +135,7 @@ config_test_() -> {"all flags", ?_assertEqual( #config{ - replaced_bad_utf8=true, + strict_utf8=true, escaped_forward_slashes=true, stream=true, single_quoted_strings=true, @@ -146,7 +145,7 @@ config_test_() -> ignored_bad_escapes=true }, parse_config([ - replaced_bad_utf8, + strict_utf8, escaped_forward_slashes, stream, single_quoted_strings, @@ -160,7 +159,6 @@ config_test_() -> {"relax flag", ?_assertEqual( #config{ - replaced_bad_utf8=true, single_quoted_strings=true, comments=true, ignored_bad_escapes=true @@ -202,7 +200,7 @@ config_to_list_test_() -> )}, {"all flags", ?_assertEqual( [ - replaced_bad_utf8, + strict_utf8, escaped_forward_slashes, single_quoted_strings, unescaped_jsonp, @@ -213,7 +211,7 @@ config_to_list_test_() -> ], config_to_list( #config{ - replaced_bad_utf8=true, + strict_utf8=true, escaped_forward_slashes=true, stream=true, single_quoted_strings=true, diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index 09ebce6..5b0f21e 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -1,5 +1,5 @@ -record(config, { - replaced_bad_utf8 = false, + strict_utf8 = false, escaped_forward_slashes = false, single_quoted_strings = false, unescaped_jsonp = false, diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 26e5aa6..f521a04 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -548,36 +548,35 @@ string(<>, Handler, Acc, Stack, Config) when X >= 16#f0, X =< 16#f4, Y >= 16#80, Y =< 16#bf, Z >= 16#80, Z =< 16#bf -> - incomplete(string, <>, Handler, Acc, Stack, Config); + incomplete(string, <>, Handler, Acc, Stack, Config); %% surrogates -string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 160 -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); %% u+xfffe, u+xffff, control codes and other noncharacters -string(<<_/utf8, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) -> +string(<<_/utf8, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); %% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match the %% preceeding clause -string(<<239, 191, X, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<<239, 191, X, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X == 190; X == 191 -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); %% overlong encodings and missing continuations of a 2 byte sequence -string(<>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 192, X =< 223 -> strip_continuations(Rest, Handler, Acc, Stack, Config, 1); %% overlong encodings and missing continuations of a 3 byte sequence -string(<>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 224, X =< 239 -> strip_continuations(Rest, Handler, Acc, Stack, Config, 2); %% overlong encodings and missing continuations of a 4 byte sequence -string(<>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 240, X =< 247 -> strip_continuations(Rest, Handler, Acc, Stack, Config, 3); %% incompletes and unexpected bytes, including orphan continuations -string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) -> +string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); -string(Bin, Handler, Acc, Stack, Config) -> - ?error(string, Bin, Handler, Acc, Stack, Config). +string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config). doublequote(<>, Handler, Acc, [key|_] = Stack, Config) -> @@ -658,9 +657,9 @@ unescape(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, A when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), ?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z) -> - case Config#config.replaced_bad_utf8 of - true -> string(Rest, Handler, acc_seq(Acc, [16#fffd, 16#fffd]), Stack, Config); - false -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) + case Config#config.strict_utf8 of + true -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config); + false -> string(Rest, Handler, acc_seq(Acc, [16#fffd, 16#fffd]), Stack, Config) end; unescape(<<$u, $d, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) when (A == $8 orelse A == $9 orelse A == $a orelse A == $b) andalso @@ -677,9 +676,9 @@ unescape(<<$u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config) case erlang:list_to_integer([A, B, C, D], 16) of Codepoint when Codepoint < 16#d800; Codepoint > 16#dfff -> string(Rest, Handler, acc_seq(Acc, maybe_replace(Codepoint, Config)), Stack, Config); - _ when Config#config.replaced_bad_utf8 -> - string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); - _ -> ?error(string, <>, Handler, Acc, Stack, Config) + _ when Config#config.strict_utf8 -> + ?error(string, <>, Handler, Acc, Stack, Config); + _ -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config) end; unescape(Bin, Handler, Acc, Stack, Config=#config{ignored_bad_escapes=true}) -> string(Bin, Handler, acc_seq(Acc, ?rsolidus), Stack, Config); @@ -904,7 +903,7 @@ comment(<>, Handler, Resume, [multicomment|_] = Stack, Config) -> incomplete(comment, <>, Handler, Resume, Stack, Config); comment(<<_/utf8, Rest/binary>>, Handler, Resume, Stack, Config) -> comment(Rest, Handler, Resume, Stack, Config); -comment(<<_, Rest/binary>>, Handler, Resume, Stack, Config=#config{replaced_bad_utf8=true}) -> +comment(<<_, Rest/binary>>, Handler, Resume, Stack, Config=#config{strict_utf8=false}) -> comment(Rest, Handler, Resume, Stack, Config); comment(<<>>, Handler, done, [Comment], Config=#config{stream=false}) when Comment == comment; Comment == multicomment -> @@ -1250,15 +1249,15 @@ comments_test_() -> )}, {"// comment with badutf", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [comments, replaced_bad_utf8]) + decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [comments]) )}, {"/**/ comment with badutf", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [comments, replaced_bad_utf8]) + decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [comments]) )}, {"/**/ comment with badutf preceeded by /", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [comments, replaced_bad_utf8]) + decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [comments]) )} ]. @@ -1326,35 +1325,35 @@ clean_string_test_() -> )}, {"error reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, reserved_space()) + lists:map(fun(Codepoint) -> decode(Codepoint, [strict_utf8]) end, reserved_space()) )}, {"error surrogates", ?_assertEqual( lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, surrogates()) + lists:map(fun(Codepoint) -> decode(Codepoint, [strict_utf8]) end, surrogates()) )}, {"error noncharacters", ?_assertEqual( lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, noncharacters()) + lists:map(fun(Codepoint) -> decode(Codepoint, [strict_utf8]) end, noncharacters()) )}, {"error extended noncharacters", ?_assertEqual( lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, extended_noncharacters()) + lists:map(fun(Codepoint) -> decode(Codepoint, [strict_utf8]) end, extended_noncharacters()) )}, {"clean reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, reserved_space()) + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, reserved_space()) )}, {"clean surrogates", ?_assertEqual( lists:duplicate(length(surrogates()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, surrogates()) + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, surrogates()) )}, {"clean noncharacters", ?_assertEqual( lists:duplicate(length(noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, noncharacters()) + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, noncharacters()) )}, {"clean extended noncharacters", ?_assertEqual( lists:duplicate(length(extended_noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, extended_noncharacters()) + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, extended_noncharacters()) )}, {"dirty \\uwxyz", ?_assertEqual( [{string, <<"\\uwxyz">>}, end_json], @@ -1405,190 +1404,190 @@ bad_utf8_test_() -> [ {"noncharacter u+fffe", ?_assertError( badarg, - decode_bad_utf(<<239, 191, 190>>, []) + decode_bad_utf(<<239, 191, 190>>, [strict_utf8]) )}, {"noncharacter u+fffe replaced", ?_assertEqual( <<16#fffd/utf8>>, - decode_bad_utf(<<239, 191, 190>>, [replaced_bad_utf8]) + decode_bad_utf(<<239, 191, 190>>, []) )}, {"noncharacter u+ffff", ?_assertError( badarg, - decode_bad_utf(<<239, 191, 191>>, []) + decode_bad_utf(<<239, 191, 191>>, [strict_utf8]) )}, {"noncharacter u+ffff replaced", ?_assertEqual( <<16#fffd/utf8>>, - decode_bad_utf(<<239, 191, 191>>, [replaced_bad_utf8]) + decode_bad_utf(<<239, 191, 191>>, []) )}, {"orphan continuation byte u+0080", ?_assertError( badarg, - decode_bad_utf(<<16#0080>>, []) + decode_bad_utf(<<16#0080>>, [strict_utf8]) )}, {"orphan continuation byte u+0080 replaced", ?_assertEqual( <<16#fffd/utf8>>, - decode_bad_utf(<<16#0080>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#0080>>, []) )}, {"orphan continuation byte u+00bf", ?_assertError( badarg, - decode_bad_utf(<<16#00bf>>, []) + decode_bad_utf(<<16#00bf>>, [strict_utf8]) )}, {"orphan continuation byte u+00bf replaced", ?_assertEqual( <<16#fffd/utf8>>, - decode_bad_utf(<<16#00bf>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#00bf>>, []) )}, {"2 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, []) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, [strict_utf8]) )}, {"2 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 2), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, [replaced_bad_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, []) )}, {"3 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, []) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, [strict_utf8]) )}, {"3 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 3), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, [replaced_bad_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, []) )}, {"4 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, []) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, [strict_utf8]) )}, {"4 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 4), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, [replaced_bad_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, []) )}, {"5 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, []) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, [strict_utf8]) )}, {"5 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 5), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, [replaced_bad_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, []) )}, {"6 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, []) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, [strict_utf8]) )}, {"6 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 6), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, [replaced_bad_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, []) )}, {"all continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, []) + decode_bad_utf(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [strict_utf8]) )}, {"all continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), decode_bad_utf( <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, - [replaced_bad_utf8] + [] ) )}, {"lonely start byte", ?_assertError( badarg, - decode_bad_utf(<<16#00c0>>, []) + decode_bad_utf(<<16#00c0>>, [strict_utf8]) )}, {"lonely start byte replaced", ?_assertEqual( <<16#fffd/utf8>>, - decode_bad_utf(<<16#00c0>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#00c0>>, []) )}, {"lonely start bytes (2 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, []) + decode_bad_utf(<<16#00c0, 32, 16#00df>>, [strict_utf8]) )}, {"lonely start bytes (2 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#00c0, 32, 16#00df>>, []) )}, {"lonely start bytes (3 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, []) + decode_bad_utf(<<16#00e0, 32, 16#00ef>>, [strict_utf8]) )}, {"lonely start bytes (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#00e0, 32, 16#00ef>>, []) )}, {"lonely start bytes (4 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, []) + decode_bad_utf(<<16#00f0, 32, 16#00f7>>, [strict_utf8]) )}, {"lonely start bytes (4 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#00f0, 32, 16#00f7>>, []) )}, {"missing continuation byte (3 byte)", ?_assertError( badarg, - decode_bad_utf(<<224, 160, 32>>, []) + decode_bad_utf(<<224, 160, 32>>, [strict_utf8]) )}, {"missing continuation byte (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<224, 160, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<224, 160, 32>>, []) )}, {"missing continuation byte (4 byte missing one)", ?_assertError( badarg, - decode_bad_utf(<<240, 144, 128, 32>>, []) + decode_bad_utf(<<240, 144, 128, 32>>, [strict_utf8]) )}, {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<240, 144, 128, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<240, 144, 128, 32>>, []) )}, {"missing continuation byte (4 byte missing two)", ?_assertError( badarg, - decode_bad_utf(<<240, 144, 32>>, []) + decode_bad_utf(<<240, 144, 32>>, [strict_utf8]) )}, {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<240, 144, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<240, 144, 32>>, []) )}, {"overlong encoding of u+002f (2 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#c0, 16#af, 32>>, []) + decode_bad_utf(<<16#c0, 16#af, 32>>, [strict_utf8]) )}, {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#c0, 16#af, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#c0, 16#af, 32>>, []) )}, {"overlong encoding of u+002f (3 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, []) + decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, [strict_utf8]) )}, {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, []) )}, {"overlong encoding of u+002f (4 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, []) + decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, [strict_utf8]) )}, {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, []) )}, {"highest overlong 2 byte sequence", ?_assertError( badarg, - decode_bad_utf(<<16#c1, 16#bf, 32>>, []) + decode_bad_utf(<<16#c1, 16#bf, 32>>, [strict_utf8]) )}, {"highest overlong 2 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#c1, 16#bf, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#c1, 16#bf, 32>>, []) )}, {"highest overlong 3 byte sequence", ?_assertError( badarg, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, []) + decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, [strict_utf8]) )}, {"highest overlong 3 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, []) )}, {"highest overlong 4 byte sequence", ?_assertError( badarg, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, []) + decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [strict_utf8]) )}, {"highest overlong 4 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [replaced_bad_utf8]) + decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, []) )} ]. @@ -1647,35 +1646,35 @@ unescape_test_() -> )}, {"replace bad high surrogate", ?_assertEqual( <<16#fffd/utf8>>, - unescape(<<"\\udc00"/utf8>>, [replaced_bad_utf8]) + unescape(<<"\\udc00"/utf8>>, []) )}, {"do not unescape bad high surrogate", ?_assertError( badarg, - unescape(<<"\\udc00"/utf8>>, []) + unescape(<<"\\udc00"/utf8>>, [strict_utf8]) )}, {"replace naked high surrogate", ?_assertEqual( <<16#fffd/utf8, "hello world">>, - unescape(<<"\\ud800hello world"/utf8>>, [replaced_bad_utf8]) + unescape(<<"\\ud800hello world"/utf8>>, []) )}, {"do not unescape naked high surrogate", ?_assertError( badarg, - unescape(<<"\\ud800hello world"/utf8>>, []) + unescape(<<"\\ud800hello world"/utf8>>, [strict_utf8]) )}, {"replace naked low surrogate", ?_assertEqual( <<16#fffd/utf8, "hello world">>, - unescape(<<"\\udc00hello world"/utf8>>, [replaced_bad_utf8]) + unescape(<<"\\udc00hello world"/utf8>>, []) )}, {"do not unescape naked low surrogate", ?_assertError( badarg, - unescape(<<"\\udc00hello world"/utf8>>, []) + unescape(<<"\\udc00hello world"/utf8>>, [strict_utf8]) )}, {"replace bad surrogate pair", ?_assertEqual( <<16#fffd/utf8, 16#fffd/utf8>>, - unescape(<<"\\ud800\\u0000">>, [replaced_bad_utf8]) + unescape(<<"\\ud800\\u0000">>, []) )}, {"do not unescape bad surrogate pair", ?_assertError( badarg, - unescape(<<"\\ud800\\u0000">>, []) + unescape(<<"\\ud800\\u0000">>, [strict_utf8]) )}, {"bad pseudo escape sequence", ?_assertError( badarg, @@ -2104,11 +2103,11 @@ custom_error_handler_test_() -> )}, {"single_comment error", ?_assertEqual( {comment, <<192>>}, - Decode(<<"[ //"/utf8, 192>>, [{error_handler, Error}, comments]) + Decode(<<"[ //"/utf8, 192>>, [{error_handler, Error}, comments, strict_utf8]) )}, {"multi_comment error", ?_assertEqual( {comment, <<192>>}, - Decode(<<"[ /*"/utf8, 192>>, [{error_handler, Error}, comments]) + Decode(<<"[ /*"/utf8, 192>>, [{error_handler, Error}, comments, strict_utf8]) )} ]. diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 6c04d27..517be4c 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -78,7 +78,7 @@ err(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). error_test_() -> [ {"value error", ?_assertError(badarg, err(self(), []))}, - {"string error", ?_assertError(badarg, err(<<239, 191, 191>>, []))} + {"string error", ?_assertError(badarg, err(<<239, 191, 191>>, [strict_utf8]))} ]. custom_error_handler_test_() -> @@ -90,7 +90,7 @@ custom_error_handler_test_() -> )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}]}, - err(<<239, 191, 191>>, [{error_handler, Error}]) + err(<<239, 191, 191>>, [{error_handler, Error}, strict_utf8]) )} ]. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 6b27e35..350624f 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -193,209 +193,46 @@ clean_string(Bin, Tokens, Handler, Stack, Config) -> end. clean_string(Bin, #config{dirty_strings=true}) -> Bin; -clean_string(Bin, Config) -> - case Config#config.replaced_bad_utf8 orelse Config#config.escaped_strings of - true -> clean(Bin, [], Config); - false -> ensure_clean(Bin) - end. - - -ensure_clean(Bin) -> - case is_clean(Bin) of - ok -> Bin; - {error, badarg} -> {error, badarg} - end. - - -%% fast path for no escaping and no correcting, throws error if string is 'bad' -is_clean(<<>>) -> ok; -is_clean(<<0, Rest/binary>>) -> is_clean(Rest); -is_clean(<<1, Rest/binary>>) -> is_clean(Rest); -is_clean(<<2, Rest/binary>>) -> is_clean(Rest); -is_clean(<<3, Rest/binary>>) -> is_clean(Rest); -is_clean(<<4, Rest/binary>>) -> is_clean(Rest); -is_clean(<<5, Rest/binary>>) -> is_clean(Rest); -is_clean(<<6, Rest/binary>>) -> is_clean(Rest); -is_clean(<<7, Rest/binary>>) -> is_clean(Rest); -is_clean(<<8, Rest/binary>>) -> is_clean(Rest); -is_clean(<<9, Rest/binary>>) -> is_clean(Rest); -is_clean(<<10, Rest/binary>>) -> is_clean(Rest); -is_clean(<<11, Rest/binary>>) -> is_clean(Rest); -is_clean(<<12, Rest/binary>>) -> is_clean(Rest); -is_clean(<<13, Rest/binary>>) -> is_clean(Rest); -is_clean(<<14, Rest/binary>>) -> is_clean(Rest); -is_clean(<<15, Rest/binary>>) -> is_clean(Rest); -is_clean(<<16, Rest/binary>>) -> is_clean(Rest); -is_clean(<<17, Rest/binary>>) -> is_clean(Rest); -is_clean(<<18, Rest/binary>>) -> is_clean(Rest); -is_clean(<<19, Rest/binary>>) -> is_clean(Rest); -is_clean(<<20, Rest/binary>>) -> is_clean(Rest); -is_clean(<<21, Rest/binary>>) -> is_clean(Rest); -is_clean(<<22, Rest/binary>>) -> is_clean(Rest); -is_clean(<<23, Rest/binary>>) -> is_clean(Rest); -is_clean(<<24, Rest/binary>>) -> is_clean(Rest); -is_clean(<<25, Rest/binary>>) -> is_clean(Rest); -is_clean(<<26, Rest/binary>>) -> is_clean(Rest); -is_clean(<<27, Rest/binary>>) -> is_clean(Rest); -is_clean(<<28, Rest/binary>>) -> is_clean(Rest); -is_clean(<<29, Rest/binary>>) -> is_clean(Rest); -is_clean(<<30, Rest/binary>>) -> is_clean(Rest); -is_clean(<<31, Rest/binary>>) -> is_clean(Rest); -is_clean(<<32, Rest/binary>>) -> is_clean(Rest); -is_clean(<<33, Rest/binary>>) -> is_clean(Rest); -is_clean(<<34, Rest/binary>>) -> is_clean(Rest); -is_clean(<<35, Rest/binary>>) -> is_clean(Rest); -is_clean(<<36, Rest/binary>>) -> is_clean(Rest); -is_clean(<<37, Rest/binary>>) -> is_clean(Rest); -is_clean(<<38, Rest/binary>>) -> is_clean(Rest); -is_clean(<<39, Rest/binary>>) -> is_clean(Rest); -is_clean(<<40, Rest/binary>>) -> is_clean(Rest); -is_clean(<<41, Rest/binary>>) -> is_clean(Rest); -is_clean(<<42, Rest/binary>>) -> is_clean(Rest); -is_clean(<<43, Rest/binary>>) -> is_clean(Rest); -is_clean(<<44, Rest/binary>>) -> is_clean(Rest); -is_clean(<<45, Rest/binary>>) -> is_clean(Rest); -is_clean(<<46, Rest/binary>>) -> is_clean(Rest); -is_clean(<<47, Rest/binary>>) -> is_clean(Rest); -is_clean(<<48, Rest/binary>>) -> is_clean(Rest); -is_clean(<<49, Rest/binary>>) -> is_clean(Rest); -is_clean(<<50, Rest/binary>>) -> is_clean(Rest); -is_clean(<<51, Rest/binary>>) -> is_clean(Rest); -is_clean(<<52, Rest/binary>>) -> is_clean(Rest); -is_clean(<<53, Rest/binary>>) -> is_clean(Rest); -is_clean(<<54, Rest/binary>>) -> is_clean(Rest); -is_clean(<<55, Rest/binary>>) -> is_clean(Rest); -is_clean(<<56, Rest/binary>>) -> is_clean(Rest); -is_clean(<<57, Rest/binary>>) -> is_clean(Rest); -is_clean(<<58, Rest/binary>>) -> is_clean(Rest); -is_clean(<<59, Rest/binary>>) -> is_clean(Rest); -is_clean(<<60, Rest/binary>>) -> is_clean(Rest); -is_clean(<<61, Rest/binary>>) -> is_clean(Rest); -is_clean(<<62, Rest/binary>>) -> is_clean(Rest); -is_clean(<<63, Rest/binary>>) -> is_clean(Rest); -is_clean(<<64, Rest/binary>>) -> is_clean(Rest); -is_clean(<<65, Rest/binary>>) -> is_clean(Rest); -is_clean(<<66, Rest/binary>>) -> is_clean(Rest); -is_clean(<<67, Rest/binary>>) -> is_clean(Rest); -is_clean(<<68, Rest/binary>>) -> is_clean(Rest); -is_clean(<<69, Rest/binary>>) -> is_clean(Rest); -is_clean(<<70, Rest/binary>>) -> is_clean(Rest); -is_clean(<<71, Rest/binary>>) -> is_clean(Rest); -is_clean(<<72, Rest/binary>>) -> is_clean(Rest); -is_clean(<<73, Rest/binary>>) -> is_clean(Rest); -is_clean(<<74, Rest/binary>>) -> is_clean(Rest); -is_clean(<<75, Rest/binary>>) -> is_clean(Rest); -is_clean(<<76, Rest/binary>>) -> is_clean(Rest); -is_clean(<<77, Rest/binary>>) -> is_clean(Rest); -is_clean(<<78, Rest/binary>>) -> is_clean(Rest); -is_clean(<<79, Rest/binary>>) -> is_clean(Rest); -is_clean(<<80, Rest/binary>>) -> is_clean(Rest); -is_clean(<<81, Rest/binary>>) -> is_clean(Rest); -is_clean(<<82, Rest/binary>>) -> is_clean(Rest); -is_clean(<<83, Rest/binary>>) -> is_clean(Rest); -is_clean(<<84, Rest/binary>>) -> is_clean(Rest); -is_clean(<<85, Rest/binary>>) -> is_clean(Rest); -is_clean(<<86, Rest/binary>>) -> is_clean(Rest); -is_clean(<<87, Rest/binary>>) -> is_clean(Rest); -is_clean(<<88, Rest/binary>>) -> is_clean(Rest); -is_clean(<<89, Rest/binary>>) -> is_clean(Rest); -is_clean(<<90, Rest/binary>>) -> is_clean(Rest); -is_clean(<<91, Rest/binary>>) -> is_clean(Rest); -is_clean(<<92, Rest/binary>>) -> is_clean(Rest); -is_clean(<<93, Rest/binary>>) -> is_clean(Rest); -is_clean(<<94, Rest/binary>>) -> is_clean(Rest); -is_clean(<<95, Rest/binary>>) -> is_clean(Rest); -is_clean(<<96, Rest/binary>>) -> is_clean(Rest); -is_clean(<<97, Rest/binary>>) -> is_clean(Rest); -is_clean(<<98, Rest/binary>>) -> is_clean(Rest); -is_clean(<<99, Rest/binary>>) -> is_clean(Rest); -is_clean(<<100, Rest/binary>>) -> is_clean(Rest); -is_clean(<<101, Rest/binary>>) -> is_clean(Rest); -is_clean(<<102, Rest/binary>>) -> is_clean(Rest); -is_clean(<<103, Rest/binary>>) -> is_clean(Rest); -is_clean(<<104, Rest/binary>>) -> is_clean(Rest); -is_clean(<<105, Rest/binary>>) -> is_clean(Rest); -is_clean(<<106, Rest/binary>>) -> is_clean(Rest); -is_clean(<<107, Rest/binary>>) -> is_clean(Rest); -is_clean(<<108, Rest/binary>>) -> is_clean(Rest); -is_clean(<<109, Rest/binary>>) -> is_clean(Rest); -is_clean(<<110, Rest/binary>>) -> is_clean(Rest); -is_clean(<<111, Rest/binary>>) -> is_clean(Rest); -is_clean(<<112, Rest/binary>>) -> is_clean(Rest); -is_clean(<<113, Rest/binary>>) -> is_clean(Rest); -is_clean(<<114, Rest/binary>>) -> is_clean(Rest); -is_clean(<<115, Rest/binary>>) -> is_clean(Rest); -is_clean(<<116, Rest/binary>>) -> is_clean(Rest); -is_clean(<<117, Rest/binary>>) -> is_clean(Rest); -is_clean(<<118, Rest/binary>>) -> is_clean(Rest); -is_clean(<<119, Rest/binary>>) -> is_clean(Rest); -is_clean(<<120, Rest/binary>>) -> is_clean(Rest); -is_clean(<<121, Rest/binary>>) -> is_clean(Rest); -is_clean(<<122, Rest/binary>>) -> is_clean(Rest); -is_clean(<<123, Rest/binary>>) -> is_clean(Rest); -is_clean(<<124, Rest/binary>>) -> is_clean(Rest); -is_clean(<<125, Rest/binary>>) -> is_clean(Rest); -is_clean(<<126, Rest/binary>>) -> is_clean(Rest); -is_clean(<<127, Rest/binary>>) -> is_clean(Rest); -is_clean(<>) when X < 16#d800 -> is_clean(Rest); -is_clean(<>) when X > 16#dfff, X < 16#fdd0 -> is_clean(Rest); -is_clean(<>) when X > 16#fdef, X < 16#fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#10000, X < 16#1fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#20000, X < 16#2fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#30000, X < 16#3fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#40000, X < 16#4fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#50000, X < 16#5fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#60000, X < 16#6fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#70000, X < 16#7fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#80000, X < 16#8fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#90000, X < 16#9fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#a0000, X < 16#afffe -> is_clean(Rest); -is_clean(<>) when X >= 16#b0000, X < 16#bfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#c0000, X < 16#cfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#d0000, X < 16#dfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#e0000, X < 16#efffe -> is_clean(Rest); -is_clean(<>) when X >= 16#f0000, X < 16#ffffe -> is_clean(Rest); -is_clean(<>) when X >= 16#100000, X < 16#10fffe -> is_clean(Rest); -is_clean(_Bin) -> {error, badarg}. +clean_string(Bin, Config) -> clean(Bin, [], Config). %% escape and/or replace bad codepoints if requested clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc)); -clean(<<0, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(0, Config) ++ Acc, Config); -clean(<<1, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(1, Config) ++ Acc, Config); -clean(<<2, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(2, Config) ++ Acc, Config); -clean(<<3, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(3, Config) ++ Acc, Config); -clean(<<4, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(4, Config) ++ Acc, Config); -clean(<<5, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(5, Config) ++ Acc, Config); -clean(<<6, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(6, Config) ++ Acc, Config); -clean(<<7, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(7, Config) ++ Acc, Config); -clean(<<8, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(8, Config) ++ Acc, Config); -clean(<<9, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(9, Config) ++ Acc, Config); -clean(<<10, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(10, Config) ++ Acc, Config); -clean(<<11, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(11, Config) ++ Acc, Config); -clean(<<12, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(12, Config) ++ Acc, Config); -clean(<<13, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(13, Config) ++ Acc, Config); -clean(<<14, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(14, Config) ++ Acc, Config); -clean(<<15, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(15, Config) ++ Acc, Config); -clean(<<16, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(16, Config) ++ Acc, Config); -clean(<<17, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(17, Config) ++ Acc, Config); -clean(<<18, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(18, Config) ++ Acc, Config); -clean(<<19, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(19, Config) ++ Acc, Config); -clean(<<20, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(20, Config) ++ Acc, Config); -clean(<<21, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(21, Config) ++ Acc, Config); -clean(<<22, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(22, Config) ++ Acc, Config); -clean(<<23, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(23, Config) ++ Acc, Config); -clean(<<24, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(24, Config) ++ Acc, Config); -clean(<<25, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(25, Config) ++ Acc, Config); -clean(<<26, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(26, Config) ++ Acc, Config); -clean(<<27, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(27, Config) ++ Acc, Config); -clean(<<28, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(28, Config) ++ Acc, Config); -clean(<<29, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(29, Config) ++ Acc, Config); -clean(<<30, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(30, Config) ++ Acc, Config); -clean(<<31, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(31, Config) ++ Acc, Config); +clean(<<0, Rest/binary>>, Acc, Config) -> maybe_replace(0, Rest, Acc, Config); +clean(<<1, Rest/binary>>, Acc, Config) -> maybe_replace(1, Rest, Acc, Config); +clean(<<2, Rest/binary>>, Acc, Config) -> maybe_replace(2, Rest, Acc, Config); +clean(<<3, Rest/binary>>, Acc, Config) -> maybe_replace(3, Rest, Acc, Config); +clean(<<4, Rest/binary>>, Acc, Config) -> maybe_replace(4, Rest, Acc, Config); +clean(<<5, Rest/binary>>, Acc, Config) -> maybe_replace(5, Rest, Acc, Config); +clean(<<6, Rest/binary>>, Acc, Config) -> maybe_replace(6, Rest, Acc, Config); +clean(<<7, Rest/binary>>, Acc, Config) -> maybe_replace(7, Rest, Acc, Config); +clean(<<8, Rest/binary>>, Acc, Config) -> maybe_replace(8, Rest, Acc, Config); +clean(<<9, Rest/binary>>, Acc, Config) -> maybe_replace(9, Rest, Acc, Config); +clean(<<10, Rest/binary>>, Acc, Config) -> maybe_replace(10, Rest, Acc, Config); +clean(<<11, Rest/binary>>, Acc, Config) -> maybe_replace(11, Rest, Acc, Config); +clean(<<12, Rest/binary>>, Acc, Config) -> maybe_replace(12, Rest, Acc, Config); +clean(<<13, Rest/binary>>, Acc, Config) -> maybe_replace(13, Rest, Acc, Config); +clean(<<14, Rest/binary>>, Acc, Config) -> maybe_replace(14, Rest, Acc, Config); +clean(<<15, Rest/binary>>, Acc, Config) -> maybe_replace(15, Rest, Acc, Config); +clean(<<16, Rest/binary>>, Acc, Config) -> maybe_replace(16, Rest, Acc, Config); +clean(<<17, Rest/binary>>, Acc, Config) -> maybe_replace(17, Rest, Acc, Config); +clean(<<18, Rest/binary>>, Acc, Config) -> maybe_replace(18, Rest, Acc, Config); +clean(<<19, Rest/binary>>, Acc, Config) -> maybe_replace(19, Rest, Acc, Config); +clean(<<20, Rest/binary>>, Acc, Config) -> maybe_replace(20, Rest, Acc, Config); +clean(<<21, Rest/binary>>, Acc, Config) -> maybe_replace(21, Rest, Acc, Config); +clean(<<22, Rest/binary>>, Acc, Config) -> maybe_replace(22, Rest, Acc, Config); +clean(<<23, Rest/binary>>, Acc, Config) -> maybe_replace(23, Rest, Acc, Config); +clean(<<24, Rest/binary>>, Acc, Config) -> maybe_replace(24, Rest, Acc, Config); +clean(<<25, Rest/binary>>, Acc, Config) -> maybe_replace(25, Rest, Acc, Config); +clean(<<26, Rest/binary>>, Acc, Config) -> maybe_replace(26, Rest, Acc, Config); +clean(<<27, Rest/binary>>, Acc, Config) -> maybe_replace(27, Rest, Acc, Config); +clean(<<28, Rest/binary>>, Acc, Config) -> maybe_replace(28, Rest, Acc, Config); +clean(<<29, Rest/binary>>, Acc, Config) -> maybe_replace(29, Rest, Acc, Config); +clean(<<30, Rest/binary>>, Acc, Config) -> maybe_replace(30, Rest, Acc, Config); +clean(<<31, Rest/binary>>, Acc, Config) -> maybe_replace(31, Rest, Acc, Config); clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config); clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config); -clean(<<34, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(34, Config) ++ Acc, Config); +clean(<<34, Rest/binary>>, Acc, Config) -> maybe_replace(34, Rest, Acc, Config); clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config); clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config); clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config); @@ -408,7 +245,7 @@ clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config); clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config); clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config); clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config); -clean(<<47, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(47, Config) ++ Acc, Config); +clean(<<47, Rest/binary>>, Acc, Config) -> maybe_replace(47, Rest, Acc, Config); clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config); clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config); clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config); @@ -453,7 +290,7 @@ clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config); clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config); clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config); clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config); -clean(<<92, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(92, Config) ++ Acc, Config); +clean(<<92, Rest/binary>>, Acc, Config) -> maybe_replace(92, Rest, Acc, Config); clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config); clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config); clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config); @@ -490,7 +327,7 @@ clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config); clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config); clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config); clean(<>, Acc, Config) when X == 16#2028; X == 16#2029 -> - clean(Rest, maybe_replace(X, Config) ++ Acc, Config); + maybe_replace(X, Rest, Acc, Config); clean(<>, Acc, Config) when X < 16#d800 -> clean(Rest, [X] ++ Acc, Config); clean(<>, Acc, Config) when X > 16#dfff, X < 16#fdd0 -> @@ -531,24 +368,24 @@ clean(<>, Acc, Config) when X >= 16#100000, X < 16#10fffe - clean(Rest, [X] ++ Acc, Config); %% surrogates clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> - clean(Rest, maybe_replace(surrogate, Config) ++ Acc, Config); + maybe_replace(surrogate, Rest, Acc, Config); %% noncharacters clean(<<_/utf8, Rest/binary>>, Acc, Config) -> - clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config); + maybe_replace(noncharacter, Rest, Acc, Config); %% u+fffe and u+ffff for R14BXX clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 -> - clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config); + maybe_replace(noncharacter, Rest, Acc, Config); %% overlong encodings and missing continuations of a 2 byte sequence clean(<>, Acc, Config) when X >= 192, X =< 223 -> - clean(strip_continuations(Rest, 1), maybe_replace(badutf, Config) ++ Acc, Config); + maybe_replace(badutf, strip_continuations(Rest, 1), Acc, Config); %% overlong encodings and missing continuations of a 3 byte sequence clean(<>, Acc, Config) when X >= 224, X =< 239 -> - clean(strip_continuations(Rest, 2), maybe_replace(badutf, Config) ++ Acc, Config); + maybe_replace(badutf, strip_continuations(Rest, 2), Acc, Config); %% overlong encodings and missing continuations of a 4 byte sequence clean(<>, Acc, Config) when X >= 240, X =< 247 -> - clean(strip_continuations(Rest, 3), maybe_replace(badutf, Config) ++ Acc, Config); + maybe_replace(badutf, strip_continuations(Rest, 3), Acc, Config); clean(<<_, Rest/binary>>, Acc, Config) -> - clean(Rest, maybe_replace(badutf, Config) ++ Acc, Config). + maybe_replace(badutf, Rest, Acc, Config). strip_continuations(Bin, 0) -> Bin; @@ -558,29 +395,37 @@ strip_continuations(<>, N) when X >= 128, X =< 191 -> strip_continuations(Bin, _) -> Bin. -maybe_replace($\b, #config{escaped_strings=true}) -> [$b, $\\]; -maybe_replace($\t, #config{escaped_strings=true}) -> [$t, $\\]; -maybe_replace($\n, #config{escaped_strings=true}) -> [$n, $\\]; -maybe_replace($\f, #config{escaped_strings=true}) -> [$f, $\\]; -maybe_replace($\r, #config{escaped_strings=true}) -> [$r, $\\]; -maybe_replace($\", #config{escaped_strings=true}) -> [$\", $\\]; -maybe_replace($/, Config=#config{escaped_strings=true}) -> +maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$b, $\\] ++ Acc, Config); +maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$t, $\\] ++ Acc, Config); +maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$n, $\\] ++ Acc, Config); +maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$f, $\\] ++ Acc, Config); +maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$r, $\\] ++ Acc, Config); +maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$\", $\\] ++ Acc, Config); +maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) -> case Config#config.escaped_forward_slashes of - true -> [$/, $\\]; - false -> [$/] + true -> clean(Rest, [$/, $\\] ++ Acc, Config); + false -> clean(Rest, [$/] ++ Acc, Config) end; -maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\]; -maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> +maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$\\, $\\] ++ Acc, Config); +maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> case Config#config.unescaped_jsonp of - true -> [X]; - false -> lists:reverse(json_escape_sequence(X)) + true -> clean(Rest, [X] ++ Acc, Config); + false -> clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config) end; -maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> - lists:reverse(json_escape_sequence(X)); -maybe_replace(noncharacter, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(surrogate, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(badutf, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(_, _) -> {error, badarg}. +maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 -> + clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config); +maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> {error, badarg}; +maybe_replace(noncharacter, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); +maybe_replace(surrogate, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); +maybe_replace(badutf, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); +maybe_replace(X, Rest, Acc, Config) -> clean(Rest, [X] ++ Acc, Config). %% convert a codepoint to it's \uXXXX equiv. @@ -650,7 +495,7 @@ error_test_() -> {"value error", ?_assertError(badarg, parse_error([self()], []))}, {"maybe_done error", ?_assertError(badarg, parse_error([start_array, end_array, start_array, end_json], []))}, {"done error", ?_assertError(badarg, parse_error([{string, <<"">>}, {literal, true}, end_json], []))}, - {"string error", ?_assertError(badarg, parse_error([{string, <<239, 191, 191>>}, end_json], []))} + {"string error", ?_assertError(badarg, parse_error([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))} ]. @@ -671,7 +516,7 @@ custom_error_handler_test_() -> )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}, end_json]}, - parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}]) + parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict_utf8]) )} ]. diff --git a/src/jsx_tests.hrl b/src/jsx_tests.hrl index fb4bf30..e6ec7aa 100644 --- a/src/jsx_tests.hrl +++ b/src/jsx_tests.hrl @@ -284,35 +284,35 @@ clean_string_test_() -> )}, {"error reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, reserved_space()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space()) )}, {"error surrogates", ?_assertEqual( lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, surrogates()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates()) )}, {"error noncharacters", ?_assertEqual( lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, noncharacters()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters()) )}, {"error extended noncharacters", ?_assertEqual( lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, extended_noncharacters()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters()) )}, {"clean reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, reserved_space()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, reserved_space()) )}, {"clean surrogates", ?_assertEqual( lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, surrogates()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, surrogates()) )}, {"clean noncharacters", ?_assertEqual( lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, noncharacters()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, noncharacters()) )}, {"clean extended noncharacters", ?_assertEqual( lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, extended_noncharacters()) + lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, extended_noncharacters()) )} ]. @@ -492,190 +492,190 @@ bad_utf8_test_() -> [ {"noncharacter u+fffe", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{}) + jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true}) )}, {"noncharacter u+fffe replaced", ?_assertEqual( <<16#fffd/utf8>>, - jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{}) )}, {"noncharacter u+ffff", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{}) + jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true}) )}, {"noncharacter u+ffff replaced", ?_assertEqual( <<16#fffd/utf8>>, - jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{}) )}, {"orphan continuation byte u+0080", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#0080>>, #config{}) + jsx_parser:clean_string(<<16#0080>>, #config{strict_utf8=true}) )}, {"orphan continuation byte u+0080 replaced", ?_assertEqual( <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#0080>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#0080>>, #config{}) )}, {"orphan continuation byte u+00bf", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#00bf>>, #config{}) + jsx_parser:clean_string(<<16#00bf>>, #config{strict_utf8=true}) )}, {"orphan continuation byte u+00bf replaced", ?_assertEqual( <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00bf>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00bf>>, #config{}) )}, {"2 continuation bytes", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) )}, {"2 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 2), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) )}, {"3 continuation bytes", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) )}, {"3 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 3), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) )}, {"4 continuation bytes", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) )}, {"4 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 4), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) )}, {"5 continuation bytes", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) )}, {"5 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 5), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) )}, {"6 continuation bytes", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) )}, {"6 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 6), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) )}, {"all continuation bytes", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{}) + jsx_parser:clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) )}, {"all continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), jsx_parser:clean_string( <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, - #config{replaced_bad_utf8=true} + #config{} ) )}, {"lonely start byte", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#00c0>>, #config{}) + jsx_parser:clean_string(<<16#00c0>>, #config{strict_utf8=true}) )}, {"lonely start byte replaced", ?_assertEqual( <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00c0>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00c0>>, #config{}) )}, {"lonely start bytes (2 byte)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{}) + jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) )}, {"lonely start bytes (2 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{}) )}, {"lonely start bytes (3 byte)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) + jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) )}, {"lonely start bytes (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) )}, {"lonely start bytes (4 byte)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) + jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) )}, {"lonely start bytes (4 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) )}, {"missing continuation byte (3 byte)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<224, 160, 32>>, #config{}) + jsx_parser:clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) )}, {"missing continuation byte (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<224, 160, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<224, 160, 32>>, #config{}) )}, {"missing continuation byte (4 byte missing one)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{}) + jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) )}, {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{}) )}, {"missing continuation byte (4 byte missing two)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<240, 144, 32>>, #config{}) + jsx_parser:clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) )}, {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<240, 144, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<240, 144, 32>>, #config{}) )}, {"overlong encoding of u+002f (2 byte)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{}) + jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) )}, {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{}) )}, {"overlong encoding of u+002f (3 byte)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) + jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) )}, {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) )}, {"overlong encoding of u+002f (4 byte)", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) + jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) )}, {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) )}, {"highest overlong 2 byte sequence", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{}) + jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) )}, {"highest overlong 2 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{}) )}, {"highest overlong 3 byte sequence", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) + jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) )}, {"highest overlong 3 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) )}, {"highest overlong 4 byte sequence", ?_assertEqual( {error, badarg}, - jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) + jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) )}, {"highest overlong 4 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{replaced_bad_utf8=true}) + jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) )} ]. From a83fa3598d4da35e8541e74039082ca905da0b32 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 20 Oct 2013 03:20:36 +0000 Subject: [PATCH 11/40] comments allowed by default. pragmatism 1 literalists 0 --- src/jsx_config.erl | 16 +- src/jsx_config.hrl | 2 +- src/jsx_decoder.erl | 386 +++++++++++++++++++++++++++++++++----------- 3 files changed, 299 insertions(+), 105 deletions(-) diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 0427ed5..1b5ff79 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -50,8 +50,8 @@ parse_config([single_quoted_strings|Rest], Config) -> parse_config(Rest, Config#config{single_quoted_strings=true}); parse_config([unescaped_jsonp|Rest], Config) -> parse_config(Rest, Config#config{unescaped_jsonp=true}); -parse_config([comments|Rest], Config) -> - parse_config(Rest, Config#config{comments=true}); +parse_config([no_comments|Rest], Config) -> + parse_config(Rest, Config#config{no_comments=true}); parse_config([escaped_strings|Rest], Config) -> parse_config(Rest, Config#config{escaped_strings=true}); parse_config([dirty_strings|Rest], Config) -> @@ -61,7 +61,6 @@ parse_config([ignored_bad_escapes|Rest], Config) -> parse_config([relax|Rest], Config) -> parse_config(Rest, Config#config{ single_quoted_strings = true, - comments = true, ignored_bad_escapes = true }); parse_config([{error_handler, ErrorHandler}|Rest] = Options, Config) when is_function(ErrorHandler, 3) -> @@ -98,7 +97,7 @@ valid_flags() -> escaped_forward_slashes, single_quoted_strings, unescaped_jsonp, - comments, + no_comments, escaped_strings, dirty_strings, ignored_bad_escapes, @@ -140,7 +139,7 @@ config_test_() -> stream=true, single_quoted_strings=true, unescaped_jsonp=true, - comments=true, + no_comments=true, dirty_strings=true, ignored_bad_escapes=true }, @@ -150,7 +149,7 @@ config_test_() -> stream, single_quoted_strings, unescaped_jsonp, - comments, + no_comments, dirty_strings, ignored_bad_escapes ]) @@ -160,7 +159,6 @@ config_test_() -> ?_assertEqual( #config{ single_quoted_strings=true, - comments=true, ignored_bad_escapes=true }, parse_config([relax]) @@ -204,7 +202,7 @@ config_to_list_test_() -> escaped_forward_slashes, single_quoted_strings, unescaped_jsonp, - comments, + no_comments, dirty_strings, ignored_bad_escapes, stream @@ -216,7 +214,7 @@ config_to_list_test_() -> stream=true, single_quoted_strings=true, unescaped_jsonp=true, - comments=true, + no_comments=true, dirty_strings=true, ignored_bad_escapes=true } diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index 5b0f21e..5060254 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -3,7 +3,7 @@ escaped_forward_slashes = false, single_quoted_strings = false, unescaped_jsonp = false, - comments = false, + no_comments = false, escaped_strings = false, dirty_strings = false, ignored_bad_escapes = false, diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index f521a04..e3587b4 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -62,6 +62,7 @@ resume(Rest, State, Handler, Acc, Stack, Config) -> integer -> integer(Rest, Handler, Acc, Stack, Config); decimal -> decimal(Rest, Handler, Acc, Stack, Config); exp -> exp(Rest, Handler, Acc, Stack, Config); + zero -> zero(Rest, Handler, Acc, Stack, Config); true -> true(Rest, Handler, Stack, Config); false -> false(Rest, Handler, Stack, Config); null -> null(Rest, Handler, Stack, Config); @@ -214,11 +215,13 @@ value(<>, Handler, Stack, Config) -> array(Rest, handle_event(start_array, Handler, Config), [array|Stack], Config); value(<>, Handler, Stack, Config) when ?is_whitespace(S) -> value(Rest, Handler, Stack, Config); -value(<>, Handler, Stack, Config=#config{comments=true}) -> +value(<>, Handler, Stack, Config=#config{no_comments=true}) -> + ?error(value, <>, Handler, Stack, Config); +value(<>, Handler, Stack, Config) -> comment(Rest, Handler, value, [comment|Stack], Config); -value(<>, Handler, Stack, Config=#config{comments=true}) -> +value(<>, Handler, Stack, Config) -> comment(Rest, Handler, value, [multicomment|Stack], Config); -value(<>, Handler, Stack, Config=#config{comments=true}) -> +value(<>, Handler, Stack, Config) -> incomplete(value, <>, Handler, Stack, Config); value(<<>>, Handler, Stack, Config) -> incomplete(value, <<>>, Handler, Stack, Config); @@ -234,11 +237,13 @@ object(<>, Handler, [key|Stack], Config) -> maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config); object(<>, Handler, Stack, Config) when ?is_whitespace(S) -> object(Rest, Handler, Stack, Config); -object(<>, Handler, Stack, Config=#config{comments=true}) -> +object(<>, Handler, Stack, Config=#config{no_comments=true}) -> + ?error(object, <>, Handler, Stack, Config); +object(<>, Handler, Stack, Config) -> comment(Rest, Handler, object, [comment|Stack], Config); -object(<>, Handler, Stack, Config=#config{comments=true}) -> +object(<>, Handler, Stack, Config) -> comment(Rest, Handler, object, [multicomment|Stack], Config); -object(<>, Handler, Stack, Config=#config{comments=true}) -> +object(<>, Handler, Stack, Config) -> incomplete(object, <>, Handler, Stack, Config); object(<<>>, Handler, Stack, Config) -> incomplete(object, <<>>, Handler, Stack, Config); @@ -250,11 +255,13 @@ array(<>, Handler, [array|Stack], Config) -> maybe_done(Rest, handle_event(end_array, Handler, Config), Stack, Config); array(<>, Handler, Stack, Config) when ?is_whitespace(S) -> array(Rest, Handler, Stack, Config); -array(<>, Handler, Stack, Config=#config{comments=true}) -> +array(<>, Handler, Stack, Config=#config{no_comments=true}) -> + value(<>, Handler, Stack, Config); +array(<>, Handler, Stack, Config) -> comment(Rest, Handler, array, [comment|Stack], Config); -array(<>, Handler, Stack, Config=#config{comments=true}) -> +array(<>, Handler, Stack, Config) -> comment(Rest, Handler, array, [multicomment|Stack], Config); -array(<>, Handler, Stack, Config=#config{comments=true}) -> +array(<>, Handler, Stack, Config) -> incomplete(array, <>, Handler, Stack, Config); array(<<>>, Handler, Stack, Config) -> incomplete(array, <<>>, Handler, Stack, Config); @@ -266,11 +273,13 @@ colon(<>, Handler, [key|Stack], Config) -> value(Rest, Handler, [object|Stack], Config); colon(<>, Handler, Stack, Config) when ?is_whitespace(S) -> colon(Rest, Handler, Stack, Config); -colon(<>, Handler, Stack, Config=#config{comments=true}) -> +colon(<>, Handler, Stack, Config=#config{no_comments=true}) -> + ?error(colon, <>, Handler, Stack, Config); +colon(<>, Handler, Stack, Config) -> comment(Rest, Handler, colon, [comment|Stack], Config); -colon(<>, Handler, Stack, Config=#config{comments=true}) -> +colon(<>, Handler, Stack, Config) -> comment(Rest, Handler, colon, [multicomment|Stack], Config); -colon(<>, Handler, Stack, Config=#config{comments=true}) -> +colon(<>, Handler, Stack, Config) -> incomplete(colon, <>, Handler, Stack, Config); colon(<<>>, Handler, Stack, Config) -> incomplete(colon, <<>>, Handler, Stack, Config); @@ -284,11 +293,13 @@ key(<>, Handler, Stack, Config=#config{single_quoted_ string(Rest, Handler, new_seq(), [singlequote|Stack], Config); key(<>, Handler, Stack, Config) when ?is_whitespace(S) -> key(Rest, Handler, Stack, Config); -key(<>, Handler, Stack, Config=#config{comments=true}) -> +key(<>, Handler, Stack, Config=#config{no_comments=true}) -> + ?error(key, <>, Handler, Stack, Config); +key(<>, Handler, Stack, Config) -> comment(Rest, Handler, key, [comment|Stack], Config); -key(<>, Handler, Stack, Config=#config{comments=true}) -> +key(<>, Handler, Stack, Config) -> comment(Rest, Handler, key, [multicomment|Stack], Config); -key(<>, Handler, Stack, Config=#config{comments=true}) -> +key(<>, Handler, Stack, Config) -> incomplete(key, <>, Handler, Stack, Config); key(<<>>, Handler, Stack, Config) -> incomplete(key, <<>>, Handler, Stack, Config); @@ -752,10 +763,6 @@ zero(<>, Handler, Acc, Stack, Config) -> decimal(Rest, Handler, acc_seq(Acc, ?decimalpoint), Stack, Config); zero(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> e(Rest, Handler, acc_seq(Acc, ".0e"), Stack, Config); -zero(<<>>, Handler, Acc, [], Config=#config{stream=false}) -> - finish_number(<<>>, Handler, {zero, Acc}, [], Config); -zero(<<>>, Handler, Acc, Stack, Config) -> - incomplete(value, (end_seq(Acc)), Handler, Stack, Config); zero(Bin, Handler, Acc, Stack, Config) -> finish_number(Bin, Handler, {zero, Acc}, Stack, Config). @@ -822,27 +829,18 @@ finish_number(<>, Handler, Acc, [array|Stack], Config) -> value(Rest, handle_event(format_number(Acc), Handler, Config), [array|Stack], Config); finish_number(<>, Handler, Acc, Stack, Config) when ?is_whitespace(S) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config); -finish_number(<>, Handler, Acc, Stack, Config=#config{comments=true}) -> +finish_number(<>, Handler, {NumType, Acc}, Stack, Config=#config{no_comments=true}) -> + ?error(NumType, <>, Handler, Acc, Stack, Config); +finish_number(<>, Handler, Acc, Stack, Config) -> comment(Rest, handle_event(format_number(Acc), Handler, Config), maybe_done, [comment|Stack], Config); -finish_number(<>, Handler, Acc, Stack, Config=#config{comments=true}) -> +finish_number(<>, Handler, Acc, Stack, Config) -> comment(Rest, handle_event(format_number(Acc), Handler, Config), maybe_done, [multicomment|Stack], Config); -finish_number(<>, Handler, Acc, Stack, Config=#config{comments=true}) -> +finish_number(<>, Handler, Acc, Stack, Config) -> incomplete(maybe_done, <>, handle_event(format_number(Acc), Handler, Config), Stack, Config); finish_number(<<>>, Handler, {NumType, Acc}, Stack, Config) -> - case NumType of - integer -> incomplete(integer, <<>>, Handler, Acc, Stack, Config); - decimal -> incomplete(decimal, <<>>, Handler, Acc, Stack, Config); - exp -> incomplete(exp, <<>>, Handler, Acc, Stack, Config) - end; + incomplete(NumType, <<>>, Handler, Acc, Stack, Config); finish_number(Bin, Handler, {NumType, Acc}, Stack, Config) -> - case NumType of - integer -> ?error(integer, Bin, Handler, Acc, Stack, Config); - decimal -> ?error(decimal, Bin, Handler, Acc, Stack, Config); - exp -> ?error(exp, Bin, Handler, Acc, Stack, Config); - zero -> - [$0|OldAcc] = Acc, - ?error(value, <<$0, Bin/binary>>, Handler, OldAcc, Stack, Config) - end. + ?error(NumType, Bin, Handler, Acc, Stack, Config). format_number({zero, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))}; format_number({integer, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))}; @@ -926,11 +924,13 @@ maybe_done(<>, Handler, [array|_] = Stack, Config) -> value(Rest, Handler, Stack, Config); maybe_done(<>, Handler, Stack, Config) when ?is_whitespace(S) -> maybe_done(Rest, Handler, Stack, Config); -maybe_done(<>, Handler, Stack, Config=#config{comments=true}) -> +maybe_done(<>, Handler, Stack, Config=#config{no_comments=true}) -> + ?error(maybe_done, <>, Handler, Stack, Config); +maybe_done(<>, Handler, Stack, Config) -> comment(Rest, Handler, maybe_done, [comment|Stack], Config); -maybe_done(<>, Handler, Stack, Config=#config{comments=true}) -> +maybe_done(<>, Handler, Stack, Config) -> comment(Rest, Handler, maybe_done, [multicomment|Stack], Config); -maybe_done(<>, Handler, Stack, Config=#config{comments=true}) -> +maybe_done(<>, Handler, Stack, Config) -> incomplete(maybe_done, <>, Handler, Stack, Config); maybe_done(<<>>, Handler, Stack, Config) when length(Stack) > 0 -> incomplete(maybe_done, <<>>, Handler, Stack, Config); @@ -940,11 +940,13 @@ maybe_done(Bin, Handler, Stack, Config) -> done(<>, Handler, [], Config) when ?is_whitespace(S) -> done(Rest, Handler, [], Config); -done(<>, Handler, Stack, Config=#config{comments=true}) -> +done(<>, Handler, Stack, Config=#config{no_comments=true}) -> + ?error(done, <>, Handler, Stack, Config); +done(<>, Handler, Stack, Config) -> comment(Rest, Handler, done, [comment|Stack], Config); -done(<>, Handler, Stack, Config=#config{comments=true}) -> +done(<>, Handler, Stack, Config) -> comment(Rest, Handler, done, [multicomment|Stack], Config); -done(<>, Handler, Stack, Config=#config{comments=true}) -> +done(<>, Handler, Stack, Config) -> incomplete(done, <>, Handler, Stack, Config); done(<<>>, {Handler, State}, [], Config=#config{stream=true}) -> incomplete(done, <<>>, {Handler, State}, [], Config); @@ -1041,87 +1043,87 @@ comments_test_() -> [ {"preceeding // comment", ?_assertEqual( [start_array, end_array, end_json], - decode(<<"// comment ", ?newline, "[]">>, [comments]) + decode(<<"// comment ", ?newline, "[]">>, []) )}, {"preceeding /**/ comment", ?_assertEqual( [start_array, end_array, end_json], - decode(<<"/* comment */[]">>, [comments]) + decode(<<"/* comment */[]">>, []) )}, {"trailing // comment", ?_assertEqual( [start_array, end_array, end_json], - decode(<<"[]// comment", ?newline>>, [comments]) + decode(<<"[]// comment", ?newline>>, []) )}, {"trailing // comment (no newline)", ?_assertEqual( [start_array, end_array, end_json], - decode(<<"[]// comment">>, [comments]) + decode(<<"[]// comment">>, []) )}, {"trailing /**/ comment", ?_assertEqual( [start_array, end_array, end_json], - decode(<<"[] /* comment */">>, [comments]) + decode(<<"[] /* comment */">>, []) )}, {"// comment inside array", ?_assertEqual( [start_array, end_array, end_json], - decode(<<"[ // comment", ?newline, "]">>, [comments]) + decode(<<"[ // comment", ?newline, "]">>, []) )}, {"/**/ comment inside array", ?_assertEqual( [start_array, end_array, end_json], - decode(<<"[ /* comment */ ]">>, [comments]) + decode(<<"[ /* comment */ ]">>, []) )}, {"// comment at beginning of array", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, [comments]) + decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, []) )}, {"/**/ comment at beginning of array", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment */ true ]">>, [comments]) + decode(<<"[ /* comment */ true ]">>, []) )}, {"// comment at end of array", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ true // comment", ?newline, "]">>, [comments]) + decode(<<"[ true // comment", ?newline, "]">>, []) )}, {"/**/ comment at end of array", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ true /* comment */ ]">>, [comments]) + decode(<<"[ true /* comment */ ]">>, []) )}, {"// comment midarray (post comma)", ?_assertEqual( [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true, // comment", ?newline, "false ]">>, [comments]) + decode(<<"[ true, // comment", ?newline, "false ]">>, []) )}, {"/**/ comment midarray (post comma)", ?_assertEqual( [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true, /* comment */ false ]">>, [comments]) + decode(<<"[ true, /* comment */ false ]">>, []) )}, {"// comment midarray (pre comma)", ?_assertEqual( [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true// comment", ?newline, ", false ]">>, [comments]) + decode(<<"[ true// comment", ?newline, ", false ]">>, []) )}, {"/**/ comment midarray (pre comma)", ?_assertEqual( [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true/* comment */, false ]">>, [comments]) + decode(<<"[ true/* comment */, false ]">>, []) )}, {"// comment inside object", ?_assertEqual( [start_object, end_object, end_json], - decode(<<"{ // comment", ?newline, "}">>, [comments]) + decode(<<"{ // comment", ?newline, "}">>, []) )}, {"/**/ comment inside object", ?_assertEqual( [start_object, end_object, end_json], - decode(<<"{ /* comment */ }">>, [comments]) + decode(<<"{ /* comment */ }">>, []) )}, {"// comment at beginning of object", ?_assertEqual( [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, [comments]) + decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, []) )}, {"/**/ comment at beginning of object", ?_assertEqual( [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ /* comment */ \"key\": true }">>, [comments]) + decode(<<"{ /* comment */ \"key\": true }">>, []) )}, {"// comment at end of object", ?_assertEqual( [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": true // comment", ?newline, "}">>, [comments]) + decode(<<"{ \"key\": true // comment", ?newline, "}">>, []) )}, {"/**/ comment at end of object", ?_assertEqual( [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": true /* comment */ }">>, [comments]) + decode(<<"{ \"key\": true /* comment */ }">>, []) )}, {"// comment midobject (post comma)", ?_assertEqual( [ @@ -1133,7 +1135,7 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, [comments]) + decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, []) )}, {"/**/ comment midobject (post comma)", ?_assertEqual( [ @@ -1145,7 +1147,7 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, [comments]) + decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, []) )}, {"// comment midobject (pre comma)", ?_assertEqual( [ @@ -1157,7 +1159,7 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, [comments]) + decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, []) )}, {"/**/ comment midobject (pre comma)", ?_assertEqual( [ @@ -1169,95 +1171,289 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, [comments]) + decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, []) )}, {"// comment precolon", ?_assertEqual( [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\" // comment", ?newline, ": true }">>, [comments]) + decode(<<"{ \"key\" // comment", ?newline, ": true }">>, []) )}, {"/**/ comment precolon", ?_assertEqual( [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\"/* comment */: true }">>, [comments]) + decode(<<"{ \"key\"/* comment */: true }">>, []) )}, {"// comment postcolon", ?_assertEqual( [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": // comment", ?newline, " true }">>, [comments]) + decode(<<"{ \"key\": // comment", ?newline, " true }">>, []) )}, {"/**/ comment postcolon", ?_assertEqual( [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\":/* comment */ true }">>, [comments]) + decode(<<"{ \"key\":/* comment */ true }">>, []) )}, {"// comment terminating zero", ?_assertEqual( [start_array, {integer, 0}, end_array, end_json], - decode(<<"[ 0// comment", ?newline, "]">>, [comments]) + decode(<<"[ 0// comment", ?newline, "]">>, []) )}, {"// comment terminating integer", ?_assertEqual( [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1// comment", ?newline, "]">>, [comments]) + decode(<<"[ 1// comment", ?newline, "]">>, []) )}, {"// comment terminating float", ?_assertEqual( [start_array, {float, 1.0}, end_array, end_json], - decode(<<"[ 1.0// comment", ?newline, "]">>, [comments]) + decode(<<"[ 1.0// comment", ?newline, "]">>, []) )}, {"// comment terminating exp", ?_assertEqual( [start_array, {float, 1.0e1}, end_array, end_json], - decode(<<"[ 1e1// comment", ?newline, "]">>, [comments]) + decode(<<"[ 1e1// comment", ?newline, "]">>, []) )}, {"/**/ comment terminating zero", ?_assertEqual( [start_array, {integer, 0}, end_array, end_json], - decode(<<"[ 0/* comment */ ]">>, [comments]) + decode(<<"[ 0/* comment */ ]">>, []) )}, {"/**/ comment terminating integer", ?_assertEqual( [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1/* comment */ ]">>, [comments]) + decode(<<"[ 1/* comment */ ]">>, []) )}, {"/**/ comment terminating float", ?_assertEqual( [start_array, {float, 1.0}, end_array, end_json], - decode(<<"[ 1.0/* comment */ ]">>, [comments]) + decode(<<"[ 1.0/* comment */ ]">>, []) )}, {"/**/ comment terminating exp", ?_assertEqual( [start_array, {float, 1.0e1}, end_array, end_json], - decode(<<"[ 1e1/* comment */ ]">>, [comments]) + decode(<<"[ 1e1/* comment */ ]">>, []) )}, {"/**/ comment following /**/ comment", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[/* comment *//* comment */true]">>, [comments]) + decode(<<"[/* comment *//* comment */true]">>, []) )}, {"/**/ comment following // comment", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[// comment", ?newline, "/* comment */true]">>, [comments]) + decode(<<"[// comment", ?newline, "/* comment */true]">>, []) )}, {"// comment following /**/ comment", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[/* comment */// comment", ?newline, "true]">>, [comments]) + decode(<<"[/* comment */// comment", ?newline, "true]">>, []) )}, {"// comment following // comment", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, [comments]) + decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, []) )}, {"/**/ comment inside /**/ comment", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* /* comment */ */ true ]">>, [comments]) + decode(<<"[ /* /* comment */ */ true ]">>, []) )}, {"/**/ comment with /", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* / */ true ]">>, [comments]) + decode(<<"[ /* / */ true ]">>, []) )}, {"/**/ comment with *", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* * */ true ]">>, [comments]) + decode(<<"[ /* * */ true ]">>, []) )}, {"// comment with badutf", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [comments]) + decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, []) )}, {"/**/ comment with badutf", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [comments]) + decode(<<"[ /* comment ", 16#00c0, " */ true]">>, []) )}, {"/**/ comment with badutf preceeded by /", ?_assertEqual( [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [comments]) + decode(<<"[ /* comment /", 16#00c0, " */ true]">>, []) + )} + ]. + + +no_comments_test_() -> + Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, + [ + {"preceeding // comment", ?_assertError( + badarg, + Decode(<<"// comment ", ?newline, "[]">>, [no_comments]) + )}, + {"preceeding /**/ comment", ?_assertError( + badarg, + Decode(<<"/* comment */[]">>, [no_comments]) + )}, + {"trailing // comment", ?_assertError( + badarg, + Decode(<<"[]// comment", ?newline>>, [no_comments]) + )}, + {"trailing // comment (no newline)", ?_assertError( + badarg, + Decode(<<"[]// comment">>, [no_comments]) + )}, + {"trailing /**/ comment", ?_assertError( + badarg, + Decode(<<"[] /* comment */">>, [no_comments]) + )}, + {"// comment inside array", ?_assertError( + badarg, + Decode(<<"[ // comment", ?newline, "]">>, [no_comments]) + )}, + {"/**/ comment inside array", ?_assertError( + badarg, + Decode(<<"[ /* comment */ ]">>, [no_comments]) + )}, + {"// comment at beginning of array", ?_assertError( + badarg, + Decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, [no_comments]) + )}, + {"/**/ comment at beginning of array", ?_assertError( + badarg, + Decode(<<"[ /* comment */ true ]">>, [no_comments]) + )}, + {"// comment at end of array", ?_assertError( + badarg, + Decode(<<"[ true // comment", ?newline, "]">>, [no_comments]) + )}, + {"/**/ comment at end of array", ?_assertError( + badarg, + Decode(<<"[ true /* comment */ ]">>, [no_comments]) + )}, + {"// comment midarray (post comma)", ?_assertError( + badarg, + Decode(<<"[ true, // comment", ?newline, "false ]">>, [no_comments]) + )}, + {"/**/ comment midarray (post comma)", ?_assertError( + badarg, + Decode(<<"[ true, /* comment */ false ]">>, [no_comments]) + )}, + {"// comment midarray (pre comma)", ?_assertError( + badarg, + Decode(<<"[ true// comment", ?newline, ", false ]">>, [no_comments]) + )}, + {"/**/ comment midarray (pre comma)", ?_assertError( + badarg, + Decode(<<"[ true/* comment */, false ]">>, [no_comments]) + )}, + {"// comment inside object", ?_assertError( + badarg, + Decode(<<"{ // comment", ?newline, "}">>, [no_comments]) + )}, + {"/**/ comment inside object", ?_assertError( + badarg, + Decode(<<"{ /* comment */ }">>, [no_comments]) + )}, + {"// comment at beginning of object", ?_assertError( + badarg, + Decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, [no_comments]) + )}, + {"/**/ comment at beginning of object", ?_assertError( + badarg, + Decode(<<"{ /* comment */ \"key\": true }">>, [no_comments]) + )}, + {"// comment at end of object", ?_assertError( + badarg, + Decode(<<"{ \"key\": true // comment", ?newline, "}">>, [no_comments]) + )}, + {"/**/ comment at end of object", ?_assertError( + badarg, + Decode(<<"{ \"key\": true /* comment */ }">>, [no_comments]) + )}, + {"// comment midobject (post comma)", ?_assertError( + badarg, + Decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, [no_comments]) + )}, + {"/**/ comment midobject (post comma)", ?_assertError( + badarg, + Decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, [no_comments]) + )}, + {"// comment midobject (pre comma)", ?_assertError( + badarg, + Decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, [no_comments]) + )}, + {"/**/ comment midobject (pre comma)", ?_assertError( + badarg, + Decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, [no_comments]) + )}, + {"// comment precolon", ?_assertError( + badarg, + Decode(<<"{ \"key\" // comment", ?newline, ": true }">>, [no_comments]) + )}, + {"/**/ comment precolon", ?_assertError( + badarg, + Decode(<<"{ \"key\"/* comment */: true }">>, [no_comments]) + )}, + {"// comment postcolon", ?_assertError( + badarg, + Decode(<<"{ \"key\": // comment", ?newline, " true }">>, [no_comments]) + )}, + {"/**/ comment postcolon", ?_assertError( + badarg, + Decode(<<"{ \"key\":/* comment */ true }">>, [no_comments]) + )}, + {"// comment terminating zero", ?_assertError( + badarg, + Decode(<<"[ 0// comment", ?newline, "]">>, [no_comments]) + )}, + {"// comment terminating integer", ?_assertError( + badarg, + Decode(<<"[ 1// comment", ?newline, "]">>, [no_comments]) + )}, + {"// comment terminating float", ?_assertError( + badarg, + Decode(<<"[ 1.0// comment", ?newline, "]">>, [no_comments]) + )}, + {"// comment terminating exp", ?_assertError( + badarg, + Decode(<<"[ 1e1// comment", ?newline, "]">>, [no_comments]) + )}, + {"/**/ comment terminating zero", ?_assertError( + badarg, + Decode(<<"[ 0/* comment */ ]">>, [no_comments]) + )}, + {"/**/ comment terminating integer", ?_assertError( + badarg, + Decode(<<"[ 1/* comment */ ]">>, [no_comments]) + )}, + {"/**/ comment terminating float", ?_assertError( + badarg, + Decode(<<"[ 1.0/* comment */ ]">>, [no_comments]) + )}, + {"/**/ comment terminating exp", ?_assertError( + badarg, + Decode(<<"[ 1e1/* comment */ ]">>, [no_comments]) + )}, + {"/**/ comment following /**/ comment", ?_assertError( + badarg, + Decode(<<"[/* comment *//* comment */true]">>, [no_comments]) + )}, + {"/**/ comment following // comment", ?_assertError( + badarg, + Decode(<<"[// comment", ?newline, "/* comment */true]">>, [no_comments]) + )}, + {"// comment following /**/ comment", ?_assertError( + badarg, + Decode(<<"[/* comment */// comment", ?newline, "true]">>, [no_comments]) + )}, + {"// comment following // comment", ?_assertError( + badarg, + Decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, [no_comments]) + )}, + {"/**/ comment inside /**/ comment", ?_assertError( + badarg, + Decode(<<"[ /* /* comment */ */ true ]">>, [no_comments]) + )}, + {"/**/ comment with /", ?_assertError( + badarg, + Decode(<<"[ /* / */ true ]">>, [no_comments]) + )}, + {"/**/ comment with *", ?_assertError( + badarg, + Decode(<<"[ /* * */ true ]">>, [no_comments]) + )}, + {"// comment with badutf", ?_assertError( + badarg, + Decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [no_comments]) + )}, + {"/**/ comment with badutf", ?_assertError( + badarg, + Decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [no_comments]) + )}, + {"/**/ comment with badutf preceeded by /", ?_assertError( + badarg, + Decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [no_comments]) )} ]. @@ -2004,15 +2200,15 @@ error_test_() -> )}, {"comment error", ?_assertError( badarg, - Decode(<<"[ / ]">>, [comments]) + Decode(<<"[ / ]">>, []) )}, {"single_comment error", ?_assertError( badarg, - Decode(<<"[ //"/utf8, 192>>, [comments]) + Decode(<<"[ //"/utf8, 192>>, []) )}, {"multi_comment error", ?_assertError( badarg, - Decode(<<"[ /*"/utf8, 192>>, [comments]) + Decode(<<"[ /*"/utf8, 192>>, []) )} ]. @@ -2050,7 +2246,7 @@ custom_error_handler_test_() -> Decode(<<"-"/utf8, 0>>, [{error_handler, Error}]) )}, {"zero error", ?_assertEqual( - {value, <<"0"/utf8, 0>>}, + {zero, <<0>>}, Decode(<<"0"/utf8, 0>>, [stream, {error_handler, Error}]) )}, {"integer error", ?_assertEqual( @@ -2099,15 +2295,15 @@ custom_error_handler_test_() -> )}, {"comment error", ?_assertEqual( {value, <<"/ ]"/utf8>>}, - Decode(<<"[ / ]">>, [{error_handler, Error}, comments]) + Decode(<<"[ / ]">>, [{error_handler, Error}]) )}, {"single_comment error", ?_assertEqual( {comment, <<192>>}, - Decode(<<"[ //"/utf8, 192>>, [{error_handler, Error}, comments, strict_utf8]) + Decode(<<"[ //"/utf8, 192>>, [{error_handler, Error}, strict_utf8]) )}, {"multi_comment error", ?_assertEqual( {comment, <<192>>}, - Decode(<<"[ /*"/utf8, 192>>, [{error_handler, Error}, comments, strict_utf8]) + Decode(<<"[ /*"/utf8, 192>>, [{error_handler, Error}, strict_utf8]) )} ]. From 99484ce4f266f079e64e9a20930ecd46cbaf456d Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 20 Oct 2013 03:23:38 +0000 Subject: [PATCH 12/40] minor refactor of jsx_decoder --- src/jsx_decoder.erl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index e3587b4..4d29b96 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -590,21 +590,21 @@ string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config). -doublequote(<>, Handler, Acc, [key|_] = Stack, Config) -> +doublequote(Rest, Handler, Acc, [key|_] = Stack, Config) -> colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), Stack, Config); -doublequote(<>, Handler, Acc, [singlequote|_] = Stack, Config) -> +doublequote(Rest, Handler, Acc, [singlequote|_] = Stack, Config) -> string(Rest, Handler,acc_seq(Acc, maybe_replace(?doublequote, Config)), Stack, Config); doublequote(<<>>, Handler, Acc, [singlequote|_] = Stack, Config) -> incomplete(string, <>, Handler, Acc, Stack, Config); -doublequote(<>, Handler, Acc, Stack, Config) -> +doublequote(Rest, Handler, Acc, Stack, Config) -> maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config). -singlequote(<>, Handler, Acc, [singlequote, key|Stack], Config) -> +singlequote(Rest, Handler, Acc, [singlequote, key|Stack], Config) -> colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), [key|Stack], Config); -singlequote(<>, Handler, Acc, [singlequote|Stack], Config) -> +singlequote(Rest, Handler, Acc, [singlequote|Stack], Config) -> maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config); -singlequote(<>, Handler, Acc, Stack, Config) -> +singlequote(Rest, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config). From f340e8dc220f8923c655739fe0170432e228595b Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 20 Oct 2013 03:28:22 +0000 Subject: [PATCH 13/40] refactor options to complete mission: more pragmatism --- src/jsx_config.erl | 179 ++++++++++++++++++++++-------------- src/jsx_config.hrl | 10 +- src/jsx_decoder.erl | 217 ++++++++++++++++++++++---------------------- src/jsx_encoder.erl | 4 +- src/jsx_parser.erl | 2 +- 5 files changed, 229 insertions(+), 183 deletions(-) diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 1b5ff79..2816b0e 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -35,34 +35,27 @@ %% parsing of jsx config -parse_config(Config) -> - parse_config(Config, #config{}). +parse_config(Config) -> parse_config(Config, #config{}). -parse_config([], Config) -> - Config; -parse_config([strict_utf8|Rest], Config) -> - parse_config(Rest, Config#config{strict_utf8=true}); +parse_config([], Config) -> Config; parse_config([escaped_forward_slashes|Rest], Config) -> parse_config(Rest, Config#config{escaped_forward_slashes=true}); -parse_config([stream|Rest], Config) -> - parse_config(Rest, Config#config{stream=true}); -parse_config([single_quoted_strings|Rest], Config) -> - parse_config(Rest, Config#config{single_quoted_strings=true}); -parse_config([unescaped_jsonp|Rest], Config) -> - parse_config(Rest, Config#config{unescaped_jsonp=true}); -parse_config([no_comments|Rest], Config) -> - parse_config(Rest, Config#config{no_comments=true}); parse_config([escaped_strings|Rest], Config) -> parse_config(Rest, Config#config{escaped_strings=true}); +parse_config([unescaped_jsonp|Rest], Config) -> + parse_config(Rest, Config#config{unescaped_jsonp=true}); parse_config([dirty_strings|Rest], Config) -> parse_config(Rest, Config#config{dirty_strings=true}); -parse_config([ignored_bad_escapes|Rest], Config) -> - parse_config(Rest, Config#config{ignored_bad_escapes=true}); -parse_config([relax|Rest], Config) -> - parse_config(Rest, Config#config{ - single_quoted_strings = true, - ignored_bad_escapes = true +parse_config([strict|Rest], Config) -> + parse_config(Rest, Config#config{strict_comments=true, + strict_utf8=true, + strict_single_quotes=true, + strict_escapes=true }); +parse_config([{strict, Strict}|Rest], Config) -> + parse_strict(Strict, Rest, Config); +parse_config([stream|Rest], Config) -> + parse_config(Rest, Config#config{stream=true}); parse_config([{error_handler, ErrorHandler}|Rest] = Options, Config) when is_function(ErrorHandler, 3) -> case Config#config.error_handler of false -> parse_config(Rest, Config#config{error_handler=ErrorHandler}) @@ -73,13 +66,25 @@ parse_config([{incomplete_handler, IncompleteHandler}|Rest] = Options, Config) w false -> parse_config(Rest, Config#config{incomplete_handler=IncompleteHandler}) ; _ -> erlang:error(badarg, [Options, Config]) end; -%% deprecated flags -parse_config(Options, Config) -> - erlang:error(badarg, [Options, Config]). +parse_config(_Options, _Config) -> erlang:error(badarg). + + +parse_strict([], Rest, Config) -> parse_config(Rest, Config); +parse_strict([comments|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_comments=true}); +parse_strict([utf8|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_utf8=true}); +parse_strict([single_quotes|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_single_quotes=true}); +parse_strict([escapes|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_escapes=true}); +parse_strict(_Strict, _Rest, _Config) -> + erlang:error(badarg). + config_to_list(Config) -> - lists:map( + reduce_config(lists:map( fun ({error_handler, F}) -> {error_handler, F}; ({incomplete_handler, F}) -> {incomplete_handler, F}; ({Key, true}) -> Key @@ -88,21 +93,37 @@ config_to_list(Config) -> fun({_, false}) -> false; (_) -> true end, lists:zip(record_info(fields, config), tl(tuple_to_list(Config))) ) - ). + )). + + +reduce_config(Input) -> reduce_config(Input, [], []). + +reduce_config([], Output, Strict) -> + case length(Strict) of + 0 -> lists:reverse(Output); + 4 -> lists:reverse(Output) ++ [strict]; + _ -> lists:reverse(Output) ++ [{strict, lists:reverse(Strict)}] + end; +reduce_config([strict_comments|Input], Output, Strict) -> + reduce_config(Input, Output, [comments] ++ Strict); +reduce_config([strict_utf8|Input], Output, Strict) -> + reduce_config(Input, Output, [utf8] ++ Strict); +reduce_config([strict_single_quotes|Input], Output, Strict) -> + reduce_config(Input, Output, [single_quotes] ++ Strict); +reduce_config([strict_escapes|Input], Output, Strict) -> + reduce_config(Input, Output, [escapes] ++ Strict); +reduce_config([Else|Input], Output, Strict) -> + reduce_config(Input, [Else] ++ Output, Strict). valid_flags() -> [ - strict_utf8, escaped_forward_slashes, - single_quoted_strings, - unescaped_jsonp, - no_comments, escaped_strings, + unescaped_jsonp, dirty_strings, - ignored_bad_escapes, + strict, stream, - relax, error_handler, incomplete_handler ]. @@ -133,35 +154,43 @@ config_test_() -> [ {"all flags", ?_assertEqual( - #config{ - strict_utf8=true, - escaped_forward_slashes=true, - stream=true, - single_quoted_strings=true, - unescaped_jsonp=true, - no_comments=true, - dirty_strings=true, - ignored_bad_escapes=true + #config{escaped_forward_slashes = true, + escaped_strings = true, + unescaped_jsonp = true, + dirty_strings = true, + strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true, + stream = true }, - parse_config([ - strict_utf8, - escaped_forward_slashes, - stream, - single_quoted_strings, + parse_config([escaped_forward_slashes, + escaped_strings, unescaped_jsonp, - no_comments, dirty_strings, - ignored_bad_escapes + strict, + stream ]) ) }, - {"relax flag", + {"strict flag", ?_assertEqual( - #config{ - single_quoted_strings=true, - ignored_bad_escapes=true + #config{strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true }, - parse_config([relax]) + parse_config([strict]) + ) + }, + {"strict expanded", + ?_assertEqual( + #config{strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true + }, + parse_config([{strict, [comments, utf8, single_quotes, escapes]}]) ) }, {"error_handler flag", ?_assertEqual( @@ -186,7 +215,7 @@ config_test_() -> {incomplete_handler, fun(_) -> false end} ]) )}, - {"bad option flag", ?_assertError(badarg, parse_config([error]))} + {"bad option flag", ?_assertError(badarg, parse_config([this_flag_does_not_exist]))} ]. @@ -197,29 +226,41 @@ config_to_list_test_() -> config_to_list(#config{}) )}, {"all flags", ?_assertEqual( - [ - strict_utf8, - escaped_forward_slashes, - single_quoted_strings, + [escaped_forward_slashes, + escaped_strings, unescaped_jsonp, - no_comments, dirty_strings, - ignored_bad_escapes, - stream + stream, + strict ], config_to_list( - #config{ - strict_utf8=true, - escaped_forward_slashes=true, - stream=true, - single_quoted_strings=true, - unescaped_jsonp=true, - no_comments=true, - dirty_strings=true, - ignored_bad_escapes=true + #config{escaped_forward_slashes = true, + escaped_strings = true, + unescaped_jsonp = true, + dirty_strings = true, + strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true, + stream = true } ) )}, + {"single strict", ?_assertEqual( + [{strict, [comments]}], + config_to_list(#config{strict_comments = true}) + )}, + {"multiple strict", ?_assertEqual( + [{strict, [utf8, single_quotes, escapes]}], + config_to_list(#config{strict_utf8 = true, strict_single_quotes = true, strict_escapes = true}) + )}, + {"all strict", ?_assertEqual( + [strict], + config_to_list(#config{strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true}) + )}, {"error handler", ?_assertEqual( [{error_handler, fun ?MODULE:fake_error_handler/3}], config_to_list(#config{error_handler=fun ?MODULE:fake_error_handler/3}) diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index 5060254..ae333fd 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -1,12 +1,12 @@ -record(config, { - strict_utf8 = false, escaped_forward_slashes = false, - single_quoted_strings = false, - unescaped_jsonp = false, - no_comments = false, escaped_strings = false, + unescaped_jsonp = false, dirty_strings = false, - ignored_bad_escapes = false, + strict_comments = false, + strict_utf8 = false, + strict_single_quotes = false, + strict_escapes = false, stream = false, error_handler = false, incomplete_handler = false diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 4d29b96..58ba3d8 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -195,7 +195,7 @@ start(Bin, Handler, Stack, Config) -> value(<>, Handler, Stack, Config) -> string(Rest, Handler, new_seq(), Stack, Config); -value(<>, Handler, Stack, Config=#config{single_quoted_strings=true}) -> +value(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> string(Rest, Handler, new_seq(), [singlequote|Stack], Config); value(<<$t, Rest/binary>>, Handler, Stack, Config) -> true(Rest, Handler, Stack, Config); @@ -215,7 +215,7 @@ value(<>, Handler, Stack, Config) -> array(Rest, handle_event(start_array, Handler, Config), [array|Stack], Config); value(<>, Handler, Stack, Config) when ?is_whitespace(S) -> value(Rest, Handler, Stack, Config); -value(<>, Handler, Stack, Config=#config{no_comments=true}) -> +value(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(value, <>, Handler, Stack, Config); value(<>, Handler, Stack, Config) -> comment(Rest, Handler, value, [comment|Stack], Config); @@ -231,13 +231,13 @@ value(Bin, Handler, Stack, Config) -> object(<>, Handler, Stack, Config) -> string(Rest, Handler, new_seq(), Stack, Config); -object(<>, Handler, Stack, Config=#config{single_quoted_strings=true}) -> +object(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> string(Rest, Handler, new_seq(), [singlequote|Stack], Config); object(<>, Handler, [key|Stack], Config) -> maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config); object(<>, Handler, Stack, Config) when ?is_whitespace(S) -> object(Rest, Handler, Stack, Config); -object(<>, Handler, Stack, Config=#config{no_comments=true}) -> +object(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(object, <>, Handler, Stack, Config); object(<>, Handler, Stack, Config) -> comment(Rest, Handler, object, [comment|Stack], Config); @@ -255,7 +255,7 @@ array(<>, Handler, [array|Stack], Config) -> maybe_done(Rest, handle_event(end_array, Handler, Config), Stack, Config); array(<>, Handler, Stack, Config) when ?is_whitespace(S) -> array(Rest, Handler, Stack, Config); -array(<>, Handler, Stack, Config=#config{no_comments=true}) -> +array(<>, Handler, Stack, Config=#config{strict_comments=true}) -> value(<>, Handler, Stack, Config); array(<>, Handler, Stack, Config) -> comment(Rest, Handler, array, [comment|Stack], Config); @@ -273,7 +273,7 @@ colon(<>, Handler, [key|Stack], Config) -> value(Rest, Handler, [object|Stack], Config); colon(<>, Handler, Stack, Config) when ?is_whitespace(S) -> colon(Rest, Handler, Stack, Config); -colon(<>, Handler, Stack, Config=#config{no_comments=true}) -> +colon(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(colon, <>, Handler, Stack, Config); colon(<>, Handler, Stack, Config) -> comment(Rest, Handler, colon, [comment|Stack], Config); @@ -289,11 +289,11 @@ colon(Bin, Handler, Stack, Config) -> key(<>, Handler, Stack, Config) -> string(Rest, Handler, new_seq(), Stack, Config); -key(<>, Handler, Stack, Config=#config{single_quoted_strings=true}) -> +key(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> string(Rest, Handler, new_seq(), [singlequote|Stack], Config); key(<>, Handler, Stack, Config) when ?is_whitespace(S) -> key(Rest, Handler, Stack, Config); -key(<>, Handler, Stack, Config=#config{no_comments=true}) -> +key(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(key, <>, Handler, Stack, Config); key(<>, Handler, Stack, Config) -> comment(Rest, Handler, key, [comment|Stack], Config); @@ -649,8 +649,8 @@ unescape(<<$t, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, maybe_replace($\t, Config)), Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, maybe_replace($\", Config)), Stack, Config); -unescape(<>, Handler, Acc, Stack, Config=#config{single_quoted_strings=true}) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace(?singlequote, Config)), Stack, Config); +unescape(<>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) -> + string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, maybe_replace($\\, Config)), Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> @@ -691,12 +691,13 @@ unescape(<<$u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config) ?error(string, <>, Handler, Acc, Stack, Config); _ -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config) end; -unescape(Bin, Handler, Acc, Stack, Config=#config{ignored_bad_escapes=true}) -> - string(Bin, Handler, acc_seq(Acc, ?rsolidus), Stack, Config); unescape(Bin, Handler, Acc, Stack, Config) -> case is_partial_escape(Bin) of true -> incomplete(string, <>, Handler, Acc, Stack, Config); - false -> ?error(string, <>, Handler, Acc, Stack, Config) + false -> case Config#config.strict_escapes of + true -> ?error(string, <>, Handler, Acc, Stack, Config); + false -> string(Bin, Handler, acc_seq(Acc, ?rsolidus), Stack, Config) + end end. @@ -829,7 +830,7 @@ finish_number(<>, Handler, Acc, [array|Stack], Config) -> value(Rest, handle_event(format_number(Acc), Handler, Config), [array|Stack], Config); finish_number(<>, Handler, Acc, Stack, Config) when ?is_whitespace(S) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config); -finish_number(<>, Handler, {NumType, Acc}, Stack, Config=#config{no_comments=true}) -> +finish_number(<>, Handler, {NumType, Acc}, Stack, Config=#config{strict_comments=true}) -> ?error(NumType, <>, Handler, Acc, Stack, Config); finish_number(<>, Handler, Acc, Stack, Config) -> comment(Rest, handle_event(format_number(Acc), Handler, Config), maybe_done, [comment|Stack], Config); @@ -924,7 +925,7 @@ maybe_done(<>, Handler, [array|_] = Stack, Config) -> value(Rest, Handler, Stack, Config); maybe_done(<>, Handler, Stack, Config) when ?is_whitespace(S) -> maybe_done(Rest, Handler, Stack, Config); -maybe_done(<>, Handler, Stack, Config=#config{no_comments=true}) -> +maybe_done(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(maybe_done, <>, Handler, Stack, Config); maybe_done(<>, Handler, Stack, Config) -> comment(Rest, Handler, maybe_done, [comment|Stack], Config); @@ -940,7 +941,7 @@ maybe_done(Bin, Handler, Stack, Config) -> done(<>, Handler, [], Config) when ?is_whitespace(S) -> done(Rest, Handler, [], Config); -done(<>, Handler, Stack, Config=#config{no_comments=true}) -> +done(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(done, <>, Handler, Stack, Config); done(<>, Handler, Stack, Config) -> comment(Rest, Handler, done, [comment|Stack], Config); @@ -1269,191 +1270,191 @@ no_comments_test_() -> [ {"preceeding // comment", ?_assertError( badarg, - Decode(<<"// comment ", ?newline, "[]">>, [no_comments]) + Decode(<<"// comment ", ?newline, "[]">>, [{strict, [comments]}]) )}, {"preceeding /**/ comment", ?_assertError( badarg, - Decode(<<"/* comment */[]">>, [no_comments]) + Decode(<<"/* comment */[]">>, [{strict, [comments]}]) )}, {"trailing // comment", ?_assertError( badarg, - Decode(<<"[]// comment", ?newline>>, [no_comments]) + Decode(<<"[]// comment", ?newline>>, [{strict, [comments]}]) )}, {"trailing // comment (no newline)", ?_assertError( badarg, - Decode(<<"[]// comment">>, [no_comments]) + Decode(<<"[]// comment">>, [{strict, [comments]}]) )}, {"trailing /**/ comment", ?_assertError( badarg, - Decode(<<"[] /* comment */">>, [no_comments]) + Decode(<<"[] /* comment */">>, [{strict, [comments]}]) )}, {"// comment inside array", ?_assertError( badarg, - Decode(<<"[ // comment", ?newline, "]">>, [no_comments]) + Decode(<<"[ // comment", ?newline, "]">>, [{strict, [comments]}]) )}, {"/**/ comment inside array", ?_assertError( badarg, - Decode(<<"[ /* comment */ ]">>, [no_comments]) + Decode(<<"[ /* comment */ ]">>, [{strict, [comments]}]) )}, {"// comment at beginning of array", ?_assertError( badarg, - Decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, [no_comments]) + Decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, [{strict, [comments]}]) )}, {"/**/ comment at beginning of array", ?_assertError( badarg, - Decode(<<"[ /* comment */ true ]">>, [no_comments]) + Decode(<<"[ /* comment */ true ]">>, [{strict, [comments]}]) )}, {"// comment at end of array", ?_assertError( badarg, - Decode(<<"[ true // comment", ?newline, "]">>, [no_comments]) + Decode(<<"[ true // comment", ?newline, "]">>, [{strict, [comments]}]) )}, {"/**/ comment at end of array", ?_assertError( badarg, - Decode(<<"[ true /* comment */ ]">>, [no_comments]) + Decode(<<"[ true /* comment */ ]">>, [{strict, [comments]}]) )}, {"// comment midarray (post comma)", ?_assertError( badarg, - Decode(<<"[ true, // comment", ?newline, "false ]">>, [no_comments]) + Decode(<<"[ true, // comment", ?newline, "false ]">>, [{strict, [comments]}]) )}, {"/**/ comment midarray (post comma)", ?_assertError( badarg, - Decode(<<"[ true, /* comment */ false ]">>, [no_comments]) + Decode(<<"[ true, /* comment */ false ]">>, [{strict, [comments]}]) )}, {"// comment midarray (pre comma)", ?_assertError( badarg, - Decode(<<"[ true// comment", ?newline, ", false ]">>, [no_comments]) + Decode(<<"[ true// comment", ?newline, ", false ]">>, [{strict, [comments]}]) )}, {"/**/ comment midarray (pre comma)", ?_assertError( badarg, - Decode(<<"[ true/* comment */, false ]">>, [no_comments]) + Decode(<<"[ true/* comment */, false ]">>, [{strict, [comments]}]) )}, {"// comment inside object", ?_assertError( badarg, - Decode(<<"{ // comment", ?newline, "}">>, [no_comments]) + Decode(<<"{ // comment", ?newline, "}">>, [{strict, [comments]}]) )}, {"/**/ comment inside object", ?_assertError( badarg, - Decode(<<"{ /* comment */ }">>, [no_comments]) + Decode(<<"{ /* comment */ }">>, [{strict, [comments]}]) )}, {"// comment at beginning of object", ?_assertError( badarg, - Decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, [no_comments]) + Decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, [{strict, [comments]}]) )}, {"/**/ comment at beginning of object", ?_assertError( badarg, - Decode(<<"{ /* comment */ \"key\": true }">>, [no_comments]) + Decode(<<"{ /* comment */ \"key\": true }">>, [{strict, [comments]}]) )}, {"// comment at end of object", ?_assertError( badarg, - Decode(<<"{ \"key\": true // comment", ?newline, "}">>, [no_comments]) + Decode(<<"{ \"key\": true // comment", ?newline, "}">>, [{strict, [comments]}]) )}, {"/**/ comment at end of object", ?_assertError( badarg, - Decode(<<"{ \"key\": true /* comment */ }">>, [no_comments]) + Decode(<<"{ \"key\": true /* comment */ }">>, [{strict, [comments]}]) )}, {"// comment midobject (post comma)", ?_assertError( badarg, - Decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, [no_comments]) + Decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, [{strict, [comments]}]) )}, {"/**/ comment midobject (post comma)", ?_assertError( badarg, - Decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, [no_comments]) + Decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, [{strict, [comments]}]) )}, {"// comment midobject (pre comma)", ?_assertError( badarg, - Decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, [no_comments]) + Decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, [{strict, [comments]}]) )}, {"/**/ comment midobject (pre comma)", ?_assertError( badarg, - Decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, [no_comments]) + Decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, [{strict, [comments]}]) )}, {"// comment precolon", ?_assertError( badarg, - Decode(<<"{ \"key\" // comment", ?newline, ": true }">>, [no_comments]) + Decode(<<"{ \"key\" // comment", ?newline, ": true }">>, [{strict, [comments]}]) )}, {"/**/ comment precolon", ?_assertError( badarg, - Decode(<<"{ \"key\"/* comment */: true }">>, [no_comments]) + Decode(<<"{ \"key\"/* comment */: true }">>, [{strict, [comments]}]) )}, {"// comment postcolon", ?_assertError( badarg, - Decode(<<"{ \"key\": // comment", ?newline, " true }">>, [no_comments]) + Decode(<<"{ \"key\": // comment", ?newline, " true }">>, [{strict, [comments]}]) )}, {"/**/ comment postcolon", ?_assertError( badarg, - Decode(<<"{ \"key\":/* comment */ true }">>, [no_comments]) + Decode(<<"{ \"key\":/* comment */ true }">>, [{strict, [comments]}]) )}, {"// comment terminating zero", ?_assertError( badarg, - Decode(<<"[ 0// comment", ?newline, "]">>, [no_comments]) + Decode(<<"[ 0// comment", ?newline, "]">>, [{strict, [comments]}]) )}, {"// comment terminating integer", ?_assertError( badarg, - Decode(<<"[ 1// comment", ?newline, "]">>, [no_comments]) + Decode(<<"[ 1// comment", ?newline, "]">>, [{strict, [comments]}]) )}, {"// comment terminating float", ?_assertError( badarg, - Decode(<<"[ 1.0// comment", ?newline, "]">>, [no_comments]) + Decode(<<"[ 1.0// comment", ?newline, "]">>, [{strict, [comments]}]) )}, {"// comment terminating exp", ?_assertError( badarg, - Decode(<<"[ 1e1// comment", ?newline, "]">>, [no_comments]) + Decode(<<"[ 1e1// comment", ?newline, "]">>, [{strict, [comments]}]) )}, {"/**/ comment terminating zero", ?_assertError( badarg, - Decode(<<"[ 0/* comment */ ]">>, [no_comments]) + Decode(<<"[ 0/* comment */ ]">>, [{strict, [comments]}]) )}, {"/**/ comment terminating integer", ?_assertError( badarg, - Decode(<<"[ 1/* comment */ ]">>, [no_comments]) + Decode(<<"[ 1/* comment */ ]">>, [{strict, [comments]}]) )}, {"/**/ comment terminating float", ?_assertError( badarg, - Decode(<<"[ 1.0/* comment */ ]">>, [no_comments]) + Decode(<<"[ 1.0/* comment */ ]">>, [{strict, [comments]}]) )}, {"/**/ comment terminating exp", ?_assertError( badarg, - Decode(<<"[ 1e1/* comment */ ]">>, [no_comments]) + Decode(<<"[ 1e1/* comment */ ]">>, [{strict, [comments]}]) )}, {"/**/ comment following /**/ comment", ?_assertError( badarg, - Decode(<<"[/* comment *//* comment */true]">>, [no_comments]) + Decode(<<"[/* comment *//* comment */true]">>, [{strict, [comments]}]) )}, {"/**/ comment following // comment", ?_assertError( badarg, - Decode(<<"[// comment", ?newline, "/* comment */true]">>, [no_comments]) + Decode(<<"[// comment", ?newline, "/* comment */true]">>, [{strict, [comments]}]) )}, {"// comment following /**/ comment", ?_assertError( badarg, - Decode(<<"[/* comment */// comment", ?newline, "true]">>, [no_comments]) + Decode(<<"[/* comment */// comment", ?newline, "true]">>, [{strict, [comments]}]) )}, {"// comment following // comment", ?_assertError( badarg, - Decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, [no_comments]) + Decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, [{strict, [comments]}]) )}, {"/**/ comment inside /**/ comment", ?_assertError( badarg, - Decode(<<"[ /* /* comment */ */ true ]">>, [no_comments]) + Decode(<<"[ /* /* comment */ */ true ]">>, [{strict, [comments]}]) )}, {"/**/ comment with /", ?_assertError( badarg, - Decode(<<"[ /* / */ true ]">>, [no_comments]) + Decode(<<"[ /* / */ true ]">>, [{strict, [comments]}]) )}, {"/**/ comment with *", ?_assertError( badarg, - Decode(<<"[ /* * */ true ]">>, [no_comments]) + Decode(<<"[ /* * */ true ]">>, [{strict, [comments]}]) )}, {"// comment with badutf", ?_assertError( badarg, - Decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [no_comments]) + Decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [{strict, [comments]}]) )}, {"/**/ comment with badutf", ?_assertError( badarg, - Decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [no_comments]) + Decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [{strict, [comments]}]) )}, {"/**/ comment with badutf preceeded by /", ?_assertError( badarg, - Decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [no_comments]) + Decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [{strict, [comments]}]) )} ]. @@ -1521,19 +1522,19 @@ clean_string_test_() -> )}, {"error reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [strict_utf8]) end, reserved_space()) + lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, reserved_space()) )}, {"error surrogates", ?_assertEqual( lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [strict_utf8]) end, surrogates()) + lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, surrogates()) )}, {"error noncharacters", ?_assertEqual( lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [strict_utf8]) end, noncharacters()) + lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, noncharacters()) )}, {"error extended noncharacters", ?_assertEqual( lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [strict_utf8]) end, extended_noncharacters()) + lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, extended_noncharacters()) )}, {"clean reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), [{string, <<16#fffd/utf8>>}, end_json]), @@ -1600,7 +1601,7 @@ bad_utf8_test_() -> [ {"noncharacter u+fffe", ?_assertError( badarg, - decode_bad_utf(<<239, 191, 190>>, [strict_utf8]) + decode_bad_utf(<<239, 191, 190>>, [{strict, [utf8]}]) )}, {"noncharacter u+fffe replaced", ?_assertEqual( <<16#fffd/utf8>>, @@ -1608,7 +1609,7 @@ bad_utf8_test_() -> )}, {"noncharacter u+ffff", ?_assertError( badarg, - decode_bad_utf(<<239, 191, 191>>, [strict_utf8]) + decode_bad_utf(<<239, 191, 191>>, [{strict, [utf8]}]) )}, {"noncharacter u+ffff replaced", ?_assertEqual( <<16#fffd/utf8>>, @@ -1616,7 +1617,7 @@ bad_utf8_test_() -> )}, {"orphan continuation byte u+0080", ?_assertError( badarg, - decode_bad_utf(<<16#0080>>, [strict_utf8]) + decode_bad_utf(<<16#0080>>, [{strict, [utf8]}]) )}, {"orphan continuation byte u+0080 replaced", ?_assertEqual( <<16#fffd/utf8>>, @@ -1624,7 +1625,7 @@ bad_utf8_test_() -> )}, {"orphan continuation byte u+00bf", ?_assertError( badarg, - decode_bad_utf(<<16#00bf>>, [strict_utf8]) + decode_bad_utf(<<16#00bf>>, [{strict, [utf8]}]) )}, {"orphan continuation byte u+00bf replaced", ?_assertEqual( <<16#fffd/utf8>>, @@ -1632,7 +1633,7 @@ bad_utf8_test_() -> )}, {"2 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, [strict_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, [{strict, [utf8]}]) )}, {"2 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 2), @@ -1640,7 +1641,7 @@ bad_utf8_test_() -> )}, {"3 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, [strict_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, [{strict, [utf8]}]) )}, {"3 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 3), @@ -1648,7 +1649,7 @@ bad_utf8_test_() -> )}, {"4 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, [strict_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, [{strict, [utf8]}]) )}, {"4 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 4), @@ -1656,7 +1657,7 @@ bad_utf8_test_() -> )}, {"5 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, [strict_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, [{strict, [utf8]}]) )}, {"5 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 5), @@ -1664,7 +1665,7 @@ bad_utf8_test_() -> )}, {"6 continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, [strict_utf8]) + decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, [{strict, [utf8]}]) )}, {"6 continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, 6), @@ -1672,7 +1673,7 @@ bad_utf8_test_() -> )}, {"all continuation bytes", ?_assertError( badarg, - decode_bad_utf(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [strict_utf8]) + decode_bad_utf(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [{strict, [utf8]}]) )}, {"all continuation bytes replaced", ?_assertEqual( binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), @@ -1683,7 +1684,7 @@ bad_utf8_test_() -> )}, {"lonely start byte", ?_assertError( badarg, - decode_bad_utf(<<16#00c0>>, [strict_utf8]) + decode_bad_utf(<<16#00c0>>, [{strict, [utf8]}]) )}, {"lonely start byte replaced", ?_assertEqual( <<16#fffd/utf8>>, @@ -1691,7 +1692,7 @@ bad_utf8_test_() -> )}, {"lonely start bytes (2 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, [strict_utf8]) + decode_bad_utf(<<16#00c0, 32, 16#00df>>, [{strict, [utf8]}]) )}, {"lonely start bytes (2 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, @@ -1699,7 +1700,7 @@ bad_utf8_test_() -> )}, {"lonely start bytes (3 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, [strict_utf8]) + decode_bad_utf(<<16#00e0, 32, 16#00ef>>, [{strict, [utf8]}]) )}, {"lonely start bytes (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, @@ -1707,7 +1708,7 @@ bad_utf8_test_() -> )}, {"lonely start bytes (4 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, [strict_utf8]) + decode_bad_utf(<<16#00f0, 32, 16#00f7>>, [{strict, [utf8]}]) )}, {"lonely start bytes (4 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32, 16#fffd/utf8>>, @@ -1715,7 +1716,7 @@ bad_utf8_test_() -> )}, {"missing continuation byte (3 byte)", ?_assertError( badarg, - decode_bad_utf(<<224, 160, 32>>, [strict_utf8]) + decode_bad_utf(<<224, 160, 32>>, [{strict, [utf8]}]) )}, {"missing continuation byte (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1723,7 +1724,7 @@ bad_utf8_test_() -> )}, {"missing continuation byte (4 byte missing one)", ?_assertError( badarg, - decode_bad_utf(<<240, 144, 128, 32>>, [strict_utf8]) + decode_bad_utf(<<240, 144, 128, 32>>, [{strict, [utf8]}]) )}, {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1731,7 +1732,7 @@ bad_utf8_test_() -> )}, {"missing continuation byte (4 byte missing two)", ?_assertError( badarg, - decode_bad_utf(<<240, 144, 32>>, [strict_utf8]) + decode_bad_utf(<<240, 144, 32>>, [{strict, [utf8]}]) )}, {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1739,7 +1740,7 @@ bad_utf8_test_() -> )}, {"overlong encoding of u+002f (2 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#c0, 16#af, 32>>, [strict_utf8]) + decode_bad_utf(<<16#c0, 16#af, 32>>, [{strict, [utf8]}]) )}, {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1747,7 +1748,7 @@ bad_utf8_test_() -> )}, {"overlong encoding of u+002f (3 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, [strict_utf8]) + decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, [{strict, [utf8]}]) )}, {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1755,7 +1756,7 @@ bad_utf8_test_() -> )}, {"overlong encoding of u+002f (4 byte)", ?_assertError( badarg, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, [strict_utf8]) + decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, [{strict, [utf8]}]) )}, {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1763,7 +1764,7 @@ bad_utf8_test_() -> )}, {"highest overlong 2 byte sequence", ?_assertError( badarg, - decode_bad_utf(<<16#c1, 16#bf, 32>>, [strict_utf8]) + decode_bad_utf(<<16#c1, 16#bf, 32>>, [{strict, [utf8]}]) )}, {"highest overlong 2 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1771,7 +1772,7 @@ bad_utf8_test_() -> )}, {"highest overlong 3 byte sequence", ?_assertError( badarg, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, [strict_utf8]) + decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, [{strict, [utf8]}]) )}, {"highest overlong 3 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1779,7 +1780,7 @@ bad_utf8_test_() -> )}, {"highest overlong 4 byte sequence", ?_assertError( badarg, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [strict_utf8]) + decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [{strict, [utf8]}]) )}, {"highest overlong 4 byte sequence replaced", ?_assertEqual( <<16#fffd/utf8, 32>>, @@ -1820,10 +1821,6 @@ unescape_test_() -> <<"\"">>, unescape(<<"\\\""/utf8>>, []) )}, - {"unescape single quote", ?_assertEqual( - <<"'">>, - unescape(<<"\\'"/utf8>>, [single_quoted_strings]) - )}, {"unescape solidus", ?_assertEqual( <<"/">>, unescape(<<"\\/"/utf8>>, []) @@ -1846,7 +1843,7 @@ unescape_test_() -> )}, {"do not unescape bad high surrogate", ?_assertError( badarg, - unescape(<<"\\udc00"/utf8>>, [strict_utf8]) + unescape(<<"\\udc00"/utf8>>, [{strict, [utf8]}]) )}, {"replace naked high surrogate", ?_assertEqual( <<16#fffd/utf8, "hello world">>, @@ -1854,7 +1851,7 @@ unescape_test_() -> )}, {"do not unescape naked high surrogate", ?_assertError( badarg, - unescape(<<"\\ud800hello world"/utf8>>, [strict_utf8]) + unescape(<<"\\ud800hello world"/utf8>>, [{strict, [utf8]}]) )}, {"replace naked low surrogate", ?_assertEqual( <<16#fffd/utf8, "hello world">>, @@ -1862,7 +1859,7 @@ unescape_test_() -> )}, {"do not unescape naked low surrogate", ?_assertError( badarg, - unescape(<<"\\udc00hello world"/utf8>>, [strict_utf8]) + unescape(<<"\\udc00hello world"/utf8>>, [{strict, [utf8]}]) )}, {"replace bad surrogate pair", ?_assertEqual( <<16#fffd/utf8, 16#fffd/utf8>>, @@ -1870,11 +1867,11 @@ unescape_test_() -> )}, {"do not unescape bad surrogate pair", ?_assertError( badarg, - unescape(<<"\\ud800\\u0000">>, [strict_utf8]) + unescape(<<"\\ud800\\u0000">>, [{strict, [utf8]}]) )}, {"bad pseudo escape sequence", ?_assertError( badarg, - unescape(<<"\\uabcg">>, []) + unescape(<<"\\uabcg">>, [strict]) )} ]. @@ -2056,11 +2053,15 @@ single_quoted_string_test_() -> [ {"single quoted string", ?_assertEqual( [{string, <<"hello world">>}, end_json], - decode(<<39, "hello world", 39>>, [single_quoted_strings]) + decode(<<39, "hello world", 39>>, []) )}, + {"single quoted string error", ?_assertEqual( + {error, badarg}, + decode(<<39, "hello world", 39>>, [{strict, [single_quotes]}]) + )}, {"single quoted string with embedded double quotes", ?_assertEqual( [{string, <<"quoth the raven, \"nevermore\"">>}, end_json], - decode(<<39, "quoth the raven, \"nevermore\"", 39>>, [single_quoted_strings]) + decode(<<39, "quoth the raven, \"nevermore\"", 39>>, []) )}, {"string with embedded single quotes", ?_assertEqual( [{string, <<"quoth the raven, 'nevermore'">>}, end_json], @@ -2068,14 +2069,18 @@ single_quoted_string_test_() -> )}, {"escaped single quote", ?_assertEqual( [{string, <<"quoth the raven, 'nevermore'">>}, end_json], - decode(<<39, "quoth the raven, \\'nevermore\\'", 39>>, [single_quoted_strings]) + decode(<<39, "quoth the raven, \\'nevermore\\'", 39>>, []) )}, {"single quoted key", ?_assertEqual( [start_object, {key, <<"key">>}, {string, <<"value">>}, {key, <<"another key">>}, {string, <<"another value">>}, end_object, end_json], - decode(<<"{'key':'value','another key':'another value'}">>, [single_quoted_strings]) + decode(<<"{'key':'value','another key':'another value'}">>, []) + )}, + {"single quoted key error", ?_assertEqual( + {error, badarg}, + decode(<<"{'key':'value','another key':'another value'}">>, [{strict, [single_quotes]}]) )} ]. @@ -2084,7 +2089,7 @@ ignored_bad_escapes_test_() -> [ {"ignore unrecognized escape sequence", ?_assertEqual( [{string, <<"\\x25">>}, end_json], - decode(<<"\"\\x25\"">>, [ignored_bad_escapes]) + decode(<<"\"\\x25\"">>, []) )} ]. @@ -2299,11 +2304,11 @@ custom_error_handler_test_() -> )}, {"single_comment error", ?_assertEqual( {comment, <<192>>}, - Decode(<<"[ //"/utf8, 192>>, [{error_handler, Error}, strict_utf8]) + Decode(<<"[ //"/utf8, 192>>, [{error_handler, Error}, {strict, [utf8]}]) )}, {"multi_comment error", ?_assertEqual( {comment, <<192>>}, - Decode(<<"[ /*"/utf8, 192>>, [{error_handler, Error}, strict_utf8]) + Decode(<<"[ /*"/utf8, 192>>, [{error_handler, Error}, {strict, [utf8]}]) )} ]. diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 517be4c..1c22e46 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -78,7 +78,7 @@ err(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). error_test_() -> [ {"value error", ?_assertError(badarg, err(self(), []))}, - {"string error", ?_assertError(badarg, err(<<239, 191, 191>>, [strict_utf8]))} + {"string error", ?_assertError(badarg, err(<<239, 191, 191>>, [strict]))} ]. custom_error_handler_test_() -> @@ -90,7 +90,7 @@ custom_error_handler_test_() -> )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}]}, - err(<<239, 191, 191>>, [{error_handler, Error}, strict_utf8]) + err(<<239, 191, 191>>, [{error_handler, Error}, strict]) )} ]. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 350624f..e59ff1e 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -516,7 +516,7 @@ custom_error_handler_test_() -> )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}, end_json]}, - parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict_utf8]) + parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict]) )} ]. From 486fda2a74a1f14a934b6c8989c810e879e8eceb Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 20 Oct 2013 21:06:51 +0000 Subject: [PATCH 14/40] refactor test suite for additional completeness and clarity --- src/jsx.erl | 276 ++++++++- src/jsx_config.erl | 6 + src/jsx_decoder.erl | 1383 +++++++++++++++---------------------------- src/jsx_encoder.erl | 22 +- src/jsx_parser.erl | 554 +++++++++++++++-- src/jsx_tests.hrl | 688 --------------------- 6 files changed, 1249 insertions(+), 1680 deletions(-) delete mode 100644 src/jsx_tests.hrl diff --git a/src/jsx.erl b/src/jsx.erl index 8c45bfe..b791699 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -34,7 +34,9 @@ -ifdef(TEST). --include("jsx_tests.hrl"). +%% data and helper functions for tests +-export([test_cases/0]). +-export([init/1, handle_event/2]). -endif. @@ -146,3 +148,275 @@ resume(Term, {decoder, State, Handler, Acc, Stack}, Config) -> jsx_decoder:resume(Term, State, Handler, Acc, Stack, jsx_config:parse_config(Config)); resume(Term, {parser, State, Handler, Stack}, Config) -> jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)). + + + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + + +%% test handler +init([]) -> []. + +handle_event(end_json, State) -> lists:reverse([end_json] ++ State); +handle_event(Event, State) -> [Event] ++ State. + + +test_cases() -> + empty_array() + ++ nested_array() + ++ empty_object() + ++ nested_object() + ++ strings() + ++ literals() + ++ integers() + ++ floats() + ++ compound_object(). + + +empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}]. + +nested_array() -> + [{ + "[[[]]]", + <<"[[[]]]">>, + [[[]]], + [start_array, start_array, start_array, end_array, end_array, end_array] + }]. + + +empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}]. + +nested_object() -> + [{ + "{\"key\":{\"key\":{}}}", + <<"{\"key\":{\"key\":{}}}">>, + [{<<"key">>, [{<<"key">>, [{}]}]}], + [ + start_object, + {key, <<"key">>}, + start_object, + {key, <<"key">>}, + start_object, + end_object, + end_object, + end_object + ] + }]. + + +naked_strings() -> + Raw = [ + "", + "hello world" + ], + [ + { + String, + <<"\"", (list_to_binary(String))/binary, "\"">>, + list_to_binary(String), + [{string, list_to_binary(String)}] + } + || String <- Raw + ]. + +strings() -> + naked_strings() + ++ [ wrap_with_array(Test) || Test <- naked_strings() ] + ++ [ wrap_with_object(Test) || Test <- naked_strings() ]. + + +naked_integers() -> + Raw = [ + 1, 2, 3, + 127, 128, 129, + 255, 256, 257, + 65534, 65535, 65536, + 18446744073709551616, + 18446744073709551617 + ], + [ + { + integer_to_list(X), + list_to_binary(integer_to_list(X)), + X, + [{integer, X}] + } + || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0] + ]. + +integers() -> + naked_integers() + ++ [ wrap_with_array(Test) || Test <- naked_integers() ] + ++ [ wrap_with_object(Test) || Test <- naked_integers() ]. + + +naked_floats() -> + Raw = [ + 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, + 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, + 1234567890.0987654321, + 0.0e0, + 1234567890.0987654321e16, + 0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308, + 1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308, + 2.2250738585072014e-308, %% min normalized float + 1.7976931348623157e308, %% max normalized float + 5.0e-324, %% min denormalized float + 2.225073858507201e-308 %% max denormalized float + ], + [ + { + sane_float_to_list(X), + list_to_binary(sane_float_to_list(X)), + X, + [{float, X}] + } + || X <- Raw ++ [ -1 * Y || Y <- Raw ] + ]. + +floats() -> + naked_floats() + ++ [ wrap_with_array(Test) || Test <- naked_floats() ] + ++ [ wrap_with_object(Test) || Test <- naked_floats() ]. + + +naked_literals() -> + [ + { + atom_to_list(Literal), + atom_to_binary(Literal, unicode), + Literal, + [{literal, Literal}] + } + || Literal <- [true, false, null] + ]. + +literals() -> + naked_literals() + ++ [ wrap_with_array(Test) || Test <- naked_literals() ] + ++ [ wrap_with_object(Test) || Test <- naked_literals() ]. + + +compound_object() -> + [{ + "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]", + <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>, + [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]], + [ + start_array, + start_object, + {key, <<"alpha">>}, + start_array, + {integer, 1}, + {integer, 2}, + {integer, 3}, + end_array, + {key, <<"beta">>}, + start_object, + {key, <<"alpha">>}, + start_array, + {float, 1.0}, + {float, 2.0}, + {float, 3.0}, + end_array, + {key, <<"beta">>}, + start_array, + {literal, true}, + {literal, false}, + end_array, + end_object, + end_object, + start_array, + start_object, + end_object, + end_array, + end_array + ] + }]. + + +wrap_with_array({Title, JSON, Term, Events}) -> + { + "[" ++ Title ++ "]", + <<"[", JSON/binary, "]">>, + [Term], + [start_array] ++ Events ++ [end_array] + }. + + +wrap_with_object({Title, JSON, Term, Events}) -> + { + "{\"key\":" ++ Title ++ "}", + <<"{\"key\":", JSON/binary, "}">>, + [{<<"key">>, Term}], + [start_object, {key, <<"key">>}] ++ Events ++ [end_object] + }. + + +sane_float_to_list(X) -> + [Output] = io_lib:format("~p", [X]), + Output. + + +incremental_decode(JSON) -> + Final = lists:foldl( + fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, + decoder(jsx, [], [stream]), + json_to_bytes(JSON) + ), + Final(end_stream). + + +incremental_parse(Events) -> + Final = lists:foldl( + fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end, + parser(?MODULE, [], [stream]), + lists:map(fun(X) -> [X] end, Events) + ), + Final(end_stream). + + +%% used to convert a json text into a list of codepoints to be incrementally +%% parsed +json_to_bytes(JSON) -> json_to_bytes(JSON, []). + +json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc); +json_to_bytes(<>, Acc) -> json_to_bytes(Rest, [<>] ++ Acc). + + +%% actual tests! +decode_test_() -> + Data = test_cases(), + [{Title, ?_assertEqual(Events ++ [end_json], (decoder(?MODULE, [], []))(JSON))} + || {Title, JSON, _, Events} <- Data + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_decode(JSON))} + || {Title, JSON, _, Events} <- Data + ]. + + +parse_test_() -> + Data = test_cases(), + [{Title, ?_assertEqual(Events ++ [end_json], (parser(?MODULE, [], []))(Events ++ [end_json]))} + || {Title, _, _, Events} <- Data + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_parse(Events))} + || {Title, _, _, Events} <- Data + ]. + + +encode_test_() -> + Data = test_cases(), + [ + { + Title, ?_assertEqual( + Events ++ [end_json], + (jsx:encoder(jsx, [], []))(Term) + ) + } || {Title, _, Term, Events} <- Data + ]. + + +-endif. diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 2816b0e..9039d3f 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -183,6 +183,12 @@ config_test_() -> parse_config([strict]) ) }, + {"strict selective", + ?_assertEqual( + #config{strict_comments = true}, + parse_config([{strict, [comments]}]) + ) + }, {"strict expanded", ?_assertEqual( #config{strict_comments = true, diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 58ba3d8..a858257 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -966,167 +966,141 @@ json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc); json_to_bytes(<>, Acc) -> json_to_bytes(Rest, [<>] ++ Acc). -decode(JSON, Config) -> - Chunk = try - start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) - catch - error:badarg -> {error, badarg} - end, - Incremental = try - Final = lists:foldl( - fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, - decoder(jsx, [], [stream] ++ Config), - json_to_bytes(JSON) - ), - Final(end_stream) - catch - error:badarg -> {error, badarg} - end, - ?assert(Chunk == Incremental), - Chunk. +decode(JSON) -> decode(JSON, []). +decode(JSON, Config) -> (decoder(jsx, [], Config))(JSON). -decode_test_() -> - Data = jsx:test_cases(), - [{Title, ?_assertEqual(Events ++ [end_json], decode(JSON, []))} - || {Title, JSON, _, Events} <- Data - ]. +incremental_decode(JSON) -> incremental_decode(JSON, []). +incremental_decode(JSON, Config) -> + Final = lists:foldl( + fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, + decoder(jsx, [], [stream] ++ Config), + json_to_bytes(JSON) + ), + Final(end_stream). %% all these numbers have different representation in erlang than in javascript and %% do not roundtrip like most integers/floats special_number_test_() -> - [ - {"-0", ?_assertEqual( - [{integer, 0}, end_json], - decode(<<"-0">>, []) - )}, - {"-0.0", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"-0.0">>, []) - )}, - {"0e0", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"0e0">>, []) - )}, - {"0e4", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"0e4">>, []) - )}, - {"1e0", ?_assertEqual( - [{float, 1.0}, end_json], - decode(<<"1e0">>, []) - )}, - {"-1e0", ?_assertEqual( - [{float, -1.0}, end_json], - decode(<<"-1e0">>, []) - )}, - {"1e4", ?_assertEqual( - [{float, 1.0e4}, end_json], - decode(<<"1e4">>, []) - )}, - {"number terminated by whitespace", ?_assertEqual( + Cases = [ + % {title, test form, json, opt flags} + {"-0", [{integer, 0}, end_json], <<"-0">>}, + {"-0.0", [{float, 0.0}, end_json], <<"-0.0">>}, + {"0e0", [{float, 0.0}, end_json], <<"0e0">>}, + {"0e4", [{float, 0.0}, end_json], <<"0e4">>}, + {"1e0", [{float, 1.0}, end_json], <<"1e0">>}, + {"-1e0", [{float, -1.0}, end_json], <<"-1e0">>}, + {"1e4", [{float, 1.0e4}, end_json], <<"1e4">>}, + {"number terminated by whitespace", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1 ]">>, []) - )}, - {"number terminated by comma", ?_assertEqual( + <<"[ 1 ]">> + }, + {"number terminated by comma", [start_array, {integer, 1}, {integer, 1}, end_array, end_json], - decode(<<"[ 1, 1 ]">>, []) - )}, - {"number terminated by comma in object", ?_assertEqual( + <<"[ 1, 1 ]">> + }, + {"number terminated by comma in object", [start_object, {key, <<"x">>}, {integer, 1}, {key, <<"y">>}, {integer, 1}, end_object, end_json], - decode(<<"{\"x\": 1, \"y\": 1}">>, []) - )} - ]. + <<"{\"x\": 1, \"y\": 1}">> + } + ], + [{Title, ?_assertEqual(Events, decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON))} + || {Title, Events, JSON} <- Cases + ]. comments_test_() -> - [ - {"preceeding // comment", ?_assertEqual( + Cases = [ + % {title, test form, json, opt flags} + {"preceeding // comment", [start_array, end_array, end_json], - decode(<<"// comment ", ?newline, "[]">>, []) - )}, - {"preceeding /**/ comment", ?_assertEqual( + <<"// comment ", ?newline, "[]">> + }, + {"preceeding /**/ comment", [start_array, end_array, end_json], - decode(<<"/* comment */[]">>, []) - )}, - {"trailing // comment", ?_assertEqual( + <<"/* comment */[]">> + }, + {"trailing // comment", [start_array, end_array, end_json], - decode(<<"[]// comment", ?newline>>, []) - )}, - {"trailing // comment (no newline)", ?_assertEqual( + <<"[]// comment", ?newline>> + }, + {"trailing // comment (no newline)", [start_array, end_array, end_json], - decode(<<"[]// comment">>, []) - )}, - {"trailing /**/ comment", ?_assertEqual( + <<"[]// comment">> + }, + {"trailing /**/ comment", [start_array, end_array, end_json], - decode(<<"[] /* comment */">>, []) - )}, - {"// comment inside array", ?_assertEqual( + <<"[] /* comment */">> + }, + {"// comment inside array", [start_array, end_array, end_json], - decode(<<"[ // comment", ?newline, "]">>, []) - )}, - {"/**/ comment inside array", ?_assertEqual( + <<"[ // comment", ?newline, "]">> + }, + {"/**/ comment inside array", [start_array, end_array, end_json], - decode(<<"[ /* comment */ ]">>, []) - )}, - {"// comment at beginning of array", ?_assertEqual( + <<"[ /* comment */ ]">> + }, + {"// comment at beginning of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, []) - )}, - {"/**/ comment at beginning of array", ?_assertEqual( + <<"[ // comment", ?newline, "true", ?newline, "]">> + }, + {"/**/ comment at beginning of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment */ true ]">>, []) - )}, - {"// comment at end of array", ?_assertEqual( + <<"[ /* comment */ true ]">> + }, + {"// comment at end of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ true // comment", ?newline, "]">>, []) - )}, - {"/**/ comment at end of array", ?_assertEqual( + <<"[ true // comment", ?newline, "]">> + }, + {"/**/ comment at end of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ true /* comment */ ]">>, []) - )}, - {"// comment midarray (post comma)", ?_assertEqual( + <<"[ true /* comment */ ]">> + }, + {"// comment midarray (post comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true, // comment", ?newline, "false ]">>, []) - )}, - {"/**/ comment midarray (post comma)", ?_assertEqual( + <<"[ true, // comment", ?newline, "false ]">> + }, + {"/**/ comment midarray (post comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true, /* comment */ false ]">>, []) - )}, - {"// comment midarray (pre comma)", ?_assertEqual( + <<"[ true, /* comment */ false ]">> + }, + {"// comment midarray (pre comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true// comment", ?newline, ", false ]">>, []) - )}, - {"/**/ comment midarray (pre comma)", ?_assertEqual( + <<"[ true// comment", ?newline, ", false ]">> + }, + {"/**/ comment midarray (pre comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true/* comment */, false ]">>, []) - )}, - {"// comment inside object", ?_assertEqual( + <<"[ true/* comment */, false ]">> + }, + {"// comment inside object", [start_object, end_object, end_json], - decode(<<"{ // comment", ?newline, "}">>, []) - )}, - {"/**/ comment inside object", ?_assertEqual( + <<"{ // comment", ?newline, "}">> + }, + {"/**/ comment inside object", [start_object, end_object, end_json], - decode(<<"{ /* comment */ }">>, []) - )}, - {"// comment at beginning of object", ?_assertEqual( + <<"{ /* comment */ }">> + }, + {"// comment at beginning of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, []) - )}, - {"/**/ comment at beginning of object", ?_assertEqual( + <<"{ // comment", ?newline, " \"key\": true", ?newline, "}">> + }, + {"/**/ comment at beginning of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ /* comment */ \"key\": true }">>, []) - )}, - {"// comment at end of object", ?_assertEqual( + <<"{ /* comment */ \"key\": true }">> + }, + {"// comment at end of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": true // comment", ?newline, "}">>, []) - )}, - {"/**/ comment at end of object", ?_assertEqual( + <<"{ \"key\": true // comment", ?newline, "}">> + }, + {"/**/ comment at end of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": true /* comment */ }">>, []) - )}, - {"// comment midobject (post comma)", ?_assertEqual( + <<"{ \"key\": true /* comment */ }">> + }, + {"// comment midobject (post comma)", [ start_object, {key, <<"x">>}, @@ -1136,9 +1110,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, []) - )}, - {"/**/ comment midobject (post comma)", ?_assertEqual( + <<"{ \"x\": true, // comment", ?newline, "\"y\": false }">> + }, + {"/**/ comment midobject (post comma)", [ start_object, {key, <<"x">>}, @@ -1148,9 +1122,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, []) - )}, - {"// comment midobject (pre comma)", ?_assertEqual( + <<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">> + }, + {"// comment midobject (pre comma)", [ start_object, {key, <<"x">>}, @@ -1160,9 +1134,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, []) - )}, - {"/**/ comment midobject (pre comma)", ?_assertEqual( + <<"{ \"x\": true// comment", ?newline, ", \"y\": false }">> + }, + {"/**/ comment midobject (pre comma)", [ start_object, {key, <<"x">>}, @@ -1172,330 +1146,168 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, []) - )}, - {"// comment precolon", ?_assertEqual( + <<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">> + }, + {"// comment precolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\" // comment", ?newline, ": true }">>, []) - )}, - {"/**/ comment precolon", ?_assertEqual( + <<"{ \"key\" // comment", ?newline, ": true }">> + }, + {"/**/ comment precolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\"/* comment */: true }">>, []) - )}, - {"// comment postcolon", ?_assertEqual( + <<"{ \"key\"/* comment */: true }">> + }, + {"// comment postcolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": // comment", ?newline, " true }">>, []) - )}, - {"/**/ comment postcolon", ?_assertEqual( + <<"{ \"key\": // comment", ?newline, " true }">> + }, + {"/**/ comment postcolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\":/* comment */ true }">>, []) - )}, - {"// comment terminating zero", ?_assertEqual( + <<"{ \"key\":/* comment */ true }">> + }, + {"// comment terminating zero", [start_array, {integer, 0}, end_array, end_json], - decode(<<"[ 0// comment", ?newline, "]">>, []) - )}, - {"// comment terminating integer", ?_assertEqual( + <<"[ 0// comment", ?newline, "]">> + }, + {"// comment terminating integer", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1// comment", ?newline, "]">>, []) - )}, - {"// comment terminating float", ?_assertEqual( + <<"[ 1// comment", ?newline, "]">> + }, + {"// comment terminating float", [start_array, {float, 1.0}, end_array, end_json], - decode(<<"[ 1.0// comment", ?newline, "]">>, []) - )}, - {"// comment terminating exp", ?_assertEqual( + <<"[ 1.0// comment", ?newline, "]">> + }, + {"// comment terminating exp", [start_array, {float, 1.0e1}, end_array, end_json], - decode(<<"[ 1e1// comment", ?newline, "]">>, []) - )}, - {"/**/ comment terminating zero", ?_assertEqual( + <<"[ 1e1// comment", ?newline, "]">> + }, + {"/**/ comment terminating zero", [start_array, {integer, 0}, end_array, end_json], - decode(<<"[ 0/* comment */ ]">>, []) - )}, - {"/**/ comment terminating integer", ?_assertEqual( + <<"[ 0/* comment */ ]">> + }, + {"/**/ comment terminating integer", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1/* comment */ ]">>, []) - )}, - {"/**/ comment terminating float", ?_assertEqual( + <<"[ 1/* comment */ ]">> + }, + {"/**/ comment terminating float", [start_array, {float, 1.0}, end_array, end_json], - decode(<<"[ 1.0/* comment */ ]">>, []) - )}, - {"/**/ comment terminating exp", ?_assertEqual( + <<"[ 1.0/* comment */ ]">> + }, + {"/**/ comment terminating exp", [start_array, {float, 1.0e1}, end_array, end_json], - decode(<<"[ 1e1/* comment */ ]">>, []) - )}, - {"/**/ comment following /**/ comment", ?_assertEqual( + <<"[ 1e1/* comment */ ]">> + }, + {"/**/ comment following /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[/* comment *//* comment */true]">>, []) - )}, - {"/**/ comment following // comment", ?_assertEqual( + <<"[/* comment *//* comment */true]">> + }, + {"/**/ comment following // comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[// comment", ?newline, "/* comment */true]">>, []) - )}, - {"// comment following /**/ comment", ?_assertEqual( + <<"[// comment", ?newline, "/* comment */true]">> + }, + {"// comment following /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[/* comment */// comment", ?newline, "true]">>, []) - )}, - {"// comment following // comment", ?_assertEqual( + <<"[/* comment */// comment", ?newline, "true]">> + }, + {"// comment following // comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, []) - )}, - {"/**/ comment inside /**/ comment", ?_assertEqual( + <<"[// comment", ?newline, "// comment", ?newline, "true]">> + }, + {"/**/ comment inside /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* /* comment */ */ true ]">>, []) - )}, - {"/**/ comment with /", ?_assertEqual( + <<"[ /* /* comment */ */ true ]">> + }, + {"/**/ comment with /", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* / */ true ]">>, []) - )}, - {"/**/ comment with *", ?_assertEqual( + <<"[ /* / */ true ]">> + }, + {"/**/ comment with *", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* * */ true ]">>, []) - )}, - {"// comment with badutf", ?_assertEqual( + <<"[ /* * */ true ]">> + }, + {"// comment with badutf", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, []) - )}, - {"/**/ comment with badutf", ?_assertEqual( + <<"[ // comment ", 16#00c0, " ", ?newline, "true]">> + }, + {"/**/ comment with badutf", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment ", 16#00c0, " */ true]">>, []) - )}, - {"/**/ comment with badutf preceeded by /", ?_assertEqual( + <<"[ /* comment ", 16#00c0, " */ true]">> + }, + {"/**/ comment with badutf preceeded by /", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment /", 16#00c0, " */ true]">>, []) - )} + <<"[ /* comment /", 16#00c0, " */ true]">> + } + ], + [{Title, ?_assertEqual(Events, decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + % error when `{strict, [comments]}` is present + [{Title, ?_assertError(badarg, decode(JSON, [{strict, [comments]}]))} + || {Title, _Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError( + badarg, + incremental_decode(JSON, [{strict, [comments]}]) + )} || {Title, _Events, JSON} <- Cases ]. no_comments_test_() -> - Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, - [ - {"preceeding // comment", ?_assertError( + Cases = [ + {"// comment with badutf", badarg, - Decode(<<"// comment ", ?newline, "[]">>, [{strict, [comments]}]) - )}, - {"preceeding /**/ comment", ?_assertError( + <<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, + [{strict, [utf8]}] + }, + {"/**/ comment with badutf", badarg, - Decode(<<"/* comment */[]">>, [{strict, [comments]}]) - )}, - {"trailing // comment", ?_assertError( + <<"[ /* comment ", 16#00c0, " */ true]">>, + [{strict, [utf8]}] + }, + {"/**/ comment with badutf preceeded by /", badarg, - Decode(<<"[]// comment", ?newline>>, [{strict, [comments]}]) - )}, - {"trailing // comment (no newline)", ?_assertError( - badarg, - Decode(<<"[]// comment">>, [{strict, [comments]}]) - )}, - {"trailing /**/ comment", ?_assertError( - badarg, - Decode(<<"[] /* comment */">>, [{strict, [comments]}]) - )}, - {"// comment inside array", ?_assertError( - badarg, - Decode(<<"[ // comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"/**/ comment inside array", ?_assertError( - badarg, - Decode(<<"[ /* comment */ ]">>, [{strict, [comments]}]) - )}, - {"// comment at beginning of array", ?_assertError( - badarg, - Decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"/**/ comment at beginning of array", ?_assertError( - badarg, - Decode(<<"[ /* comment */ true ]">>, [{strict, [comments]}]) - )}, - {"// comment at end of array", ?_assertError( - badarg, - Decode(<<"[ true // comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"/**/ comment at end of array", ?_assertError( - badarg, - Decode(<<"[ true /* comment */ ]">>, [{strict, [comments]}]) - )}, - {"// comment midarray (post comma)", ?_assertError( - badarg, - Decode(<<"[ true, // comment", ?newline, "false ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment midarray (post comma)", ?_assertError( - badarg, - Decode(<<"[ true, /* comment */ false ]">>, [{strict, [comments]}]) - )}, - {"// comment midarray (pre comma)", ?_assertError( - badarg, - Decode(<<"[ true// comment", ?newline, ", false ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment midarray (pre comma)", ?_assertError( - badarg, - Decode(<<"[ true/* comment */, false ]">>, [{strict, [comments]}]) - )}, - {"// comment inside object", ?_assertError( - badarg, - Decode(<<"{ // comment", ?newline, "}">>, [{strict, [comments]}]) - )}, - {"/**/ comment inside object", ?_assertError( - badarg, - Decode(<<"{ /* comment */ }">>, [{strict, [comments]}]) - )}, - {"// comment at beginning of object", ?_assertError( - badarg, - Decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, [{strict, [comments]}]) - )}, - {"/**/ comment at beginning of object", ?_assertError( - badarg, - Decode(<<"{ /* comment */ \"key\": true }">>, [{strict, [comments]}]) - )}, - {"// comment at end of object", ?_assertError( - badarg, - Decode(<<"{ \"key\": true // comment", ?newline, "}">>, [{strict, [comments]}]) - )}, - {"/**/ comment at end of object", ?_assertError( - badarg, - Decode(<<"{ \"key\": true /* comment */ }">>, [{strict, [comments]}]) - )}, - {"// comment midobject (post comma)", ?_assertError( - badarg, - Decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, [{strict, [comments]}]) - )}, - {"/**/ comment midobject (post comma)", ?_assertError( - badarg, - Decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, [{strict, [comments]}]) - )}, - {"// comment midobject (pre comma)", ?_assertError( - badarg, - Decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, [{strict, [comments]}]) - )}, - {"/**/ comment midobject (pre comma)", ?_assertError( - badarg, - Decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, [{strict, [comments]}]) - )}, - {"// comment precolon", ?_assertError( - badarg, - Decode(<<"{ \"key\" // comment", ?newline, ": true }">>, [{strict, [comments]}]) - )}, - {"/**/ comment precolon", ?_assertError( - badarg, - Decode(<<"{ \"key\"/* comment */: true }">>, [{strict, [comments]}]) - )}, - {"// comment postcolon", ?_assertError( - badarg, - Decode(<<"{ \"key\": // comment", ?newline, " true }">>, [{strict, [comments]}]) - )}, - {"/**/ comment postcolon", ?_assertError( - badarg, - Decode(<<"{ \"key\":/* comment */ true }">>, [{strict, [comments]}]) - )}, - {"// comment terminating zero", ?_assertError( - badarg, - Decode(<<"[ 0// comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"// comment terminating integer", ?_assertError( - badarg, - Decode(<<"[ 1// comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"// comment terminating float", ?_assertError( - badarg, - Decode(<<"[ 1.0// comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"// comment terminating exp", ?_assertError( - badarg, - Decode(<<"[ 1e1// comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"/**/ comment terminating zero", ?_assertError( - badarg, - Decode(<<"[ 0/* comment */ ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment terminating integer", ?_assertError( - badarg, - Decode(<<"[ 1/* comment */ ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment terminating float", ?_assertError( - badarg, - Decode(<<"[ 1.0/* comment */ ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment terminating exp", ?_assertError( - badarg, - Decode(<<"[ 1e1/* comment */ ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment following /**/ comment", ?_assertError( - badarg, - Decode(<<"[/* comment *//* comment */true]">>, [{strict, [comments]}]) - )}, - {"/**/ comment following // comment", ?_assertError( - badarg, - Decode(<<"[// comment", ?newline, "/* comment */true]">>, [{strict, [comments]}]) - )}, - {"// comment following /**/ comment", ?_assertError( - badarg, - Decode(<<"[/* comment */// comment", ?newline, "true]">>, [{strict, [comments]}]) - )}, - {"// comment following // comment", ?_assertError( - badarg, - Decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, [{strict, [comments]}]) - )}, - {"/**/ comment inside /**/ comment", ?_assertError( - badarg, - Decode(<<"[ /* /* comment */ */ true ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment with /", ?_assertError( - badarg, - Decode(<<"[ /* / */ true ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment with *", ?_assertError( - badarg, - Decode(<<"[ /* * */ true ]">>, [{strict, [comments]}]) - )}, - {"// comment with badutf", ?_assertError( - badarg, - Decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [{strict, [comments]}]) - )}, - {"/**/ comment with badutf", ?_assertError( - badarg, - Decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [{strict, [comments]}]) - )}, - {"/**/ comment with badutf preceeded by /", ?_assertError( - badarg, - Decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [{strict, [comments]}]) - )} + <<"[ /* comment /", 16#00c0, " */ true]">>, + [{strict, [utf8]}] + } + ], + [{Title, ?_assertError(Error, decode(JSON, Config))} + || {Title, Error, JSON, Config} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError(Error, incremental_decode(JSON, Config))} + || {Title, Error, JSON, Config} <- Cases ]. +% doing the full unicode range takes foreverrrrrrr so just do boundaries +% excludes characters that may need escaping codepoints() -> - unicode:characters_to_binary( - [32, 33] - ++ lists:seq(35, 38) - ++ lists:seq(40, 46) - ++ lists:seq(48, 91) - ++ lists:seq(93, 16#2027) - ++ lists:seq(16#202a, 16#d7ff) - ++ lists:seq(16#e000, 16#fdcf) - ++ lists:seq(16#fdf0, 16#fffd) - ). + [32, 33] ++ + lists:seq(35, 46) ++ + lists:seq(48, 91) ++ + lists:seq(93, 127) ++ + [16#2027, 16#202a, 16#d7ff, 16#e000, 16#fdcf, 16#fdf0, 16#fffd] ++ + [16#10000, 16#1fffd, 16#20000, 16#30000, 16#40000, 16#50000] ++ + [16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++ + [16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000]. -extended_codepoints() -> - unicode:characters_to_binary( - lists:seq(16#10000, 16#1fffd) ++ [ - 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, - 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, - 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 - ] - ). +reserved_space() -> lists:seq(16#fdd0, 16#fdef). -reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. +surrogates() -> lists:seq(16#d800, 16#dfff). -surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. - -noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. +noncharacters() -> lists:seq(16#fffe, 16#ffff). extended_noncharacters() -> - [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] - ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] - ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] - ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] - ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] - ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] - ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] - ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] - ]. + [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] ++ + [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] ++ + [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] ++ + [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] ++ + [16#9fffe, 16#9ffff, 16#afffe, 16#affff] ++ + [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] ++ + [16#dfffe, 16#dffff, 16#efffe, 16#effff] ++ + [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]. + %% erlang refuses to decode certain codepoints, so fake them all to_fake_utf8(N) when N < 16#0080 -> <<34/utf8, N:8, 34/utf8>>; @@ -1511,540 +1323,267 @@ to_fake_utf8(N) -> clean_string_test_() -> - [ - {"clean codepoints", ?_assertEqual( - [{string, codepoints()}, end_json], - decode(<<34, (codepoints())/binary, 34>>, []) - )}, - {"clean extended codepoints", ?_assertEqual( - [{string, extended_codepoints()}, end_json], - decode(<<34, (extended_codepoints())/binary, 34>>, []) - )}, - {"error reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, reserved_space()) - )}, - {"error surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, surrogates()) - )}, - {"error noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, noncharacters()) - )}, - {"error extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, extended_noncharacters()) - )}, - {"clean reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, reserved_space()) - )}, - {"clean surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, surrogates()) - )}, - {"clean noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, noncharacters()) - )}, - {"clean extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, extended_noncharacters()) - )}, - {"dirty \\uwxyz", ?_assertEqual( - [{string, <<"\\uwxyz">>}, end_json], - decode(<<34, "\\uwxyz", 34>>, [dirty_strings]) - )}, - {"dirty \\x23", ?_assertEqual( - [{string, <<"\\x23">>}, end_json], - decode(<<34, "\\x23", 34>>, [dirty_strings]) - )}, - {"dirty 0", ?_assertEqual( - [{string, <<0>>}, end_json], - decode(<<34, 0, 34>>, [dirty_strings]) - )}, - {"dirty 0\"0", ?_assertEqual( - [{string, <<0, ?doublequote, 0>>}, end_json], - decode(<<34, 0, ?rsolidus, ?doublequote, 0, 34>>, [dirty_strings]) - )}, - {"dirty 0\"0", ?_assertEqual( - [{string, <<0, ?rsolidus, ?doublequote, 0>>}, end_json], - decode(<<34, 0, ?rsolidus, ?rsolidus, ?doublequote, 0, 34>>, [dirty_strings]) - )}, - {"dirty 16#d800", ?_assertEqual( - [{string, <<237, 160, 128>>}, end_json], - decode(<<34, 237, 160, 128, 34>>, [dirty_strings]) - )}, - {"dirty 16#10ffff", ?_assertEqual( - [{string, <<244, 143, 191, 191>>}, end_json], - decode(<<34, 244, 143, 191, 191, 34>>, [dirty_strings]) - )}, - {"dirty /", ?_assertEqual( - [{string, <<$/>>}, end_json], - decode(<<34, $/, 34>>, [dirty_strings, escaped_forward_slashes]) - )}, - {"dirty <<194, 129>>", ?_assertEqual( - [{string, <<194, 129>>}, end_json], - decode(<<34, 194, 129, 34>>, [dirty_strings]) - )} + Clean = codepoints(), + Dirty = reserved_space() ++ surrogates() ++ noncharacters() ++ extended_noncharacters(), + % clean codepoints + [{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual( + [{string, <>}, end_json], + decode(<<34/utf8, Codepoint/utf8, 34/utf8>>) + )} || Codepoint <- Clean + ] ++ + % bad codepoints replaced by u+FFFD + [{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual( + [{string, <<16#fffd/utf8>>}, end_json], + decode(to_fake_utf8(Codepoint)) + )} || Codepoint <- Dirty + ] ++ + % bad codepoints that cause errors + [{"dirty u+" ++ integer_to_list(Codepoint, 16), ?_assertError( + badarg, + decode(to_fake_utf8(Codepoint), [{strict, [utf8]}]) + )} || Codepoint <- Dirty ]. -decode_bad_utf(String, Config) -> - case decode(<<34, String/binary, 34>>, Config) of - {error, badarg} -> erlang:error(badarg); - [{string, S}, end_json] -> S - end. +dirty_string_test_() -> + Cases = [ + {"dirty \\n", + [start_array, {string, <<"\\n">>}, end_array, end_json], + <<"[\"\\n\"]">>, + [dirty_strings] + }, + {"dirty \\uwxyz", + [start_array, {string, <<"\\uwxyz">>}, end_array, end_json], + <<"[\"\\uwxyz\"]">>, + [dirty_strings] + }, + {"dirty \\x23", + [start_array, {string, <<"\\x23">>}, end_array, end_json], + <<"[\"\\x23\"]">>, + [dirty_strings] + }, + {"dirty 0", + [start_array, {string, <<0>>}, end_array, end_json], + <<"[\"", 0, "\"]">>, + [dirty_strings] + }, + {"dirty 16#d800", + [start_array, {string, <<237, 160, 128>>}, end_array, end_json], + <<"[\"", 237, 160, 128, "\"]">>, + [dirty_strings] + }, + {"dirty 16#10ffff", + [start_array, {string, <<244, 143, 191, 191>>}, end_array, end_json], + <<"[\"", 244, 143, 191, 191, "\"]">>, + [dirty_strings] + }, + {"dirty /", + [start_array, {string, <<$/>>}, end_array, end_json], + <<"[\"", $/, "\"]">>, + [dirty_strings, escaped_forward_slashes] + }, + {"dirty <<194, 129>>", + [start_array, {string, <<194, 129>>}, end_array, end_json], + <<"[\"", 194, 129, "\"]">>, + [dirty_strings] + } + ], + [{Title, ?_assertEqual(Events, decode(JSON, Config))} + || {Title, Events, JSON, Config} <- Cases + ] ++ + % ensure `dirty_strings` and `strict` interact properly + [{Title, ?_assertEqual(Events, decode(JSON, Config ++ [strict]))} + || {Title, Events, JSON, Config} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON, Config))} + || {Title, Events, JSON, Config} <- Cases + ]. + bad_utf8_test_() -> - [ - {"noncharacter u+fffe", ?_assertError( - badarg, - decode_bad_utf(<<239, 191, 190>>, [{strict, [utf8]}]) - )}, - {"noncharacter u+fffe replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<239, 191, 190>>, []) - )}, - {"noncharacter u+ffff", ?_assertError( - badarg, - decode_bad_utf(<<239, 191, 191>>, [{strict, [utf8]}]) - )}, - {"noncharacter u+ffff replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<239, 191, 191>>, []) - )}, - {"orphan continuation byte u+0080", ?_assertError( - badarg, - decode_bad_utf(<<16#0080>>, [{strict, [utf8]}]) - )}, - {"orphan continuation byte u+0080 replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#0080>>, []) - )}, - {"orphan continuation byte u+00bf", ?_assertError( - badarg, - decode_bad_utf(<<16#00bf>>, [{strict, [utf8]}]) - )}, - {"orphan continuation byte u+00bf replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#00bf>>, []) - )}, - {"2 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, [{strict, [utf8]}]) - )}, - {"2 continuation bytes replaced", ?_assertEqual( + Cases = [ + {"noncharacter u+fffe", <<16#fffd/utf8>>, <<239, 191, 190>>}, + {"noncharacter u+ffff", <<16#fffd/utf8>>, <<239, 191, 191>>}, + {"orphan continuation byte u+0080", <<16#fffd/utf8>>, <<16#0080>>}, + {"orphan continuation byte u+00bf", <<16#fffd/utf8>>, <<16#00bf>>}, + {"2 continuation bytes", binary:copy(<<16#fffd/utf8>>, 2), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, []) - )}, - {"3 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, [{strict, [utf8]}]) - )}, - {"3 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 2))/binary>> + }, + {"3 continuation bytes", binary:copy(<<16#fffd/utf8>>, 3), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, []) - )}, - {"4 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, [{strict, [utf8]}]) - )}, - {"4 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 3))/binary>> + }, + {"4 continuation bytes", binary:copy(<<16#fffd/utf8>>, 4), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, []) - )}, - {"5 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, [{strict, [utf8]}]) - )}, - {"5 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 4))/binary>> + }, + {"5 continuation bytes", binary:copy(<<16#fffd/utf8>>, 5), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, []) - )}, - {"6 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, [{strict, [utf8]}]) - )}, - {"6 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 5))/binary>> + }, + {"6 continuation bytes", binary:copy(<<16#fffd/utf8>>, 6), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, []) - )}, - {"all continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [{strict, [utf8]}]) - )}, - {"all continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 6))/binary>> + }, + {"all continuation bytes", binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), - decode_bad_utf( - <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, - [] - ) - )}, - {"lonely start byte", ?_assertError( - badarg, - decode_bad_utf(<<16#00c0>>, [{strict, [utf8]}]) - )}, - {"lonely start byte replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#00c0>>, []) - )}, - {"lonely start bytes (2 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, [{strict, [utf8]}]) - )}, - {"lonely start bytes (2 byte) replaced", ?_assertEqual( + <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>> + }, + {"lonely start byte", <<16#fffd/utf8>>, <<16#00c0>>}, + {"lonely start bytes (2 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, []) - )}, - {"lonely start bytes (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, [{strict, [utf8]}]) - )}, - {"lonely start bytes (3 byte) replaced", ?_assertEqual( + <<16#00c0, 32, 16#00df>> + }, + {"lonely start bytes (3 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, []) - )}, - {"lonely start bytes (4 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, [{strict, [utf8]}]) - )}, - {"lonely start bytes (4 byte) replaced", ?_assertEqual( + <<16#00e0, 32, 16#00ef>> + }, + {"lonely start bytes (4 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, []) - )}, - {"missing continuation byte (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<224, 160, 32>>, [{strict, [utf8]}]) - )}, - {"missing continuation byte (3 byte) replaced", ?_assertEqual( + <<16#00f0, 32, 16#00f7>> + }, + {"missing continuation byte (3 byte)", <<16#fffd/utf8, 32>>, <<224, 160, 32>>}, + {"missing continuation byte (4 byte missing one)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<224, 160, 32>>, []) - )}, - {"missing continuation byte (4 byte missing one)", ?_assertError( - badarg, - decode_bad_utf(<<240, 144, 128, 32>>, [{strict, [utf8]}]) - )}, - {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( + <<240, 144, 128, 32>> + }, + {"missing continuation byte (4 byte missing two)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<240, 144, 128, 32>>, []) - )}, - {"missing continuation byte (4 byte missing two)", ?_assertError( - badarg, - decode_bad_utf(<<240, 144, 32>>, [{strict, [utf8]}]) - )}, - {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( + <<240, 144, 32>> + }, + {"overlong encoding of u+002f (2 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<240, 144, 32>>, []) - )}, - {"overlong encoding of u+002f (2 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#c0, 16#af, 32>>, [{strict, [utf8]}]) - )}, - {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( + <<16#c0, 16#af, 32>> + }, + {"overlong encoding of u+002f (3 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#c0, 16#af, 32>>, []) - )}, - {"overlong encoding of u+002f (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, [{strict, [utf8]}]) - )}, - {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( + <<16#e0, 16#80, 16#af, 32>> + }, + {"overlong encoding of u+002f (4 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, []) - )}, - {"overlong encoding of u+002f (4 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, [{strict, [utf8]}]) - )}, - {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( + <<16#f0, 16#80, 16#80, 16#af, 32>> + }, + {"highest overlong 2 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, []) - )}, - {"highest overlong 2 byte sequence", ?_assertError( - badarg, - decode_bad_utf(<<16#c1, 16#bf, 32>>, [{strict, [utf8]}]) - )}, - {"highest overlong 2 byte sequence replaced", ?_assertEqual( + <<16#c1, 16#bf, 32>> + }, + {"highest overlong 3 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#c1, 16#bf, 32>>, []) - )}, - {"highest overlong 3 byte sequence", ?_assertError( - badarg, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, [{strict, [utf8]}]) - )}, - {"highest overlong 3 byte sequence replaced", ?_assertEqual( + <<16#e0, 16#9f, 16#bf, 32>> + }, + {"highest overlong 4 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, []) - )}, - {"highest overlong 4 byte sequence", ?_assertError( + <<16#f0, 16#8f, 16#bf, 16#bf, 32>> + } + ], + [{Title, ?_assertError( badarg, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [{strict, [utf8]}]) - )}, - {"highest overlong 4 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, []) - )} + decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]) + )} || {Title, _, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError( + badarg, + incremental_decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]) + )} || {Title, _, JSON} <- Cases + ] ++ + [{Title ++ " replaced", ?_assertEqual( + [{string, Replacement}, end_json], + decode(<<34, JSON/binary, 34>>) + )} || {Title, Replacement, JSON} <- Cases + ] ++ + [{Title ++ " replaced (incremental)", ?_assertEqual( + [{string, Replacement}, end_json], + incremental_decode(<<34, JSON/binary, 34>>) + )} || {Title, Replacement, JSON} <- Cases ]. -unescape(Bin, Config) -> - case decode(<<34, Bin/binary, 34>>, Config) of - [{string, String}, end_json] -> String; - {error, badarg} -> erlang:error(badarg) - end. - unescape_test_() -> - [ - {"unescape backspace", ?_assertEqual( - <<"\b">>, - unescape(<<"\\b"/utf8>>, []) - )}, - {"unescape tab", ?_assertEqual( - <<"\t">>, - unescape(<<"\\t"/utf8>>, []) - )}, - {"unescape newline", ?_assertEqual( - <<"\n">>, - unescape(<<"\\n"/utf8>>, []) - )}, - {"unescape formfeed", ?_assertEqual( - <<"\f">>, - unescape(<<"\\f"/utf8>>, []) - )}, - {"unescape carriage return", ?_assertEqual( - <<"\r">>, - unescape(<<"\\r"/utf8>>, []) - )}, - {"unescape quote", ?_assertEqual( - <<"\"">>, - unescape(<<"\\\""/utf8>>, []) - )}, - {"unescape solidus", ?_assertEqual( - <<"/">>, - unescape(<<"\\/"/utf8>>, []) - )}, - {"unescape reverse solidus", ?_assertEqual( - <<"\\">>, - unescape(<<"\\\\"/utf8>>, []) - )}, - {"unescape control", ?_assertEqual( - <<0>>, - unescape(<<"\\u0000"/utf8>>, []) - )}, - {"unescape surrogate pair", ?_assertEqual( - <<16#10000/utf8>>, - unescape(<<"\\ud800\\udc00"/utf8>>, []) - )}, - {"replace bad high surrogate", ?_assertEqual( - <<16#fffd/utf8>>, - unescape(<<"\\udc00"/utf8>>, []) - )}, - {"do not unescape bad high surrogate", ?_assertError( - badarg, - unescape(<<"\\udc00"/utf8>>, [{strict, [utf8]}]) - )}, - {"replace naked high surrogate", ?_assertEqual( + Cases = [ + {"unescape backspace", <<"\b">>, <<"\\b"/utf8>>}, + {"unescape tab", <<"\t">>, <<"\\t"/utf8>>}, + {"unescape newline", <<"\n">>, <<"\\n"/utf8>>}, + {"unescape formfeed", <<"\f">>, <<"\\f"/utf8>>}, + {"unescape carriage return", <<"\r">>, <<"\\r"/utf8>>}, + {"unescape quote", <<"\"">>, <<"\\\""/utf8>>}, + {"unescape solidus", <<"/">>, <<"\\/"/utf8>>}, + {"unescape reverse solidus", <<"\\">>, <<"\\\\"/utf8>>}, + {"unescape control", <<0>>, <<"\\u0000"/utf8>>}, + {"unescape surrogate pair", <<16#10000/utf8>>, <<"\\ud800\\udc00"/utf8>>}, + {"replace bad high surrogate", <<16#fffd/utf8>>, <<"\\udc00"/utf8>>}, + {"replace naked high surrogate", <<16#fffd/utf8, "hello world">>, - unescape(<<"\\ud800hello world"/utf8>>, []) - )}, - {"do not unescape naked high surrogate", ?_assertError( - badarg, - unescape(<<"\\ud800hello world"/utf8>>, [{strict, [utf8]}]) - )}, - {"replace naked low surrogate", ?_assertEqual( + <<"\\ud800hello world"/utf8>> + }, + {"replace naked low surrogate", <<16#fffd/utf8, "hello world">>, - unescape(<<"\\udc00hello world"/utf8>>, []) - )}, - {"do not unescape naked low surrogate", ?_assertError( - badarg, - unescape(<<"\\udc00hello world"/utf8>>, [{strict, [utf8]}]) - )}, - {"replace bad surrogate pair", ?_assertEqual( - <<16#fffd/utf8, 16#fffd/utf8>>, - unescape(<<"\\ud800\\u0000">>, []) - )}, - {"do not unescape bad surrogate pair", ?_assertError( - badarg, - unescape(<<"\\ud800\\u0000">>, [{strict, [utf8]}]) - )}, - {"bad pseudo escape sequence", ?_assertError( - badarg, - unescape(<<"\\uabcg">>, [strict]) - )} + <<"\\udc00hello world"/utf8>> + }, + {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\ud800\\u0000">>} + ], + [{Title, ?_assertEqual([{string, Escaped}, end_json], decode(<<34, JSON/binary, 34>>))} + || {Title, Escaped, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual( + [{string, Escaped}, end_json], + incremental_decode(<<34, JSON/binary, 34>>) + )} || {Title, Escaped, JSON} <- Cases ]. -maybe_escape(Bin, Config) -> - [{string, String}, end_json] = decode(Bin, Config), - String. +bad_escaped_surrogate_test_() -> + Cases = [ + {"do not unescape bad high surrogate", <<"\\udc00">>}, + {"do not unescape naked high surrogate", <<"\\ud800hello world">>}, + {"do not unescape naked low surrogate", <<"\\udc00hello world">>}, + {"do not unescape bad surrogate pair", <<"\\ud800\\u0000">>} + ], + [{Title, ?_assertError(badarg, decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]))} + || {Title, JSON} <- Cases + ]. + escape_test_() -> + Cases = [ + {"backspace", <<"\b">>, <<"\\b">>}, + {"tab", <<"\t">>, <<"\\t">>}, + {"newline", <<"\n">>, <<"\\n">>}, + {"formfeed", <<"\f">>, <<"\\f">>}, + {"carriage return", <<"\r">>, <<"\\r">>}, + {"quote", <<"\"">>, <<"\\\"">>}, + {"backslash", <<"\\">>, <<"\\\\">>}, + {"control", <<0>>, <<"\\u0000">>} + ], + [{"escape " ++ Title, ?_assertEqual( + [{string, Escaped}, end_json], + decode(<<34, Escaped/binary, 34>>, [escaped_strings]) + )} || {Title, _Unescaped, Escaped} <- Cases + ] ++ + [{"do not escape " ++ Title, ?_assertEqual( + [{string, Unescaped}, end_json], + decode(<<34, Escaped/binary, 34>>) + )} || {Title, Unescaped, Escaped} <- Cases + ]. + + +special_escape_test_() -> [ - {"maybe_escape backspace", ?_assertEqual( - <<"\\b">>, - maybe_escape(<<34, "\\b"/utf8, 34>>, [escaped_strings]) + {"escape forward slash", ?_assertEqual( + [{string, <<"\\/">>}, end_json], + decode(<<34, "/"/utf8, 34>>, [escaped_strings, escaped_forward_slashes]) )}, - {"don't escape backspace", ?_assertEqual( - <<"\b">>, - maybe_escape(<<34, "\\b"/utf8, 34>>, []) + {"do not escape forward slash", ?_assertEqual( + [{string, <<"/">>}, end_json], + decode(<<34, "/"/utf8, 34>>, [escaped_strings]) )}, - {"maybe_escape tab", ?_assertEqual( - <<"\\t">>, - maybe_escape(<<34, "\\t"/utf8, 34>>, [escaped_strings]) + {"escape jsonp", ?_assertEqual( + [{string, <<"\\u2028">>}, end_json], + decode(<<34, 16#2028/utf8, 34>>, [escaped_strings]) )}, - {"maybe_escape newline", ?_assertEqual( - <<"\\n">>, - maybe_escape(<<34, "\\n"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape formfeed", ?_assertEqual( - <<"\\f">>, - maybe_escape(<<34, "\\f"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape carriage return", ?_assertEqual( - <<"\\r">>, - maybe_escape(<<34, "\\r"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape quote", ?_assertEqual( - <<"\\\"">>, - maybe_escape(<<34, "\\\""/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape forward slash", ?_assertEqual( - <<"\\/">>, - maybe_escape(<<34, "/"/utf8, 34>>, [escaped_strings, escaped_forward_slashes]) - )}, - {"do not maybe_escape forward slash", ?_assertEqual( - <<"/">>, - maybe_escape(<<34, "/"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape backslash", ?_assertEqual( - <<"\\\\">>, - maybe_escape(<<34, "\\\\"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape jsonp (u2028)", ?_assertEqual( - <<"\\u2028">>, - maybe_escape(<<34, 16#2028/utf8, 34>>, [escaped_strings]) - )}, - {"do not maybe_escape jsonp (u2028)", ?_assertEqual( - <<16#2028/utf8>>, - maybe_escape(<<34, 16#2028/utf8, 34>>, [escaped_strings, unescaped_jsonp]) - )}, - {"maybe_escape jsonp (u2029)", ?_assertEqual( - <<"\\u2029">>, - maybe_escape(<<34, 16#2029/utf8, 34>>, [escaped_strings]) - )}, - {"do not maybe_escape jsonp (u2029)", ?_assertEqual( - <<16#2029/utf8>>, - maybe_escape(<<34, 16#2029/utf8, 34>>, [escaped_strings, unescaped_jsonp]) - )}, - {"maybe_escape u0000", ?_assertEqual( - <<"\\u0000">>, - maybe_escape(<<34, "\\u0000"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0001", ?_assertEqual( - <<"\\u0001">>, - maybe_escape(<<34, "\\u0001"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0002", ?_assertEqual( - <<"\\u0002">>, - maybe_escape(<<34, "\\u0002"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0003", ?_assertEqual( - <<"\\u0003">>, - maybe_escape(<<34, "\\u0003"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0004", ?_assertEqual( - <<"\\u0004">>, - maybe_escape(<<34, "\\u0004"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0005", ?_assertEqual( - <<"\\u0005">>, - maybe_escape(<<34, "\\u0005"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0006", ?_assertEqual( - <<"\\u0006">>, - maybe_escape(<<34, "\\u0006"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0007", ?_assertEqual( - <<"\\u0007">>, - maybe_escape(<<34, "\\u0007"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000b", ?_assertEqual( - <<"\\u000b">>, - maybe_escape(<<34, "\\u000b"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000e", ?_assertEqual( - <<"\\u000e">>, - maybe_escape(<<34, "\\u000e"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000f", ?_assertEqual( - <<"\\u000f">>, - maybe_escape(<<34, "\\u000f"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0010", ?_assertEqual( - <<"\\u0010">>, - maybe_escape(<<34, "\\u0010"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0011", ?_assertEqual( - <<"\\u0011">>, - maybe_escape(<<34, "\\u0011"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0012", ?_assertEqual( - <<"\\u0012">>, - maybe_escape(<<34, "\\u0012"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0013", ?_assertEqual( - <<"\\u0013">>, - maybe_escape(<<34, "\\u0013"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0014", ?_assertEqual( - <<"\\u0014">>, - maybe_escape(<<34, "\\u0014"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0015", ?_assertEqual( - <<"\\u0015">>, - maybe_escape(<<34, "\\u0015"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0016", ?_assertEqual( - <<"\\u0016">>, - maybe_escape(<<34, "\\u0016"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0017", ?_assertEqual( - <<"\\u0017">>, - maybe_escape(<<34, "\\u0017"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0018", ?_assertEqual( - <<"\\u0018">>, - maybe_escape(<<34, "\\u0018"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0019", ?_assertEqual( - <<"\\u0019">>, - maybe_escape(<<34, "\\u0019"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001a", ?_assertEqual( - <<"\\u001a">>, - maybe_escape(<<34, "\\u001a"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001b", ?_assertEqual( - <<"\\u001b">>, - maybe_escape(<<34, "\\u001b"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001c", ?_assertEqual( - <<"\\u001c">>, - maybe_escape(<<34, "\\u001c"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001d", ?_assertEqual( - <<"\\u001d">>, - maybe_escape(<<34, "\\u001d"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001e", ?_assertEqual( - <<"\\u001e">>, - maybe_escape(<<34, "\\u001e"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001f", ?_assertEqual( - <<"\\u001f">>, - maybe_escape(<<34, "\\u001f"/utf8, 34>>, [escaped_strings]) + {"do not escape jsonp", ?_assertEqual( + [{string, <<16#2028/utf8>>}, end_json], + decode(<<34, 16#2028/utf8, 34>>, [escaped_strings, unescaped_jsonp]) )} ]. @@ -2055,8 +1594,8 @@ single_quoted_string_test_() -> [{string, <<"hello world">>}, end_json], decode(<<39, "hello world", 39>>, []) )}, - {"single quoted string error", ?_assertEqual( - {error, badarg}, + {"single quoted string error", ?_assertError( + badarg, decode(<<39, "hello world", 39>>, [{strict, [single_quotes]}]) )}, {"single quoted string with embedded double quotes", ?_assertEqual( @@ -2078,8 +1617,8 @@ single_quoted_string_test_() -> end_object, end_json], decode(<<"{'key':'value','another key':'another value'}">>, []) )}, - {"single quoted key error", ?_assertEqual( - {error, badarg}, + {"single quoted key error", ?_assertError( + badarg, decode(<<"{'key':'value','another key':'another value'}">>, [{strict, [single_quotes]}]) )} ]. diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 1c22e46..f4e8699 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -61,24 +61,12 @@ unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K) -> unzip(Rest, [V, K] ++ -include_lib("eunit/include/eunit.hrl"). -encode_test_() -> - Data = jsx:test_cases(), - Encode = encoder(jsx, [], []), - [ - { - Title, ?_assertEqual( - Events, - Encode(Term) -- [end_json] - ) - } || {Title, _, Term, Events} <- Data - ]. - -err(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). +parser(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). error_test_() -> [ - {"value error", ?_assertError(badarg, err(self(), []))}, - {"string error", ?_assertError(badarg, err(<<239, 191, 191>>, [strict]))} + {"value error", ?_assertError(badarg, parser(self(), []))}, + {"string error", ?_assertError(badarg, parser(<<239, 191, 191>>, [strict]))} ]. custom_error_handler_test_() -> @@ -86,11 +74,11 @@ custom_error_handler_test_() -> [ {"value error", ?_assertEqual( {value, [self()]}, - err(self(), [{error_handler, Error}]) + parser(self(), [{error_handler, Error}]) )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}]}, - err(<<239, 191, 191>>, [{error_handler, Error}, strict]) + parser(<<239, 191, 191>>, [{error_handler, Error}, strict]) )} ]. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index e59ff1e..dc2d436 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -26,10 +26,6 @@ -export([parser/3, resume/5]). -export([init/1, handle_event/2]). --ifdef(TEST). --export([clean_string/2, json_escape_sequence/1]). --endif. - -spec parser(Handler::module(), State::any(), Config::jsx:config()) -> jsx:parser(). @@ -455,47 +451,15 @@ handle_event(Event, State) -> [Event] ++ State. -include_lib("eunit/include/eunit.hrl"). -parse(Events, Config) -> - Chunk = try - value(Events ++ [end_json], {jsx, []}, [], jsx_config:parse_config(Config)) - catch - error:badarg -> {error, badarg} - end, - Incremental = try - Final = lists:foldl( - fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end, - parser(jsx, [], [stream] ++ Config), - lists:map(fun(X) -> [X] end, Events) - ), - Final(end_stream) - catch - error:badarg -> {error, badarg} - end, - ?assert(Chunk == Incremental), - Chunk. - - -parse_test_() -> - Data = jsx:test_cases(), - [ - { - Title, ?_assertEqual( - Events ++ [end_json], - parse(Events, []) - ) - } || {Title, _, _, Events} <- Data - ]. - - -parse_error(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)). +parse(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)). error_test_() -> [ - {"value error", ?_assertError(badarg, parse_error([self()], []))}, - {"maybe_done error", ?_assertError(badarg, parse_error([start_array, end_array, start_array, end_json], []))}, - {"done error", ?_assertError(badarg, parse_error([{string, <<"">>}, {literal, true}, end_json], []))}, - {"string error", ?_assertError(badarg, parse_error([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))} + {"value error", ?_assertError(badarg, parse([self()], []))}, + {"maybe_done error", ?_assertError(badarg, parse([start_array, end_array, start_array, end_json], []))}, + {"done error", ?_assertError(badarg, parse([{string, <<"">>}, {literal, true}, end_json], []))}, + {"string error", ?_assertError(badarg, parse([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))} ]. @@ -504,47 +468,533 @@ custom_error_handler_test_() -> [ {"value error", ?_assertEqual( {value, [self()]}, - parse_error([self()], [{error_handler, Error}]) + parse([self()], [{error_handler, Error}]) )}, {"maybe_done error", ?_assertEqual( {maybe_done, [start_array, end_json]}, - parse_error([start_array, end_array, start_array, end_json], [{error_handler, Error}]) + parse([start_array, end_array, start_array, end_json], [{error_handler, Error}]) )}, {"done error", ?_assertEqual( {maybe_done, [{literal, true}, end_json]}, - parse_error([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) + parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}, end_json]}, - parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict]) + parse([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict]) )} ]. +incomplete_test_() -> + Cases = [ + {"incomplete value", []}, + {"incomplete object", [start_object]}, + {"incomplete array", [start_array]}, + {"incomplete maybe_done", [start_array, end_array]} + ], + [{Title, ?_assertError(badarg, parse(Events, []))} + || {Title, Events} <- Cases + ]. + + custom_incomplete_handler_test_() -> [ {"custom incomplete handler", ?_assertError( badarg, - parse_error([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) + parse([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) )} ]. raw_test_() -> + Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end, [ {"raw empty list", ?_assertEqual( - [start_array, end_array, end_json], - parse([{raw, <<"[]">>}], []) + [start_array, end_array], + Parse([{raw, <<"[]">>}], []) )}, {"raw empty object", ?_assertEqual( - [start_object, end_object, end_json], - parse([{raw, <<"{}">>}], []) + [start_object, end_object], + Parse([{raw, <<"{}">>}], []) )}, {"raw chunk inside stream", ?_assertEqual( - [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object, end_json], - parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], []) + [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object], + Parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], []) )} ]. +%% erlang refuses to encode certain codepoints, so fake them +to_fake_utf8(N) when N < 16#0080 -> <>; +to_fake_utf8(N) when N < 16#0800 -> + <<0:5, Y:5, X:6>> = <>, + <<2#110:3, Y:5, 2#10:2, X:6>>; +to_fake_utf8(N) when N < 16#10000 -> + <> = <>, + <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; +to_fake_utf8(N) -> + <<0:3, W:3, Z:6, Y:6, X:6>> = <>, + <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. + + +codepoints() -> + unicode:characters_to_binary( + [32, 33] + ++ lists:seq(35, 46) + ++ lists:seq(48, 91) + ++ lists:seq(93, 16#2027) + ++ lists:seq(16#202a, 16#d7ff) + ++ lists:seq(16#e000, 16#fdcf) + ++ lists:seq(16#fdf0, 16#fffd) + ). + +extended_codepoints() -> + unicode:characters_to_binary( + lists:seq(16#10000, 16#1fffd) ++ [ + 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, + 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, + 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 + ] + ). + +reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. + +surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. + +noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. + +extended_noncharacters() -> + [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] + ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] + ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] + ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] + ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] + ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] + ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] + ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] + ]. + + +clean_string_test_() -> + [ + {"clean codepoints", ?_assertEqual( + codepoints(), + clean_string(codepoints(), #config{}) + )}, + {"clean extended codepoints", ?_assertEqual( + extended_codepoints(), + clean_string(extended_codepoints(), #config{}) + )}, + {"escape path codepoints", ?_assertEqual( + codepoints(), + clean_string(codepoints(), #config{escaped_strings=true}) + )}, + {"escape path extended codepoints", ?_assertEqual( + extended_codepoints(), + clean_string(extended_codepoints(), #config{escaped_strings=true}) + )}, + {"error reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space()) + )}, + {"error surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates()) + )}, + {"error noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters()) + )}, + {"error extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters()) + )}, + {"clean reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space()) + )}, + {"clean surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates()) + )}, + {"clean noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters()) + )}, + {"clean extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters()) + )} + ]. + + +escape_test_() -> + [ + {"maybe_escape backspace", ?_assertEqual( + <<"\\b">>, + clean_string(<<16#0008/utf8>>, #config{escaped_strings=true}) + )}, + {"don't escape backspace", ?_assertEqual( + <<"\b">>, + clean_string(<<16#0008/utf8>>, #config{}) + )}, + {"maybe_escape tab", ?_assertEqual( + <<"\\t">>, + clean_string(<<16#0009/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape newline", ?_assertEqual( + <<"\\n">>, + clean_string(<<16#000a/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape formfeed", ?_assertEqual( + <<"\\f">>, + clean_string(<<16#000c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape carriage return", ?_assertEqual( + <<"\\r">>, + clean_string(<<16#000d/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape quote", ?_assertEqual( + <<"\\\"">>, + clean_string(<<16#0022/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape forward slash", ?_assertEqual( + <<"\\/">>, + clean_string(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true}) + )}, + {"do not maybe_escape forward slash", ?_assertEqual( + <<"/">>, + clean_string(<<16#002f/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape backslash", ?_assertEqual( + <<"\\\\">>, + clean_string(<<16#005c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape jsonp (u2028)", ?_assertEqual( + <<"\\u2028">>, + clean_string(<<16#2028/utf8>>, #config{escaped_strings=true}) + )}, + {"do not maybe_escape jsonp (u2028)", ?_assertEqual( + <<16#2028/utf8>>, + clean_string(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) + )}, + {"maybe_escape jsonp (u2029)", ?_assertEqual( + <<"\\u2029">>, + clean_string(<<16#2029/utf8>>, #config{escaped_strings=true}) + )}, + {"do not maybe_escape jsonp (u2029)", ?_assertEqual( + <<16#2029/utf8>>, + clean_string(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) + )}, + {"maybe_escape u0000", ?_assertEqual( + <<"\\u0000">>, + clean_string(<<16#0000/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0001", ?_assertEqual( + <<"\\u0001">>, + clean_string(<<16#0001/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0002", ?_assertEqual( + <<"\\u0002">>, + clean_string(<<16#0002/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0003", ?_assertEqual( + <<"\\u0003">>, + clean_string(<<16#0003/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0004", ?_assertEqual( + <<"\\u0004">>, + clean_string(<<16#0004/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0005", ?_assertEqual( + <<"\\u0005">>, + clean_string(<<16#0005/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0006", ?_assertEqual( + <<"\\u0006">>, + clean_string(<<16#0006/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0007", ?_assertEqual( + <<"\\u0007">>, + clean_string(<<16#0007/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000b", ?_assertEqual( + <<"\\u000b">>, + clean_string(<<16#000b/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000e", ?_assertEqual( + <<"\\u000e">>, + clean_string(<<16#000e/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000f", ?_assertEqual( + <<"\\u000f">>, + clean_string(<<16#000f/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0010", ?_assertEqual( + <<"\\u0010">>, + clean_string(<<16#0010/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0011", ?_assertEqual( + <<"\\u0011">>, + clean_string(<<16#0011/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0012", ?_assertEqual( + <<"\\u0012">>, + clean_string(<<16#0012/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0013", ?_assertEqual( + <<"\\u0013">>, + clean_string(<<16#0013/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0014", ?_assertEqual( + <<"\\u0014">>, + clean_string(<<16#0014/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0015", ?_assertEqual( + <<"\\u0015">>, + clean_string(<<16#0015/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0016", ?_assertEqual( + <<"\\u0016">>, + clean_string(<<16#0016/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0017", ?_assertEqual( + <<"\\u0017">>, + clean_string(<<16#0017/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0018", ?_assertEqual( + <<"\\u0018">>, + clean_string(<<16#0018/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0019", ?_assertEqual( + <<"\\u0019">>, + clean_string(<<16#0019/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001a", ?_assertEqual( + <<"\\u001a">>, + clean_string(<<16#001a/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001b", ?_assertEqual( + <<"\\u001b">>, + clean_string(<<16#001b/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001c", ?_assertEqual( + <<"\\u001c">>, + clean_string(<<16#001c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001d", ?_assertEqual( + <<"\\u001d">>, + clean_string(<<16#001d/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001e", ?_assertEqual( + <<"\\u001e">>, + clean_string(<<16#001e/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001f", ?_assertEqual( + <<"\\u001f">>, + clean_string(<<16#001f/utf8>>, #config{escaped_strings=true}) + )} + ]. + + +bad_utf8_test_() -> + [ + {"noncharacter u+fffe", ?_assertEqual( + {error, badarg}, + clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true}) + )}, + {"noncharacter u+fffe replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(to_fake_utf8(16#fffe), #config{}) + )}, + {"noncharacter u+ffff", ?_assertEqual( + {error, badarg}, + clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true}) + )}, + {"noncharacter u+ffff replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(to_fake_utf8(16#ffff), #config{}) + )}, + {"orphan continuation byte u+0080", ?_assertEqual( + {error, badarg}, + clean_string(<<16#0080>>, #config{strict_utf8=true}) + )}, + {"orphan continuation byte u+0080 replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#0080>>, #config{}) + )}, + {"orphan continuation byte u+00bf", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00bf>>, #config{strict_utf8=true}) + )}, + {"orphan continuation byte u+00bf replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#00bf>>, #config{}) + )}, + {"2 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) + )}, + {"2 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 2), + clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) + )}, + {"3 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) + )}, + {"3 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 3), + clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) + )}, + {"4 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) + )}, + {"4 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 4), + clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) + )}, + {"5 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) + )}, + {"5 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 5), + clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) + )}, + {"6 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) + )}, + {"6 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 6), + clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) + )}, + {"all continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) + )}, + {"all continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), + clean_string( + <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, + #config{} + ) + )}, + {"lonely start byte", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00c0>>, #config{strict_utf8=true}) + )}, + {"lonely start byte replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#00c0>>, #config{}) + )}, + {"lonely start bytes (2 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (2 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00c0, 32, 16#00df>>, #config{}) + )}, + {"lonely start bytes (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) + )}, + {"lonely start bytes (4 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (4 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) + )}, + {"missing continuation byte (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<224, 160, 32>>, #config{}) + )}, + {"missing continuation byte (4 byte missing one)", ?_assertEqual( + {error, badarg}, + clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<240, 144, 128, 32>>, #config{}) + )}, + {"missing continuation byte (4 byte missing two)", ?_assertEqual( + {error, badarg}, + clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<240, 144, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (2 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#c0, 16#af, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (4 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) + )}, + {"highest overlong 2 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 2 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#c1, 16#bf, 32>>, #config{}) + )}, + {"highest overlong 3 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 3 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) + )}, + {"highest overlong 4 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 4 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) + )} + ]. + + +json_escape_sequence_test_() -> + [ + {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")}, + {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")}, + {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} + ]. + + -endif. \ No newline at end of file diff --git a/src/jsx_tests.hrl b/src/jsx_tests.hrl deleted file mode 100644 index e6ec7aa..0000000 --- a/src/jsx_tests.hrl +++ /dev/null @@ -1,688 +0,0 @@ -%% data and helper functions for tests - --export([init/1, handle_event/2]). --export([test_cases/0]). - - --include_lib("eunit/include/eunit.hrl"). - - -%% test handler -init([]) -> []. - -handle_event(end_json, State) -> lists:reverse([end_json] ++ State); -handle_event(Event, State) -> [Event] ++ State. - - -test_cases() -> - empty_array() - ++ nested_array() - ++ empty_object() - ++ nested_object() - ++ strings() - ++ literals() - ++ integers() - ++ floats() - ++ compound_object(). - - -empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}]. - -nested_array() -> - [{ - "[[[]]]", - <<"[[[]]]">>, - [[[]]], - [start_array, start_array, start_array, end_array, end_array, end_array] - }]. - - -empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}]. - -nested_object() -> - [{ - "{\"key\":{\"key\":{}}}", - <<"{\"key\":{\"key\":{}}}">>, - [{<<"key">>, [{<<"key">>, [{}]}]}], - [ - start_object, - {key, <<"key">>}, - start_object, - {key, <<"key">>}, - start_object, - end_object, - end_object, - end_object - ] - }]. - - -naked_strings() -> - Raw = [ - "", - "hello world" - ], - [ - { - String, - <<"\"", (list_to_binary(String))/binary, "\"">>, - list_to_binary(String), - [{string, list_to_binary(String)}] - } - || String <- Raw - ]. - -strings() -> - naked_strings() - ++ [ wrap_with_array(Test) || Test <- naked_strings() ] - ++ [ wrap_with_object(Test) || Test <- naked_strings() ]. - - -naked_integers() -> - Raw = [ - 1, 2, 3, - 127, 128, 129, - 255, 256, 257, - 65534, 65535, 65536, - 18446744073709551616, - 18446744073709551617 - ], - [ - { - integer_to_list(X), - list_to_binary(integer_to_list(X)), - X, - [{integer, X}] - } - || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0] - ]. - -integers() -> - naked_integers() - ++ [ wrap_with_array(Test) || Test <- naked_integers() ] - ++ [ wrap_with_object(Test) || Test <- naked_integers() ]. - - -naked_floats() -> - Raw = [ - 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, - 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, - 1234567890.0987654321, - 0.0e0, - 1234567890.0987654321e16, - 0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308, - 1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308, - 2.2250738585072014e-308, %% min normalized float - 1.7976931348623157e308, %% max normalized float - 5.0e-324, %% min denormalized float - 2.225073858507201e-308 %% max denormalized float - ], - [ - { - sane_float_to_list(X), - list_to_binary(sane_float_to_list(X)), - X, - [{float, X}] - } - || X <- Raw ++ [ -1 * Y || Y <- Raw ] - ]. - -floats() -> - naked_floats() - ++ [ wrap_with_array(Test) || Test <- naked_floats() ] - ++ [ wrap_with_object(Test) || Test <- naked_floats() ]. - - -naked_literals() -> - [ - { - atom_to_list(Literal), - atom_to_binary(Literal, unicode), - Literal, - [{literal, Literal}] - } - || Literal <- [true, false, null] - ]. - -literals() -> - naked_literals() - ++ [ wrap_with_array(Test) || Test <- naked_literals() ] - ++ [ wrap_with_object(Test) || Test <- naked_literals() ]. - - -compound_object() -> - [{ - "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]", - <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>, - [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]], - [ - start_array, - start_object, - {key, <<"alpha">>}, - start_array, - {integer, 1}, - {integer, 2}, - {integer, 3}, - end_array, - {key, <<"beta">>}, - start_object, - {key, <<"alpha">>}, - start_array, - {float, 1.0}, - {float, 2.0}, - {float, 3.0}, - end_array, - {key, <<"beta">>}, - start_array, - {literal, true}, - {literal, false}, - end_array, - end_object, - end_object, - start_array, - start_object, - end_object, - end_array, - end_array - ] - }]. - - -wrap_with_array({Title, JSON, Term, Events}) -> - { - "[" ++ Title ++ "]", - <<"[", JSON/binary, "]">>, - [Term], - [start_array] ++ Events ++ [end_array] - }. - - -wrap_with_object({Title, JSON, Term, Events}) -> - { - "{\"key\":" ++ Title ++ "}", - <<"{\"key\":", JSON/binary, "}">>, - [{<<"key">>, Term}], - [start_object, {key, <<"key">>}] ++ Events ++ [end_object] - }. - - -sane_float_to_list(X) -> - [Output] = io_lib:format("~p", [X]), - Output. - --include("jsx_config.hrl"). - - -%% erlang refuses to encode certain codepoints, so fake them -to_fake_utf8(N) when N < 16#0080 -> <>; -to_fake_utf8(N) when N < 16#0800 -> - <<0:5, Y:5, X:6>> = <>, - <<2#110:3, Y:5, 2#10:2, X:6>>; -to_fake_utf8(N) when N < 16#10000 -> - <> = <>, - <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; -to_fake_utf8(N) -> - <<0:3, W:3, Z:6, Y:6, X:6>> = <>, - <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. - - -codepoints() -> - unicode:characters_to_binary( - [32, 33] - ++ lists:seq(35, 46) - ++ lists:seq(48, 91) - ++ lists:seq(93, 16#2027) - ++ lists:seq(16#202a, 16#d7ff) - ++ lists:seq(16#e000, 16#fdcf) - ++ lists:seq(16#fdf0, 16#fffd) - ). - -extended_codepoints() -> - unicode:characters_to_binary( - lists:seq(16#10000, 16#1fffd) ++ [ - 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, - 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, - 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 - ] - ). - -reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. - -surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. - -noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. - -extended_noncharacters() -> - [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] - ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] - ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] - ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] - ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] - ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] - ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] - ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] - ]. - - -clean_string_test_() -> - [ - {"clean codepoints", ?_assertEqual( - codepoints(), - jsx_parser:clean_string(codepoints(), #config{}) - )}, - {"clean extended codepoints", ?_assertEqual( - extended_codepoints(), - jsx_parser:clean_string(extended_codepoints(), #config{}) - )}, - {"escape path codepoints", ?_assertEqual( - codepoints(), - jsx_parser:clean_string(codepoints(), #config{escaped_strings=true}) - )}, - {"escape path extended codepoints", ?_assertEqual( - extended_codepoints(), - jsx_parser:clean_string(extended_codepoints(), #config{escaped_strings=true}) - )}, - {"error reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space()) - )}, - {"error surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates()) - )}, - {"error noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters()) - )}, - {"error extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters()) - )}, - {"clean reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, reserved_space()) - )}, - {"clean surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, surrogates()) - )}, - {"clean noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, noncharacters()) - )}, - {"clean extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, extended_noncharacters()) - )} - ]. - - -maybe_escape(Bin, Config) -> jsx_parser:clean_string(Bin, Config). - -escape_test_() -> - [ - {"maybe_escape backspace", ?_assertEqual( - <<"\\b">>, - maybe_escape(<<16#0008/utf8>>, #config{escaped_strings=true}) - )}, - {"don't escape backspace", ?_assertEqual( - <<"\b">>, - maybe_escape(<<16#0008/utf8>>, #config{}) - )}, - {"maybe_escape tab", ?_assertEqual( - <<"\\t">>, - maybe_escape(<<16#0009/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape newline", ?_assertEqual( - <<"\\n">>, - maybe_escape(<<16#000a/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape formfeed", ?_assertEqual( - <<"\\f">>, - maybe_escape(<<16#000c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape carriage return", ?_assertEqual( - <<"\\r">>, - maybe_escape(<<16#000d/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape quote", ?_assertEqual( - <<"\\\"">>, - maybe_escape(<<16#0022/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape forward slash", ?_assertEqual( - <<"\\/">>, - maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true}) - )}, - {"do not maybe_escape forward slash", ?_assertEqual( - <<"/">>, - maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape backslash", ?_assertEqual( - <<"\\\\">>, - maybe_escape(<<16#005c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape jsonp (u2028)", ?_assertEqual( - <<"\\u2028">>, - maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true}) - )}, - {"do not maybe_escape jsonp (u2028)", ?_assertEqual( - <<16#2028/utf8>>, - maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) - )}, - {"maybe_escape jsonp (u2029)", ?_assertEqual( - <<"\\u2029">>, - maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true}) - )}, - {"do not maybe_escape jsonp (u2029)", ?_assertEqual( - <<16#2029/utf8>>, - maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) - )}, - {"maybe_escape u0000", ?_assertEqual( - <<"\\u0000">>, - maybe_escape(<<16#0000/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0001", ?_assertEqual( - <<"\\u0001">>, - maybe_escape(<<16#0001/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0002", ?_assertEqual( - <<"\\u0002">>, - maybe_escape(<<16#0002/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0003", ?_assertEqual( - <<"\\u0003">>, - maybe_escape(<<16#0003/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0004", ?_assertEqual( - <<"\\u0004">>, - maybe_escape(<<16#0004/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0005", ?_assertEqual( - <<"\\u0005">>, - maybe_escape(<<16#0005/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0006", ?_assertEqual( - <<"\\u0006">>, - maybe_escape(<<16#0006/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0007", ?_assertEqual( - <<"\\u0007">>, - maybe_escape(<<16#0007/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000b", ?_assertEqual( - <<"\\u000b">>, - maybe_escape(<<16#000b/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000e", ?_assertEqual( - <<"\\u000e">>, - maybe_escape(<<16#000e/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000f", ?_assertEqual( - <<"\\u000f">>, - maybe_escape(<<16#000f/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0010", ?_assertEqual( - <<"\\u0010">>, - maybe_escape(<<16#0010/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0011", ?_assertEqual( - <<"\\u0011">>, - maybe_escape(<<16#0011/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0012", ?_assertEqual( - <<"\\u0012">>, - maybe_escape(<<16#0012/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0013", ?_assertEqual( - <<"\\u0013">>, - maybe_escape(<<16#0013/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0014", ?_assertEqual( - <<"\\u0014">>, - maybe_escape(<<16#0014/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0015", ?_assertEqual( - <<"\\u0015">>, - maybe_escape(<<16#0015/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0016", ?_assertEqual( - <<"\\u0016">>, - maybe_escape(<<16#0016/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0017", ?_assertEqual( - <<"\\u0017">>, - maybe_escape(<<16#0017/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0018", ?_assertEqual( - <<"\\u0018">>, - maybe_escape(<<16#0018/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0019", ?_assertEqual( - <<"\\u0019">>, - maybe_escape(<<16#0019/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001a", ?_assertEqual( - <<"\\u001a">>, - maybe_escape(<<16#001a/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001b", ?_assertEqual( - <<"\\u001b">>, - maybe_escape(<<16#001b/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001c", ?_assertEqual( - <<"\\u001c">>, - maybe_escape(<<16#001c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001d", ?_assertEqual( - <<"\\u001d">>, - maybe_escape(<<16#001d/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001e", ?_assertEqual( - <<"\\u001e">>, - maybe_escape(<<16#001e/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001f", ?_assertEqual( - <<"\\u001f">>, - maybe_escape(<<16#001f/utf8>>, #config{escaped_strings=true}) - )} - ]. - - -bad_utf8_test_() -> - [ - {"noncharacter u+fffe", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true}) - )}, - {"noncharacter u+fffe replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{}) - )}, - {"noncharacter u+ffff", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true}) - )}, - {"noncharacter u+ffff replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{}) - )}, - {"orphan continuation byte u+0080", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#0080>>, #config{strict_utf8=true}) - )}, - {"orphan continuation byte u+0080 replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#0080>>, #config{}) - )}, - {"orphan continuation byte u+00bf", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00bf>>, #config{strict_utf8=true}) - )}, - {"orphan continuation byte u+00bf replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00bf>>, #config{}) - )}, - {"2 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) - )}, - {"2 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 2), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) - )}, - {"3 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) - )}, - {"3 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 3), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) - )}, - {"4 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) - )}, - {"4 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 4), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) - )}, - {"5 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) - )}, - {"5 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 5), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) - )}, - {"6 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) - )}, - {"6 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 6), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) - )}, - {"all continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) - )}, - {"all continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), - jsx_parser:clean_string( - <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, - #config{} - ) - )}, - {"lonely start byte", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00c0>>, #config{strict_utf8=true}) - )}, - {"lonely start byte replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00c0>>, #config{}) - )}, - {"lonely start bytes (2 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) - )}, - {"lonely start bytes (2 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{}) - )}, - {"lonely start bytes (3 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) - )}, - {"lonely start bytes (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) - )}, - {"lonely start bytes (4 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) - )}, - {"lonely start bytes (4 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) - )}, - {"missing continuation byte (3 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) - )}, - {"missing continuation byte (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<224, 160, 32>>, #config{}) - )}, - {"missing continuation byte (4 byte missing one)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) - )}, - {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{}) - )}, - {"missing continuation byte (4 byte missing two)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) - )}, - {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<240, 144, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (2 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) - )}, - {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (3 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) - )}, - {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (4 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) - )}, - {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) - )}, - {"highest overlong 2 byte sequence", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) - )}, - {"highest overlong 2 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{}) - )}, - {"highest overlong 3 byte sequence", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) - )}, - {"highest overlong 3 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) - )}, - {"highest overlong 4 byte sequence", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) - )}, - {"highest overlong 4 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) - )} - ]. - - -json_escape_sequence_test_() -> - [ - {"json escape sequence test - 16#0000", ?_assertEqual(jsx_parser:json_escape_sequence(16#0000), "\\u0000")}, - {"json escape sequence test - 16#abc", ?_assertEqual(jsx_parser:json_escape_sequence(16#abc), "\\u0abc")}, - {"json escape sequence test - 16#def", ?_assertEqual(jsx_parser:json_escape_sequence(16#def), "\\u0def")} - ]. \ No newline at end of file From 4f08d5355fb9bbe5c007c2534bf5b80983c9e128 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 20 Oct 2013 21:16:03 +0000 Subject: [PATCH 15/40] fix bug where escape characters were getting dropped when operating under `dirty_strings' --- src/jsx_decoder.erl | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index a858257..03b1d5a 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -631,12 +631,10 @@ strip_continuations(<>, Handler, Acc, Stack, Config, _) -> %% this all gets really gross and should probably eventually be folded into %% but for now it fakes being part of string on incompletes and errors +unescape(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> + string(<>, Handler, acc_seq(Acc, ?rsolidus), Stack, Config); unescape(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> - case C of - ?doublequote -> string(Rest, Handler, acc_seq(Acc, C), Stack, Config); - ?rsolidus -> string(<>, Handler, acc_seq(Acc, ?rsolidus), Stack, Config); - _ -> string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config) - end; + string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config); unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, maybe_replace($\b, Config)), Stack, Config); unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) -> @@ -1367,6 +1365,16 @@ dirty_string_test_() -> <<"[\"", 0, "\"]">>, [dirty_strings] }, + {"dirty 0\\\"0", + [start_array, {string, <<0, ?rsolidus, ?doublequote, 0>>}, end_array, end_json], + <<"[\"", 0, ?rsolidus, ?doublequote, 0, "\"]">>, + [dirty_strings] + }, + {"dirty 0\\\\\"0", + [start_array, {string, <<0, ?rsolidus, ?rsolidus, ?doublequote, 0>>}, end_array, end_json], + <<"[\"", 0, ?rsolidus, ?rsolidus, ?doublequote, 0, "\"]">>, + [dirty_strings] + }, {"dirty 16#d800", [start_array, {string, <<237, 160, 128>>}, end_array, end_json], <<"[\"", 237, 160, 128, "\"]">>, From 96f76b617844d394191fa05d6d8e9bb5e08351ff Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 21 Oct 2013 05:46:53 +0000 Subject: [PATCH 16/40] further refactoring of test suite --- src/jsx_decoder.erl | 318 ++++++++++---------------------------------- 1 file changed, 70 insertions(+), 248 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 03b1d5a..a7866df 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -818,28 +818,11 @@ exp(Bin, Handler, Acc, Stack, Config) -> finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), [], Config); -finish_number(<>, Handler, Acc, [object|Stack], Config) -> - maybe_done(Rest, handle_event([format_number(Acc), end_object], Handler, Config), Stack, Config); -finish_number(<>, Handler, Acc, [array|Stack], Config) -> - maybe_done(Rest, handle_event([format_number(Acc), end_array], Handler, Config), Stack, Config); -finish_number(<>, Handler, Acc, [object|Stack], Config) -> - key(Rest, handle_event(format_number(Acc), Handler, Config), [key|Stack], Config); -finish_number(<>, Handler, Acc, [array|Stack], Config) -> - value(Rest, handle_event(format_number(Acc), Handler, Config), [array|Stack], Config); -finish_number(<>, Handler, Acc, Stack, Config) when ?is_whitespace(S) -> - maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config); -finish_number(<>, Handler, {NumType, Acc}, Stack, Config=#config{strict_comments=true}) -> - ?error(NumType, <>, Handler, Acc, Stack, Config); -finish_number(<>, Handler, Acc, Stack, Config) -> - comment(Rest, handle_event(format_number(Acc), Handler, Config), maybe_done, [comment|Stack], Config); -finish_number(<>, Handler, Acc, Stack, Config) -> - comment(Rest, handle_event(format_number(Acc), Handler, Config), maybe_done, [multicomment|Stack], Config); -finish_number(<>, Handler, Acc, Stack, Config) -> - incomplete(maybe_done, <>, handle_event(format_number(Acc), Handler, Config), Stack, Config); finish_number(<<>>, Handler, {NumType, Acc}, Stack, Config) -> incomplete(NumType, <<>>, Handler, Acc, Stack, Config); -finish_number(Bin, Handler, {NumType, Acc}, Stack, Config) -> - ?error(NumType, Bin, Handler, Acc, Stack, Config). +finish_number(Rest, Handler, Acc, Stack, Config) -> + maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config). + format_number({zero, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))}; format_number({integer, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))}; @@ -1576,60 +1559,58 @@ escape_test_() -> special_escape_test_() -> - [ - {"escape forward slash", ?_assertEqual( - [{string, <<"\\/">>}, end_json], - decode(<<34, "/"/utf8, 34>>, [escaped_strings, escaped_forward_slashes]) - )}, - {"do not escape forward slash", ?_assertEqual( - [{string, <<"/">>}, end_json], - decode(<<34, "/"/utf8, 34>>, [escaped_strings]) - )}, - {"escape jsonp", ?_assertEqual( - [{string, <<"\\u2028">>}, end_json], - decode(<<34, 16#2028/utf8, 34>>, [escaped_strings]) - )}, - {"do not escape jsonp", ?_assertEqual( - [{string, <<16#2028/utf8>>}, end_json], - decode(<<34, 16#2028/utf8, 34>>, [escaped_strings, unescaped_jsonp]) - )} + Cases = [ + {"escape forward slash", <<"\\/">>, <<"/"/utf8>>, [escaped_forward_slashes]}, + {"do not escape forward slash", <<"/">>, <<"/"/utf8>>, []}, + {"escape jsonp", <<"\\u2028">>, <<16#2028/utf8>>, []}, + {"do not escape jsonp", <<16#2028/utf8>>, <<16#2028/utf8>>, [unescaped_jsonp]} + ], + [{Title, ?_assertEqual( + [{string, Expect}, end_json], + decode(<<34, Raw/binary, 34>>, [escaped_strings] ++ Config) + )} || {Title, Expect, Raw, Config} <- Cases ]. single_quoted_string_test_() -> - [ - {"single quoted string", ?_assertEqual( - [{string, <<"hello world">>}, end_json], - decode(<<39, "hello world", 39>>, []) - )}, - {"single quoted string error", ?_assertError( - badarg, - decode(<<39, "hello world", 39>>, [{strict, [single_quotes]}]) - )}, - {"single quoted string with embedded double quotes", ?_assertEqual( + Cases = [ + {"single quoted string", [{string, <<"hello world">>}, end_json], <<39, "hello world", 39>>}, + {"single quoted string with embedded double quotes", [{string, <<"quoth the raven, \"nevermore\"">>}, end_json], - decode(<<39, "quoth the raven, \"nevermore\"", 39>>, []) - )}, - {"string with embedded single quotes", ?_assertEqual( + <<39, "quoth the raven, \"nevermore\"", 39>> + }, + {"escaped single quote", [{string, <<"quoth the raven, 'nevermore'">>}, end_json], - decode(<<34, "quoth the raven, 'nevermore'", 34>>, []) - )}, - {"escaped single quote", ?_assertEqual( - [{string, <<"quoth the raven, 'nevermore'">>}, end_json], - decode(<<39, "quoth the raven, \\'nevermore\\'", 39>>, []) - )}, - {"single quoted key", ?_assertEqual( + <<39, "quoth the raven, \\'nevermore\\'", 39>> + }, + {"single quoted key", [start_object, {key, <<"key">>}, {string, <<"value">>}, {key, <<"another key">>}, {string, <<"another value">>}, end_object, end_json], - decode(<<"{'key':'value','another key':'another value'}">>, []) - )}, - {"single quoted key error", ?_assertError( + <<"{'key':'value','another key':'another value'}">> + } + ], + [{Title, ?_assertEqual(Expect, decode(Raw, []))} || {Title, Expect, Raw} <- Cases] ++ + [{Title, ?_assertError( badarg, - decode(<<"{'key':'value','another key':'another value'}">>, [{strict, [single_quotes]}]) + decode(Raw, [{strict, [single_quotes]}]) + )} || {Title, _Expect, Raw} <- Cases + ]. + + +embedded_single_quoted_string_test_() -> + [ + {"string with embedded single quotes", ?_assertEqual( + [{string, <<"quoth the raven, 'nevermore'">>}, end_json], + decode(<<34, "quoth the raven, 'nevermore'", 34>>, []) + )}, + {"string with embedded single quotes", ?_assertEqual( + [{string, <<"quoth the raven, 'nevermore'">>}, end_json], + decode(<<34, "quoth the raven, 'nevermore'", 34>>, [{strict, [single_quotes]}]) )} ]. + ignored_bad_escapes_test_() -> @@ -1672,200 +1653,41 @@ incomplete_test_() -> error_test_() -> - Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, - [ - {"maybe_bom error", ?_assertError( - badarg, - Decode(<<16#ef, 0>>, []) - )}, - {"definitely_bom error", ?_assertError( - badarg, - Decode(<<16#ef, 16#bb, 0>>, []) - )}, - {"value error", ?_assertError( - badarg, - Decode(<<0>>, []) - )}, - {"object error", ?_assertError( - badarg, - Decode(<<"{"/utf8, 0>>, []) - )}, - {"colon error", ?_assertError( - badarg, - Decode(<<"{\"\""/utf8, 0>>, []) - )}, - {"key error", ?_assertError( - badarg, - Decode(<<"{\"\":1,"/utf8, 0>>, []) - )}, - {"negative error", ?_assertError( - badarg, - Decode(<<"-"/utf8, 0>>, []) - )}, - {"zero error", ?_assertError( - badarg, - Decode(<<"0"/utf8, 0>>, [stream]) - )}, - {"integer error", ?_assertError( - badarg, - Decode(<<"1"/utf8, 0>>, [stream]) - )}, - {"decimal error", ?_assertError( - badarg, - Decode(<<"1.0"/utf8, 0>>, [stream]) - )}, - {"exp error", ?_assertError( - badarg, - Decode(<<"1.0e1"/utf8, 0>>, [stream]) - )}, - {"e error", ?_assertError( - badarg, - Decode(<<"1e"/utf8, 0>>, []) - )}, - {"ex error", ?_assertError( - badarg, - Decode(<<"1e+"/utf8, 0>>, []) - )}, - {"exp error", ?_assertError( - badarg, - Decode(<<"1.e"/utf8>>, []) - )}, - {"true error", ?_assertError( - badarg, - Decode(<<"tru"/utf8, 0>>, []) - )}, - {"false error", ?_assertError( - badarg, - Decode(<<"fals"/utf8, 0>>, []) - )}, - {"null error", ?_assertError( - badarg, - Decode(<<"nul"/utf8, 0>>, []) - )}, - {"maybe_done error", ?_assertError( - badarg, - Decode(<<"[[]"/utf8, 0>>, []) - )}, - {"done error", ?_assertError( - badarg, - Decode(<<"[]"/utf8, 0>>, []) - )}, - {"comment error", ?_assertError( - badarg, - Decode(<<"[ / ]">>, []) - )}, - {"single_comment error", ?_assertError( - badarg, - Decode(<<"[ //"/utf8, 192>>, []) - )}, - {"multi_comment error", ?_assertError( - badarg, - Decode(<<"[ /*"/utf8, 192>>, []) - )} - ]. - - -custom_error_handler_test_() -> - Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, Error = fun(Rest, {_, State, _, _, _}, _) -> {State, Rest} end, - [ - {"maybe_bom error", ?_assertEqual( - {value, <<16#ef, 0>>}, - Decode(<<16#ef, 0>>, [{error_handler, Error}]) - )}, - {"definitely_bom error", ?_assertEqual( - {value, <<16#ef, 16#bb, 0>>}, - Decode(<<16#ef, 16#bb, 0>>, [{error_handler, Error}]) - )}, - {"value error", ?_assertEqual( - {value, <<0>>}, - Decode(<<0>>, [{error_handler, Error}]) - )}, - {"object error", ?_assertEqual( - {object, <<0>>}, - Decode(<<"{"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"colon error", ?_assertEqual( - {colon, <<0>>}, - Decode(<<"{\"\""/utf8, 0>>, [{error_handler, Error}]) - )}, - {"key error", ?_assertEqual( - {key, <<0>>}, - Decode(<<"{\"\":1,"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"negative error", ?_assertEqual( - {value, <<"-"/utf8, 0>>}, - Decode(<<"-"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"zero error", ?_assertEqual( - {zero, <<0>>}, - Decode(<<"0"/utf8, 0>>, [stream, {error_handler, Error}]) - )}, - {"integer error", ?_assertEqual( - {integer, <<0>>}, - Decode(<<"1"/utf8, 0>>, [stream, {error_handler, Error}]) - )}, - {"decimal error", ?_assertEqual( - {decimal, <<0>>}, - Decode(<<"1.0"/utf8, 0>>, [stream, {error_handler, Error}]) - )}, - {"exp error", ?_assertEqual( - {exp, <<0>>}, - Decode(<<"1.0e1"/utf8, 0>>, [stream, {error_handler, Error}]) - )}, - {"e error", ?_assertEqual( - {decimal, <<$e, 0>>}, - Decode(<<"1e"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"ex error", ?_assertEqual( - {decimal, <<$e, ?positive, 0>>}, - Decode(<<"1e+"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"exp error", ?_assertEqual( - {decimal, <<$e>>}, - Decode(<<"1.e"/utf8>>, [{error_handler, Error}]) - )}, - {"true error", ?_assertEqual( - {true, <<"ru"/utf8, 0>>}, - Decode(<<"tru"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"false error", ?_assertEqual( - {false, <<"als"/utf8, 0>>}, - Decode(<<"fals"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"null error", ?_assertEqual( - {null, <<"ul"/utf8, 0>>}, - Decode(<<"nul"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"maybe_done error", ?_assertEqual( - {maybe_done, <<0>>}, - Decode(<<"[[]"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"done error", ?_assertEqual( - {done, <<0>>}, - Decode(<<"[]"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"comment error", ?_assertEqual( - {value, <<"/ ]"/utf8>>}, - Decode(<<"[ / ]">>, [{error_handler, Error}]) - )}, - {"single_comment error", ?_assertEqual( - {comment, <<192>>}, - Decode(<<"[ //"/utf8, 192>>, [{error_handler, Error}, {strict, [utf8]}]) - )}, - {"multi_comment error", ?_assertEqual( - {comment, <<192>>}, - Decode(<<"[ /*"/utf8, 192>>, [{error_handler, Error}, {strict, [utf8]}]) - )} + Cases = [ + {"maybe_bom error", <<16#ef, 0>>, {value, <<16#ef, 0>>}}, + {"definitely_bom error", <<16#ef, 16#bb, 0>>, {value, <<16#ef, 16#bb, 0>>}}, + {"value error", <<0>>, {value, <<0>>}}, + {"object error", <<"{"/utf8, 0>>, {object, <<0>>}}, + {"colon error", <<"{\"\""/utf8, 0>>, {colon, <<0>>}}, + {"key error", <<"{\"\":1,"/utf8, 0>>, {key, <<0>>}}, + {"negative error", <<"-"/utf8, 0>>, {value, <<"-"/utf8, 0>>}}, + {"zero error", <<"0"/utf8, 0>>, {done, <<0>>}}, + {"integer error", <<"1"/utf8, 0>>, {done, <<0>>}}, + {"decimal error", <<"1.0"/utf8, 0>>, {done, <<0>>}}, + {"exp error", <<"1.0e1"/utf8, 0>>, {done, <<0>>}}, + {"e error", <<"1e"/utf8, 0>>, {decimal, <<$e, 0>>}}, + {"ex error", <<"1e+"/utf8, 0>>, {decimal, <<$e, ?positive, 0>>}}, + {"exp error", <<"1.e"/utf8>>, {decimal, <<$e>>}}, + {"true error", <<"tru"/utf8, 0>>, {true, <<"ru"/utf8, 0>>}}, + {"false error", <<"fals"/utf8, 0>>, {false, <<"als"/utf8, 0>>}}, + {"null error", <<"nul"/utf8, 0>>, {null, <<"ul"/utf8, 0>>}}, + {"maybe_done error", <<"[[]"/utf8, 0>>, {maybe_done, <<0>>}}, + {"done error", <<"[]"/utf8, 0>>, {done, <<0>>}} + ], + [{Title, ?_assertError(badarg, decode(State))} || {Title, State, _} <- Cases] ++ + [{Title ++ " (custom handler)", ?_assertEqual( + Err, + decode(State, [{error_handler, Error}]) + )} || {Title, State, Err} <- Cases ]. custom_incomplete_handler_test_() -> - Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, [ {"custom incomplete handler", ?_assertError( - badarg, - Decode(<<>>, [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) + incomplete, + decode(<<>>, [{incomplete_handler, fun(_, _, _) -> erlang:error(incomplete) end}, stream]) )} ]. From f6ebd9a1c427ac8d4ae005cbaa2a9a111d0d7d8e Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 21 Oct 2013 06:22:49 +0000 Subject: [PATCH 17/40] remove inconsistent custom `error_handler' and `incomplete_handler' tests pending reimplementation --- src/jsx_decoder.erl | 47 ++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index a7866df..9dbc321 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -1653,34 +1653,29 @@ incomplete_test_() -> error_test_() -> - Error = fun(Rest, {_, State, _, _, _}, _) -> {State, Rest} end, Cases = [ - {"maybe_bom error", <<16#ef, 0>>, {value, <<16#ef, 0>>}}, - {"definitely_bom error", <<16#ef, 16#bb, 0>>, {value, <<16#ef, 16#bb, 0>>}}, - {"value error", <<0>>, {value, <<0>>}}, - {"object error", <<"{"/utf8, 0>>, {object, <<0>>}}, - {"colon error", <<"{\"\""/utf8, 0>>, {colon, <<0>>}}, - {"key error", <<"{\"\":1,"/utf8, 0>>, {key, <<0>>}}, - {"negative error", <<"-"/utf8, 0>>, {value, <<"-"/utf8, 0>>}}, - {"zero error", <<"0"/utf8, 0>>, {done, <<0>>}}, - {"integer error", <<"1"/utf8, 0>>, {done, <<0>>}}, - {"decimal error", <<"1.0"/utf8, 0>>, {done, <<0>>}}, - {"exp error", <<"1.0e1"/utf8, 0>>, {done, <<0>>}}, - {"e error", <<"1e"/utf8, 0>>, {decimal, <<$e, 0>>}}, - {"ex error", <<"1e+"/utf8, 0>>, {decimal, <<$e, ?positive, 0>>}}, - {"exp error", <<"1.e"/utf8>>, {decimal, <<$e>>}}, - {"true error", <<"tru"/utf8, 0>>, {true, <<"ru"/utf8, 0>>}}, - {"false error", <<"fals"/utf8, 0>>, {false, <<"als"/utf8, 0>>}}, - {"null error", <<"nul"/utf8, 0>>, {null, <<"ul"/utf8, 0>>}}, - {"maybe_done error", <<"[[]"/utf8, 0>>, {maybe_done, <<0>>}}, - {"done error", <<"[]"/utf8, 0>>, {done, <<0>>}} + {"maybe_bom error", <<16#ef, 0>>}, + {"definitely_bom error", <<16#ef, 16#bb, 0>>}, + {"object error", <<"{"/utf8, 0>>}, + {"colon error", <<"{\"\""/utf8, 0>>}, + {"key error", <<"{\"\":1,"/utf8, 0>>}, + {"value error", <<0>>}, + {"negative error", <<"-"/utf8, 0>>}, + {"zero error", <<"0"/utf8, 0>>}, + {"integer error", <<"1"/utf8, 0>>}, + {"decimal error", <<"1.0"/utf8, 0>>}, + {"e error", <<"1e"/utf8, 0>>}, + {"ex error", <<"1e+"/utf8, 0>>}, + {"exp error", <<"1e1"/utf8, 0>>}, + {"exp error", <<"1.0e1"/utf8, 0>>}, + {"exp error", <<"1.e"/utf8>>}, + {"true error", <<"tru"/utf8, 0>>}, + {"false error", <<"fals"/utf8, 0>>}, + {"null error", <<"nul"/utf8, 0>>}, + {"maybe_done error", <<"[[]"/utf8, 0>>}, + {"done error", <<"[]"/utf8, 0>>} ], - [{Title, ?_assertError(badarg, decode(State))} || {Title, State, _} <- Cases] ++ - [{Title ++ " (custom handler)", ?_assertEqual( - Err, - decode(State, [{error_handler, Error}]) - )} || {Title, State, Err} <- Cases - ]. + [{Title, ?_assertError(badarg, decode(State))} || {Title, State} <- Cases]. custom_incomplete_handler_test_() -> From 1909687affa3ff7ae48bcdceb51d4ee00b119515 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 21 Oct 2013 06:25:45 +0000 Subject: [PATCH 18/40] fix format of incomplete tests in jsx_decoder --- src/jsx_decoder.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 9dbc321..71ac4b7 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -1635,16 +1635,16 @@ incomplete_test_() -> [ {"stream false", ?_assertError( badarg, - start(<<"{">>, {jsx, []}, [], jsx_config:parse_config([])) + decode(<<"{">>) )}, {"stream true", ?_assert( - case start(<<"{">>, {jsx, []}, [], jsx_config:parse_config([stream])) of + case decode(<<"{">>, [stream]) of {incomplete, _} -> true; _ -> false end )}, {"complete input", ?_assert( - case start(<<"{}">>, {jsx, []}, [], jsx_config:parse_config([stream])) of + case decode(<<"{}">>, [stream]) of {incomplete, _} -> true; _ -> false end From 62914f422a8144b260126ed146459616dee9b312 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Thu, 24 Oct 2013 05:01:20 +0000 Subject: [PATCH 19/40] abstracted internal state of the `jsx_to_term' handler --- src/jsx_to_term.erl | 127 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 105 insertions(+), 22 deletions(-) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index b7cec2a..d9a50f0 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -25,6 +25,7 @@ -export([to_term/2]). -export([init/1, handle_event/2]). +-export([start_object/1, start_array/1, finish/1, insert/2, insert/3]). -record(config, { @@ -70,33 +71,20 @@ parse_config([], Config) -> Config. -init(Config) -> {[[]], parse_config(Config)}. +init(Config) -> {[], parse_config(Config)}. -handle_event(end_json, {[[Terms]], _Config}) -> Terms; +handle_event(end_json, {Term, _Config}) -> Term; -handle_event(start_object, {Terms, Config}) -> {[[]|Terms], Config}; -handle_event(end_object, {[[], {key, Key}, Last|Terms], Config}) -> - {[[{Key, [{}]}] ++ Last] ++ Terms, Config}; -handle_event(end_object, {[Object, {key, Key}, Last|Terms], Config}) -> - {[[{Key, lists:reverse(Object)}] ++ Last] ++ Terms, Config}; -handle_event(end_object, {[[], Last|Terms], Config}) -> - {[[[{}]] ++ Last] ++ Terms, Config}; -handle_event(end_object, {[Object, Last|Terms], Config}) -> - {[[lists:reverse(Object)] ++ Last] ++ Terms, Config}; +handle_event(start_object, {Stack, Config}) -> {start_object(Stack), Config}; +handle_event(end_object, {Stack, Config}) -> {finish(Stack), Config}; -handle_event(start_array, {Terms, Config}) -> {[[]|Terms], Config}; -handle_event(end_array, {[List, {key, Key}, Last|Terms], Config}) -> - {[[{Key, lists:reverse(List)}] ++ Last] ++ Terms, Config}; -handle_event(end_array, {[List, Last|Terms], Config}) -> - {[[lists:reverse(List)] ++ Last] ++ Terms, Config}; +handle_event(start_array, {Stack, Config}) -> {start_array(Stack), Config}; +handle_event(end_array, {Stack, Config}) -> {finish(Stack), Config}; -handle_event({key, Key}, {Terms, Config}) -> {[{key, format_key(Key, Config)}] ++ Terms, Config}; +handle_event({key, Key}, {Stack, Config}) -> {insert(format_key(Key, Config), Stack), Config}; -handle_event({_, Event}, {[{key, Key}, Last|Terms], Config}) -> - {[[{Key, Event}] ++ Last] ++ Terms, Config}; -handle_event({_, Event}, {[Last|Terms], Config}) -> - {[[Event] ++ Last] ++ Terms, Config}. +handle_event({_, Event}, {Stack, Config}) -> {insert(Event, Stack), Config}. format_key(Key, Config) -> @@ -113,6 +101,44 @@ format_key(Key, Config) -> end. +%% internal state is a stack of in progress objects/arrays +%% `[Current, Parent, Grandparent,...OriginalAncestor]` +%% an object has the representation on the stack of +%% `{object, [{NthKey, NthValue}, {NMinus1Key, NthMinus1Value},...{FirstKey, FirstValue}]}` +%% of if there's a key with a yet to be matched value +%% `{object, Key, [{NthKey, NthValue},...]}` +%% an array looks like +%% `{array, [NthValue, NthMinus1Value,...FirstValue]}` + +%% allocate a new object on top of the stack +start_object(Stack) -> [{object, []}] ++ Stack. + +%% allocate a new array on top of the stack +start_array(Stack) -> [{array, []}] ++ Stack. + +%% finish an object or array and insert it into the parent object if it exists +finish([{object, []}]) -> [{}]; +finish([{object, []}|Rest]) -> insert([{}], Rest); +finish([{object, Pairs}]) -> lists:reverse(Pairs); +finish([{object, Pairs}|Rest]) -> insert(lists:reverse(Pairs), Rest); +finish([{array, Values}]) -> lists:reverse(Values); +finish([{array, Values}|Rest]) -> insert(lists:reverse(Values), Rest); +finish(_) -> erlang:error(badarg). + +%% insert a value when there's no parent object or array +insert(Value, []) -> Value; +%% insert a key or value into an object or array, autodetects the 'right' thing +insert(Key, [{object, Pairs}|Rest]) -> [{object, Key, Pairs}] ++ Rest; +insert(Value, [{object, Key, Pairs}|Rest]) -> [{object, [{Key, Value}] ++ Pairs}] ++ Rest; +insert(Value, [{array, Values}|Rest]) -> [{array, [Value] ++ Values}] ++ Rest; +insert(_, _) -> erlang:error(badarg). + +%% insert a key/value pair into an object +insert(Key, Value, [{object, Pairs}|Rest]) -> [{object, [{Key, Value}] ++ Pairs}] ++ Rest; +insert(_, _, _) -> erlang:error(badarg). + + + %% eunit tests @@ -158,13 +184,70 @@ format_key_test_() -> ]. +rep_manipulation_test_() -> + [ + {"allocate a new object on an empty stack", ?_assertEqual( + [{object, []}], + start_object([]) + )}, + {"allocate a new object on a stack", ?_assertEqual( + [{object, []}, {object, []}], + start_object([{object, []}]) + )}, + {"allocate a new array on an empty stack", ?_assertEqual( + [{array, []}], + start_array([]) + )}, + {"allocate a new array on a stack", ?_assertEqual( + [{array, []}, {object, []}], + start_array([{object, []}]) + )}, + {"insert a key into an object", ?_assertEqual( + [{object, key, []}, junk], + insert(key, [{object, []}, junk]) + )}, + {"insert a value into an object", ?_assertEqual( + [{object, [{key, value}]}, junk], + insert(value, [{object, key, []}, junk]) + )}, + {"insert a value into an array", ?_assertEqual( + [{array, [value]}, junk], + insert(value, [{array, []}, junk]) + )}, + {"insert a key/value pair into an object", ?_assertEqual( + [{object, [{key, value}, {x, y}]}, junk], + insert(key, value, [{object, [{x, y}]}, junk]) + )}, + {"finish an object with no ancestor", ?_assertEqual( + [{a, b}, {x, y}], + finish([{object, [{x, y}, {a, b}]}]) + )}, + {"finish an empty object", ?_assertEqual( + [{}], + finish([{object, []}]) + )}, + {"finish an object with an ancestor", ?_assertEqual( + [{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], + finish([{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}]) + )}, + {"finish an array with no ancestor", ?_assertEqual( + [a, b, c], + finish([{array, [c, b, a]}]) + )}, + {"finish an array with an ancestor", ?_assertEqual( + [{array, [[a, b, c], d, e, f]}], + finish([{array, [c, b, a]}, {array, [d, e, f]}]) + )} + ]. + + handle_event_test_() -> Data = jsx:test_cases(), [ { Title, ?_assertEqual( Term, - lists:foldl(fun handle_event/2, {[[]], #config{}}, Events ++ [end_json]) + lists:foldl(fun handle_event/2, {[], #config{}}, Events ++ [end_json]) ) } || {Title, _, Term, Events} <- Data ]. From 625f912e7be958275e6f6ccfb088041eefcf290c Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 25 Oct 2013 01:35:39 +0000 Subject: [PATCH 20/40] generate test state from `init/1' in `jsx_to_term' and `jsx_to_json' instead of doing it by hand --- src/jsx_to_json.erl | 2 +- src/jsx_to_term.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 177a32c..342a5da 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -298,7 +298,7 @@ handle_event_test_() -> { Title, ?_assertEqual( JSON, - lists:foldl(fun handle_event/2, {start, [], #config{}}, Events ++ [end_json]) + lists:foldl(fun handle_event/2, init([]), Events ++ [end_json]) ) } || {Title, JSON, _, Events} <- Data ]. diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index d9a50f0..88d4c83 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -247,7 +247,7 @@ handle_event_test_() -> { Title, ?_assertEqual( Term, - lists:foldl(fun handle_event/2, {[], #config{}}, Events ++ [end_json]) + lists:foldl(fun handle_event/2, init([]), Events ++ [end_json]) ) } || {Title, _, Term, Events} <- Data ]. From 5409668cf4a329596bbc96b159be279f203663a9 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sat, 26 Oct 2013 14:45:36 +0000 Subject: [PATCH 21/40] abstracted internal state of `jsx_to_json'. uses same interface as `jsx_to_term' --- src/jsx_to_json.erl | 107 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 342a5da..35ec437 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -25,6 +25,7 @@ -export([to_json/2, format/2]). -export([init/1, handle_event/2]). +-export([start_object/1, start_array/1, insert/2, insert/3, finish/1]). -record(config, { @@ -73,7 +74,6 @@ parse_config([], Config) -> Config. - -define(start_object, <<"{">>). -define(start_array, <<"[">>). -define(end_object, <<"}">>). @@ -85,7 +85,6 @@ parse_config([], Config) -> -define(newline, <<"\n">>). - init(Config) -> {start, [], parse_config(Config)}. @@ -146,6 +145,8 @@ handle_event(end_json, {[], Acc, _Config}) -> unicode:characters_to_binary(Acc, encode(string, String, _Config) -> [?quote, String, ?quote]; +encode(key, Key, _Config) -> + [?quote, Key, ?quote]; encode(literal, Literal, _Config) -> erlang:atom_to_list(Literal); encode(integer, Integer, _Config) -> @@ -180,6 +181,51 @@ indent_or_space(Config) -> end. +%% internal state is a stack of in progress objects/arrays +%% `[Current, Parent, Grandparent,...OriginalAncestor]` +%% an object has the representation on the stack of +%% `{object, Object}` +%% of if there's a key with a yet to be matched value +%% `{object, Key, Object}` +%% an array looks like +%% `{array, Array}` +%% `Object` and `Array` are utf8 encoded binaries + +%% allocate a new object on top of the stack +start_object(Stack) -> [{object, ?start_object}] ++ Stack. + +%% allocate a new array on top of the stack +start_array(Stack) -> [{array, ?start_array}] ++ Stack. + +%% finish an object or array and insert it into the parent object if it exists +finish([{object, Object}]) -> <>; +finish([{object, Object}|Rest]) -> insert(<>, Rest); +finish([{array, Array}]) -> <>; +finish([{array, Array}|Rest]) -> insert(<>, Rest); +finish(_) -> erlang:error(badarg). + +%% insert a value when there's no parent object or array +insert(Value, []) when is_binary(Value) -> Value; +%% insert a key or value into an object or array, autodetects the 'right' thing +insert(Key, [{object, Object}|Rest]) when is_binary(Key) -> [{object, Key, Object}] ++ Rest; +insert(Value, [{object, Key, ?start_object}|Rest]) when is_binary(Value) -> + [{object, <>}] ++ Rest; +insert(Value, [{object, Key, Object}|Rest]) when is_binary(Value) -> + [{object, <>}] ++ Rest; +insert(Value, [{array, ?start_array}|Rest]) when is_binary(Value) -> + [{array, <>}] ++ Rest; +insert(Value, [{array, Array}|Rest]) when is_binary(Value) -> + [{array, <>}] ++ Rest; +insert(_, _) -> erlang:error(badarg). + +%% insert a key/value pair into an object +insert(Key, Value, [{object, ?start_object}|Rest]) when is_binary(Key), is_binary(Value) -> + [{object, <>}] ++ Rest; +insert(Key, Value, [{object, Object}|Rest]) when is_binary(Key), is_binary(Value) -> + [{object, <>}] ++ Rest; +insert(_, _, _) -> erlang:error(badarg). + + %% eunit tests -ifdef(TEST). @@ -292,6 +338,63 @@ format_test_() -> ]. +rep_manipulation_test_() -> + [ + {"allocate a new object on an empty stack", ?_assertEqual( + [{object, <<"{">>}], + start_object([]) + )}, + {"allocate a new object on a stack", ?_assertEqual( + [{object, <<"{">>}, {object, <<"{">>}], + start_object([{object, <<"{">>}]) + )}, + {"allocate a new array on an empty stack", ?_assertEqual( + [{array, <<"[">>}], + start_array([]) + )}, + {"allocate a new array on a stack", ?_assertEqual( + [{array, <<"[">>}, {object, <<"{">>}], + start_array([{object, <<"{">>}]) + )}, + {"insert a key into an object", ?_assertEqual( + [{object, <<"\"key\"">>, <<"{">>}], + insert(<<"\"key\"">>, [{object, <<"{">>}]) + )}, + {"insert a value into an object", ?_assertEqual( + [{object, <<"{\"key\":true">>}], + insert(<<"true">>, [{object, <<"\"key\"">>, <<"{">>}]) + )}, + {"insert a value into an array", ?_assertEqual( + [{array, <<"[true">>}], + insert(<<"true">>, [{array, <<"[">>}]) + )}, + {"insert a key/value pair into an object", ?_assertEqual( + [{object, <<"{\"x\":true,\"y\":false">>}], + insert(<<"\"y\"">>, <<"false">>, [{object, <<"{\"x\":true">>}]) + )}, + {"finish an object with no ancestor", ?_assertEqual( + <<"{\"x\":true,\"y\":false}">>, + finish([{object, <<"{\"x\":true,\"y\":false">>}]) + )}, + {"finish an empty object", ?_assertEqual( + <<"{}">>, + finish([{object, <<"{">>}]) + )}, + {"finish an object with an ancestor", ?_assertEqual( + [{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], + finish([{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}]) + )}, + {"finish an array with no ancestor", ?_assertEqual( + <<"[true,false,null]">>, + finish([{array, <<"[true,false,null">>}]) + )}, + {"finish an array with an ancestor", ?_assertEqual( + [{array, <<"[1,2,3,[true,false,null]">>}], + finish([{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}]) + )} + ]. + + handle_event_test_() -> Data = jsx:test_cases(), [ From 7296f790b21e1e77ff77010569f01469a8f19075 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 27 Oct 2013 01:28:49 +0000 Subject: [PATCH 22/40] convert all space/newlines in json formatter to emit binaries --- src/jsx_to_json.erl | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 35ec437..98fa072 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -157,22 +157,17 @@ encode(float, Float, _Config) -> space(Config) -> case Config#config.space of - 0 -> [] + 0 -> <<>> ; X when X > 0 -> binary:copy(?space, X) end. indent(Config) -> case Config#config.indent of - 0 -> [] - ; X when X > 0 -> - Indent = binary:copy(?space, X), - indent(Indent, Config#config.depth, [?newline]) + 0 -> <<>> + ; X when X > 0 -> <> end. -indent(_Indent, 0, Acc) -> Acc; -indent(Indent, N, Acc) -> indent(Indent, N - 1, [Acc, Indent]). - indent_or_space(Config) -> case Config#config.indent > 0 of @@ -258,7 +253,7 @@ config_test_() -> space_test_() -> [ - {"no space", ?_assertEqual([], space(#config{space=0}))}, + {"no space", ?_assertEqual(<<>>, space(#config{space=0}))}, {"one space", ?_assertEqual(<<" ">>, space(#config{space=1}))}, {"four spaces", ?_assertEqual(<<" ">>, space(#config{space=4}))} ]. @@ -266,21 +261,21 @@ space_test_() -> indent_test_() -> [ - {"no indent", ?_assertEqual([], indent(#config{indent=0, depth=1}))}, + {"no indent", ?_assertEqual(<<>>, indent(#config{indent=0, depth=1}))}, {"indent 1 depth 1", ?_assertEqual( - [[?newline], ?space], + <>/binary>>, indent(#config{indent=1, depth=1}) )}, {"indent 1 depth 2", ?_assertEqual( - [[[?newline], ?space], ?space], + <>/binary>>, indent(#config{indent=1, depth=2}) )}, {"indent 4 depth 1", ?_assertEqual( - [[?newline], <<" ">>], + <>/binary>>, indent(#config{indent=4, depth=1}) )}, {"indent 4 depth 2", ?_assertEqual( - [[[?newline], <<" ">>], <<" ">>], + <>/binary, <<" ">>/binary>>, indent(#config{indent=4, depth=2}) )} ]. @@ -293,7 +288,7 @@ indent_or_space_test_() -> indent_or_space(#config{space=1, indent=0, depth=1}) )}, {"indent so no space", ?_assertEqual( - [[?newline], ?space], + <>/binary>>, indent_or_space(#config{space=1, indent=1, depth=1}) )} ]. From 7b9170b32d2d2a849ca01a45696d5680b7e4e429 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 29 Oct 2013 19:15:16 +0000 Subject: [PATCH 23/40] add `Config' arg to rep manipulation --- src/jsx_to_json.erl | 98 ++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 98fa072..ed28a63 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -187,37 +187,43 @@ indent_or_space(Config) -> %% `Object` and `Array` are utf8 encoded binaries %% allocate a new object on top of the stack -start_object(Stack) -> [{object, ?start_object}] ++ Stack. +start_object({Stack, Config}) -> {[{object, ?start_object}] ++ Stack, Config}. %% allocate a new array on top of the stack -start_array(Stack) -> [{array, ?start_array}] ++ Stack. +start_array({Stack, Config}) -> {[{array, ?start_array}] ++ Stack, Config}. %% finish an object or array and insert it into the parent object if it exists -finish([{object, Object}]) -> <>; -finish([{object, Object}|Rest]) -> insert(<>, Rest); -finish([{array, Array}]) -> <>; -finish([{array, Array}|Rest]) -> insert(<>, Rest); +finish({[{object, Object}], Config}) -> + {<>, Config}; +finish({[{object, Object}|Rest], Config}) -> + insert(<>, {Rest, Config}); +finish({[{array, Array}], Config}) -> + {<>, Config}; +finish({[{array, Array}|Rest], Config}) -> + insert(<>, {Rest, Config}); finish(_) -> erlang:error(badarg). %% insert a value when there's no parent object or array -insert(Value, []) when is_binary(Value) -> Value; +insert(Value, {[], Config}) when is_binary(Value) -> + {Value, Config}; %% insert a key or value into an object or array, autodetects the 'right' thing -insert(Key, [{object, Object}|Rest]) when is_binary(Key) -> [{object, Key, Object}] ++ Rest; -insert(Value, [{object, Key, ?start_object}|Rest]) when is_binary(Value) -> - [{object, <>}] ++ Rest; -insert(Value, [{object, Key, Object}|Rest]) when is_binary(Value) -> - [{object, <>}] ++ Rest; -insert(Value, [{array, ?start_array}|Rest]) when is_binary(Value) -> - [{array, <>}] ++ Rest; -insert(Value, [{array, Array}|Rest]) when is_binary(Value) -> - [{array, <>}] ++ Rest; +insert(Key, {[{object, Object}|Rest], Config}) when is_binary(Key) -> + {[{object, Key, Object}] ++ Rest, Config}; +insert(Value, {[{object, Key, ?start_object}|Rest], Config}) when is_binary(Value) -> + {[{object, <>}] ++ Rest, Config}; +insert(Value, {[{object, Key, Object}|Rest], Config}) when is_binary(Value) -> + {[{object, <>}] ++ Rest, Config}; +insert(Value, {[{array, ?start_array}|Rest], Config}) when is_binary(Value) -> + {[{array, <>}] ++ Rest, Config}; +insert(Value, {[{array, Array}|Rest], Config}) when is_binary(Value) -> + {[{array, <>}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). %% insert a key/value pair into an object -insert(Key, Value, [{object, ?start_object}|Rest]) when is_binary(Key), is_binary(Value) -> - [{object, <>}] ++ Rest; -insert(Key, Value, [{object, Object}|Rest]) when is_binary(Key), is_binary(Value) -> - [{object, <>}] ++ Rest; +insert(Key, Value, {[{object, ?start_object}|Rest], Config}) when is_binary(Key), is_binary(Value) -> + {[{object, <>}] ++ Rest, Config}; +insert(Key, Value, {[{object, Object}|Rest], Config}) when is_binary(Key), is_binary(Value) -> + {[{object, <>}] ++ Rest, Config}; insert(_, _, _) -> erlang:error(badarg). @@ -336,56 +342,56 @@ format_test_() -> rep_manipulation_test_() -> [ {"allocate a new object on an empty stack", ?_assertEqual( - [{object, <<"{">>}], - start_object([]) + {[{object, <<"{">>}], []}, + start_object({[], []}) )}, {"allocate a new object on a stack", ?_assertEqual( - [{object, <<"{">>}, {object, <<"{">>}], - start_object([{object, <<"{">>}]) + {[{object, <<"{">>}, {object, <<"{">>}], []}, + start_object({[{object, <<"{">>}], []}) )}, {"allocate a new array on an empty stack", ?_assertEqual( - [{array, <<"[">>}], - start_array([]) + {[{array, <<"[">>}], []}, + start_array({[], []}) )}, {"allocate a new array on a stack", ?_assertEqual( - [{array, <<"[">>}, {object, <<"{">>}], - start_array([{object, <<"{">>}]) + {[{array, <<"[">>}, {object, <<"{">>}], []}, + start_array({[{object, <<"{">>}], []}) )}, {"insert a key into an object", ?_assertEqual( - [{object, <<"\"key\"">>, <<"{">>}], - insert(<<"\"key\"">>, [{object, <<"{">>}]) + {[{object, <<"\"key\"">>, <<"{">>}], []}, + insert(<<"\"key\"">>, {[{object, <<"{">>}], []}) )}, {"insert a value into an object", ?_assertEqual( - [{object, <<"{\"key\":true">>}], - insert(<<"true">>, [{object, <<"\"key\"">>, <<"{">>}]) + {[{object, <<"{\"key\":true">>}], []}, + insert(<<"true">>, {[{object, <<"\"key\"">>, <<"{">>}], []}) )}, {"insert a value into an array", ?_assertEqual( - [{array, <<"[true">>}], - insert(<<"true">>, [{array, <<"[">>}]) + {[{array, <<"[true">>}], []}, + insert(<<"true">>, {[{array, <<"[">>}], []}) )}, {"insert a key/value pair into an object", ?_assertEqual( - [{object, <<"{\"x\":true,\"y\":false">>}], - insert(<<"\"y\"">>, <<"false">>, [{object, <<"{\"x\":true">>}]) + {[{object, <<"{\"x\":true,\"y\":false">>}], []}, + insert(<<"\"y\"">>, <<"false">>, {[{object, <<"{\"x\":true">>}], []}) )}, {"finish an object with no ancestor", ?_assertEqual( - <<"{\"x\":true,\"y\":false}">>, - finish([{object, <<"{\"x\":true,\"y\":false">>}]) + {<<"{\"x\":true,\"y\":false}">>, []}, + finish({[{object, <<"{\"x\":true,\"y\":false">>}], []}) )}, {"finish an empty object", ?_assertEqual( - <<"{}">>, - finish([{object, <<"{">>}]) + {<<"{}">>, []}, + finish({[{object, <<"{">>}], []}) )}, {"finish an object with an ancestor", ?_assertEqual( - [{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], - finish([{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}]) + {[{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], []}, + finish({[{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}], []}) )}, {"finish an array with no ancestor", ?_assertEqual( - <<"[true,false,null]">>, - finish([{array, <<"[true,false,null">>}]) + {<<"[true,false,null]">>, []}, + finish({[{array, <<"[true,false,null">>}], []}) )}, {"finish an array with an ancestor", ?_assertEqual( - [{array, <<"[1,2,3,[true,false,null]">>}], - finish([{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}]) + {[{array, <<"[1,2,3,[true,false,null]">>}], []}, + finish({[{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}], []}) )} ]. From f58ac5566e11f07d2dd4b21b69251e48444606dc Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 30 Oct 2013 00:21:17 +0000 Subject: [PATCH 24/40] add whitespace formatting to rep manipulation functions --- src/jsx_to_json.erl | 108 +++++++++++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 31 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index ed28a63..e56c5de 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -210,20 +210,63 @@ insert(Value, {[], Config}) when is_binary(Value) -> insert(Key, {[{object, Object}|Rest], Config}) when is_binary(Key) -> {[{object, Key, Object}] ++ Rest, Config}; insert(Value, {[{object, Key, ?start_object}|Rest], Config}) when is_binary(Value) -> - {[{object, <>}] ++ Rest, Config}; + { + [{object, <>}] ++ Rest, + Config + }; insert(Value, {[{object, Key, Object}|Rest], Config}) when is_binary(Value) -> - {[{object, <>}] ++ Rest, Config}; + { + [{object, <>}] ++ Rest, + Config + }; insert(Value, {[{array, ?start_array}|Rest], Config}) when is_binary(Value) -> {[{array, <>}] ++ Rest, Config}; insert(Value, {[{array, Array}|Rest], Config}) when is_binary(Value) -> - {[{array, <>}] ++ Rest, Config}; + { + [{array, <>}] ++ Rest, + Config + }; insert(_, _) -> erlang:error(badarg). %% insert a key/value pair into an object insert(Key, Value, {[{object, ?start_object}|Rest], Config}) when is_binary(Key), is_binary(Value) -> - {[{object, <>}] ++ Rest, Config}; + { + [{object, <>}] ++ Rest, + Config + }; insert(Key, Value, {[{object, Object}|Rest], Config}) when is_binary(Key), is_binary(Value) -> - {[{object, <>}] ++ Rest, Config}; + { + [{object, <>}] ++ Rest, + Config + }; insert(_, _, _) -> erlang:error(badarg). @@ -342,56 +385,59 @@ format_test_() -> rep_manipulation_test_() -> [ {"allocate a new object on an empty stack", ?_assertEqual( - {[{object, <<"{">>}], []}, - start_object({[], []}) + {[{object, <<"{">>}], #config{}}, + start_object({[], #config{}}) )}, {"allocate a new object on a stack", ?_assertEqual( - {[{object, <<"{">>}, {object, <<"{">>}], []}, - start_object({[{object, <<"{">>}], []}) + {[{object, <<"{">>}, {object, <<"{">>}], #config{}}, + start_object({[{object, <<"{">>}], #config{}}) )}, {"allocate a new array on an empty stack", ?_assertEqual( - {[{array, <<"[">>}], []}, - start_array({[], []}) + {[{array, <<"[">>}], #config{}}, + start_array({[], #config{}}) )}, {"allocate a new array on a stack", ?_assertEqual( - {[{array, <<"[">>}, {object, <<"{">>}], []}, - start_array({[{object, <<"{">>}], []}) + {[{array, <<"[">>}, {object, <<"{">>}], #config{}}, + start_array({[{object, <<"{">>}], #config{}}) )}, {"insert a key into an object", ?_assertEqual( - {[{object, <<"\"key\"">>, <<"{">>}], []}, - insert(<<"\"key\"">>, {[{object, <<"{">>}], []}) + {[{object, <<"\"key\"">>, <<"{">>}], #config{}}, + insert(<<"\"key\"">>, {[{object, <<"{">>}], #config{}}) )}, {"insert a value into an object", ?_assertEqual( - {[{object, <<"{\"key\":true">>}], []}, - insert(<<"true">>, {[{object, <<"\"key\"">>, <<"{">>}], []}) + {[{object, <<"{\"key\":true">>}], #config{}}, + insert(<<"true">>, {[{object, <<"\"key\"">>, <<"{">>}], #config{}}) )}, {"insert a value into an array", ?_assertEqual( - {[{array, <<"[true">>}], []}, - insert(<<"true">>, {[{array, <<"[">>}], []}) + {[{array, <<"[true">>}], #config{}}, + insert(<<"true">>, {[{array, <<"[">>}], #config{}}) )}, {"insert a key/value pair into an object", ?_assertEqual( - {[{object, <<"{\"x\":true,\"y\":false">>}], []}, - insert(<<"\"y\"">>, <<"false">>, {[{object, <<"{\"x\":true">>}], []}) + {[{object, <<"{\"x\":true,\"y\":false">>}], #config{}}, + insert(<<"\"y\"">>, <<"false">>, {[{object, <<"{\"x\":true">>}], #config{}}) )}, {"finish an object with no ancestor", ?_assertEqual( - {<<"{\"x\":true,\"y\":false}">>, []}, - finish({[{object, <<"{\"x\":true,\"y\":false">>}], []}) + {<<"{\"x\":true,\"y\":false}">>, #config{}}, + finish({[{object, <<"{\"x\":true,\"y\":false">>}], #config{}}) )}, {"finish an empty object", ?_assertEqual( - {<<"{}">>, []}, - finish({[{object, <<"{">>}], []}) + {<<"{}">>, #config{}}, + finish({[{object, <<"{">>}], #config{}}) )}, {"finish an object with an ancestor", ?_assertEqual( - {[{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], []}, - finish({[{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}], []}) + {[{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], #config{}}, + finish({ + [{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}], + #config{} + }) )}, {"finish an array with no ancestor", ?_assertEqual( - {<<"[true,false,null]">>, []}, - finish({[{array, <<"[true,false,null">>}], []}) + {<<"[true,false,null]">>, #config{}}, + finish({[{array, <<"[true,false,null">>}], #config{}}) )}, {"finish an array with an ancestor", ?_assertEqual( - {[{array, <<"[1,2,3,[true,false,null]">>}], []}, - finish({[{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}], []}) + {[{array, <<"[1,2,3,[true,false,null]">>}], #config{}}, + finish({[{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}], #config{}}) )} ]. From 5753baefafb1ad7fa91f425a9d934e7b77d71bb9 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 30 Oct 2013 00:34:45 +0000 Subject: [PATCH 25/40] convert jsx_to_json over to new internal interface --- src/jsx_to_json.erl | 112 +++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 74 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index e56c5de..c907e98 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -85,74 +85,31 @@ parse_config([], Config) -> -define(newline, <<"\n">>). -init(Config) -> {start, [], parse_config(Config)}. +init(Config) -> {[], parse_config(Config)}. -handle_event(Event, {start, Acc, Config}) -> - case Event of - {Type, Value} -> {[], [Acc, encode(Type, Value, Config)], Config} - ; start_object -> {[object_start], [Acc, ?start_object], Config} - ; start_array -> {[array_start], [Acc, ?start_array], Config} - end; -handle_event(Event, {[object_start|Stack], Acc, OldConfig = #config{depth = Depth}}) -> - Config = OldConfig#config{depth = Depth + 1}, - case Event of - {key, Key} -> - {[object_value|Stack], [Acc, indent(Config), encode(string, Key, Config), ?colon, space(Config)], Config} - ; end_object -> - {Stack, [Acc, ?end_object], OldConfig} - end; -handle_event(Event, {[object_value|Stack], Acc, Config}) -> - case Event of - {Type, Value} when Type == string; Type == literal; - Type == integer; Type == float -> - {[key|Stack], [Acc, encode(Type, Value, Config)], Config} - ; start_object -> {[object_start, key|Stack], [Acc, ?start_object], Config} - ; start_array -> {[array_start, key|Stack], [Acc, ?start_array], Config} - end; -handle_event(Event, {[key|Stack], Acc, Config = #config{depth = Depth}}) -> - case Event of - {key, Key} -> - {[object_value|Stack], [Acc, ?comma, indent_or_space(Config), encode(string, Key, Config), ?colon, space(Config)], Config} - ; end_object -> - NewConfig = Config#config{depth = Depth - 1}, - {Stack, [Acc, indent(NewConfig), ?end_object], NewConfig} - end; -handle_event(Event, {[array_start|Stack], Acc, OldConfig = #config{depth = Depth}}) -> - Config = OldConfig#config{depth = Depth + 1}, - case Event of - {Type, Value} when Type == string; Type == literal; - Type == integer; Type == float -> - {[array|Stack], [Acc, indent(Config), encode(Type, Value, Config)], Config} - ; start_object -> {[object_start, array|Stack], [Acc, indent(Config), ?start_object], Config} - ; start_array -> {[array_start, array|Stack], [Acc, indent(Config), ?start_array], Config} - ; end_array -> {Stack, [Acc, ?end_array], OldConfig} - end; -handle_event(Event, {[array|Stack], Acc, Config = #config{depth = Depth}}) -> - case Event of - {Type, Value} when Type == string; Type == literal; - Type == integer; Type == float -> - {[array|Stack], [Acc, ?comma, indent_or_space(Config), encode(Type, Value, Config)], Config} - ; end_array -> - NewConfig = Config#config{depth = Depth - 1}, - {Stack, [Acc, indent(NewConfig), ?end_array], NewConfig} - ; start_object -> {[object_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_object], Config} - ; start_array -> {[array_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_array], Config} - end; -handle_event(end_json, {[], Acc, _Config}) -> unicode:characters_to_binary(Acc, utf8). +handle_event(end_json, {Term, _Config}) -> Term; + +handle_event(start_object, State) -> start_object(State); +handle_event(end_object, State) -> finish(State); + +handle_event(start_array, State) -> start_array(State); +handle_event(end_array, State) -> finish(State); + +handle_event({Type, Event}, {_, Config} = State) -> insert(encode(Type, Event, Config), State). encode(string, String, _Config) -> - [?quote, String, ?quote]; + <>; encode(key, Key, _Config) -> - [?quote, Key, ?quote]; + <>; encode(literal, Literal, _Config) -> - erlang:atom_to_list(Literal); + unicode:characters_to_binary(erlang:atom_to_list(Literal)); encode(integer, Integer, _Config) -> - erlang:integer_to_list(Integer); + unicode:characters_to_binary(erlang:integer_to_list(Integer)); encode(float, Float, _Config) -> - [Output] = io_lib:format("~p", [Float]), Output. + [Output] = io_lib:format("~p", [Float]), unicode:characters_to_binary(Output). space(Config) -> @@ -345,40 +302,47 @@ indent_or_space_test_() -> format_test_() -> [ - {"0.0", ?_assert(encode(float, 0.0, #config{}) =:= "0.0")}, - {"1.0", ?_assert(encode(float, 1.0, #config{}) =:= "1.0")}, - {"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= "-1.0")}, + {"0.0", ?_assert(encode(float, 0.0, #config{}) =:= <<"0.0">>)}, + {"1.0", ?_assert(encode(float, 1.0, #config{}) =:= <<"1.0">>)}, + {"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= <<"-1.0">>)}, {"3.1234567890987654321", ?_assert( - encode(float, 3.1234567890987654321, #config{}) =:= "3.1234567890987655") + encode(float, 3.1234567890987654321, #config{}) =:= <<"3.1234567890987655">>) }, - {"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= "1.0e23")}, - {"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= "0.3")}, - {"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= "0.0001")}, - {"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= "1.0e-5")}, - {"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= "1.0e-8")}, - {"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= "1.0e-323")}, - {"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= "1.0e308")}, + {"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= <<"1.0e23">>)}, + {"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= <<"0.3">>)}, + {"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= <<"0.0001">>)}, + {"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= <<"1.0e-5">>)}, + {"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= <<"1.0e-8">>)}, + {"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= <<"1.0e-323">>)}, + {"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= <<"1.0e308">>)}, {"min normalized float", ?_assert( - encode(float, math:pow(2, -1022), #config{}) =:= "2.2250738585072014e-308" + encode(float, math:pow(2, -1022), #config{}) =:= <<"2.2250738585072014e-308">> ) }, {"max normalized float", ?_assert( encode(float, (2 - math:pow(2, -52)) * math:pow(2, 1023), #config{}) - =:= "1.7976931348623157e308" + =:= <<"1.7976931348623157e308">> ) }, {"min denormalized float", - ?_assert(encode(float, math:pow(2, -1074), #config{}) =:= "5.0e-324") + ?_assert(encode(float, math:pow(2, -1074), #config{}) =:= <<"5.0e-324">>) }, {"max denormalized float", ?_assert( encode(float, (1 - math:pow(2, -52)) * math:pow(2, -1022), #config{}) - =:= "2.225073858507201e-308" + =:= <<"2.225073858507201e-308">> ) - } + }, + {"hello world", ?_assert(encode(string, <<"hello world">>, #config{}) =:= <<"\"hello world\"">>)}, + {"key", ?_assert(encode(key, <<"key">>, #config{}) =:= <<"\"key\"">>)}, + {"1", ?_assert(encode(integer, 1, #config{}) =:= <<"1">>)}, + {"-1", ?_assert(encode(integer, -1, #config{}) =:= <<"-1">>)}, + {"true", ?_assert(encode(literal, true, #config{}) =:= <<"true">>)}, + {"false", ?_assert(encode(literal, false, #config{}) =:= <<"false">>)}, + {"null", ?_assert(encode(literal, null, #config{}) =:= <<"null">>)} ]. From 3f7f0f1f9ce7273bec98d1325aacbe998e9e0205 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 30 Oct 2013 00:51:44 +0000 Subject: [PATCH 26/40] introduce `Config' object to internal rep in `jsx_to_json' to match `jsx_to_term' --- src/jsx_to_term.erl | 94 +++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 88d4c83..36fdfd0 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -76,15 +76,15 @@ init(Config) -> {[], parse_config(Config)}. handle_event(end_json, {Term, _Config}) -> Term; -handle_event(start_object, {Stack, Config}) -> {start_object(Stack), Config}; -handle_event(end_object, {Stack, Config}) -> {finish(Stack), Config}; +handle_event(start_object, State) -> start_object(State); +handle_event(end_object, State) -> finish(State); -handle_event(start_array, {Stack, Config}) -> {start_array(Stack), Config}; -handle_event(end_array, {Stack, Config}) -> {finish(Stack), Config}; +handle_event(start_array, State) -> start_array(State); +handle_event(end_array, State) -> finish(State); -handle_event({key, Key}, {Stack, Config}) -> {insert(format_key(Key, Config), Stack), Config}; +handle_event({key, Key}, {_, Config} = State) -> insert(format_key(Key, Config), State); -handle_event({_, Event}, {Stack, Config}) -> {insert(Event, Stack), Config}. +handle_event({_, Event}, State) -> insert(Event, State). format_key(Key, Config) -> @@ -111,30 +111,34 @@ format_key(Key, Config) -> %% `{array, [NthValue, NthMinus1Value,...FirstValue]}` %% allocate a new object on top of the stack -start_object(Stack) -> [{object, []}] ++ Stack. +start_object({Stack, Config}) -> {[{object, []}] ++ Stack, Config}. %% allocate a new array on top of the stack -start_array(Stack) -> [{array, []}] ++ Stack. +start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. %% finish an object or array and insert it into the parent object if it exists -finish([{object, []}]) -> [{}]; -finish([{object, []}|Rest]) -> insert([{}], Rest); -finish([{object, Pairs}]) -> lists:reverse(Pairs); -finish([{object, Pairs}|Rest]) -> insert(lists:reverse(Pairs), Rest); -finish([{array, Values}]) -> lists:reverse(Values); -finish([{array, Values}|Rest]) -> insert(lists:reverse(Values), Rest); +finish({[{object, []}], Config}) -> {[{}], Config}; +finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); +finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; +finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); finish(_) -> erlang:error(badarg). %% insert a value when there's no parent object or array -insert(Value, []) -> Value; +insert(Value, {[], Config}) -> {Value, Config}; %% insert a key or value into an object or array, autodetects the 'right' thing -insert(Key, [{object, Pairs}|Rest]) -> [{object, Key, Pairs}] ++ Rest; -insert(Value, [{object, Key, Pairs}|Rest]) -> [{object, [{Key, Value}] ++ Pairs}] ++ Rest; -insert(Value, [{array, Values}|Rest]) -> [{array, [Value] ++ Values}] ++ Rest; +insert(Key, {[{object, Pairs}|Rest], Config}) -> + {[{object, Key, Pairs}] ++ Rest, Config}; +insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> + {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; +insert(Value, {[{array, Values}|Rest], Config}) -> + {[{array, [Value] ++ Values}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). %% insert a key/value pair into an object -insert(Key, Value, [{object, Pairs}|Rest]) -> [{object, [{Key, Value}] ++ Pairs}] ++ Rest; +insert(Key, Value, {[{object, Pairs}|Rest], Config}) -> + {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; insert(_, _, _) -> erlang:error(badarg). @@ -187,56 +191,56 @@ format_key_test_() -> rep_manipulation_test_() -> [ {"allocate a new object on an empty stack", ?_assertEqual( - [{object, []}], - start_object([]) + {[{object, []}], #config{}}, + start_object({[], #config{}}) )}, {"allocate a new object on a stack", ?_assertEqual( - [{object, []}, {object, []}], - start_object([{object, []}]) + {[{object, []}, {object, []}], #config{}}, + start_object({[{object, []}], #config{}}) )}, {"allocate a new array on an empty stack", ?_assertEqual( - [{array, []}], - start_array([]) + {[{array, []}], #config{}}, + start_array({[], #config{}}) )}, {"allocate a new array on a stack", ?_assertEqual( - [{array, []}, {object, []}], - start_array([{object, []}]) + {[{array, []}, {object, []}], #config{}}, + start_array({[{object, []}], #config{}}) )}, {"insert a key into an object", ?_assertEqual( - [{object, key, []}, junk], - insert(key, [{object, []}, junk]) + {[{object, key, []}, junk], #config{}}, + insert(key, {[{object, []}, junk], #config{}}) )}, {"insert a value into an object", ?_assertEqual( - [{object, [{key, value}]}, junk], - insert(value, [{object, key, []}, junk]) + {[{object, [{key, value}]}, junk], #config{}}, + insert(value, {[{object, key, []}, junk], #config{}}) )}, {"insert a value into an array", ?_assertEqual( - [{array, [value]}, junk], - insert(value, [{array, []}, junk]) + {[{array, [value]}, junk], #config{}}, + insert(value, {[{array, []}, junk], #config{}}) )}, {"insert a key/value pair into an object", ?_assertEqual( - [{object, [{key, value}, {x, y}]}, junk], - insert(key, value, [{object, [{x, y}]}, junk]) + {[{object, [{key, value}, {x, y}]}, junk], #config{}}, + insert(key, value, {[{object, [{x, y}]}, junk], #config{}}) )}, {"finish an object with no ancestor", ?_assertEqual( - [{a, b}, {x, y}], - finish([{object, [{x, y}, {a, b}]}]) + {[{a, b}, {x, y}], #config{}}, + finish({[{object, [{x, y}, {a, b}]}], #config{}}) )}, {"finish an empty object", ?_assertEqual( - [{}], - finish([{object, []}]) + {[{}], #config{}}, + finish({[{object, []}], #config{}}) )}, {"finish an object with an ancestor", ?_assertEqual( - [{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], - finish([{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}]) + {[{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], #config{}}, + finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}}) )}, {"finish an array with no ancestor", ?_assertEqual( - [a, b, c], - finish([{array, [c, b, a]}]) + {[a, b, c], #config{}}, + finish({[{array, [c, b, a]}], #config{}}) )}, {"finish an array with an ancestor", ?_assertEqual( - [{array, [[a, b, c], d, e, f]}], - finish([{array, [c, b, a]}, {array, [d, e, f]}]) + {[{array, [[a, b, c], d, e, f]}], #config{}}, + finish({[{array, [c, b, a]}, {array, [d, e, f]}], #config{}}) )} ]. From eb7527e9a33daafb51d32a003ef4562922cdf0e8 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 30 Oct 2013 00:52:44 +0000 Subject: [PATCH 27/40] whitespace --- src/jsx_to_json.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index c907e98..6d8e2b8 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -88,7 +88,6 @@ parse_config([], Config) -> init(Config) -> {[], parse_config(Config)}. - handle_event(end_json, {Term, _Config}) -> Term; handle_event(start_object, State) -> start_object(State); From 08673b144e86d256948ecfe73d05ec67ba8ca2b4 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 30 Oct 2013 00:57:50 +0000 Subject: [PATCH 28/40] update comments about internal rep to match reality --- src/jsx_to_json.erl | 4 +++- src/jsx_to_term.erl | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 6d8e2b8..73209eb 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -132,7 +132,9 @@ indent_or_space(Config) -> end. -%% internal state is a stack of in progress objects/arrays +%% internal state is a stack and a config object +%% `{Stack, Config}` +%% the stack is a list of in progress objects/arrays %% `[Current, Parent, Grandparent,...OriginalAncestor]` %% an object has the representation on the stack of %% `{object, Object}` diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 36fdfd0..815da4b 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -101,7 +101,9 @@ format_key(Key, Config) -> end. -%% internal state is a stack of in progress objects/arrays +%% internal state is a stack and a config object +%% `{Stack, Config}` +%% the stack is a list of in progress objects/arrays %% `[Current, Parent, Grandparent,...OriginalAncestor]` %% an object has the representation on the stack of %% `{object, [{NthKey, NthValue}, {NMinus1Key, NthMinus1Value},...{FirstKey, FirstValue}]}` From 70aebfc6ad2e97f92dc076924e7425dc9da22ec7 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Thu, 31 Oct 2013 02:47:45 +0000 Subject: [PATCH 29/40] add `get_key/1' to `jsx_to_json' and `jsx_to_term' --- src/jsx_to_json.erl | 21 +++++++++++++++++++-- src/jsx_to_term.erl | 17 ++++++++++++++++- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 73209eb..21fea69 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -25,7 +25,7 @@ -export([to_json/2, format/2]). -export([init/1, handle_event/2]). --export([start_object/1, start_array/1, insert/2, insert/3, finish/1]). +-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1]). -record(config, { @@ -228,6 +228,11 @@ insert(Key, Value, {[{object, Object}|Rest], Config}) when is_binary(Key), is_bi insert(_, _, _) -> erlang:error(badarg). +get_key({[{object, Key, _}|_], _}) -> {ok, Key}; +get_key(_) -> {error, nokey}. + + + %% eunit tests -ifdef(TEST). @@ -369,6 +374,18 @@ rep_manipulation_test_() -> {[{object, <<"\"key\"">>, <<"{">>}], #config{}}, insert(<<"\"key\"">>, {[{object, <<"{">>}], #config{}}) )}, + {"get current key", ?_assertEqual( + {ok, key}, + get_key({[{object, key, <<"{">>}], #config{}}) + )}, + {"try to get non-key from object", ?_assertEqual( + {error, nokey}, + get_key({[{object, <<"{">>}], #config{}}) + )}, + {"try to get key from array", ?_assertEqual( + {error, nokey}, + get_key({[{array, <<"[">>}], #config{}}) + )}, {"insert a value into an object", ?_assertEqual( {[{object, <<"{\"key\":true">>}], #config{}}, insert(<<"true">>, {[{object, <<"\"key\"">>, <<"{">>}], #config{}}) @@ -419,4 +436,4 @@ handle_event_test_() -> ]. --endif. \ No newline at end of file +-endif. diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 815da4b..173d190 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -25,7 +25,7 @@ -export([to_term/2]). -export([init/1, handle_event/2]). --export([start_object/1, start_array/1, finish/1, insert/2, insert/3]). +-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1]). -record(config, { @@ -144,6 +144,9 @@ insert(Key, Value, {[{object, Pairs}|Rest], Config}) -> insert(_, _, _) -> erlang:error(badarg). +get_key({[{object, Key, _}|_], _}) -> {ok, Key}; +get_key(_) -> {error, nokey}. + %% eunit tests @@ -212,6 +215,18 @@ rep_manipulation_test_() -> {[{object, key, []}, junk], #config{}}, insert(key, {[{object, []}, junk], #config{}}) )}, + {"get current key", ?_assertEqual( + {ok, key}, + get_key({[{object, key, []}], #config{}}) + )}, + {"try to get non-key from object", ?_assertEqual( + {error, nokey}, + get_key({[{object, []}], #config{}}) + )}, + {"try to get key from array", ?_assertEqual( + {error, nokey}, + get_key({[{array, []}], #config{}}) + )}, {"insert a value into an object", ?_assertEqual( {[{object, [{key, value}]}, junk], #config{}}, insert(value, {[{object, key, []}, junk], #config{}}) From 04bd9dd827f25efab259fb2528a38fb7e6ccdbb8 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 29 Oct 2013 01:43:46 +0000 Subject: [PATCH 30/40] interstitial README --- README.md | 266 ++++++++++++++++++++++++++---------------------------- 1 file changed, 126 insertions(+), 140 deletions(-) diff --git a/README.md b/README.md index 81f8dda..75e9960 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# jsx (v1.4.3) # +# jsx (v2.0) # an erlang application for consuming, producing and manipulating [json][json]. inspired by [yajl][yajl] @@ -116,24 +116,29 @@ false jsx is an erlang application for consuming, producing and manipulating [json][json] -json has a [spec][rfc4627] but common usage differs subtly. it's common -usage jsx attempts to address, with guidance from the spec +jsx follows the json [spec][rfc4627] as closely as possible with allowances for +real world usage -all json produced and consumed by jsx should be `utf8` encoded text or a -reasonable approximation thereof. ascii works too, but anything beyond that -i'm not going to make any promises. **especially** not latin1 +jsx is pragmatic. the json spec allows extensions so jsx extends the spec in a +number of ways. see the section on `strict` in [options](#option) below though -the [spec][rfc4627] thinks json values must be wrapped in a json array or -object but everyone else disagrees so jsx allows naked json values by default. -if you're a curmudgeon who's offended by this deviation here is a wrapper for -you: +there's not supposed to be any comments in json but when did comments ever hurt +anyone? json has no official comments but this parser allows c/c++ style comments. +anywhere whitespace is allowed you can insert comments (both `// ...` and `/* ... */`) + +all jsx decoder input should be `utf8` encoded binaries. sometimes you get binaries +that are almost but not quite valid utf8 whether due to improper escaping or poor +encoding. jsx replaces invalid codepoints and poorly formed sequences with the +unicode replacement character (`u+FFFD`) + +json only allows keys and strings to be delimited by double quotes (`u+0022`) but +javascript allows them to be delimited by single quotes (`u+0027`) as well. jsx +follows javascript in this. strings that start with single quotes can contain double +quotes but must end with single quotes and must escape any single quotes they contain + +json and jsx only recognize escape sequences as outlined in the json spec. it just +ignores bad escape sequences -```erlang -%% usage: `real_json(jsx:decode(JSON))` -real_json(Result) when is_list(Result) -> Result; -real_json(Result) when is_tuple(Result, 2) -> Result; -real_json(_) -> erlang:error(badarg). -``` ### json <-> erlang mapping ### @@ -218,22 +223,30 @@ real_json(_) -> erlang:error(badarg). ### incomplete input ### -jsx handles incomplete json texts. if a partial json text is parsed, rather than -returning a term from your callback handler, jsx returns `{incomplete, F}` where -`F` is a function with an identical API to the anonymous fun returned from -`decoder/3`, `encoder/3` or `parser/3`. it retains the internal state of the -parser at the point where input was exhausted. this allows you to parse as you -stream json over a socket or file descriptor, or to parse large json texts -without needing to keep them entirely in memory +jsx can handle incomplete json texts. if the option `stream` is passed to the decoder +or parser and if a partial json text is parsed, rather than returning a term from +your callback handler, jsx returns `{incomplete, F}` where `F` is a function with +an identical API to the anonymous fun returned from `decoder/3`, `encoder/3` or +`parser/3`. it retains the internal state of the parser at the point where input +was exhausted. this allows you to parse as you stream json over a socket or file +descriptor, or to parse large json texts without needing to keep them entirely in +memory -however, it is important to recognize that jsx is greedy by default. jsx will -consider the parsing complete if input is exhausted and the json text is not -unambiguously incomplete. this is mostly relevant when parsing bare numbers like -`<<"1234">>`. this could be a complete json integer or just the beginning of a -json integer that is being parsed incrementally. jsx will treat it as a whole -integer. calling jsx with the [option](#options) `stream` reverses this -behavior and never considers parsing complete until the `incomplete` function is -called with the argument `end_stream` +however, it is important to recognize that jsx is conservative by default. jsx will +not consider the parsing complete even when input is exhausted and the json text is +unambiguously incomplete. to end parsing call the `incomplete` function with the +argument `end_stream` like: + +```erlang +1> {incomplete, F} = jsx:decode(<<"[">>, [stream]). +{incomplete,#Fun} +2> F(end_stream). +** exception error: bad argument +3> {incomplete, G} = F(<<"]">>). +{incomplete,#Fun} +4> G(end_stream). +[] +``` ## data types ## @@ -262,6 +275,34 @@ json_text() = binary() a utf8 encoded binary containing a json string + +#### `token()` #### + +```erlang +event() = start_object + | end_object + | start_array + | end_array + | {key, binary()} + | {string, binary()} + | binary() + | {integer, integer()} + | integer() + | {float, float()} + | float() + | {literal, true} + | true + | {literal, false} + | false + | {literal, null} + | null + | end_json +``` + +the representation used during syntactic analysis. you can generate this +yourself and feed it to `jsx:parser/3` if you'd like to define your own +representations + #### `event()` #### ```erlang @@ -279,50 +320,32 @@ event() = start_object | end_json ``` -#### `token()` #### - -```erlang -token() = event() - | binary() - | {number, integer() | float()} - | integer() - | float() - | true - | false - | null -``` - -the representation used during syntactic analysis. you can generate this -yourself and feed it to `jsx:parser/3` if you'd like to define your own -representations +the subset of [`token()`](#token) emitted by the decoder and encoder to handlers #### `option()` #### ```erlang -option() = replaced_bad_utf8 - | escaped_forward_slashes - | single_quoted_strings - | unescaped_jsonp - | comments +option() = escaped_forward_slashes | escaped_strings + | unescaped_jsonp | dirty_strings - | ignored_bad_escapes - | relax + | strict + | {strict, [strict_option()]} | stream -``` + | {incomplete_handler, fun()} + | {error_handler, fun()} + +strict_option() = comments + | utf8 + | single_quotes + | escapes +``` jsx functions all take a common set of options. not all flags have meaning in all contexts, but they are always valid options. functions may have additional options beyond these. see [individual function documentation](#exports) for details -- `replaced_bad_utf8` - - json text input and json strings SHOULD be utf8 encoded binaries, - appropriately escaped as per the json spec. attempts are made to replace - invalid codepoints with `u+FFFD` as per the unicode spec when this option is - present. this applies both to malformed unicode and disallowed codepoints - - `escaped_forward_slashes` json strings are escaped according to the json spec. this means forward @@ -330,35 +353,6 @@ additional options beyond these. see are left unescaped. you may want to use this if you are embedding json directly into a html or xml document -- `single_quoted_strings` - - some parsers allow double quotes (`u+0022`) to be replaced by single quotes - (`u+0027`) to delimit keys and strings. this option allows json containing - single quotes as structural characters to be parsed without errors. note - that the parser expects strings to be terminated by the same quote type that - opened it and that single quotes must, obviously, be escaped within strings - delimited by single quotes - - double quotes must **always** be escaped, regardless of what kind of quotes - delimit the string they are found in - - the parser will never emit json with keys or strings delimited by single - quotes - -- `unescaped_jsonp` - - javascript interpreters treat the codepoints `u+2028` and `u+2029` as - significant whitespace. json strings that contain either of these codepoints - will be parsed incorrectly by some javascript interpreters. by default, - these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to - retain compatibility. this option simply removes that escaping - -- `comments` - - json has no official comments but some parsers allow c/c++ style comments. - anywhere whitespace is allowed this flag allows comments (both `// ...` and - `/* ... */`) - - `escaped_strings` by default both the encoder and decoder return strings as utf8 binaries @@ -367,12 +361,14 @@ additional options beyond these. see unaltered. this flag escapes strings as if for output in json, removing control codes and problematic codepoints and replacing them with the appropriate escapes + +- `unescaped_jsonp` -- `ignored_bad_escapes` - - during decoding ignore unrecognized escape sequences and leave them as is in - the stream. note that combining this option with `escaped_strings` will - result in the escape character itself being escaped + javascript interpreters treat the codepoints `u+2028` and `u+2029` as + significant whitespace. json strings that contain either of these codepoints + will be parsed incorrectly by some javascript interpreters. by default, + these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to + retain compatibility. this option simply removes that escaping - `dirty_strings` @@ -380,20 +376,39 @@ additional options beyond these. see can result in unwanted behaviour. if your strings are already escaped (or you'd like to force invalid strings into "json" you monster) use this flag to bypass escaping. this can also be used to read in **really** invalid json - strings. everything but escaped quotes are passed as is to the resulting - string term. note that this overrides `ignored_bad_escapes`, - `unescaped_jsonp` and `escaped_strings` + strings. everything between unescaped quotes are passed as is to the resulting + string term. note that this takes precedence over any other options + +- `strict` + + as mentioned [earlier](#description), jsx is pragmatic. if you're more of a + json purist or you're really into bdsm stricter adherence to the spec is + possible. the following restrictions are available + + * `comments` + + comments are disabled and result in a `badarg` error + + * `utf8` + + invalid codepoints and malformed unicode result in `badarg` errors + + * `single_quotes` + + only keys and strings delimited by double quotes (`u+0022`) are allowed. the + single quote (`u+0027`) results in a `badarg` error + + * `escapes` + + escape sequences not adhering to the json spec result in a `badarg` error + + any combination of these can be passed to jsx by using `{strict, [strict_option()]}`. + `strict` is equivalent to `{strict, [comments, bad_utf8, single_quotes, escapes]}` - `stream` see [incomplete input](#incomplete-input) -- `relax` - - relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, - ignored_bad_escapes]` for when you don't care how absolutely terrible your - json input is, you just want the parser to do the best it can - - `incomplete_handler` & `error_handler` the default incomplete and error handlers can be replaced with user defined @@ -432,7 +447,7 @@ parser(Module, Args, Opts) -> Fun((Tokens) -> any()) Opts = [option()] JSONText = json_text() JSONTerm = json_term() - Tokens = token() | [token()] + Tokens = event() | [event()] ``` jsx is a json compiler with interleaved tokenizing, syntactic analysis and @@ -465,7 +480,7 @@ decode(JSON, Opts) -> Term JSON = json_text() Term = json_term() - Opts = [option() | labels | {labels, Label} | {post_decode, F}] + Opts = [option() | labels | {labels, Label}] Label = binary | atom | existing_atom | attempt_atom F = fun((any()) -> any()) ``` @@ -482,18 +497,6 @@ new atoms to the atom table and will result in a `badarg` error if the atom does not exist. `attempt_atom` will convert keys to atoms when they exist, and leave them as binary otherwise -`{post_decode, F}` is a user defined function of arity 1 that is called on each -output value (objects, arrays, strings, numbers and literals). it may return any -value to be substituted in the returned term. for example: - -```erlang -1> F = fun(V) when is_list(V) -> V; (V) -> false end. -2> jsx:decode(<<"{\"a list\": [true, \"a string\", 1]}">>, [{post_decode, F}]). -[{<<"a list">>, [false, false, false]}] -``` - -declaring more than one post-decoder will result in a `badarg` error exception - raises a `badarg` error exception if input is not valid json @@ -505,7 +508,7 @@ encode(Term, Opts) -> JSON Term = json_term() JSON = json_text() - Opts = [option() | {pre_encode, F} | space | {space, N} | indent | {indent, N}] + Opts = [option() | space | {space, N} | indent | {indent, N}] F = fun((any()) -> any()) N = pos_integer() ``` @@ -519,18 +522,6 @@ the option `{indent, N}` inserts a newline and `N` spaces for each level of indentation in your json output. note that this overrides spaces inserted after a comma. `indent` is an alias for `{indent, 1}`. the default is `{indent, 0}` -`{pre_encode, F}` is a user defined function of arity 1 that is called on each -input value. it may return any valid json value to be substituted in the -returned json. for example: - -```erlang -1> F = fun(V) when is_list(V) -> V; (V) -> false end. -2> jsx:encode([{<<"a list">>, [true, <<"a string">>, 1]}], [{pre_encode, F}]). -<<"{\"a list\": [false, false, false]}">> -``` - -declaring more than one pre-encoder will result in a `badarg` error exception - raises a `badarg` error exception if input is not a valid [erlang representation of json](#json---erlang-mapping) @@ -664,16 +655,11 @@ following events must be handled: the end of a json array -- `{key, binary()}` - - a key in a json object. this is guaranteed to follow either `start_object` - or a json value. it will usually be a `utf8` encoded binary. see the - [options](#option) for possible exceptions - - `{string, binary()}` a json string. it will usually be a `utf8` encoded binary. see the - [options](#option) for possible exceptions + [options](#option) for possible exceptions. note that keys are also + json strings - `{integer, integer()}` @@ -711,4 +697,4 @@ jsx wouldn't be what it is without the contributions of [paul davis](https://git [rebar]: https://github.com/rebar/rebar [meck]: https://github.com/eproxus/meck [rfc4627]: http://tools.ietf.org/html/rfc4627 -[travis]: https://travis-ci.org/ +[travis]: https://travis-ci.org/ \ No newline at end of file From 5db1d9cc05cb7769fe7e64f0a0a3b95e5244b1ef Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 3 Nov 2013 23:39:52 +0000 Subject: [PATCH 31/40] cleanup README for v2.0 --- README.md | 103 ++++++++++++------------------------------------------ 1 file changed, 22 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index 75e9960..c7e0b45 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,6 @@ copyright 2010-2013 alisdair sullivan - [`json_term()`](#json_term) - [`json_text()`](#json_text) - [`event()`](#event) - - [`token()`](#token) - [`option()`](#option) * [exports](#exports) - [`encoder/3`, `decoder/3` & `parser/3`](#encoder3-decoder3--parser3) @@ -122,8 +121,7 @@ real world usage jsx is pragmatic. the json spec allows extensions so jsx extends the spec in a number of ways. see the section on `strict` in [options](#option) below though -there's not supposed to be any comments in json but when did comments ever hurt -anyone? json has no official comments but this parser allows c/c++ style comments. +json has no official comments but this parser allows c/c++ style comments. anywhere whitespace is allowed you can insert comments (both `// ...` and `/* ... */`) all jsx decoder input should be `utf8` encoded binaries. sometimes you get binaries @@ -145,7 +143,7 @@ ignores bad escape sequences **json** | **erlang** --------------------------------|-------------------------------- -`number` | `integer()` and `float()` +`number` | `integer()` if possible, `float()` otherwise `string` | `binary()` `true`, `false` and `null` | `true`, `false` and `null` `array` | `[]` and `[JSON]` @@ -153,17 +151,18 @@ ignores bad escape sequences * numbers - javascript and thus json represent all numeric values with floats. as - this is woefully insufficient for many uses, **jsx**, just like erlang, - supports bigints. whenever possible, this library will interpret json - numbers that look like integers as integers. other numbers will be converted - to erlang's floating point type, which is nearly but not quite iee754. - negative zero is not representable in erlang (zero is unsigned in erlang and - `0` is equivalent to `-0`) and will be interpreted as regular zero. numbers - not representable are beyond the concern of this implementation, and will - result in parsing errors + javascript and thus json represent all numeric values with floats. there's no + reason for erlang -- a language that supports arbitrarily large integers -- to + restrict all numbers to the ieee754 range + + whenever possible, **jsx** will interpret json numbers that look like integers as + integers. other numbers will be converted to erlang's floating point type, which + is nearly but not quite iee754. negative zero is not representable in erlang (zero + is unsigned in erlang and `0` is equivalent to `-0`) and will be interpreted as + regular zero. numbers not representable are beyond the concern of this implementation, + and will result in parsing errors - when converting from erlang to json, numbers are represented with their + when converting from erlang to json, floats are represented with their shortest representation that will round trip without loss of precision. this means that some floats may be superficially dissimilar (although functionally equivalent). for example, `1.0000000000000001` will be @@ -171,30 +170,22 @@ ignores bad escape sequences * strings - the json [spec][rfc4627] is frustratingly vague on the exact details of json - strings. json must be unicode, but no encoding is specified. javascript - explicitly allows strings containing codepoints explicitly disallowed by - unicode. json allows implementations to set limits on the content of - strings. other implementations attempt to resolve this in various ways. this - implementation, in default operation, only accepts strings that meet the - constraints set out in the json spec (strings are sequences of unicode - codepoints deliminated by `"` (`u+0022`) that may not contain control codes - unless properly escaped with `\` (`u+005c`)) and that are encoded in `utf8` - - the utf8 restriction means improperly paired surrogates are explicitly - disallowed. `u+d800` to `u+dfff` are allowed, but only when they form valid - surrogate pairs. surrogates encountered otherwise result in errors + json strings must be unicode. in practice, because **jsx** only accepts + `utf8` all strings must be `utf8`. in addition to being unicode json strings + restrict a number of codepoints and define a number of escape sequences json string escapes of the form `\uXXXX` will be converted to their equivalent codepoints during parsing. this means control characters and other codepoints disallowed by the json spec may be encountered in resulting - strings, but codepoints disallowed by the unicode spec will not be. in the - interest of pragmatism there is an [option](#option) for looser parsing + strings. the utf8 restriction means the surrogates are explicitly disallowed. + if a string contains escaped surrogates (`u+d800` to `u+dfff`) they are + interpreted but only when they form valid surrogate pairs. surrogates + encountered otherwise are replaced with the replacement codepoint (`u+fffd`) all erlang strings are represented by **valid** `utf8` encoded binaries. the encoder will check strings for conformance. noncharacters (like `u+ffff`) - are allowed in erlang utf8 encoded binaries, but not in strings passed to - the encoder (although, again, see [options](#option)) + are allowed in erlang utf8 encoded binaries, but will be replaced in strings + passed to the encoder (although, again, see [options](#option)) this implementation performs no normalization on strings beyond that detailed here. be careful when comparing strings as equivalent strings @@ -275,34 +266,6 @@ json_text() = binary() a utf8 encoded binary containing a json string - -#### `token()` #### - -```erlang -event() = start_object - | end_object - | start_array - | end_array - | {key, binary()} - | {string, binary()} - | binary() - | {integer, integer()} - | integer() - | {float, float()} - | float() - | {literal, true} - | true - | {literal, false} - | false - | {literal, null} - | null - | end_json -``` - -the representation used during syntactic analysis. you can generate this -yourself and feed it to `jsx:parser/3` if you'd like to define your own -representations - #### `event()` #### ```erlang @@ -409,28 +372,6 @@ additional options beyond these. see see [incomplete input](#incomplete-input) -- `incomplete_handler` & `error_handler` - - the default incomplete and error handlers can be replaced with user defined - handlers. if options include `{error_handler, F}` and/or - `{incomplete_handler, F}` where `F` is a function of arity 3 they will be - called instead of the default handler. the spec for `F` is as follows - ```erlang - F(Remaining, InternalState, Config) -> any() - - Remaining = binary() | term() - InternalState = opaque() - Config = list() - ``` - `Remaining` is the binary fragment or term that caused the error - - `InternalState` is an opaque structure containing the internal state of the - parser/decoder/encoder - - `Config` is a list of options/flags in use by the parser/decoder/encoder - - these functions should be considered experimental for now - ## exports ## From 5fcd4644bf1ade87ac12d487275ee33b3e724ff6 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 13 Nov 2013 02:29:44 +0000 Subject: [PATCH 32/40] make sure `jsx_encoder:encode/2' dispatches recursive calls correctly --- src/jsx_encoder.erl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index f4e8699..35acffe 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -43,9 +43,13 @@ encode([], _EntryPoint) -> [start_array, end_array]; encode([{}], _EntryPoint) -> [start_object, end_object]; encode([{_, _}|_] = Term, EntryPoint) -> - lists:flatten([start_object] ++ [ EntryPoint:encode(T) || T <- unzip(Term) ] ++ [end_object]); + lists:flatten( + [start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unzip(Term) ] ++ [end_object] + ); encode(Term, EntryPoint) when is_list(Term) -> - lists:flatten([start_array] ++ [ EntryPoint:encode(T) || T <- Term ] ++ [end_array]); + lists:flatten( + [start_array] ++ [ EntryPoint:encode(T, EntryPoint) || T <- Term ] ++ [end_array] + ); encode(Else, _EntryPoint) -> [Else]. From 9e701a5f8b8cb3b1563c41e6f144933b0000eb1b Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 16 Dec 2013 22:32:18 +0000 Subject: [PATCH 33/40] minor README edits --- README.md | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index c7e0b45..5537d00 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,11 @@ an erlang application for consuming, producing and manipulating [json][json]. inspired by [yajl][yajl] -jsx is built via [rebar][rebar] and continuous integration testing provided courtesy [travis][travis] +**jsx** is built via [rebar][rebar] and continuous integration testing provided courtesy [travis][travis] current status: [![Build Status](https://secure.travis-ci.org/talentdeficit/jsx.png?branch=develop)](http://travis-ci.org/talentdeficit/jsx) -jsx is released under the terms of the [MIT][MIT] license +**jsx** is released under the terms of the [MIT][MIT] license copyright 2010-2013 alisdair sullivan @@ -16,6 +16,7 @@ copyright 2010-2013 alisdair sullivan * [quickstart](#quickstart) * [description](#description) - [json <-> erlang mapping](#json---erlang-mapping) + - [extending jsx](#extending-jsx) - [incomplete input](#incomplete-input) * [data types](#data-types) - [`json_term()`](#json_term) @@ -112,29 +113,29 @@ false ## description ## -jsx is an erlang application for consuming, producing and manipulating +**jsx** is an erlang application for consuming, producing and manipulating [json][json] -jsx follows the json [spec][rfc4627] as closely as possible with allowances for +**jsx** follows the json [spec][rfc4627] as closely as possible with allowances for real world usage -jsx is pragmatic. the json spec allows extensions so jsx extends the spec in a +**jsx** is pragmatic. the json spec allows extensions so **jsx** extends the spec in a number of ways. see the section on `strict` in [options](#option) below though json has no official comments but this parser allows c/c++ style comments. anywhere whitespace is allowed you can insert comments (both `// ...` and `/* ... */`) -all jsx decoder input should be `utf8` encoded binaries. sometimes you get binaries +all **jsx** decoder input should be `utf8` encoded binaries. sometimes you get binaries that are almost but not quite valid utf8 whether due to improper escaping or poor -encoding. jsx replaces invalid codepoints and poorly formed sequences with the +encoding. **jsx** replaces invalid codepoints and poorly formed sequences with the unicode replacement character (`u+FFFD`) json only allows keys and strings to be delimited by double quotes (`u+0022`) but -javascript allows them to be delimited by single quotes (`u+0027`) as well. jsx +javascript allows them to be delimited by single quotes (`u+0027`) as well. **jsx** follows javascript in this. strings that start with single quotes can contain double quotes but must end with single quotes and must escape any single quotes they contain -json and jsx only recognize escape sequences as outlined in the json spec. it just +json and **jsx** only recognize escape sequences as outlined in the json spec. it just ignores bad escape sequences @@ -214,16 +215,16 @@ ignores bad escape sequences ### incomplete input ### -jsx can handle incomplete json texts. if the option `stream` is passed to the decoder +**jsx** can handle incomplete json texts. if the option `stream` is passed to the decoder or parser and if a partial json text is parsed, rather than returning a term from -your callback handler, jsx returns `{incomplete, F}` where `F` is a function with +your callback handler, **jsx** returns `{incomplete, F}` where `F` is a function with an identical API to the anonymous fun returned from `decoder/3`, `encoder/3` or `parser/3`. it retains the internal state of the parser at the point where input was exhausted. this allows you to parse as you stream json over a socket or file descriptor, or to parse large json texts without needing to keep them entirely in memory -however, it is important to recognize that jsx is conservative by default. jsx will +however, it is important to recognize that **jsx** is conservative by default. **jsx** will not consider the parsing complete even when input is exhausted and the json text is unambiguously incomplete. to end parsing call the `incomplete` function with the argument `end_stream` like: @@ -304,7 +305,7 @@ strict_option() = comments | escapes ``` -jsx functions all take a common set of options. not all flags have meaning +**jsx** functions all take a common set of options. not all flags have meaning in all contexts, but they are always valid options. functions may have additional options beyond these. see [individual function documentation](#exports) for details @@ -344,7 +345,7 @@ additional options beyond these. see - `strict` - as mentioned [earlier](#description), jsx is pragmatic. if you're more of a + as mentioned [earlier](#description), **jsx** is pragmatic. if you're more of a json purist or you're really into bdsm stricter adherence to the spec is possible. the following restrictions are available @@ -365,7 +366,7 @@ additional options beyond these. see escape sequences not adhering to the json spec result in a `badarg` error - any combination of these can be passed to jsx by using `{strict, [strict_option()]}`. + any combination of these can be passed to **jsx** by using `{strict, [strict_option()]}`. `strict` is equivalent to `{strict, [comments, bad_utf8, single_quotes, escapes]}` - `stream` @@ -391,7 +392,7 @@ parser(Module, Args, Opts) -> Fun((Tokens) -> any()) Tokens = event() | [event()] ``` -jsx is a json compiler with interleaved tokenizing, syntactic analysis and +**jsx** is a json compiler with interleaved tokenizing, syntactic analysis and semantic analysis stages. included are two tokenizers; one that handles json texts (`decoder/3`) and one that handles erlang terms (`encoder/3`). there is also an entry point to the syntactic analysis stage for use with user-defined @@ -550,7 +551,7 @@ what exactly constitutes valid json may be altered via [options](#option) ## callback exports ## -the following functions should be exported from a jsx callback module +the following functions should be exported from a **jsx** callback module #### `Module:init/1` #### @@ -630,7 +631,7 @@ following events must be handled: ## acknowledgements ## -jsx wouldn't be what it is without the contributions of [paul davis](https://github.com/davisp), [lloyd hilaiel](https://github.com/lloyd), [john engelhart](https://github.com/johnezang), [bob ippolito](https://github.com/etrepum), [fernando benavides](https://github.com/elbrujohalcon), [alex kropivny](https://github.com/amtal), [steve strong](https://github.com/srstrong), [michael truog](https://github.com/okeuday), [dmitry kolesnikov](https://github.com/fogfish) and [emptytea](https://github.com/emptytea) +**jsx** wouldn't be what it is without the contributions of [paul davis](https://github.com/davisp), [lloyd hilaiel](https://github.com/lloyd), [john engelhart](https://github.com/johnezang), [bob ippolito](https://github.com/etrepum), [fernando benavides](https://github.com/elbrujohalcon), [alex kropivny](https://github.com/amtal), [steve strong](https://github.com/srstrong), [michael truog](https://github.com/okeuday), [dmitry kolesnikov](https://github.com/fogfish) and [emptytea](https://github.com/emptytea) [json]: http://json.org [yajl]: http://lloyd.github.com/yajl From 95b2d9b628f6083b95b5c79a1d676576c3b13834 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 18 Dec 2013 02:13:50 +0000 Subject: [PATCH 34/40] allow atoms in erlang terms to be converted to strings in json objects and arrays. `true`, `false` and `null` excluded --- src/jsx.erl | 39 ++++++++++++++++++++++++++++++++++++++- src/jsx_parser.erl | 2 ++ src/jsx_to_json.erl | 2 +- src/jsx_verify.erl | 2 +- 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/jsx.erl b/src/jsx.erl index 57a1d45..3dac4f8 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -35,7 +35,7 @@ -ifdef(TEST). %% data and helper functions for tests --export([test_cases/0]). +-export([test_cases/0, special_test_cases/0]). -export([init/1, handle_event/2]). -endif. @@ -174,9 +174,13 @@ test_cases() -> ++ floats() ++ compound_object(). +%% segregate these so we can skip them in `jsx_to_term` +special_test_cases() -> special_objects() ++ special_array(). + empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}]. + nested_array() -> [{ "[[[]]]", @@ -188,6 +192,7 @@ nested_array() -> empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}]. + nested_object() -> [{ "{\"key\":{\"key\":{}}}", @@ -221,6 +226,7 @@ naked_strings() -> || String <- Raw ]. + strings() -> naked_strings() ++ [ wrap_with_array(Test) || Test <- naked_strings() ] @@ -246,6 +252,7 @@ naked_integers() -> || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0] ]. + integers() -> naked_integers() ++ [ wrap_with_array(Test) || Test <- naked_integers() ] @@ -276,6 +283,7 @@ naked_floats() -> || X <- Raw ++ [ -1 * Y || Y <- Raw ] ]. + floats() -> naked_floats() ++ [ wrap_with_array(Test) || Test <- naked_floats() ] @@ -293,6 +301,7 @@ naked_literals() -> || Literal <- [true, false, null] ]. + literals() -> naked_literals() ++ [ wrap_with_array(Test) || Test <- naked_literals() ] @@ -337,6 +346,34 @@ compound_object() -> }]. +special_objects() -> + [ + { + "[{key, atom}]", + <<"{\"key\":\"atom\"}">>, + [{key, atom}], + [start_object, {key, <<"key">>}, {string, <<"atom">>}, end_object] + }, + { + "[{1, true}]", + <<"{\"1\":true}">>, + [{1, true}], + [start_object, {key, <<"1">>}, {literal, true}, end_object] + } + ]. + + +special_array() -> + [ + { + "[foo, bar]", + <<"[\"foo\",\"bar\"]">>, + [foo, bar], + [start_array, {string, <<"foo">>}, {string, <<"bar">>}, end_array] + } + ]. + + wrap_with_array({Title, JSON, Term, Events}) -> { "[" ++ Title ++ "]", diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 769ffd7..c5c23a2 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -115,6 +115,8 @@ value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) end; value([String|Tokens], Handler, Stack, Config) when is_binary(String) -> value([{string, String}] ++ Tokens, Handler, Stack, Config); +value([String|Tokens], Handler, Stack, Config) when is_atom(String) -> + value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config); value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) -> value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config); value([], Handler, Stack, Config) -> diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 6f4114e..5bd862e 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -431,7 +431,7 @@ rep_manipulation_test_() -> handle_event_test_() -> - Data = jsx:test_cases(), + Data = jsx:test_cases() ++ jsx:special_test_cases(), [ { Title, ?_assertEqual( diff --git a/src/jsx_verify.erl b/src/jsx_verify.erl index 34c50e5..e923691 100644 --- a/src/jsx_verify.erl +++ b/src/jsx_verify.erl @@ -159,7 +159,7 @@ repeated_keys_test_() -> handle_event_test_() -> - Data = jsx:test_cases(), + Data = jsx:test_cases() ++ jsx:special_test_cases(), [ { Title, ?_assertEqual( From 0c7517468cd747337261fa77e0d9fe874c5c8d0f Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 18 Dec 2013 02:39:43 +0000 Subject: [PATCH 35/40] update CHANGES --- CHANGES.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 438b7dd..e08a0bb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,14 @@ +v2.0 + +* jsx is much more pragmatic by default; common json errors are silently + ignored (and fixed). stricter parsing must be enabled with options +* removed `pre_encode` and `post_decode` options in favour of making jsx + functions easier to wrap and customize +* streaming behavior is now disabled by default and must be requested explicitly +* removed deprecated function names (`to_json`, `to_term`, `term_to_json`, etc) +* expanded test coverage + + v1.4.5 * various fixes to typespecs uncovered by dialyzer From 00469ba9c6fd93d688624eda0f42e5797b07c46f Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 12 Jan 2014 20:36:19 +0000 Subject: [PATCH 36/40] change `get_key/1' functions to return bare value or throw error add `start_term' and `start_json' functions --- src/jsx_to_json.erl | 27 ++++++++++++++++++++------- src/jsx_to_term.erl | 27 ++++++++++++++++++++------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 5bd862e..6b271ca 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -25,6 +25,7 @@ -export([to_json/2, format/2]). -export([init/1, handle_event/2]). +-export([start_json/0, start_json/1]). -export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1]). @@ -150,6 +151,10 @@ indent_or_space(Config) -> %% `{array, Array}` %% `Object` and `Array` are utf8 encoded binaries +start_json() -> {[], #config{}}. + +start_json(Config) when is_list(Config) -> {[], parse_config(Config)}. + %% allocate a new object on top of the stack start_object({Stack, Config}) -> {[{object, ?start_object}] ++ Stack, Config}. @@ -234,8 +239,8 @@ insert(Key, Value, {[{object, Object}|Rest], Config}) when is_binary(Key), is_bi insert(_, _, _) -> erlang:error(badarg). -get_key({[{object, Key, _}|_], _}) -> {ok, Key}; -get_key(_) -> {error, nokey}. +get_key({[{object, Key, _}|_], _}) -> Key; +get_key(_) -> erlang:error(badarg). @@ -360,6 +365,14 @@ format_test_() -> rep_manipulation_test_() -> [ + {"allocate a new context", ?_assertEqual( + {[], #config{}}, + start_json() + )}, + {"allocate a new context with config", ?_assertEqual( + {[], #config{space=1, indent=2}}, + start_json([{space, 1}, {indent, 2}]) + )}, {"allocate a new object on an empty stack", ?_assertEqual( {[{object, <<"{">>}], #config{}}, start_object({[], #config{}}) @@ -381,15 +394,15 @@ rep_manipulation_test_() -> insert(<<"\"key\"">>, {[{object, <<"{">>}], #config{}}) )}, {"get current key", ?_assertEqual( - {ok, key}, + key, get_key({[{object, key, <<"{">>}], #config{}}) )}, - {"try to get non-key from object", ?_assertEqual( - {error, nokey}, + {"try to get non-key from object", ?_assertError( + badarg, get_key({[{object, <<"{">>}], #config{}}) )}, - {"try to get key from array", ?_assertEqual( - {error, nokey}, + {"try to get key from array", ?_assertError( + badarg, get_key({[{array, <<"[">>}], #config{}}) )}, {"insert a value into an object", ?_assertEqual( diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index fd90d10..9e3589c 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -25,6 +25,7 @@ -export([to_term/2]). -export([init/1, handle_event/2]). +-export([start_term/0, start_term/1]). -export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1]). @@ -117,6 +118,10 @@ format_key(Key, Config) -> %% an array looks like %% `{array, [NthValue, NthMinus1Value,...FirstValue]}` +start_term() -> {[], #config{}}. + +start_term(Config) when is_list(Config) -> {[], parse_config(Config)}. + %% allocate a new object on top of the stack start_object({Stack, Config}) -> {[{object, []}] ++ Stack, Config}. @@ -149,8 +154,8 @@ insert(Key, Value, {[{object, Pairs}|Rest], Config}) -> insert(_, _, _) -> erlang:error(badarg). -get_key({[{object, Key, _}|_], _}) -> {ok, Key}; -get_key(_) -> {error, nokey}. +get_key({[{object, Key, _}|_], _}) -> Key; +get_key(_) -> erlang:error(badarg). @@ -200,6 +205,14 @@ format_key_test_() -> rep_manipulation_test_() -> [ + {"allocate a new context", ?_assertEqual( + {[], #config{}}, + start_term() + )}, + {"allocate a new context with option", ?_assertEqual( + {[], #config{labels=atom}}, + start_term([{labels, atom}]) + )}, {"allocate a new object on an empty stack", ?_assertEqual( {[{object, []}], #config{}}, start_object({[], #config{}}) @@ -221,15 +234,15 @@ rep_manipulation_test_() -> insert(key, {[{object, []}, junk], #config{}}) )}, {"get current key", ?_assertEqual( - {ok, key}, + key, get_key({[{object, key, []}], #config{}}) )}, - {"try to get non-key from object", ?_assertEqual( - {error, nokey}, + {"try to get non-key from object", ?_assertError( + badarg, get_key({[{object, []}], #config{}}) )}, - {"try to get key from array", ?_assertEqual( - {error, nokey}, + {"try to get key from array", ?_assertError( + badarg, get_key({[{array, []}], #config{}}) )}, {"insert a value into an object", ?_assertEqual( From c81e2108dd47f5a7915580b41f782dc5aed9a493 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 12 Jan 2014 21:38:02 +0000 Subject: [PATCH 37/40] add `get_value/1' function for retrieving the current (final?) value of the internal representations from `jsx_to_json' and `jsx_to_term' add all internal representation functions to `jsx' module and export them --- src/jsx.erl | 99 +++++++++++++++++++++++++++++++++++++++++++++ src/jsx_to_json.erl | 12 +++++- src/jsx_to_term.erl | 10 +++-- 3 files changed, 116 insertions(+), 5 deletions(-) diff --git a/src/jsx.erl b/src/jsx.erl index 3dac4f8..79ffb06 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -28,9 +28,12 @@ -export([format/1, format/2, minify/1, prettify/1]). -export([encoder/3, decoder/3, parser/3]). -export([resume/3]). +-export([start_json/0, start_json/1, start_term/0, start_term/1]). +-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]). -export_type([json_term/0, json_text/0, token/0]). -export_type([encoder/0, decoder/0, parser/0, internal_state/0]). +-export_type([internal_json/0, internal_term/0, internal_thing/0]). -ifdef(TEST). @@ -150,12 +153,108 @@ resume(Term, {parser, State, Handler, Stack}, Config) -> jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)). +-opaque internal_json() :: tuple(). + +-spec start_json() -> internal_json(). +-spec start_json(Config::list()) -> internal_json(). + +start_json() -> {jsx_to_json, jsx_to_json:start_json()}. +start_json(Config) -> {jsx_to_json, jsx_to_json:start_json(Config)}. + + +-opaque internal_term() :: tuple(). + +-spec start_term() -> internal_term(). +-spec start_term(Config::list()) -> internal_term(). + +start_term() -> {jsx_to_term, jsx_to_term:start_term()}. +start_term(Config) -> {jsx_to_term, jsx_to_term:start_term(Config)}. + + +% naming things is hard +-opaque internal_thing() :: internal_term() | internal_json(). + + +-spec start_object(internal_thing()) -> internal_thing(). + +start_object({Handler, Internals}) -> {Handler, Handler:start_object(Internals)}. + + +-spec start_array(internal_thing()) -> internal_thing(). + +start_array({Handler, Internals}) -> {Handler, Handler:start_array(Internals)}. + + +-spec finish(internal_thing()) -> internal_thing(). + +finish({Handler, Internals}) -> {Handler, Handler:finish(Internals)}. + + +-spec insert(Value::any(), internal_thing()) -> internal_thing(). + +insert(Value, {Handler, Internals}) -> {Handler, Handler:insert(Value, Internals)}. + + +-spec insert(Key::any(), Value::any(), internal_thing()) -> internal_thing(). + +insert(Key, Value, {Handler, Internals}) -> {Handler, Handler:insert(Key, Value, Internals)}. + + +-spec get_key(internal_thing()) -> atom() | binary(). + +get_key({Handler, Internals}) -> Handler:get_key(Internals). + + +-spec get_value(internal_thing()) -> any(). + +get_value({Handler, Internals}) -> Handler:get_value(Internals). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +%% sanity checks for internal format handlers + +internal_format_sanity_test_() -> + [ + {"jsx term sanity check", ?_assertEqual( + [{key, [1, true, <<"hallo world">>]}], + begin + NewTerm = start_term(), + ObjectAllocated = start_object(NewTerm), + KeyInserted = insert(key, ObjectAllocated), + ArrayAllocated = start_array(KeyInserted), + OneInserted = insert(1, ArrayAllocated), + TrueInserted = insert(true, OneInserted), + HalloWorldInserted = insert(<<"hallo world">>, TrueInserted), + ArrayClosed = finish(HalloWorldInserted), + ObjectClosed = finish(ArrayClosed), + TermClosed = get_value(ObjectClosed), + TermClosed + end + )}, + {"jsx json sanity check", ?_assertEqual( + <<"{\"key\":[1,true,\"hallo world\"]}">>, + begin + NewTerm = start_json(), + ObjectAllocated = start_object(NewTerm), + KeyInserted = insert(<<"\"key\"">>, ObjectAllocated), + ArrayAllocated = start_array(KeyInserted), + OneInserted = insert(<<"1">>, ArrayAllocated), + TrueInserted = insert(<<"true">>, OneInserted), + HalloWorldInserted = insert(<<"\"hallo world\"">>, TrueInserted), + ArrayClosed = finish(HalloWorldInserted), + ObjectClosed = finish(ArrayClosed), + TermClosed = get_value(ObjectClosed), + TermClosed + end + )} + ]. + + %% test handler init([]) -> []. diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 6b271ca..2616200 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -26,7 +26,7 @@ -export([to_json/2, format/2]). -export([init/1, handle_event/2]). -export([start_json/0, start_json/1]). --export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1]). +-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]). -record(config, { @@ -95,7 +95,7 @@ init(Config) -> {[], parse_config(Config)}. -spec handle_event(Event::any(), State::state()) -> state(). -handle_event(end_json, {Term, _Config}) -> Term; +handle_event(end_json, State) -> get_value(State); handle_event(start_object, State) -> start_object(State); handle_event(end_object, State) -> finish(State); @@ -243,6 +243,14 @@ get_key({[{object, Key, _}|_], _}) -> Key; get_key(_) -> erlang:error(badarg). +get_value({Value, Config}) -> + case Value of + Value when is_binary(Value) -> Value; + _ -> erlang:error(badarg) + end; +get_value(_) -> erlang:error(badarg). + + %% eunit tests diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 9e3589c..d9840d3 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -26,7 +26,7 @@ -export([to_term/2]). -export([init/1, handle_event/2]). -export([start_term/0, start_term/1]). --export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1]). +-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]). -record(config, { @@ -80,7 +80,7 @@ init(Config) -> {[], parse_config(Config)}. -spec handle_event(Event::any(), State::state()) -> state(). -handle_event(end_json, {Term, _Config}) -> Term; +handle_event(end_json, State) -> get_value(State); handle_event(start_object, State) -> start_object(State); handle_event(end_object, State) -> finish(State); @@ -128,7 +128,8 @@ start_object({Stack, Config}) -> {[{object, []}] ++ Stack, Config}. %% allocate a new array on top of the stack start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. -%% finish an object or array and insert it into the parent object if it exists +%% finish an object or array and insert it into the parent object if it exists or +%% return it if it is the root object finish({[{object, []}], Config}) -> {[{}], Config}; finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; @@ -158,6 +159,9 @@ get_key({[{object, Key, _}|_], _}) -> Key; get_key(_) -> erlang:error(badarg). +get_value({Value, _Config}) -> Value; +get_value(_) -> erlang:error(badarg). + %% eunit tests From 1050f560a73819daa62d604f73850bf6bce543d3 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 13 Jan 2014 07:59:59 +0000 Subject: [PATCH 38/40] update CHANGES --- CHANGES.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index e08a0bb..fd6d732 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,9 +4,11 @@ v2.0 ignored (and fixed). stricter parsing must be enabled with options * removed `pre_encode` and `post_decode` options in favour of making jsx functions easier to wrap and customize +* added abstraction layer for manipulating the internal state of `jsx_to_term` + and `jsx_to_json` and exposed it to user code * streaming behavior is now disabled by default and must be requested explicitly * removed deprecated function names (`to_json`, `to_term`, `term_to_json`, etc) -* expanded test coverage +* expanded test coverage v1.4.5 From 52f5824de4d9807dc037e6ba6f033b0b2c9b8afb Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 14 Jan 2014 00:33:31 +0000 Subject: [PATCH 39/40] strip out public api for term/json building functions. not mature yet --- src/jsx.erl | 99 ----------------------------------------------------- 1 file changed, 99 deletions(-) diff --git a/src/jsx.erl b/src/jsx.erl index 79ffb06..3dac4f8 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -28,12 +28,9 @@ -export([format/1, format/2, minify/1, prettify/1]). -export([encoder/3, decoder/3, parser/3]). -export([resume/3]). --export([start_json/0, start_json/1, start_term/0, start_term/1]). --export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]). -export_type([json_term/0, json_text/0, token/0]). -export_type([encoder/0, decoder/0, parser/0, internal_state/0]). --export_type([internal_json/0, internal_term/0, internal_thing/0]). -ifdef(TEST). @@ -153,108 +150,12 @@ resume(Term, {parser, State, Handler, Stack}, Config) -> jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)). --opaque internal_json() :: tuple(). - --spec start_json() -> internal_json(). --spec start_json(Config::list()) -> internal_json(). - -start_json() -> {jsx_to_json, jsx_to_json:start_json()}. -start_json(Config) -> {jsx_to_json, jsx_to_json:start_json(Config)}. - - --opaque internal_term() :: tuple(). - --spec start_term() -> internal_term(). --spec start_term(Config::list()) -> internal_term(). - -start_term() -> {jsx_to_term, jsx_to_term:start_term()}. -start_term(Config) -> {jsx_to_term, jsx_to_term:start_term(Config)}. - - -% naming things is hard --opaque internal_thing() :: internal_term() | internal_json(). - - --spec start_object(internal_thing()) -> internal_thing(). - -start_object({Handler, Internals}) -> {Handler, Handler:start_object(Internals)}. - - --spec start_array(internal_thing()) -> internal_thing(). - -start_array({Handler, Internals}) -> {Handler, Handler:start_array(Internals)}. - - --spec finish(internal_thing()) -> internal_thing(). - -finish({Handler, Internals}) -> {Handler, Handler:finish(Internals)}. - - --spec insert(Value::any(), internal_thing()) -> internal_thing(). - -insert(Value, {Handler, Internals}) -> {Handler, Handler:insert(Value, Internals)}. - - --spec insert(Key::any(), Value::any(), internal_thing()) -> internal_thing(). - -insert(Key, Value, {Handler, Internals}) -> {Handler, Handler:insert(Key, Value, Internals)}. - - --spec get_key(internal_thing()) -> atom() | binary(). - -get_key({Handler, Internals}) -> Handler:get_key(Internals). - - --spec get_value(internal_thing()) -> any(). - -get_value({Handler, Internals}) -> Handler:get_value(Internals). - - -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -%% sanity checks for internal format handlers - -internal_format_sanity_test_() -> - [ - {"jsx term sanity check", ?_assertEqual( - [{key, [1, true, <<"hallo world">>]}], - begin - NewTerm = start_term(), - ObjectAllocated = start_object(NewTerm), - KeyInserted = insert(key, ObjectAllocated), - ArrayAllocated = start_array(KeyInserted), - OneInserted = insert(1, ArrayAllocated), - TrueInserted = insert(true, OneInserted), - HalloWorldInserted = insert(<<"hallo world">>, TrueInserted), - ArrayClosed = finish(HalloWorldInserted), - ObjectClosed = finish(ArrayClosed), - TermClosed = get_value(ObjectClosed), - TermClosed - end - )}, - {"jsx json sanity check", ?_assertEqual( - <<"{\"key\":[1,true,\"hallo world\"]}">>, - begin - NewTerm = start_json(), - ObjectAllocated = start_object(NewTerm), - KeyInserted = insert(<<"\"key\"">>, ObjectAllocated), - ArrayAllocated = start_array(KeyInserted), - OneInserted = insert(<<"1">>, ArrayAllocated), - TrueInserted = insert(<<"true">>, OneInserted), - HalloWorldInserted = insert(<<"\"hallo world\"">>, TrueInserted), - ArrayClosed = finish(HalloWorldInserted), - ObjectClosed = finish(ArrayClosed), - TermClosed = get_value(ObjectClosed), - TermClosed - end - )} - ]. - - %% test handler init([]) -> []. From 92f0a65dab1cf1524a6b65e263acb6174e0d0dc8 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 14 Jan 2014 00:39:06 +0000 Subject: [PATCH 40/40] important note about 2.0 preview --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b0316d0..cb79505 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,16 @@ current status: [![Build Status](https://secure.travis-ci.org/talentdeficit/jsx. copyright 2010-2013 alisdair sullivan +## really important note ## + +this is a preview of the 2.0 release. there are lots of changes. see [CHANGES.md](CHANGES.md) +for the overview or read this document for the details + ## index ## * [quickstart](#quickstart) * [description](#description) - [json <-> erlang mapping](#json---erlang-mapping) - - [extending jsx](#extending-jsx) - [incomplete input](#incomplete-input) * [data types](#data-types) - [`json_term()`](#json_term)