From c6827d06de2b69591576336fc9ad84a2f841d74d Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 14 Mar 2012 23:01:59 -0700 Subject: [PATCH 1/2] the option single_quotes in functions dealing with json inputs now allows json that uses single quotes to deliminate keys and strings to be processed, note that this changes the escaping rules slightly --- README.markdown | 11 ++++++++-- src/jsx.erl | 52 ++++++++++++++++++++++++++++++++++++++++++++ src/jsx_decoder.erl | 53 ++++++++++++++++++++++++++++++--------------- src/jsx_opts.hrl | 2 +- src/jsx_utils.erl | 6 +++-- 5 files changed, 101 insertions(+), 23 deletions(-) diff --git a/README.markdown b/README.markdown index 473882f..0464f00 100644 --- a/README.markdown +++ b/README.markdown @@ -32,6 +32,7 @@ types: * `Opts` = `[]` | `[Opt]` * `Opt` = - `loose_unicode` + - `single_quotes` - `labels` - `{labels, Label}` - `Label` = @@ -40,7 +41,9 @@ types: * `existing_atom` - `explicit_end` -`JSON` SHOULD be a utf8 encoded binary. if the option `loose_unicode` is present attempts are made to replace invalid codepoints with `u+FFFD` but badly encoded binaries may, in either case, result in `badarg` errors +`JSON` SHOULD be a utf8 encoded binary. if the option `loose_unicode` is present attempts are made to replace invalid codepoints with `u+FFFD` but badly encoded binaries may, in either case, result in `badarg` errors + +valid json strings are deliminated by double quotes, but some implementations allow single quotes in their place. the `single_quotes` option recognizes json texts with single quotes in the place of double quotes as valid. please be aware that if you enable this option, you MUST escape single quotes in keys and strings the option `labels` controls how keys are converted from json to erlang terms. `binary` does no conversion beyond normal escaping. `atom` converts keys to erlang atoms, and results in a badarg error if keys fall outside the range of erlang atoms. `existing_atom` is identical to `atom`, except it will not add new atoms to the atom table @@ -97,10 +100,13 @@ types: - `indent` - `{indent, N}` - `loose_unicode` + - `single_quotes` - `escape_forward_slash` - `explicit_end` -`JSON` SHOULD be a utf8 encoded binary. if the option `loose_unicode` is present attempts are made to replace invalid codepoints with `u+FFFD` but badly encoded binaries may, in either case, result in `badarg` errors +`JSON` SHOULD be a utf8 encoded binary. if the option `loose_unicode` is present attempts are made to replace invalid codepoints with `u+FFFD` but badly encoded binaries may, in either case, result in `badarg` errors + +valid json strings are deliminated by double quotes, but some implementations allow single quotes in their place. the `single_quotes` option recognizes json texts with single quotes in the place of double quotes as valid. please be aware that if you enable this option, you MUST escape single quotes in keys and strings the option `{space, N}` inserts `N` spaces after every comma and colon in your json output. `space` is an alias for `{space, 1}`. the default is `{space, 0}` @@ -125,6 +131,7 @@ types: * `Opts` = `[]` | `[Opt]` * `Opt` = - `loose_unicode` + - `single_quotes` - `explicit_end` see `json_to_term` for details of options diff --git a/src/jsx.erl b/src/jsx.erl index 6c0dbe5..ff1bc09 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -120,6 +120,58 @@ encoder_decoder_equiv_test_() -> ]. +single_quotes_test_() -> + [ + {"single quoted keys", + ?_assertEqual( + to_term(<<"{'key':true}">>, [single_quotes]), + [{<<"key">>, true}] + ) + }, + {"multiple single quoted keys", + ?_assertEqual( + to_term(<<"{'key':true, 'another key':true}">>, [single_quotes]), + [{<<"key">>, true}, {<<"another key">>, true}] + ) + }, + {"nested single quoted keys", + ?_assertEqual( + to_term(<<"{'key': {'key':true, 'another key':true}}">>, [single_quotes]), + [{<<"key">>, [{<<"key">>, true}, {<<"another key">>, true}]}] + ) + }, + {"single quoted string", + ?_assertEqual( + to_term(<<"['string']">>, [single_quotes]), + [<<"string">>] + ) + }, + {"single quote in double quoted string", + ?_assertEqual( + to_term(<<"[\"a single quote: '\"]">>, [single_quotes]), + [<<"a single quote: '">>] + ) + }, + {"escaped single quote in single quoted string", + ?_assertEqual( + to_term(<<"['a single quote: \\'']">>, [single_quotes]), + [<<"a single quote: '">>] + ) + }, + {"escaped single quote when single quotes are disallowed", + ?_assertError( + badarg, + to_term(<<"[\"a single quote: \\'\"]">>) + ) + }, + {"mismatched quotes", + ?_assertError( + badarg, + to_term(<<"['mismatched\"]">>, [single_quotes]) + ) + } + ]. + %% test handler init([]) -> []. diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 0a82ea2..2cca939 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -59,7 +59,8 @@ decoder(Handler, State, Opts) -> %% kv seperator -define(comma, 16#2C). --define(quote, 16#22). +-define(doublequote, 16#22). +-define(singlequote, 16#27). -define(colon, 16#3A). %% string escape sequences @@ -130,8 +131,10 @@ decoder(Handler, State, Opts) -> -define(end_seq(Seq), unicode:characters_to_binary(lists:reverse(Seq))). -value(<>, Handler, Stack, Opts) -> +value(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); +value(<>, Handler, Stack, Opts = #opts{single_quotes=true}) -> + string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); value(<<$t, Rest/binary>>, Handler, Stack, Opts) -> tr(Rest, Handler, Stack, Opts); value(<<$f, Rest/binary>>, Handler, Stack, Opts) -> @@ -156,8 +159,10 @@ value(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -object(<>, Handler, Stack, Opts) -> +object(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); +object(<>, Handler, Stack, Opts = #opts{single_quotes=true}) -> + string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); object(<>, {Handler, State}, [key|Stack], Opts) -> maybe_done(Rest, {Handler, Handler:handle_event(end_object, State)}, Stack, Opts); object(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> @@ -168,8 +173,10 @@ object(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -array(<>, Handler, Stack, Opts) -> +array(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); +array(<>, Handler, Stack, Opts = #opts{single_quotes=true}) -> + string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); array(<<$t, Rest/binary>>, Handler, Stack, Opts) -> tr(Rest, Handler, Stack, Opts); array(<<$f, Rest/binary>>, Handler, Stack, Opts) -> @@ -206,8 +213,10 @@ colon(Bin, Handler, Stack, Opts) -> ?error([Bin, Handler, Stack, Opts]). -key(<>, Handler, Stack, Opts) -> +key(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); +key(<>, Handler, Stack, Opts = #opts{single_quotes=true}) -> + string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); key(<>, Handler, Stack, Opts) when ?is_whitespace(S) -> key(Rest, Handler, Stack, Opts); key(<<>>, Handler, Stack, Opts) -> @@ -233,18 +242,24 @@ partial_utf(<>) partial_utf(_) -> false. -string(<>, {Handler, State}, [Acc, key|Stack], Opts) -> - colon(Rest, - {Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)}, - [key|Stack], - Opts - ); -string(<>, {Handler, State}, [Acc|Stack], Opts) -> - maybe_done(Rest, - {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, - Stack, - Opts - ); +string(<>, {Handler, State}, S, Opts) -> + case S of + [Acc, key|Stack] -> + colon(Rest, {Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)}, [key|Stack], Opts); + [Acc|Stack] -> + maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, Stack, Opts); + [Acc, single_quote|Stack] -> + ?error([<>, {Handler, State}, S, Opts]) + end; +string(<>, {Handler, State}, S, Opts = #opts{single_quotes=true}) -> + case S of + [Acc, single_quote, key|Stack] -> + colon(Rest, {Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)}, [key|Stack], Opts); + [Acc, single_quote|Stack] -> + maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, Stack, Opts); + [Acc|Stack] -> + string(Rest, {Handler, State}, [?acc_seq(Acc, ?singlequote)|Stack], Opts) + end; string(<>, Handler, Stack, Opts) -> escape(Rest, Handler, Stack, Opts); %% things get dumb here. erlang doesn't properly restrict unicode non-characters @@ -318,8 +333,10 @@ escape(<<$t, Rest/binary>>, Handler, [Acc|Stack], Opts) -> escape(<<$u, Rest/binary>>, Handler, Stack, Opts) -> escaped_unicode(Rest, Handler, [?new_seq()|Stack], Opts); escape(<>, Handler, [Acc|Stack], Opts) - when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> + when S =:= ?doublequote; S =:= ?solidus; S =:= ?rsolidus -> string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts); +escape(<>, Handler, [Acc|Stack], Opts = #opts{single_quotes=true}) -> + string(Rest, Handler, [?acc_seq(Acc, ?singlequote)|Stack], Opts); escape(<<>>, Handler, Stack, Opts) -> ?incomplete(escape, <<>>, Handler, Stack, Opts); escape(Bin, Handler, Stack, Opts) -> diff --git a/src/jsx_opts.hrl b/src/jsx_opts.hrl index d49254b..f60627f 100644 --- a/src/jsx_opts.hrl +++ b/src/jsx_opts.hrl @@ -2,5 +2,5 @@ loose_unicode = false, escape_forward_slash = false, explicit_end = false, - parser = auto + single_quotes = false }). \ No newline at end of file diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index 814092c..2c8b160 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -43,6 +43,8 @@ parse_opts([escape_forward_slash|Rest], Opts) -> parse_opts(Rest, Opts#opts{escape_forward_slash=true}); parse_opts([explicit_end|Rest], Opts) -> parse_opts(Rest, Opts#opts{explicit_end=true}); +parse_opts([single_quotes|Rest], Opts) -> + parse_opts(Rest, Opts#opts{single_quotes=true}); parse_opts(_, _) -> {error, badarg}. @@ -52,12 +54,12 @@ extract_opts(Opts) -> extract_parser_opts([], Acc) -> Acc; extract_parser_opts([{K,V}|Rest], Acc) -> - case lists:member(K, [loose_unicode, escape_forward_slash, explicit_end]) of + case lists:member(K, [loose_unicode, escape_forward_slash, explicit_end, single_quotes]) of true -> extract_parser_opts(Rest, [{K,V}] ++ Acc) ; false -> extract_parser_opts(Rest, Acc) end; extract_parser_opts([K|Rest], Acc) -> - case lists:member(K, [loose_unicode, escape_forward_slash, explicit_end]) of + case lists:member(K, [loose_unicode, escape_forward_slash, explicit_end, single_quotes]) of true -> extract_parser_opts(Rest, [K] ++ Acc) ; false -> extract_parser_opts(Rest, Acc) end. From 59689769de368e70bb730132b5de3fb3481510c0 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Thu, 15 Mar 2012 20:54:52 -0700 Subject: [PATCH 2/2] supress unused var errors --- src/jsx_decoder.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 2cca939..006cc22 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -246,10 +246,10 @@ string(<>, {Handler, State}, S, Opts) -> case S of [Acc, key|Stack] -> colon(Rest, {Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)}, [key|Stack], Opts); + [_Acc, single_quote|_Stack] -> + ?error([<>, {Handler, State}, S, Opts]); [Acc|Stack] -> - maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, Stack, Opts); - [Acc, single_quote|Stack] -> - ?error([<>, {Handler, State}, S, Opts]) + maybe_done(Rest, {Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)}, Stack, Opts) end; string(<>, {Handler, State}, S, Opts = #opts{single_quotes=true}) -> case S of