the option single_quotes in functions dealing with json inputs now allows json that uses single quotes to deliminate keys and strings to be processed, note that this changes the escaping rules slightly

This commit is contained in:
alisdair sullivan 2012-03-14 23:01:59 -07:00
parent c4b468f20a
commit 50b00bac0f
5 changed files with 91 additions and 13 deletions

View file

@ -32,6 +32,7 @@ types:
* `Opts` = `[]` | `[Opt]`
* `Opt` =
- `loose_unicode`
- `single_quotes`
- `labels`
- `{labels, Label}`
- `Label` =
@ -40,7 +41,9 @@ types:
* `existing_atom`
- `explicit_end`
`JSON` SHOULD be a utf8 encoded binary. if the option `loose_unicode` is present attempts are made to replace invalid codepoints with `u+FFFD` but badly encoded binaries may, in either case, result in `badarg` errors
`JSON` SHOULD be a utf8 encoded binary. if the option `loose_unicode` is present attempts are made to replace invalid codepoints with `u+FFFD` but badly encoded binaries may, in either case, result in `badarg` errors
valid json strings are deliminated by double quotes, but some implementations allow single quotes in their place. the `single_quotes` option recognizes json texts with single quotes in the place of double quotes as valid. please be aware that if you enable this option, you MUST escape single quotes in keys and strings
the option `labels` controls how keys are converted from json to erlang terms. `binary` does no conversion beyond normal escaping. `atom` converts keys to erlang atoms, and results in a badarg error if keys fall outside the range of erlang atoms. `existing_atom` is identical to `atom`, except it will not add new atoms to the atom table
@ -97,10 +100,13 @@ types:
- `indent`
- `{indent, N}`
- `loose_unicode`
- `single_quotes`
- `escape_forward_slash`
- `explicit_end`
`JSON` SHOULD be a utf8 encoded binary. if the option `loose_unicode` is present attempts are made to replace invalid codepoints with `u+FFFD` but badly encoded binaries may, in either case, result in `badarg` errors
`JSON` SHOULD be a utf8 encoded binary. if the option `loose_unicode` is present attempts are made to replace invalid codepoints with `u+FFFD` but badly encoded binaries may, in either case, result in `badarg` errors
valid json strings are deliminated by double quotes, but some implementations allow single quotes in their place. the `single_quotes` option recognizes json texts with single quotes in the place of double quotes as valid. please be aware that if you enable this option, you MUST escape single quotes in keys and strings
the option `{space, N}` inserts `N` spaces after every comma and colon in your json output. `space` is an alias for `{space, 1}`. the default is `{space, 0}`
@ -125,6 +131,7 @@ types:
* `Opts` = `[]` | `[Opt]`
* `Opt` =
- `loose_unicode`
- `single_quotes`
- `explicit_end`
see `json_to_term` for details of options

View file

@ -120,6 +120,52 @@ encoder_decoder_equiv_test_() ->
].
single_quotes_test_() ->
[
{"single quoted keys",
?_assertEqual(
to_term(<<"{'key':true}">>, [single_quotes]),
[{<<"key">>, true}]
)
},
{"multiple single quoted keys",
?_assertEqual(
to_term(<<"{'key':true, 'another key':true}">>, [single_quotes]),
[{<<"key">>, true}, {<<"another key">>, true}]
)
},
{"nested single quoted keys",
?_assertEqual(
to_term(<<"{'key': {'key':true, 'another key':true}}">>, [single_quotes]),
[{<<"key">>, [{<<"key">>, true}, {<<"another key">>, true}]}]
)
},
{"single quoted string",
?_assertEqual(
to_term(<<"['string']">>, [single_quotes]),
[<<"string">>]
)
},
{"single quote in double quoted string",
?_assertEqual(
to_term(<<"[\"a single quote: '\"]">>),
[<<"a single quote: '">>]
)
},
{"escaped single quote in single quoted string",
?_assertEqual(
to_term(<<"['a single quote: \\'']">>, [single_quotes]),
[<<"a single quote: '">>]
)
},
{"escaped single quote when single quotes are disallowed",
?_assertError(
badarg,
to_term(<<"[\"a single quote: \\'\"]">>)
)
}
].
%% test handler
init([]) -> [].

View file

@ -59,7 +59,8 @@ decoder(Handler, State, Opts) ->
%% kv seperator
-define(comma, 16#2C).
-define(quote, 16#22).
-define(doublequote, 16#22).
-define(singlequote, 16#27).
-define(colon, 16#3A).
%% string escape sequences
@ -130,7 +131,9 @@ decoder(Handler, State, Opts) ->
-define(end_seq(Seq), unicode:characters_to_binary(lists:reverse(Seq))).
value(<<?quote, Rest/binary>>, Handler, Stack, Opts) ->
value(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) ->
string(Rest, Handler, [?new_seq()|Stack], Opts);
value(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quotes=true}) ->
string(Rest, Handler, [?new_seq()|Stack], Opts);
value(<<$t, Rest/binary>>, Handler, Stack, Opts) ->
tr(Rest, Handler, Stack, Opts);
@ -156,7 +159,9 @@ value(Bin, Handler, Stack, Opts) ->
?error([Bin, Handler, Stack, Opts]).
object(<<?quote, Rest/binary>>, Handler, Stack, Opts) ->
object(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) ->
string(Rest, Handler, [?new_seq()|Stack], Opts);
object(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quotes=true}) ->
string(Rest, Handler, [?new_seq()|Stack], Opts);
object(<<?end_object, Rest/binary>>, {Handler, State}, [key|Stack], Opts) ->
maybe_done(Rest, {Handler, Handler:handle_event(end_object, State)}, Stack, Opts);
@ -168,7 +173,9 @@ object(Bin, Handler, Stack, Opts) ->
?error([Bin, Handler, Stack, Opts]).
array(<<?quote, Rest/binary>>, Handler, Stack, Opts) ->
array(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) ->
string(Rest, Handler, [?new_seq()|Stack], Opts);
array(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quotes=true}) ->
string(Rest, Handler, [?new_seq()|Stack], Opts);
array(<<$t, Rest/binary>>, Handler, Stack, Opts) ->
tr(Rest, Handler, Stack, Opts);
@ -206,7 +213,9 @@ colon(Bin, Handler, Stack, Opts) ->
?error([Bin, Handler, Stack, Opts]).
key(<<?quote, Rest/binary>>, Handler, Stack, Opts) ->
key(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) ->
string(Rest, Handler, [?new_seq()|Stack], Opts);
key(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quotes=true}) ->
string(Rest, Handler, [?new_seq()|Stack], Opts);
key(<<S, Rest/binary>>, Handler, Stack, Opts) when ?is_whitespace(S) ->
key(Rest, Handler, Stack, Opts);
@ -233,13 +242,25 @@ partial_utf(<<X, Y, Z>>)
partial_utf(_) -> false.
string(<<?quote/utf8, Rest/binary>>, {Handler, State}, [Acc, key|Stack], Opts) ->
string(<<?doublequote, Rest/binary>>, {Handler, State}, [Acc, key|Stack], Opts) ->
colon(Rest,
{Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)},
[key|Stack],
Opts
);
string(<<?quote/utf8, Rest/binary>>, {Handler, State}, [Acc|Stack], Opts) ->
string(<<?singlequote, Rest/binary>>, {Handler, State}, [Acc, key|Stack], Opts = #opts{single_quotes=true}) ->
colon(Rest,
{Handler, Handler:handle_event({key, ?end_seq(Acc)}, State)},
[key|Stack],
Opts
);
string(<<?doublequote, Rest/binary>>, {Handler, State}, [Acc|Stack], Opts) ->
maybe_done(Rest,
{Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)},
Stack,
Opts
);
string(<<?singlequote, Rest/binary>>, {Handler, State}, [Acc|Stack], Opts = #opts{single_quotes=true}) ->
maybe_done(Rest,
{Handler, Handler:handle_event({string, ?end_seq(Acc)}, State)},
Stack,
@ -318,8 +339,10 @@ escape(<<$t, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
escape(<<$u, Rest/binary>>, Handler, Stack, Opts) ->
escaped_unicode(Rest, Handler, [?new_seq()|Stack], Opts);
escape(<<S, Rest/binary>>, Handler, [Acc|Stack], Opts)
when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus ->
when S =:= ?doublequote; S =:= ?solidus; S =:= ?rsolidus ->
string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts);
escape(<<?singlequote, Rest/binary>>, Handler, [Acc|Stack], Opts = #opts{single_quotes=true}) ->
string(Rest, Handler, [?acc_seq(Acc, ?singlequote)|Stack], Opts);
escape(<<>>, Handler, Stack, Opts) ->
?incomplete(escape, <<>>, Handler, Stack, Opts);
escape(Bin, Handler, Stack, Opts) ->

View file

@ -2,5 +2,5 @@
loose_unicode = false,
escape_forward_slash = false,
explicit_end = false,
parser = auto
single_quotes = false
}).

View file

@ -43,6 +43,8 @@ parse_opts([escape_forward_slash|Rest], Opts) ->
parse_opts(Rest, Opts#opts{escape_forward_slash=true});
parse_opts([explicit_end|Rest], Opts) ->
parse_opts(Rest, Opts#opts{explicit_end=true});
parse_opts([single_quotes|Rest], Opts) ->
parse_opts(Rest, Opts#opts{single_quotes=true});
parse_opts(_, _) ->
{error, badarg}.
@ -52,12 +54,12 @@ extract_opts(Opts) ->
extract_parser_opts([], Acc) -> Acc;
extract_parser_opts([{K,V}|Rest], Acc) ->
case lists:member(K, [loose_unicode, escape_forward_slash, explicit_end]) of
case lists:member(K, [loose_unicode, escape_forward_slash, explicit_end, single_quotes]) of
true -> extract_parser_opts(Rest, [{K,V}] ++ Acc)
; false -> extract_parser_opts(Rest, Acc)
end;
extract_parser_opts([K|Rest], Acc) ->
case lists:member(K, [loose_unicode, escape_forward_slash, explicit_end]) of
case lists:member(K, [loose_unicode, escape_forward_slash, explicit_end, single_quotes]) of
true -> extract_parser_opts(Rest, [K] ++ Acc)
; false -> extract_parser_opts(Rest, Acc)
end.