From 6b43609730b8ef20ab2fb4dcf5b492c5cad8475b Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 19 Aug 2014 17:47:44 -0700 Subject: [PATCH] allow a single trailing comma in objects or arrays --- README.md | 9 +++++++++ src/jsx_config.erl | 5 +++++ src/jsx_config.hrl | 1 + src/jsx_decoder.erl | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/README.md b/README.md index 9930dc2..7b94bd9 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,10 @@ number of ways. see the section on `strict` in [options](#option) below though json has no official comments but this parser allows c/c++ style comments. anywhere whitespace is allowed you can insert comments (both `// ...` and `/* ... */`) +some particularly irresponsible json emitters leave trailing commas at the end of +objects or arrays. **jsx** allows a single trailing comma in input. multiple commas +in any posistion or a preceding comma are still errors + all **jsx** decoder input should be `utf8` encoded binaries. sometimes you get binaries that are almost but not quite valid utf8 whether due to improper escaping or poor encoding. **jsx** replaces invalid codepoints and poorly formed sequences with the @@ -337,6 +341,7 @@ option() = escaped_forward_slashes | stream strict_option() = comments + | trailing_commas | utf8 | single_quotes | escapes @@ -390,6 +395,10 @@ additional options beyond these. see comments are disabled and result in a `badarg` error + * `trailing_commas` + + trailing commas in an object or list result in `badarg` errors + * `utf8` invalid codepoints and malformed unicode result in `badarg` errors diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 617dc0c..909eeda 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -65,6 +65,7 @@ parse_config([dirty_strings|Rest], Config) -> parse_config(Rest, Config#config{dirty_strings=true}); parse_config([strict|Rest], Config) -> parse_config(Rest, Config#config{strict_comments=true, + strict_commas=true, strict_utf8=true, strict_single_quotes=true, strict_escapes=true @@ -89,6 +90,8 @@ parse_config(_Options, _Config) -> erlang:error(badarg). parse_strict([], Rest, Config) -> parse_config(Rest, Config); parse_strict([comments|Strict], Rest, Config) -> parse_strict(Strict, Rest, Config#config{strict_comments=true}); +parse_strict([trailing_commas|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_commas=true}); parse_strict([utf8|Strict], Rest, Config) -> parse_strict(Strict, Rest, Config#config{strict_utf8=true}); parse_strict([single_quotes|Strict], Rest, Config) -> @@ -182,6 +185,7 @@ config_test_() -> unescaped_jsonp = true, dirty_strings = true, strict_comments = true, + strict_commas = true, strict_utf8 = true, strict_single_quotes = true, strict_escapes = true, @@ -199,6 +203,7 @@ config_test_() -> {"strict flag", ?_assertEqual( #config{strict_comments = true, + strict_commas = true, strict_utf8 = true, strict_single_quotes = true, strict_escapes = true diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index 89f7824..97117c1 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -4,6 +4,7 @@ unescaped_jsonp = false :: boolean(), dirty_strings = false :: boolean(), strict_comments = false :: boolean(), + strict_commas = false :: boolean(), strict_utf8 = false :: boolean(), strict_single_quotes = false :: boolean(), strict_escapes = false :: boolean(), diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 3f97d76..db84f56 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -211,6 +211,8 @@ value(<>, Handler, Stack, Config) -> array(Rest, handle_event(start_array, Handler, Config), [array|Stack], Config); value(<>, Handler, Stack, Config) when ?is_whitespace(S) -> value(Rest, Handler, Stack, Config); +value(<> = Rest, Handler, Stack, Config=#config{strict_commas=false}) -> + maybe_done(Rest, Handler, Stack, Config); value(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(value, <>, Handler, Stack, Config); value(<>, Handler, Stack, Config) -> @@ -289,6 +291,8 @@ key(<>, Handler, Stack, Config=#config{strict_single_ string(Rest, Handler, new_seq(), [singlequote|Stack], Config); key(<>, Handler, Stack, Config) when ?is_whitespace(S) -> key(Rest, Handler, Stack, Config); +key(<>, Handler, [key|Stack], Config=#config{strict_commas=false}) -> + maybe_done(<>, Handler, [object|Stack], Config); key(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(key, <>, Handler, Stack, Config); key(<>, Handler, Stack, Config) -> @@ -1627,6 +1631,43 @@ bom_test_() -> ]. +trailing_comma_test_() -> + [ + {"trailing comma in object", ?_assertEqual( + [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], + decode(<<"{\"key\": true,}">>, []) + )}, + {"strict trailing comma in object", ?_assertError( + badarg, + decode(<<"{\"key\": true,}">>, [{strict, [trailing_commas]}]) + )}, + {"two trailing commas in object", ?_assertError( + badarg, + decode(<<"{\"key\": true,,}">>, []) + )}, + {"comma in empty object", ?_assertError( + badarg, + decode(<<"{,}">>, []) + )}, + {"trailing comma in list", ?_assertEqual( + [start_array, {literal, true}, end_array, end_json], + decode(<<"[true,]">>, []) + )}, + {"strict trailing comma in list", ?_assertError( + badarg, + decode(<<"[true,]">>, [{strict, [trailing_commas]}]) + )}, + {"two trailing commas in list", ?_assertError( + badarg, + decode(<<"[true,,]">>, []) + )}, + {"comma in empty list", ?_assertError( + badarg, + decode(<<"[,]">>, []) + )} + ]. + + incomplete_test_() -> [ {"stream false", ?_assertError(