diff --git a/CHANGES.md b/CHANGES.md index 438b7dd..fd6d732 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,16 @@ +v2.0 + +* jsx is much more pragmatic by default; common json errors are silently + ignored (and fixed). stricter parsing must be enabled with options +* removed `pre_encode` and `post_decode` options in favour of making jsx + functions easier to wrap and customize +* added abstraction layer for manipulating the internal state of `jsx_to_term` + and `jsx_to_json` and exposed it to user code +* streaming behavior is now disabled by default and must be requested explicitly +* removed deprecated function names (`to_json`, `to_term`, `term_to_json`, etc) +* expanded test coverage + + v1.4.5 * various fixes to typespecs uncovered by dialyzer diff --git a/README.md b/README.md index 4044ae0..cb79505 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,21 @@ -# jsx (v1.4.5) # +# jsx (v2.0) # an erlang application for consuming, producing and manipulating [json][json]. inspired by [yajl][yajl] -jsx is built via [rebar][rebar] and continuous integration testing provided courtesy [travis][travis] +**jsx** is built via [rebar][rebar] and continuous integration testing provided courtesy [travis][travis] current status: [![Build Status](https://secure.travis-ci.org/talentdeficit/jsx.png?branch=develop)](http://travis-ci.org/talentdeficit/jsx) -jsx is released under the terms of the [MIT][MIT] license +**jsx** is released under the terms of the [MIT][MIT] license copyright 2010-2013 alisdair sullivan +## really important note ## + +this is a preview of the 2.0 release. there are lots of changes. see [CHANGES.md](CHANGES.md) +for the overview or read this document for the details + ## index ## * [quickstart](#quickstart) @@ -21,7 +26,6 @@ copyright 2010-2013 alisdair sullivan - [`json_term()`](#json_term) - [`json_text()`](#json_text) - [`event()`](#event) - - [`token()`](#token) - [`option()`](#option) * [exports](#exports) - [`encoder/3`, `decoder/3` & `parser/3`](#encoder3-decoder3--parser3) @@ -113,27 +117,31 @@ false ## description ## -jsx is an erlang application for consuming, producing and manipulating +**jsx** is an erlang application for consuming, producing and manipulating [json][json] -json has a [spec][rfc4627] but common usage differs subtly. it's common -usage jsx attempts to address, with guidance from the spec +**jsx** follows the json [spec][rfc4627] as closely as possible with allowances for +real world usage -all json produced and consumed by jsx should be `utf8` encoded text or a -reasonable approximation thereof. ascii works too, but anything beyond that -i'm not going to make any promises. **especially** not latin1 +**jsx** is pragmatic. the json spec allows extensions so **jsx** extends the spec in a +number of ways. see the section on `strict` in [options](#option) below though -the [spec][rfc4627] thinks json values must be wrapped in a json array or -object but everyone else disagrees so jsx allows naked json values by default. -if you're a curmudgeon who's offended by this deviation here is a wrapper for -you: +json has no official comments but this parser allows c/c++ style comments. +anywhere whitespace is allowed you can insert comments (both `// ...` and `/* ... */`) + +all **jsx** decoder input should be `utf8` encoded binaries. sometimes you get binaries +that are almost but not quite valid utf8 whether due to improper escaping or poor +encoding. **jsx** replaces invalid codepoints and poorly formed sequences with the +unicode replacement character (`u+FFFD`) + +json only allows keys and strings to be delimited by double quotes (`u+0022`) but +javascript allows them to be delimited by single quotes (`u+0027`) as well. **jsx** +follows javascript in this. strings that start with single quotes can contain double +quotes but must end with single quotes and must escape any single quotes they contain + +json and **jsx** only recognize escape sequences as outlined in the json spec. it just +ignores bad escape sequences -```erlang -%% usage: `real_json(jsx:decode(JSON))` -real_json(Result) when is_list(Result) -> Result; -real_json(Result) when is_tuple(Result, 2) -> Result; -real_json(_) -> erlang:error(badarg). -``` ### json <-> erlang mapping ### @@ -148,17 +156,18 @@ real_json(_) -> erlang:error(badarg). * numbers - javascript and thus json represent all numeric values with floats. as - this is woefully insufficient for many uses, **jsx**, just like erlang, - supports bigints. whenever possible, this library will interpret json - numbers that look like integers as integers. other numbers will be converted - to erlang's floating point type, which is nearly but not quite iee754. - negative zero is not representable in erlang (zero is unsigned in erlang and - `0` is equivalent to `-0`) and will be interpreted as regular zero. numbers - not representable are beyond the concern of this implementation, and will - result in parsing errors + javascript and thus json represent all numeric values with floats. there's no + reason for erlang -- a language that supports arbitrarily large integers -- to + restrict all numbers to the ieee754 range + + whenever possible, **jsx** will interpret json numbers that look like integers as + integers. other numbers will be converted to erlang's floating point type, which + is nearly but not quite iee754. negative zero is not representable in erlang (zero + is unsigned in erlang and `0` is equivalent to `-0`) and will be interpreted as + regular zero. numbers not representable are beyond the concern of this implementation, + and will result in parsing errors - when converting from erlang to json, numbers are represented with their + when converting from erlang to json, floats are represented with their shortest representation that will round trip without loss of precision. this means that some floats may be superficially dissimilar (although functionally equivalent). for example, `1.0000000000000001` will be @@ -166,32 +175,23 @@ real_json(_) -> erlang:error(badarg). * strings - all erlang strings are represented by **valid** `utf8` encoded binaries or - atoms. note that the atoms `true`, `false` and `null` will never be - automatically converted to strings as the json equivalent values take - precedence. when decoding json strings will always be presented as binaries, - never atoms - - the [json spec][rfc4627] is frustratingly vague on the exact details of json - strings. json must be unicode, but no encoding is specified. javascript - explicitly allows strings containing codepoints explicitly disallowed by - unicode. json allows implementations to set limits on the content of - strings. other implementations attempt to resolve this in various ways. this - implementation, in default operation, only accepts strings that meet the - constraints set out in the json spec (strings are sequences of unicode - codepoints deliminated by `"` (`u+0022`) that may not contain control codes - unless properly escaped with `\` (`u+005c`)) and that are encoded in `utf8` - - the utf8 restriction means improperly paired surrogates are explicitly - disallowed. `u+d800` to `u+dfff` are allowed, but only when they form valid - surrogate pairs. surrogates encountered otherwise result in errors. the - noncharacters will also result in errors + json strings must be unicode encoded binaries or erlang atoms. in practice, + because **jsx** only accepts `utf8` binaries all binary strings must be `utf8`. + in addition to being unicode json strings restrict a number of codepoints and + define a number of escape sequences json string escapes of the form `\uXXXX` will be converted to their equivalent codepoints during parsing. this means control characters and other codepoints disallowed by the json spec may be encountered in resulting - strings, but codepoints disallowed by the unicode spec will not be. in the - interest of pragmatism there is an [option](#option) for looser parsing + strings. the utf8 restriction means the surrogates are explicitly disallowed. + if a string contains escaped surrogates (`u+d800` to `u+dfff`) they are + interpreted but only when they form valid surrogate pairs. surrogates + encountered otherwise are replaced with the replacement codepoint (`u+fffd`) + + all erlang strings are represented by **valid** `utf8` encoded binaries. the + encoder will check strings for conformance. noncharacters (like `u+ffff`) + are allowed in erlang utf8 encoded binaries, but will be replaced in strings + passed to the encoder (although, again, see [options](#option)) this implementation performs no normalization on strings beyond that detailed here. be careful when comparing strings as equivalent strings @@ -220,22 +220,30 @@ real_json(_) -> erlang:error(badarg). ### incomplete input ### -jsx handles incomplete json texts. if a partial json text is parsed, rather than -returning a term from your callback handler, jsx returns `{incomplete, F}` where -`F` is a function with an identical API to the anonymous fun returned from -`decoder/3`, `encoder/3` or `parser/3`. it retains the internal state of the -parser at the point where input was exhausted. this allows you to parse as you -stream json over a socket or file descriptor, or to parse large json texts -without needing to keep them entirely in memory +**jsx** can handle incomplete json texts. if the option `stream` is passed to the decoder +or parser and if a partial json text is parsed, rather than returning a term from +your callback handler, **jsx** returns `{incomplete, F}` where `F` is a function with +an identical API to the anonymous fun returned from `decoder/3`, `encoder/3` or +`parser/3`. it retains the internal state of the parser at the point where input +was exhausted. this allows you to parse as you stream json over a socket or file +descriptor, or to parse large json texts without needing to keep them entirely in +memory -however, it is important to recognize that jsx is greedy by default. jsx will -consider the parsing complete if input is exhausted and the json text is not -unambiguously incomplete. this is mostly relevant when parsing bare numbers like -`<<"1234">>`. this could be a complete json integer or just the beginning of a -json integer that is being parsed incrementally. jsx will treat it as a whole -integer. calling jsx with the [option](#options) `explicit_end` reverses this -behavior and never considers parsing complete until the `incomplete` function is -called with the argument `end_stream` +however, it is important to recognize that **jsx** is conservative by default. **jsx** will +not consider the parsing complete even when input is exhausted and the json text is +unambiguously incomplete. to end parsing call the `incomplete` function with the +argument `end_stream` like: + +```erlang +1> {incomplete, F} = jsx:decode(<<"[">>, [stream]). +{incomplete,#Fun} +2> F(end_stream). +** exception error: bad argument +3> {incomplete, G} = F(<<"]">>). +{incomplete,#Fun} +4> G(end_stream). +[] +``` ## data types ## @@ -282,50 +290,32 @@ event() = start_object | end_json ``` -#### `token()` #### - -```erlang -token() = event() - | binary() - | {number, integer() | float()} - | integer() - | float() - | true - | false - | null -``` - -the representation used during syntactic analysis. you can generate this -yourself and feed it to `jsx:parser/3` if you'd like to define your own -representations +the subset of [`token()`](#token) emitted by the decoder and encoder to handlers #### `option()` #### ```erlang -option() = replaced_bad_utf8 - | escaped_forward_slashes - | single_quoted_strings - | unescaped_jsonp - | comments +option() = escaped_forward_slashes | escaped_strings + | unescaped_jsonp | dirty_strings - | ignored_bad_escapes - | relax - | explicit_end -``` + | strict + | {strict, [strict_option()]} + | stream + | {incomplete_handler, fun()} + | {error_handler, fun()} -jsx functions all take a common set of options. not all flags have meaning +strict_option() = comments + | utf8 + | single_quotes + | escapes +``` + +**jsx** functions all take a common set of options. not all flags have meaning in all contexts, but they are always valid options. functions may have additional options beyond these. see [individual function documentation](#exports) for details -- `replaced_bad_utf8` - - json text input and json strings SHOULD be utf8 encoded binaries, - appropriately escaped as per the json spec. attempts are made to replace - invalid codepoints with `u+FFFD` as per the unicode spec when this option is - present. this applies both to malformed unicode and disallowed codepoints - - `escaped_forward_slashes` json strings are escaped according to the json spec. this means forward @@ -333,35 +323,6 @@ additional options beyond these. see are left unescaped. you may want to use this if you are embedding json directly into a html or xml document -- `single_quoted_strings` - - some parsers allow double quotes (`u+0022`) to be replaced by single quotes - (`u+0027`) to delimit keys and strings. this option allows json containing - single quotes as structural characters to be parsed without errors. note - that the parser expects strings to be terminated by the same quote type that - opened it and that single quotes must, obviously, be escaped within strings - delimited by single quotes - - double quotes must **always** be escaped, regardless of what kind of quotes - delimit the string they are found in - - the parser will never emit json with keys or strings delimited by single - quotes - -- `unescaped_jsonp` - - javascript interpreters treat the codepoints `u+2028` and `u+2029` as - significant whitespace. json strings that contain either of these codepoints - will be parsed incorrectly by some javascript interpreters. by default, - these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to - retain compatibility. this option simply removes that escaping - -- `comments` - - json has no official comments but some parsers allow c/c++ style comments. - anywhere whitespace is allowed this flag allows comments (both `// ...` and - `/* ... */`) - - `escaped_strings` by default both the encoder and decoder return strings as utf8 binaries @@ -370,12 +331,14 @@ additional options beyond these. see unaltered. this flag escapes strings as if for output in json, removing control codes and problematic codepoints and replacing them with the appropriate escapes + +- `unescaped_jsonp` -- `ignored_bad_escapes` - - during decoding ignore unrecognized escape sequences and leave them as is in - the stream. note that combining this option with `escaped_strings` will - result in the escape character itself being escaped + javascript interpreters treat the codepoints `u+2028` and `u+2029` as + significant whitespace. json strings that contain either of these codepoints + will be parsed incorrectly by some javascript interpreters. by default, + these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to + retain compatibility. this option simply removes that escaping - `dirty_strings` @@ -383,42 +346,39 @@ additional options beyond these. see can result in unwanted behaviour. if your strings are already escaped (or you'd like to force invalid strings into "json" you monster) use this flag to bypass escaping. this can also be used to read in **really** invalid json - strings. everything but escaped quotes are passed as is to the resulting - string term. note that this overrides `ignored_bad_escapes`, - `unescaped_jsonp` and `escaped_strings` + strings. everything between unescaped quotes are passed as is to the resulting + string term. note that this takes precedence over any other options -- `explicit_end` +- `strict` + + as mentioned [earlier](#description), **jsx** is pragmatic. if you're more of a + json purist or you're really into bdsm stricter adherence to the spec is + possible. the following restrictions are available + + * `comments` + + comments are disabled and result in a `badarg` error + + * `utf8` + + invalid codepoints and malformed unicode result in `badarg` errors + + * `single_quotes` + + only keys and strings delimited by double quotes (`u+0022`) are allowed. the + single quote (`u+0027`) results in a `badarg` error + + * `escapes` + + escape sequences not adhering to the json spec result in a `badarg` error + + any combination of these can be passed to **jsx** by using `{strict, [strict_option()]}`. + `strict` is equivalent to `{strict, [comments, bad_utf8, single_quotes, escapes]}` + +- `stream` see [incomplete input](#incomplete-input) -- `relax` - - relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, - ignored_bad_escapes]` for when you don't care how absolutely terrible your - json input is, you just want the parser to do the best it can - -- `incomplete_handler` & `error_handler` - - the default incomplete and error handlers can be replaced with user defined - handlers. if options include `{error_handler, F}` and/or - `{incomplete_handler, F}` where `F` is a function of arity 3 they will be - called instead of the default handler. the spec for `F` is as follows - ```erlang - F(Remaining, InternalState, Config) -> any() - - Remaining = binary() | term() - InternalState = opaque() - Config = list() - ``` - `Remaining` is the binary fragment or term that caused the error - - `InternalState` is an opaque structure containing the internal state of the - parser/decoder/encoder - - `Config` is a list of options/flags in use by the parser/decoder/encoder - - these functions should be considered experimental for now - ## exports ## @@ -435,10 +395,10 @@ parser(Module, Args, Opts) -> Fun((Tokens) -> any()) Opts = [option()] JSONText = json_text() JSONTerm = json_term() - Tokens = token() | [token()] + Tokens = event() | [event()] ``` -jsx is a json compiler with interleaved tokenizing, syntactic analysis and +**jsx** is a json compiler with interleaved tokenizing, syntactic analysis and semantic analysis stages. included are two tokenizers; one that handles json texts (`decoder/3`) and one that handles erlang terms (`encoder/3`). there is also an entry point to the syntactic analysis stage for use with user-defined @@ -468,7 +428,7 @@ decode(JSON, Opts) -> Term JSON = json_text() Term = json_term() - Opts = [option() | labels | {labels, Label} | {post_decode, F}] + Opts = [option() | labels | {labels, Label}] Label = binary | atom | existing_atom | attempt_atom F = fun((any()) -> any()) ``` @@ -485,18 +445,6 @@ new atoms to the atom table and will result in a `badarg` error if the atom does not exist. `attempt_atom` will convert keys to atoms when they exist, and leave them as binary otherwise -`{post_decode, F}` is a user defined function of arity 1 that is called on each -output value (objects, arrays, strings, numbers and literals). it may return any -value to be substituted in the returned term. for example: - -```erlang -1> F = fun(V) when is_list(V) -> V; (V) -> false end. -2> jsx:decode(<<"{\"a list\": [true, \"a string\", 1]}">>, [{post_decode, F}]). -[{<<"a list">>, [false, false, false]}] -``` - -declaring more than one post-decoder will result in a `badarg` error exception - raises a `badarg` error exception if input is not valid json @@ -508,7 +456,7 @@ encode(Term, Opts) -> JSON Term = json_term() JSON = json_text() - Opts = [option() | {pre_encode, F} | space | {space, N} | indent | {indent, N}] + Opts = [option() | space | {space, N} | indent | {indent, N}] F = fun((any()) -> any()) N = pos_integer() ``` @@ -522,18 +470,6 @@ the option `{indent, N}` inserts a newline and `N` spaces for each level of indentation in your json output. note that this overrides spaces inserted after a comma. `indent` is an alias for `{indent, 1}`. the default is `{indent, 0}` -`{pre_encode, F}` is a user defined function of arity 1 that is called on each -input value. it may return any valid json value to be substituted in the -returned json. for example: - -```erlang -1> F = fun(V) when is_list(V) -> V; (V) -> false end. -2> jsx:encode([{<<"a list">>, [true, <<"a string">>, 1]}], [{pre_encode, F}]). -<<"{\"a list\": [false, false, false]}">> -``` - -declaring more than one pre-encoder will result in a `badarg` error exception - raises a `badarg` error exception if input is not a valid [erlang representation of json](#json---erlang-mapping) @@ -621,7 +557,7 @@ what exactly constitutes valid json may be altered via [options](#option) ## callback exports ## -the following functions should be exported from a jsx callback module +the following functions should be exported from a **jsx** callback module #### `Module:init/1` #### @@ -667,16 +603,11 @@ following events must be handled: the end of a json array -- `{key, binary()}` - - a key in a json object. this is guaranteed to follow either `start_object` - or a json value. it will usually be a `utf8` encoded binary. see the - [options](#option) for possible exceptions - - `{string, binary()}` a json string. it will usually be a `utf8` encoded binary. see the - [options](#option) for possible exceptions + [options](#option) for possible exceptions. note that keys are also + json strings - `{integer, integer()}` diff --git a/src/jsx.erl b/src/jsx.erl index 80d2ef4..3dac4f8 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -28,22 +28,17 @@ -export([format/1, format/2, minify/1, prettify/1]). -export([encoder/3, decoder/3, parser/3]). -export([resume/3]). -%% old api --export([term_to_json/1, term_to_json/2, json_to_term/1, json_to_term/2]). --export([to_json/1, to_json/2]). --export([to_term/1, to_term/2]). -export_type([json_term/0, json_text/0, token/0]). --export_type([config/0, encoder/0, decoder/0, parser/0, internal_state/0]). +-export_type([encoder/0, decoder/0, parser/0, internal_state/0]). -ifdef(TEST). --include("jsx_tests.hrl"). --else. --include("jsx_config.hrl"). +%% data and helper functions for tests +-export([test_cases/0, special_test_cases/0]). +-export([init/1, handle_event/2]). -endif. --type config() :: #config{}. -type json_term() :: [{binary() | atom(), json_term()}] @@ -64,19 +59,12 @@ encode(Source) -> encode(Source, []). encode(Source, Config) -> jsx_to_json:to_json(Source, Config). -%% old api, alias for encode/x --spec to_json(Source::json_term()) -> json_text() | {incomplete, encoder()}. --spec to_json(Source::json_term(), Config::jsx_to_json:config()) -> json_text() | {incomplete, encoder()}. +-spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}. +-spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}. -to_json(Source) -> encode(Source, []). -to_json(Source, Config) -> encode(Source, Config). - --spec term_to_json(Source::json_term()) -> json_text() | {incomplete, encoder()}. --spec term_to_json(Source::json_term(), Config::jsx_to_json:config()) -> json_text() | {incomplete, encoder()}. - -term_to_json(Source) -> encode(Source, []). -term_to_json(Source, Config) -> encode(Source, Config). +decode(Source) -> decode(Source, []). +decode(Source, Config) -> jsx_to_term:to_term(Source, Config). -spec format(Source::json_text()) -> json_text() | {incomplete, decoder()}. @@ -96,27 +84,6 @@ minify(Source) -> format(Source, []). prettify(Source) -> format(Source, [space, {indent, 2}]). --spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}. --spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}. - -decode(Source) -> decode(Source, []). -decode(Source, Config) -> jsx_to_term:to_term(Source, Config). - -%% old api, alias for to_term/x - --spec to_term(Source::json_text()) -> json_term() | {incomplete, decoder()}. --spec to_term(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}. - -to_term(Source) -> decode(Source, []). -to_term(Source, Config) -> decode(Source, Config). - --spec json_to_term(Source::json_text()) -> json_term() | {incomplete, decoder()}. --spec json_to_term(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}. - -json_to_term(Source) -> decode(Source, []). -json_to_term(Source, Config) -> decode(Source, Config). - - -spec is_json(Source::any()) -> true | false. -spec is_json(Source::any(), Config::jsx_verify:config()) -> true | false. @@ -182,3 +149,311 @@ resume(Term, {decoder, State, Handler, Acc, Stack}, Config) -> resume(Term, {parser, State, Handler, Stack}, Config) -> jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)). + + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + + +%% test handler +init([]) -> []. + +handle_event(end_json, State) -> lists:reverse([end_json] ++ State); +handle_event(Event, State) -> [Event] ++ State. + + +test_cases() -> + empty_array() + ++ nested_array() + ++ empty_object() + ++ nested_object() + ++ strings() + ++ literals() + ++ integers() + ++ floats() + ++ compound_object(). + +%% segregate these so we can skip them in `jsx_to_term` +special_test_cases() -> special_objects() ++ special_array(). + + +empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}]. + + +nested_array() -> + [{ + "[[[]]]", + <<"[[[]]]">>, + [[[]]], + [start_array, start_array, start_array, end_array, end_array, end_array] + }]. + + +empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}]. + + +nested_object() -> + [{ + "{\"key\":{\"key\":{}}}", + <<"{\"key\":{\"key\":{}}}">>, + [{<<"key">>, [{<<"key">>, [{}]}]}], + [ + start_object, + {key, <<"key">>}, + start_object, + {key, <<"key">>}, + start_object, + end_object, + end_object, + end_object + ] + }]. + + +naked_strings() -> + Raw = [ + "", + "hello world" + ], + [ + { + String, + <<"\"", (list_to_binary(String))/binary, "\"">>, + list_to_binary(String), + [{string, list_to_binary(String)}] + } + || String <- Raw + ]. + + +strings() -> + naked_strings() + ++ [ wrap_with_array(Test) || Test <- naked_strings() ] + ++ [ wrap_with_object(Test) || Test <- naked_strings() ]. + + +naked_integers() -> + Raw = [ + 1, 2, 3, + 127, 128, 129, + 255, 256, 257, + 65534, 65535, 65536, + 18446744073709551616, + 18446744073709551617 + ], + [ + { + integer_to_list(X), + list_to_binary(integer_to_list(X)), + X, + [{integer, X}] + } + || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0] + ]. + + +integers() -> + naked_integers() + ++ [ wrap_with_array(Test) || Test <- naked_integers() ] + ++ [ wrap_with_object(Test) || Test <- naked_integers() ]. + + +naked_floats() -> + Raw = [ + 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, + 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, + 1234567890.0987654321, + 0.0e0, + 1234567890.0987654321e16, + 0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308, + 1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308, + 2.2250738585072014e-308, %% min normalized float + 1.7976931348623157e308, %% max normalized float + 5.0e-324, %% min denormalized float + 2.225073858507201e-308 %% max denormalized float + ], + [ + { + sane_float_to_list(X), + list_to_binary(sane_float_to_list(X)), + X, + [{float, X}] + } + || X <- Raw ++ [ -1 * Y || Y <- Raw ] + ]. + + +floats() -> + naked_floats() + ++ [ wrap_with_array(Test) || Test <- naked_floats() ] + ++ [ wrap_with_object(Test) || Test <- naked_floats() ]. + + +naked_literals() -> + [ + { + atom_to_list(Literal), + atom_to_binary(Literal, unicode), + Literal, + [{literal, Literal}] + } + || Literal <- [true, false, null] + ]. + + +literals() -> + naked_literals() + ++ [ wrap_with_array(Test) || Test <- naked_literals() ] + ++ [ wrap_with_object(Test) || Test <- naked_literals() ]. + + +compound_object() -> + [{ + "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]", + <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>, + [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]], + [ + start_array, + start_object, + {key, <<"alpha">>}, + start_array, + {integer, 1}, + {integer, 2}, + {integer, 3}, + end_array, + {key, <<"beta">>}, + start_object, + {key, <<"alpha">>}, + start_array, + {float, 1.0}, + {float, 2.0}, + {float, 3.0}, + end_array, + {key, <<"beta">>}, + start_array, + {literal, true}, + {literal, false}, + end_array, + end_object, + end_object, + start_array, + start_object, + end_object, + end_array, + end_array + ] + }]. + + +special_objects() -> + [ + { + "[{key, atom}]", + <<"{\"key\":\"atom\"}">>, + [{key, atom}], + [start_object, {key, <<"key">>}, {string, <<"atom">>}, end_object] + }, + { + "[{1, true}]", + <<"{\"1\":true}">>, + [{1, true}], + [start_object, {key, <<"1">>}, {literal, true}, end_object] + } + ]. + + +special_array() -> + [ + { + "[foo, bar]", + <<"[\"foo\",\"bar\"]">>, + [foo, bar], + [start_array, {string, <<"foo">>}, {string, <<"bar">>}, end_array] + } + ]. + + +wrap_with_array({Title, JSON, Term, Events}) -> + { + "[" ++ Title ++ "]", + <<"[", JSON/binary, "]">>, + [Term], + [start_array] ++ Events ++ [end_array] + }. + + +wrap_with_object({Title, JSON, Term, Events}) -> + { + "{\"key\":" ++ Title ++ "}", + <<"{\"key\":", JSON/binary, "}">>, + [{<<"key">>, Term}], + [start_object, {key, <<"key">>}] ++ Events ++ [end_object] + }. + + +sane_float_to_list(X) -> + [Output] = io_lib:format("~p", [X]), + Output. + + +incremental_decode(JSON) -> + Final = lists:foldl( + fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, + decoder(jsx, [], [stream]), + json_to_bytes(JSON) + ), + Final(end_stream). + + +incremental_parse(Events) -> + Final = lists:foldl( + fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end, + parser(?MODULE, [], [stream]), + lists:map(fun(X) -> [X] end, Events) + ), + Final(end_stream). + + +%% used to convert a json text into a list of codepoints to be incrementally +%% parsed +json_to_bytes(JSON) -> json_to_bytes(JSON, []). + +json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc); +json_to_bytes(<>, Acc) -> json_to_bytes(Rest, [<>] ++ Acc). + + +%% actual tests! +decode_test_() -> + Data = test_cases(), + [{Title, ?_assertEqual(Events ++ [end_json], (decoder(?MODULE, [], []))(JSON))} + || {Title, JSON, _, Events} <- Data + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_decode(JSON))} + || {Title, JSON, _, Events} <- Data + ]. + + +parse_test_() -> + Data = test_cases(), + [{Title, ?_assertEqual(Events ++ [end_json], (parser(?MODULE, [], []))(Events ++ [end_json]))} + || {Title, _, _, Events} <- Data + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_parse(Events))} + || {Title, _, _, Events} <- Data + ]. + + +encode_test_() -> + Data = test_cases(), + [ + { + Title, ?_assertEqual( + Events ++ [end_json], + (jsx:encoder(jsx, [], []))(Term) + ) + } || {Title, _, Term, Events} <- Data + ]. + + +-endif. diff --git a/src/jsx_config.erl b/src/jsx_config.erl index e5f8622..8633d8d 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -49,41 +49,27 @@ %% parsing of jsx config -spec parse_config(Config::proplists:proplist()) -> jsx:config(). -parse_config(Config) -> - parse_config(Config, #config{}). +parse_config(Config) -> parse_config(Config, #config{}). -parse_config([], Config) -> - Config; -parse_config([replaced_bad_utf8|Rest], Config) -> - parse_config(Rest, Config#config{replaced_bad_utf8=true}); +parse_config([], Config) -> Config; parse_config([escaped_forward_slashes|Rest], Config) -> parse_config(Rest, Config#config{escaped_forward_slashes=true}); -parse_config([explicit_end|Rest], Config) -> - parse_config(Rest, Config#config{explicit_end=true}); -parse_config([single_quoted_strings|Rest], Config) -> - parse_config(Rest, Config#config{single_quoted_strings=true}); -parse_config([unescaped_jsonp|Rest], Config) -> - parse_config(Rest, Config#config{unescaped_jsonp=true}); -parse_config([comments|Rest], Config) -> - parse_config(Rest, Config#config{comments=true}); parse_config([escaped_strings|Rest], Config) -> parse_config(Rest, Config#config{escaped_strings=true}); +parse_config([unescaped_jsonp|Rest], Config) -> + parse_config(Rest, Config#config{unescaped_jsonp=true}); parse_config([dirty_strings|Rest], Config) -> parse_config(Rest, Config#config{dirty_strings=true}); -parse_config([ignored_bad_escapes|Rest], Config) -> - parse_config(Rest, Config#config{ignored_bad_escapes=true}); -parse_config([relax|Rest], Config) -> - parse_config(Rest, Config#config{ - replaced_bad_utf8 = true, - single_quoted_strings = true, - comments = true, - ignored_bad_escapes = true +parse_config([strict|Rest], Config) -> + parse_config(Rest, Config#config{strict_comments=true, + strict_utf8=true, + strict_single_quotes=true, + strict_escapes=true }); -parse_config([{pre_encode, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) -> - case Config#config.pre_encode of - false -> parse_config(Rest, Config#config{pre_encode=Encoder}) - ; _ -> erlang:error(badarg, [Options, Config]) - end; +parse_config([{strict, Strict}|Rest], Config) -> + parse_strict(Strict, Rest, Config); +parse_config([stream|Rest], Config) -> + parse_config(Rest, Config#config{stream=true}); parse_config([{error_handler, ErrorHandler}|Rest] = Options, Config) when is_function(ErrorHandler, 3) -> case Config#config.error_handler of false -> parse_config(Rest, Config#config{error_handler=ErrorHandler}) @@ -94,34 +80,28 @@ parse_config([{incomplete_handler, IncompleteHandler}|Rest] = Options, Config) w false -> parse_config(Rest, Config#config{incomplete_handler=IncompleteHandler}) ; _ -> erlang:error(badarg, [Options, Config]) end; -%% deprecated flags -parse_config([{pre_encoder, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) -> - case Config#config.pre_encode of - false -> parse_config(Rest, Config#config{pre_encode=Encoder}) - ; _ -> erlang:error(badarg, [Options, Config]) - end; -parse_config([loose_unicode|Rest], Config) -> - parse_config(Rest, Config#config{replaced_bad_utf8=true}); -parse_config([escape_forward_slash|Rest], Config) -> - parse_config(Rest, Config#config{escaped_forward_slashes=true}); -parse_config([single_quotes|Rest], Config) -> - parse_config(Rest, Config#config{single_quoted_strings=true}); -parse_config([no_jsonp_escapes|Rest], Config) -> - parse_config(Rest, Config#config{unescaped_jsonp=true}); -parse_config([json_escape|Rest], Config) -> - parse_config(Rest, Config#config{escaped_strings=true}); -parse_config([ignore_bad_escapes|Rest], Config) -> - parse_config(Rest, Config#config{ignored_bad_escapes=true}); -parse_config(Options, Config) -> - erlang:error(badarg, [Options, Config]). +parse_config(_Options, _Config) -> erlang:error(badarg). + + +parse_strict([], Rest, Config) -> parse_config(Rest, Config); +parse_strict([comments|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_comments=true}); +parse_strict([utf8|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_utf8=true}); +parse_strict([single_quotes|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_single_quotes=true}); +parse_strict([escapes|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_escapes=true}); +parse_strict(_Strict, _Rest, _Config) -> + erlang:error(badarg). + -spec config_to_list(Config::jsx:config()) -> proplists:proplist(). config_to_list(Config) -> - lists:map( - fun ({pre_encode, F}) -> {pre_encode, F}; - ({error_handler, F}) -> {error_handler, F}; + reduce_config(lists:map( + fun ({error_handler, F}) -> {error_handler, F}; ({incomplete_handler, F}) -> {incomplete_handler, F}; ({Key, true}) -> Key end, @@ -129,34 +109,41 @@ config_to_list(Config) -> fun({_, false}) -> false; (_) -> true end, lists:zip(record_info(fields, config), tl(tuple_to_list(Config))) ) - ). + )). + + +reduce_config(Input) -> reduce_config(Input, [], []). + +reduce_config([], Output, Strict) -> + case length(Strict) of + 0 -> lists:reverse(Output); + 4 -> lists:reverse(Output) ++ [strict]; + _ -> lists:reverse(Output) ++ [{strict, lists:reverse(Strict)}] + end; +reduce_config([strict_comments|Input], Output, Strict) -> + reduce_config(Input, Output, [comments] ++ Strict); +reduce_config([strict_utf8|Input], Output, Strict) -> + reduce_config(Input, Output, [utf8] ++ Strict); +reduce_config([strict_single_quotes|Input], Output, Strict) -> + reduce_config(Input, Output, [single_quotes] ++ Strict); +reduce_config([strict_escapes|Input], Output, Strict) -> + reduce_config(Input, Output, [escapes] ++ Strict); +reduce_config([Else|Input], Output, Strict) -> + reduce_config(Input, [Else] ++ Output, Strict). -spec valid_flags() -> [atom()]. valid_flags() -> [ - replaced_bad_utf8, escaped_forward_slashes, - single_quoted_strings, - unescaped_jsonp, - comments, escaped_strings, + unescaped_jsonp, dirty_strings, - ignored_bad_escapes, - explicit_end, - relax, - pre_encode, + strict, + stream, error_handler, - incomplete_handler, - %% deprecated flags - pre_encoder, %% pre_encode - loose_unicode, %% replaced_bad_utf8 - escape_forward_slash, %% escaped_forward_slashes - single_quotes, %% single_quoted_strings - no_jsonp_escapes, %% unescaped_jsonp - json_escape, %% escaped_strings - ignore_bad_escapes %% ignored_bad_escapes + incomplete_handler ]. @@ -187,70 +174,51 @@ config_test_() -> [ {"all flags", ?_assertEqual( - #config{ - replaced_bad_utf8=true, - escaped_forward_slashes=true, - explicit_end=true, - single_quoted_strings=true, - unescaped_jsonp=true, - comments=true, - dirty_strings=true, - ignored_bad_escapes=true + #config{escaped_forward_slashes = true, + escaped_strings = true, + unescaped_jsonp = true, + dirty_strings = true, + strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true, + stream = true }, - parse_config([ - replaced_bad_utf8, - escaped_forward_slashes, - explicit_end, - single_quoted_strings, + parse_config([escaped_forward_slashes, + escaped_strings, unescaped_jsonp, - comments, dirty_strings, - ignored_bad_escapes + strict, + stream ]) ) }, - {"relax flag", + {"strict flag", ?_assertEqual( - #config{ - replaced_bad_utf8=true, - single_quoted_strings=true, - comments=true, - ignored_bad_escapes=true + #config{strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true }, - parse_config([relax]) + parse_config([strict]) + ) + }, + {"strict selective", + ?_assertEqual( + #config{strict_comments = true}, + parse_config([{strict, [comments]}]) + ) + }, + {"strict expanded", + ?_assertEqual( + #config{strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true + }, + parse_config([{strict, [comments, utf8, single_quotes, escapes]}]) ) }, - {"deprecated flags", ?_assertEqual( - #config{ - pre_encode=fun lists:length/1, - replaced_bad_utf8=true, - escaped_forward_slashes=true, - single_quoted_strings=true, - unescaped_jsonp=true, - escaped_strings=true, - ignored_bad_escapes=true - }, - parse_config([ - {pre_encoder, fun lists:length/1}, - loose_unicode, - escape_forward_slash, - single_quotes, - no_jsonp_escapes, - json_escape, - ignore_bad_escapes - ]) - )}, - {"pre_encode flag", ?_assertEqual( - #config{pre_encode=fun lists:length/1}, - parse_config([{pre_encode, fun lists:length/1}]) - )}, - {"two pre_encoders defined", ?_assertError( - badarg, - parse_config([ - {pre_encode, fun(_) -> true end}, - {pre_encode, fun(_) -> false end} - ]) - )}, {"error_handler flag", ?_assertEqual( #config{error_handler=fun ?MODULE:fake_error_handler/3}, parse_config([{error_handler, fun ?MODULE:fake_error_handler/3}]) @@ -273,7 +241,7 @@ config_test_() -> {incomplete_handler, fun(_) -> false end} ]) )}, - {"bad option flag", ?_assertError(badarg, parse_config([error]))} + {"bad option flag", ?_assertError(badarg, parse_config([this_flag_does_not_exist]))} ]. @@ -284,32 +252,40 @@ config_to_list_test_() -> config_to_list(#config{}) )}, {"all flags", ?_assertEqual( - [ - replaced_bad_utf8, - escaped_forward_slashes, - single_quoted_strings, + [escaped_forward_slashes, + escaped_strings, unescaped_jsonp, - comments, dirty_strings, - ignored_bad_escapes, - explicit_end + stream, + strict ], config_to_list( - #config{ - replaced_bad_utf8=true, - escaped_forward_slashes=true, - explicit_end=true, - single_quoted_strings=true, - unescaped_jsonp=true, - comments=true, - dirty_strings=true, - ignored_bad_escapes=true + #config{escaped_forward_slashes = true, + escaped_strings = true, + unescaped_jsonp = true, + dirty_strings = true, + strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true, + stream = true } ) )}, - {"pre_encode", ?_assertEqual( - [{pre_encode, fun lists:length/1}], - config_to_list(#config{pre_encode=fun lists:length/1}) + {"single strict", ?_assertEqual( + [{strict, [comments]}], + config_to_list(#config{strict_comments = true}) + )}, + {"multiple strict", ?_assertEqual( + [{strict, [utf8, single_quotes, escapes]}], + config_to_list(#config{strict_utf8 = true, strict_single_quotes = true, strict_escapes = true}) + )}, + {"all strict", ?_assertEqual( + [strict], + config_to_list(#config{strict_comments = true, + strict_utf8 = true, + strict_single_quotes = true, + strict_escapes = true}) )}, {"error handler", ?_assertEqual( [{error_handler, fun ?MODULE:fake_error_handler/3}], diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index baa1384..89f7824 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -1,15 +1,13 @@ -record(config, { - replaced_bad_utf8 = false :: boolean(), - escaped_forward_slashes = false :: boolean(), - single_quoted_strings = false :: boolean(), - unescaped_jsonp = false :: boolean(), - comments = false :: boolean(), - escaped_strings = false :: boolean(), - dirty_strings = false :: boolean(), - ignored_bad_escapes = false :: boolean(), - explicit_end = false :: boolean(), - pre_encode = false :: false | fun((any()) -> any()), - error_handler = false :: false | jsx_config:handler(), - incomplete_handler = false :: false | jsx_config:handler() + escaped_forward_slashes = false :: boolean(), + escaped_strings = false :: boolean(), + unescaped_jsonp = false :: boolean(), + dirty_strings = false :: boolean(), + strict_comments = false :: boolean(), + strict_utf8 = false :: boolean(), + strict_single_quotes = false :: boolean(), + strict_escapes = false :: boolean(), + stream = false :: boolean(), + error_handler = false :: false | jsx_config:handler(), + incomplete_handler = false :: false | jsx_config:handler() }). - diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index a94c08d..a9d5c6e 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -62,6 +62,7 @@ resume(Rest, State, Handler, Acc, Stack, Config) -> integer -> integer(Rest, Handler, Acc, Stack, Config); decimal -> decimal(Rest, Handler, Acc, Stack, Config); exp -> exp(Rest, Handler, Acc, Stack, Config); + zero -> zero(Rest, Handler, Acc, Stack, Config); true -> true(Rest, Handler, Stack, Config); false -> false(Rest, Handler, Stack, Config); null -> null(Rest, Handler, Stack, Config); @@ -138,20 +139,25 @@ resume(Rest, State, Handler, Acc, Stack, Config) -> -endif. +incomplete(State, Rest, Handler, Stack, Config = #config{stream=false}) -> + ?error(State, Rest, Handler, Stack, Config); incomplete(State, Rest, Handler, Stack, Config) -> incomplete(State, Rest, Handler, unused, Stack, Config). -incomplete(State, Rest, Handler, Acc, Stack, Config=#config{incomplete_handler=false}) -> + +incomplete(State, Rest, Handler, Acc, Stack, Config = #config{stream=false}) -> + ?error(State, Rest, Handler, Acc, Stack, Config); +incomplete(State, Rest, Handler, Acc, Stack, Config = #config{incomplete_handler=false}) -> {incomplete, fun(Stream) when is_binary(Stream) -> resume(<>, State, Handler, Acc, Stack, Config); (end_stream) -> - case resume(<>, State, Handler, Acc, Stack, Config#config{explicit_end=false}) of + case resume(<>, State, Handler, Acc, Stack, Config#config{stream=false}) of {incomplete, _} -> ?error(State, Rest, Handler, Acc, Stack, Config); Else -> Else end end }; -incomplete(State, Rest, Handler, Acc, Stack, Config=#config{incomplete_handler=F}) -> +incomplete(State, Rest, Handler, Acc, Stack, Config = #config{incomplete_handler=F}) -> F(Rest, {decoder, State, Handler, Acc, Stack}, jsx_config:config_to_list(Config)). @@ -189,7 +195,7 @@ start(Bin, Handler, Stack, Config) -> value(<>, Handler, Stack, Config) -> string(Rest, Handler, new_seq(), Stack, Config); -value(<>, Handler, Stack, Config=#config{single_quoted_strings=true}) -> +value(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> string(Rest, Handler, new_seq(), [singlequote|Stack], Config); value(<<$t, Rest/binary>>, Handler, Stack, Config) -> true(Rest, Handler, Stack, Config); @@ -209,11 +215,13 @@ value(<>, Handler, Stack, Config) -> array(Rest, handle_event(start_array, Handler, Config), [array|Stack], Config); value(<>, Handler, Stack, Config) when ?is_whitespace(S) -> value(Rest, Handler, Stack, Config); -value(<>, Handler, Stack, Config=#config{comments=true}) -> +value(<>, Handler, Stack, Config=#config{strict_comments=true}) -> + ?error(value, <>, Handler, Stack, Config); +value(<>, Handler, Stack, Config) -> comment(Rest, Handler, value, [comment|Stack], Config); -value(<>, Handler, Stack, Config=#config{comments=true}) -> +value(<>, Handler, Stack, Config) -> comment(Rest, Handler, value, [multicomment|Stack], Config); -value(<>, Handler, Stack, Config=#config{comments=true}) -> +value(<>, Handler, Stack, Config) -> incomplete(value, <>, Handler, Stack, Config); value(<<>>, Handler, Stack, Config) -> incomplete(value, <<>>, Handler, Stack, Config); @@ -223,17 +231,19 @@ value(Bin, Handler, Stack, Config) -> object(<>, Handler, Stack, Config) -> string(Rest, Handler, new_seq(), Stack, Config); -object(<>, Handler, Stack, Config=#config{single_quoted_strings=true}) -> +object(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> string(Rest, Handler, new_seq(), [singlequote|Stack], Config); object(<>, Handler, [key|Stack], Config) -> maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config); object(<>, Handler, Stack, Config) when ?is_whitespace(S) -> object(Rest, Handler, Stack, Config); -object(<>, Handler, Stack, Config=#config{comments=true}) -> +object(<>, Handler, Stack, Config=#config{strict_comments=true}) -> + ?error(object, <>, Handler, Stack, Config); +object(<>, Handler, Stack, Config) -> comment(Rest, Handler, object, [comment|Stack], Config); -object(<>, Handler, Stack, Config=#config{comments=true}) -> +object(<>, Handler, Stack, Config) -> comment(Rest, Handler, object, [multicomment|Stack], Config); -object(<>, Handler, Stack, Config=#config{comments=true}) -> +object(<>, Handler, Stack, Config) -> incomplete(object, <>, Handler, Stack, Config); object(<<>>, Handler, Stack, Config) -> incomplete(object, <<>>, Handler, Stack, Config); @@ -245,11 +255,13 @@ array(<>, Handler, [array|Stack], Config) -> maybe_done(Rest, handle_event(end_array, Handler, Config), Stack, Config); array(<>, Handler, Stack, Config) when ?is_whitespace(S) -> array(Rest, Handler, Stack, Config); -array(<>, Handler, Stack, Config=#config{comments=true}) -> +array(<>, Handler, Stack, Config=#config{strict_comments=true}) -> + value(<>, Handler, Stack, Config); +array(<>, Handler, Stack, Config) -> comment(Rest, Handler, array, [comment|Stack], Config); -array(<>, Handler, Stack, Config=#config{comments=true}) -> +array(<>, Handler, Stack, Config) -> comment(Rest, Handler, array, [multicomment|Stack], Config); -array(<>, Handler, Stack, Config=#config{comments=true}) -> +array(<>, Handler, Stack, Config) -> incomplete(array, <>, Handler, Stack, Config); array(<<>>, Handler, Stack, Config) -> incomplete(array, <<>>, Handler, Stack, Config); @@ -261,11 +273,13 @@ colon(<>, Handler, [key|Stack], Config) -> value(Rest, Handler, [object|Stack], Config); colon(<>, Handler, Stack, Config) when ?is_whitespace(S) -> colon(Rest, Handler, Stack, Config); -colon(<>, Handler, Stack, Config=#config{comments=true}) -> +colon(<>, Handler, Stack, Config=#config{strict_comments=true}) -> + ?error(colon, <>, Handler, Stack, Config); +colon(<>, Handler, Stack, Config) -> comment(Rest, Handler, colon, [comment|Stack], Config); -colon(<>, Handler, Stack, Config=#config{comments=true}) -> +colon(<>, Handler, Stack, Config) -> comment(Rest, Handler, colon, [multicomment|Stack], Config); -colon(<>, Handler, Stack, Config=#config{comments=true}) -> +colon(<>, Handler, Stack, Config) -> incomplete(colon, <>, Handler, Stack, Config); colon(<<>>, Handler, Stack, Config) -> incomplete(colon, <<>>, Handler, Stack, Config); @@ -275,15 +289,17 @@ colon(Bin, Handler, Stack, Config) -> key(<>, Handler, Stack, Config) -> string(Rest, Handler, new_seq(), Stack, Config); -key(<>, Handler, Stack, Config=#config{single_quoted_strings=true}) -> +key(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> string(Rest, Handler, new_seq(), [singlequote|Stack], Config); key(<>, Handler, Stack, Config) when ?is_whitespace(S) -> key(Rest, Handler, Stack, Config); -key(<>, Handler, Stack, Config=#config{comments=true}) -> +key(<>, Handler, Stack, Config=#config{strict_comments=true}) -> + ?error(key, <>, Handler, Stack, Config); +key(<>, Handler, Stack, Config) -> comment(Rest, Handler, key, [comment|Stack], Config); -key(<>, Handler, Stack, Config=#config{comments=true}) -> +key(<>, Handler, Stack, Config) -> comment(Rest, Handler, key, [multicomment|Stack], Config); -key(<>, Handler, Stack, Config=#config{comments=true}) -> +key(<>, Handler, Stack, Config) -> incomplete(key, <>, Handler, Stack, Config); key(<<>>, Handler, Stack, Config) -> incomplete(key, <<>>, Handler, Stack, Config); @@ -543,53 +559,52 @@ string(<>, Handler, Acc, Stack, Config) when X >= 16#f0, X =< 16#f4, Y >= 16#80, Y =< 16#bf, Z >= 16#80, Z =< 16#bf -> - incomplete(string, <>, Handler, Acc, Stack, Config); + incomplete(string, <>, Handler, Acc, Stack, Config); %% surrogates -string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 160 -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); %% u+xfffe, u+xffff, control codes and other noncharacters -string(<<_/utf8, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) -> +string(<<_/utf8, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); %% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match the %% preceeding clause -string(<<239, 191, X, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<<239, 191, X, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X == 190; X == 191 -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); %% overlong encodings and missing continuations of a 2 byte sequence -string(<>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 192, X =< 223 -> strip_continuations(Rest, Handler, Acc, Stack, Config, 1); %% overlong encodings and missing continuations of a 3 byte sequence -string(<>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 224, X =< 239 -> strip_continuations(Rest, Handler, Acc, Stack, Config, 2); %% overlong encodings and missing continuations of a 4 byte sequence -string(<>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) +string(<>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 240, X =< 247 -> strip_continuations(Rest, Handler, Acc, Stack, Config, 3); %% incompletes and unexpected bytes, including orphan continuations -string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) -> +string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); -string(Bin, Handler, Acc, Stack, Config) -> - ?error(string, Bin, Handler, Acc, Stack, Config). +string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config). -doublequote(<>, Handler, Acc, [key|_] = Stack, Config) -> +doublequote(Rest, Handler, Acc, [key|_] = Stack, Config) -> colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), Stack, Config); -doublequote(<>, Handler, Acc, [singlequote|_] = Stack, Config) -> +doublequote(Rest, Handler, Acc, [singlequote|_] = Stack, Config) -> string(Rest, Handler,acc_seq(Acc, maybe_replace(?doublequote, Config)), Stack, Config); doublequote(<<>>, Handler, Acc, [singlequote|_] = Stack, Config) -> incomplete(string, <>, Handler, Acc, Stack, Config); -doublequote(<>, Handler, Acc, Stack, Config) -> +doublequote(Rest, Handler, Acc, Stack, Config) -> maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config). -singlequote(<>, Handler, Acc, [singlequote, key|Stack], Config) -> +singlequote(Rest, Handler, Acc, [singlequote, key|Stack], Config) -> colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), [key|Stack], Config); -singlequote(<>, Handler, Acc, [singlequote|Stack], Config) -> +singlequote(Rest, Handler, Acc, [singlequote|Stack], Config) -> maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config); -singlequote(<>, Handler, Acc, Stack, Config) -> +singlequote(Rest, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config). @@ -616,12 +631,10 @@ strip_continuations(<>, Handler, Acc, Stack, Config, _) -> %% this all gets really gross and should probably eventually be folded into %% but for now it fakes being part of string on incompletes and errors +unescape(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> + string(<>, Handler, acc_seq(Acc, ?rsolidus), Stack, Config); unescape(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> - case C of - ?doublequote -> string(Rest, Handler, acc_seq(Acc, C), Stack, Config); - ?rsolidus -> string(<>, Handler, acc_seq(Acc, ?rsolidus), Stack, Config); - _ -> string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config) - end; + string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config); unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, maybe_replace($\b, Config)), Stack, Config); unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) -> @@ -634,8 +647,8 @@ unescape(<<$t, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, maybe_replace($\t, Config)), Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, maybe_replace($\", Config)), Stack, Config); -unescape(<>, Handler, Acc, Stack, Config=#config{single_quoted_strings=true}) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace(?singlequote, Config)), Stack, Config); +unescape(<>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) -> + string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, acc_seq(Acc, maybe_replace($\\, Config)), Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> @@ -653,9 +666,9 @@ unescape(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, A when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), ?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z) -> - case Config#config.replaced_bad_utf8 of - true -> string(Rest, Handler, acc_seq(Acc, [16#fffd, 16#fffd]), Stack, Config); - false -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) + case Config#config.strict_utf8 of + true -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config); + false -> string(Rest, Handler, acc_seq(Acc, [16#fffd, 16#fffd]), Stack, Config) end; unescape(<<$u, $d, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), @@ -672,16 +685,17 @@ unescape(<<$u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config) case erlang:list_to_integer([A, B, C, D], 16) of Codepoint when Codepoint < 16#d800; Codepoint > 16#dfff -> string(Rest, Handler, acc_seq(Acc, maybe_replace(Codepoint, Config)), Stack, Config); - _ when Config#config.replaced_bad_utf8 -> - string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); - _ -> ?error(string, <>, Handler, Acc, Stack, Config) + _ when Config#config.strict_utf8 -> + ?error(string, <>, Handler, Acc, Stack, Config); + _ -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config) end; -unescape(Bin, Handler, Acc, Stack, Config=#config{ignored_bad_escapes=true}) -> - string(Bin, Handler, acc_seq(Acc, ?rsolidus), Stack, Config); unescape(Bin, Handler, Acc, Stack, Config) -> case is_partial_escape(Bin) of true -> incomplete(string, <>, Handler, Acc, Stack, Config); - false -> ?error(string, <>, Handler, Acc, Stack, Config) + false -> case Config#config.strict_escapes of + true -> ?error(string, <>, Handler, Acc, Stack, Config); + false -> string(Bin, Handler, acc_seq(Acc, ?rsolidus), Stack, Config) + end end. @@ -748,10 +762,6 @@ zero(<>, Handler, Acc, Stack, Config) -> decimal(Rest, Handler, acc_seq(Acc, ?decimalpoint), Stack, Config); zero(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> e(Rest, Handler, acc_seq(Acc, ".0e"), Stack, Config); -zero(<<>>, Handler, Acc, [], Config=#config{explicit_end=false}) -> - finish_number(<<>>, Handler, {zero, Acc}, [], Config); -zero(<<>>, Handler, Acc, Stack, Config) -> - incomplete(value, (end_seq(Acc)), Handler, Stack, Config); zero(Bin, Handler, Acc, Stack, Config) -> finish_number(Bin, Handler, {zero, Acc}, Stack, Config). @@ -806,39 +816,13 @@ exp(Bin, Handler, Acc, Stack, Config) -> finish_number(Bin, Handler, {exp, Acc}, Stack, Config). -finish_number(Rest, Handler, Acc, [], Config=#config{explicit_end=false}) -> +finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), [], Config); -finish_number(<>, Handler, Acc, [object|Stack], Config) -> - maybe_done(Rest, handle_event([format_number(Acc), end_object], Handler, Config), Stack, Config); -finish_number(<>, Handler, Acc, [array|Stack], Config) -> - maybe_done(Rest, handle_event([format_number(Acc), end_array], Handler, Config), Stack, Config); -finish_number(<>, Handler, Acc, [object|Stack], Config) -> - key(Rest, handle_event(format_number(Acc), Handler, Config), [key|Stack], Config); -finish_number(<>, Handler, Acc, [array|Stack], Config) -> - value(Rest, handle_event(format_number(Acc), Handler, Config), [array|Stack], Config); -finish_number(<>, Handler, Acc, Stack, Config) when ?is_whitespace(S) -> - maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config); -finish_number(<>, Handler, Acc, Stack, Config=#config{comments=true}) -> - comment(Rest, handle_event(format_number(Acc), Handler, Config), maybe_done, [comment|Stack], Config); -finish_number(<>, Handler, Acc, Stack, Config=#config{comments=true}) -> - comment(Rest, handle_event(format_number(Acc), Handler, Config), maybe_done, [multicomment|Stack], Config); -finish_number(<>, Handler, Acc, Stack, Config=#config{comments=true}) -> - incomplete(maybe_done, <>, handle_event(format_number(Acc), Handler, Config), Stack, Config); finish_number(<<>>, Handler, {NumType, Acc}, Stack, Config) -> - case NumType of - integer -> incomplete(integer, <<>>, Handler, Acc, Stack, Config); - decimal -> incomplete(decimal, <<>>, Handler, Acc, Stack, Config); - exp -> incomplete(exp, <<>>, Handler, Acc, Stack, Config) - end; -finish_number(Bin, Handler, {NumType, Acc}, Stack, Config) -> - case NumType of - integer -> ?error(integer, Bin, Handler, Acc, Stack, Config); - decimal -> ?error(decimal, Bin, Handler, Acc, Stack, Config); - exp -> ?error(exp, Bin, Handler, Acc, Stack, Config); - zero -> - [$0|OldAcc] = Acc, - ?error(value, <<$0, Bin/binary>>, Handler, OldAcc, Stack, Config) - end. + incomplete(NumType, <<>>, Handler, Acc, Stack, Config); +finish_number(Rest, Handler, Acc, Stack, Config) -> + maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config). + format_number({zero, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))}; format_number({integer, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))}; @@ -899,9 +883,9 @@ comment(<>, Handler, Resume, [multicomment|_] = Stack, Config) -> incomplete(comment, <>, Handler, Resume, Stack, Config); comment(<<_/utf8, Rest/binary>>, Handler, Resume, Stack, Config) -> comment(Rest, Handler, Resume, Stack, Config); -comment(<<_, Rest/binary>>, Handler, Resume, Stack, Config=#config{replaced_bad_utf8=true}) -> +comment(<<_, Rest/binary>>, Handler, Resume, Stack, Config=#config{strict_utf8=false}) -> comment(Rest, Handler, Resume, Stack, Config); -comment(<<>>, Handler, done, [Comment], Config=#config{explicit_end=false}) +comment(<<>>, Handler, done, [Comment], Config=#config{stream=false}) when Comment == comment; Comment == multicomment -> resume(<<>>, done, Handler, unused, [], Config); comment(<<>>, Handler, Resume, Stack, Config) -> @@ -922,11 +906,13 @@ maybe_done(<>, Handler, [array|_] = Stack, Config) -> value(Rest, Handler, Stack, Config); maybe_done(<>, Handler, Stack, Config) when ?is_whitespace(S) -> maybe_done(Rest, Handler, Stack, Config); -maybe_done(<>, Handler, Stack, Config=#config{comments=true}) -> +maybe_done(<>, Handler, Stack, Config=#config{strict_comments=true}) -> + ?error(maybe_done, <>, Handler, Stack, Config); +maybe_done(<>, Handler, Stack, Config) -> comment(Rest, Handler, maybe_done, [comment|Stack], Config); -maybe_done(<>, Handler, Stack, Config=#config{comments=true}) -> +maybe_done(<>, Handler, Stack, Config) -> comment(Rest, Handler, maybe_done, [multicomment|Stack], Config); -maybe_done(<>, Handler, Stack, Config=#config{comments=true}) -> +maybe_done(<>, Handler, Stack, Config) -> incomplete(maybe_done, <>, Handler, Stack, Config); maybe_done(<<>>, Handler, Stack, Config) when length(Stack) > 0 -> incomplete(maybe_done, <<>>, Handler, Stack, Config); @@ -936,13 +922,15 @@ maybe_done(Bin, Handler, Stack, Config) -> done(<>, Handler, [], Config) when ?is_whitespace(S) -> done(Rest, Handler, [], Config); -done(<>, Handler, Stack, Config=#config{comments=true}) -> +done(<>, Handler, Stack, Config=#config{strict_comments=true}) -> + ?error(done, <>, Handler, Stack, Config); +done(<>, Handler, Stack, Config) -> comment(Rest, Handler, done, [comment|Stack], Config); -done(<>, Handler, Stack, Config=#config{comments=true}) -> +done(<>, Handler, Stack, Config) -> comment(Rest, Handler, done, [multicomment|Stack], Config); -done(<>, Handler, Stack, Config=#config{comments=true}) -> +done(<>, Handler, Stack, Config) -> incomplete(done, <>, Handler, Stack, Config); -done(<<>>, {Handler, State}, [], Config=#config{explicit_end=true}) -> +done(<<>>, {Handler, State}, [], Config=#config{stream=true}) -> incomplete(done, <<>>, {Handler, State}, [], Config); done(<<>>, {_Handler, State}, [], _Config) -> State; done(Bin, Handler, Stack, Config) -> ?error(done, Bin, Handler, Stack, Config). @@ -959,167 +947,141 @@ json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc); json_to_bytes(<>, Acc) -> json_to_bytes(Rest, [<>] ++ Acc). -decode(JSON, Config) -> - Chunk = try - start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) - catch - error:badarg -> {error, badarg} - end, - Incremental = try - Final = lists:foldl( - fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, - decoder(jsx, [], [explicit_end] ++ Config), - json_to_bytes(JSON) - ), - Final(end_stream) - catch - error:badarg -> {error, badarg} - end, - ?assert(Chunk == Incremental), - Chunk. +decode(JSON) -> decode(JSON, []). +decode(JSON, Config) -> (decoder(jsx, [], Config))(JSON). -decode_test_() -> - Data = jsx:test_cases(), - [{Title, ?_assertEqual(Events ++ [end_json], decode(JSON, []))} - || {Title, JSON, _, Events} <- Data - ]. +incremental_decode(JSON) -> incremental_decode(JSON, []). +incremental_decode(JSON, Config) -> + Final = lists:foldl( + fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, + decoder(jsx, [], [stream] ++ Config), + json_to_bytes(JSON) + ), + Final(end_stream). %% all these numbers have different representation in erlang than in javascript and %% do not roundtrip like most integers/floats special_number_test_() -> - [ - {"-0", ?_assertEqual( - [{integer, 0}, end_json], - decode(<<"-0">>, []) - )}, - {"-0.0", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"-0.0">>, []) - )}, - {"0e0", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"0e0">>, []) - )}, - {"0e4", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"0e4">>, []) - )}, - {"1e0", ?_assertEqual( - [{float, 1.0}, end_json], - decode(<<"1e0">>, []) - )}, - {"-1e0", ?_assertEqual( - [{float, -1.0}, end_json], - decode(<<"-1e0">>, []) - )}, - {"1e4", ?_assertEqual( - [{float, 1.0e4}, end_json], - decode(<<"1e4">>, []) - )}, - {"number terminated by whitespace", ?_assertEqual( + Cases = [ + % {title, test form, json, opt flags} + {"-0", [{integer, 0}, end_json], <<"-0">>}, + {"-0.0", [{float, 0.0}, end_json], <<"-0.0">>}, + {"0e0", [{float, 0.0}, end_json], <<"0e0">>}, + {"0e4", [{float, 0.0}, end_json], <<"0e4">>}, + {"1e0", [{float, 1.0}, end_json], <<"1e0">>}, + {"-1e0", [{float, -1.0}, end_json], <<"-1e0">>}, + {"1e4", [{float, 1.0e4}, end_json], <<"1e4">>}, + {"number terminated by whitespace", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1 ]">>, []) - )}, - {"number terminated by comma", ?_assertEqual( + <<"[ 1 ]">> + }, + {"number terminated by comma", [start_array, {integer, 1}, {integer, 1}, end_array, end_json], - decode(<<"[ 1, 1 ]">>, []) - )}, - {"number terminated by comma in object", ?_assertEqual( + <<"[ 1, 1 ]">> + }, + {"number terminated by comma in object", [start_object, {key, <<"x">>}, {integer, 1}, {key, <<"y">>}, {integer, 1}, end_object, end_json], - decode(<<"{\"x\": 1, \"y\": 1}">>, []) - )} - ]. + <<"{\"x\": 1, \"y\": 1}">> + } + ], + [{Title, ?_assertEqual(Events, decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON))} + || {Title, Events, JSON} <- Cases + ]. comments_test_() -> - [ - {"preceeding // comment", ?_assertEqual( + Cases = [ + % {title, test form, json, opt flags} + {"preceeding // comment", [start_array, end_array, end_json], - decode(<<"// comment ", ?newline, "[]">>, [comments]) - )}, - {"preceeding /**/ comment", ?_assertEqual( + <<"// comment ", ?newline, "[]">> + }, + {"preceeding /**/ comment", [start_array, end_array, end_json], - decode(<<"/* comment */[]">>, [comments]) - )}, - {"trailing // comment", ?_assertEqual( + <<"/* comment */[]">> + }, + {"trailing // comment", [start_array, end_array, end_json], - decode(<<"[]// comment", ?newline>>, [comments]) - )}, - {"trailing // comment (no newline)", ?_assertEqual( + <<"[]// comment", ?newline>> + }, + {"trailing // comment (no newline)", [start_array, end_array, end_json], - decode(<<"[]// comment">>, [comments]) - )}, - {"trailing /**/ comment", ?_assertEqual( + <<"[]// comment">> + }, + {"trailing /**/ comment", [start_array, end_array, end_json], - decode(<<"[] /* comment */">>, [comments]) - )}, - {"// comment inside array", ?_assertEqual( + <<"[] /* comment */">> + }, + {"// comment inside array", [start_array, end_array, end_json], - decode(<<"[ // comment", ?newline, "]">>, [comments]) - )}, - {"/**/ comment inside array", ?_assertEqual( + <<"[ // comment", ?newline, "]">> + }, + {"/**/ comment inside array", [start_array, end_array, end_json], - decode(<<"[ /* comment */ ]">>, [comments]) - )}, - {"// comment at beginning of array", ?_assertEqual( + <<"[ /* comment */ ]">> + }, + {"// comment at beginning of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, [comments]) - )}, - {"/**/ comment at beginning of array", ?_assertEqual( + <<"[ // comment", ?newline, "true", ?newline, "]">> + }, + {"/**/ comment at beginning of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment */ true ]">>, [comments]) - )}, - {"// comment at end of array", ?_assertEqual( + <<"[ /* comment */ true ]">> + }, + {"// comment at end of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ true // comment", ?newline, "]">>, [comments]) - )}, - {"/**/ comment at end of array", ?_assertEqual( + <<"[ true // comment", ?newline, "]">> + }, + {"/**/ comment at end of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ true /* comment */ ]">>, [comments]) - )}, - {"// comment midarray (post comma)", ?_assertEqual( + <<"[ true /* comment */ ]">> + }, + {"// comment midarray (post comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true, // comment", ?newline, "false ]">>, [comments]) - )}, - {"/**/ comment midarray (post comma)", ?_assertEqual( + <<"[ true, // comment", ?newline, "false ]">> + }, + {"/**/ comment midarray (post comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true, /* comment */ false ]">>, [comments]) - )}, - {"// comment midarray (pre comma)", ?_assertEqual( + <<"[ true, /* comment */ false ]">> + }, + {"// comment midarray (pre comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true// comment", ?newline, ", false ]">>, [comments]) - )}, - {"/**/ comment midarray (pre comma)", ?_assertEqual( + <<"[ true// comment", ?newline, ", false ]">> + }, + {"/**/ comment midarray (pre comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true/* comment */, false ]">>, [comments]) - )}, - {"// comment inside object", ?_assertEqual( + <<"[ true/* comment */, false ]">> + }, + {"// comment inside object", [start_object, end_object, end_json], - decode(<<"{ // comment", ?newline, "}">>, [comments]) - )}, - {"/**/ comment inside object", ?_assertEqual( + <<"{ // comment", ?newline, "}">> + }, + {"/**/ comment inside object", [start_object, end_object, end_json], - decode(<<"{ /* comment */ }">>, [comments]) - )}, - {"// comment at beginning of object", ?_assertEqual( + <<"{ /* comment */ }">> + }, + {"// comment at beginning of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, [comments]) - )}, - {"/**/ comment at beginning of object", ?_assertEqual( + <<"{ // comment", ?newline, " \"key\": true", ?newline, "}">> + }, + {"/**/ comment at beginning of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ /* comment */ \"key\": true }">>, [comments]) - )}, - {"// comment at end of object", ?_assertEqual( + <<"{ /* comment */ \"key\": true }">> + }, + {"// comment at end of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": true // comment", ?newline, "}">>, [comments]) - )}, - {"/**/ comment at end of object", ?_assertEqual( + <<"{ \"key\": true // comment", ?newline, "}">> + }, + {"/**/ comment at end of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": true /* comment */ }">>, [comments]) - )}, - {"// comment midobject (post comma)", ?_assertEqual( + <<"{ \"key\": true /* comment */ }">> + }, + {"// comment midobject (post comma)", [ start_object, {key, <<"x">>}, @@ -1129,9 +1091,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, [comments]) - )}, - {"/**/ comment midobject (post comma)", ?_assertEqual( + <<"{ \"x\": true, // comment", ?newline, "\"y\": false }">> + }, + {"/**/ comment midobject (post comma)", [ start_object, {key, <<"x">>}, @@ -1141,9 +1103,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, [comments]) - )}, - {"// comment midobject (pre comma)", ?_assertEqual( + <<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">> + }, + {"// comment midobject (pre comma)", [ start_object, {key, <<"x">>}, @@ -1153,9 +1115,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, [comments]) - )}, - {"/**/ comment midobject (pre comma)", ?_assertEqual( + <<"{ \"x\": true// comment", ?newline, ", \"y\": false }">> + }, + {"/**/ comment midobject (pre comma)", [ start_object, {key, <<"x">>}, @@ -1165,136 +1127,168 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, [comments]) - )}, - {"// comment precolon", ?_assertEqual( + <<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">> + }, + {"// comment precolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\" // comment", ?newline, ": true }">>, [comments]) - )}, - {"/**/ comment precolon", ?_assertEqual( + <<"{ \"key\" // comment", ?newline, ": true }">> + }, + {"/**/ comment precolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\"/* comment */: true }">>, [comments]) - )}, - {"// comment postcolon", ?_assertEqual( + <<"{ \"key\"/* comment */: true }">> + }, + {"// comment postcolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": // comment", ?newline, " true }">>, [comments]) - )}, - {"/**/ comment postcolon", ?_assertEqual( + <<"{ \"key\": // comment", ?newline, " true }">> + }, + {"/**/ comment postcolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\":/* comment */ true }">>, [comments]) - )}, - {"// comment terminating zero", ?_assertEqual( + <<"{ \"key\":/* comment */ true }">> + }, + {"// comment terminating zero", [start_array, {integer, 0}, end_array, end_json], - decode(<<"[ 0// comment", ?newline, "]">>, [comments]) - )}, - {"// comment terminating integer", ?_assertEqual( + <<"[ 0// comment", ?newline, "]">> + }, + {"// comment terminating integer", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1// comment", ?newline, "]">>, [comments]) - )}, - {"// comment terminating float", ?_assertEqual( + <<"[ 1// comment", ?newline, "]">> + }, + {"// comment terminating float", [start_array, {float, 1.0}, end_array, end_json], - decode(<<"[ 1.0// comment", ?newline, "]">>, [comments]) - )}, - {"// comment terminating exp", ?_assertEqual( + <<"[ 1.0// comment", ?newline, "]">> + }, + {"// comment terminating exp", [start_array, {float, 1.0e1}, end_array, end_json], - decode(<<"[ 1e1// comment", ?newline, "]">>, [comments]) - )}, - {"/**/ comment terminating zero", ?_assertEqual( + <<"[ 1e1// comment", ?newline, "]">> + }, + {"/**/ comment terminating zero", [start_array, {integer, 0}, end_array, end_json], - decode(<<"[ 0/* comment */ ]">>, [comments]) - )}, - {"/**/ comment terminating integer", ?_assertEqual( + <<"[ 0/* comment */ ]">> + }, + {"/**/ comment terminating integer", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1/* comment */ ]">>, [comments]) - )}, - {"/**/ comment terminating float", ?_assertEqual( + <<"[ 1/* comment */ ]">> + }, + {"/**/ comment terminating float", [start_array, {float, 1.0}, end_array, end_json], - decode(<<"[ 1.0/* comment */ ]">>, [comments]) - )}, - {"/**/ comment terminating exp", ?_assertEqual( + <<"[ 1.0/* comment */ ]">> + }, + {"/**/ comment terminating exp", [start_array, {float, 1.0e1}, end_array, end_json], - decode(<<"[ 1e1/* comment */ ]">>, [comments]) - )}, - {"/**/ comment following /**/ comment", ?_assertEqual( + <<"[ 1e1/* comment */ ]">> + }, + {"/**/ comment following /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[/* comment *//* comment */true]">>, [comments]) - )}, - {"/**/ comment following // comment", ?_assertEqual( + <<"[/* comment *//* comment */true]">> + }, + {"/**/ comment following // comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[// comment", ?newline, "/* comment */true]">>, [comments]) - )}, - {"// comment following /**/ comment", ?_assertEqual( + <<"[// comment", ?newline, "/* comment */true]">> + }, + {"// comment following /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[/* comment */// comment", ?newline, "true]">>, [comments]) - )}, - {"// comment following // comment", ?_assertEqual( + <<"[/* comment */// comment", ?newline, "true]">> + }, + {"// comment following // comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, [comments]) - )}, - {"/**/ comment inside /**/ comment", ?_assertEqual( + <<"[// comment", ?newline, "// comment", ?newline, "true]">> + }, + {"/**/ comment inside /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* /* comment */ */ true ]">>, [comments]) - )}, - {"/**/ comment with /", ?_assertEqual( + <<"[ /* /* comment */ */ true ]">> + }, + {"/**/ comment with /", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* / */ true ]">>, [comments]) - )}, - {"/**/ comment with *", ?_assertEqual( + <<"[ /* / */ true ]">> + }, + {"/**/ comment with *", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* * */ true ]">>, [comments]) - )}, - {"// comment with badutf", ?_assertEqual( + <<"[ /* * */ true ]">> + }, + {"// comment with badutf", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [comments, replaced_bad_utf8]) - )}, - {"/**/ comment with badutf", ?_assertEqual( + <<"[ // comment ", 16#00c0, " ", ?newline, "true]">> + }, + {"/**/ comment with badutf", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [comments, replaced_bad_utf8]) - )}, - {"/**/ comment with badutf preceeded by /", ?_assertEqual( + <<"[ /* comment ", 16#00c0, " */ true]">> + }, + {"/**/ comment with badutf preceeded by /", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [comments, replaced_bad_utf8]) - )} + <<"[ /* comment /", 16#00c0, " */ true]">> + } + ], + [{Title, ?_assertEqual(Events, decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + % error when `{strict, [comments]}` is present + [{Title, ?_assertError(badarg, decode(JSON, [{strict, [comments]}]))} + || {Title, _Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError( + badarg, + incremental_decode(JSON, [{strict, [comments]}]) + )} || {Title, _Events, JSON} <- Cases ]. +no_comments_test_() -> + Cases = [ + {"// comment with badutf", + badarg, + <<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, + [{strict, [utf8]}] + }, + {"/**/ comment with badutf", + badarg, + <<"[ /* comment ", 16#00c0, " */ true]">>, + [{strict, [utf8]}] + }, + {"/**/ comment with badutf preceeded by /", + badarg, + <<"[ /* comment /", 16#00c0, " */ true]">>, + [{strict, [utf8]}] + } + ], + [{Title, ?_assertError(Error, decode(JSON, Config))} + || {Title, Error, JSON, Config} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError(Error, incremental_decode(JSON, Config))} + || {Title, Error, JSON, Config} <- Cases + ]. + + +% doing the full unicode range takes foreverrrrrrr so just do boundaries +% excludes characters that may need escaping codepoints() -> - unicode:characters_to_binary( - [32, 33] - ++ lists:seq(35, 38) - ++ lists:seq(40, 46) - ++ lists:seq(48, 91) - ++ lists:seq(93, 16#2027) - ++ lists:seq(16#202a, 16#d7ff) - ++ lists:seq(16#e000, 16#fdcf) - ++ lists:seq(16#fdf0, 16#fffd) - ). + [32, 33] ++ + lists:seq(35, 46) ++ + lists:seq(48, 91) ++ + lists:seq(93, 127) ++ + [16#2027, 16#202a, 16#d7ff, 16#e000, 16#fdcf, 16#fdf0, 16#fffd] ++ + [16#10000, 16#1fffd, 16#20000, 16#30000, 16#40000, 16#50000] ++ + [16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++ + [16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000]. -extended_codepoints() -> - unicode:characters_to_binary( - lists:seq(16#10000, 16#1fffd) ++ [ - 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, - 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, - 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 - ] - ). +reserved_space() -> lists:seq(16#fdd0, 16#fdef). -reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. +surrogates() -> lists:seq(16#d800, 16#dfff). -surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. - -noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. +noncharacters() -> lists:seq(16#fffe, 16#ffff). extended_noncharacters() -> - [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] - ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] - ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] - ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] - ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] - ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] - ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] - ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] - ]. + [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] ++ + [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] ++ + [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] ++ + [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] ++ + [16#9fffe, 16#9ffff, 16#afffe, 16#affff] ++ + [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] ++ + [16#dfffe, 16#dffff, 16#efffe, 16#effff] ++ + [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]. + %% erlang refuses to decode certain codepoints, so fake them all to_fake_utf8(N) when N < 16#0080 -> <<34/utf8, N:8, 34/utf8>>; @@ -1310,581 +1304,320 @@ to_fake_utf8(N) -> clean_string_test_() -> - [ - {"clean codepoints", ?_assertEqual( - [{string, codepoints()}, end_json], - decode(<<34, (codepoints())/binary, 34>>, []) - )}, - {"clean extended codepoints", ?_assertEqual( - [{string, extended_codepoints()}, end_json], - decode(<<34, (extended_codepoints())/binary, 34>>, []) - )}, - {"error reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, reserved_space()) - )}, - {"error surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, surrogates()) - )}, - {"error noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, noncharacters()) - )}, - {"error extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, extended_noncharacters()) - )}, - {"clean reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, reserved_space()) - )}, - {"clean surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, surrogates()) - )}, - {"clean noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, noncharacters()) - )}, - {"clean extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, extended_noncharacters()) - )}, - {"dirty \\uwxyz", ?_assertEqual( - [{string, <<"\\uwxyz">>}, end_json], - decode(<<34, "\\uwxyz", 34>>, [dirty_strings]) - )}, - {"dirty \\x23", ?_assertEqual( - [{string, <<"\\x23">>}, end_json], - decode(<<34, "\\x23", 34>>, [dirty_strings]) - )}, - {"dirty 0", ?_assertEqual( - [{string, <<0>>}, end_json], - decode(<<34, 0, 34>>, [dirty_strings]) - )}, - {"dirty 0\"0", ?_assertEqual( - [{string, <<0, ?doublequote, 0>>}, end_json], - decode(<<34, 0, ?rsolidus, ?doublequote, 0, 34>>, [dirty_strings]) - )}, - {"dirty 0\"0", ?_assertEqual( - [{string, <<0, ?rsolidus, ?doublequote, 0>>}, end_json], - decode(<<34, 0, ?rsolidus, ?rsolidus, ?doublequote, 0, 34>>, [dirty_strings]) - )}, - {"dirty 16#d800", ?_assertEqual( - [{string, <<237, 160, 128>>}, end_json], - decode(<<34, 237, 160, 128, 34>>, [dirty_strings]) - )}, - {"dirty 16#10ffff", ?_assertEqual( - [{string, <<244, 143, 191, 191>>}, end_json], - decode(<<34, 244, 143, 191, 191, 34>>, [dirty_strings]) - )}, - {"dirty /", ?_assertEqual( - [{string, <<$/>>}, end_json], - decode(<<34, $/, 34>>, [dirty_strings, escaped_forward_slashes]) - )}, - {"dirty <<194, 129>>", ?_assertEqual( - [{string, <<194, 129>>}, end_json], - decode(<<34, 194, 129, 34>>, [dirty_strings]) - )} + Clean = codepoints(), + Dirty = reserved_space() ++ surrogates() ++ noncharacters() ++ extended_noncharacters(), + % clean codepoints + [{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual( + [{string, <>}, end_json], + decode(<<34/utf8, Codepoint/utf8, 34/utf8>>) + )} || Codepoint <- Clean + ] ++ + % bad codepoints replaced by u+FFFD + [{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual( + [{string, <<16#fffd/utf8>>}, end_json], + decode(to_fake_utf8(Codepoint)) + )} || Codepoint <- Dirty + ] ++ + % bad codepoints that cause errors + [{"dirty u+" ++ integer_to_list(Codepoint, 16), ?_assertError( + badarg, + decode(to_fake_utf8(Codepoint), [{strict, [utf8]}]) + )} || Codepoint <- Dirty ]. -decode_bad_utf(String, Config) -> - case decode(<<34, String/binary, 34>>, Config) of - {error, badarg} -> erlang:error(badarg); - [{string, S}, end_json] -> S - end. +dirty_string_test_() -> + Cases = [ + {"dirty \\n", + [start_array, {string, <<"\\n">>}, end_array, end_json], + <<"[\"\\n\"]">>, + [dirty_strings] + }, + {"dirty \\uwxyz", + [start_array, {string, <<"\\uwxyz">>}, end_array, end_json], + <<"[\"\\uwxyz\"]">>, + [dirty_strings] + }, + {"dirty \\x23", + [start_array, {string, <<"\\x23">>}, end_array, end_json], + <<"[\"\\x23\"]">>, + [dirty_strings] + }, + {"dirty 0", + [start_array, {string, <<0>>}, end_array, end_json], + <<"[\"", 0, "\"]">>, + [dirty_strings] + }, + {"dirty 0\\\"0", + [start_array, {string, <<0, ?rsolidus, ?doublequote, 0>>}, end_array, end_json], + <<"[\"", 0, ?rsolidus, ?doublequote, 0, "\"]">>, + [dirty_strings] + }, + {"dirty 0\\\\\"0", + [start_array, {string, <<0, ?rsolidus, ?rsolidus, ?doublequote, 0>>}, end_array, end_json], + <<"[\"", 0, ?rsolidus, ?rsolidus, ?doublequote, 0, "\"]">>, + [dirty_strings] + }, + {"dirty 16#d800", + [start_array, {string, <<237, 160, 128>>}, end_array, end_json], + <<"[\"", 237, 160, 128, "\"]">>, + [dirty_strings] + }, + {"dirty 16#10ffff", + [start_array, {string, <<244, 143, 191, 191>>}, end_array, end_json], + <<"[\"", 244, 143, 191, 191, "\"]">>, + [dirty_strings] + }, + {"dirty /", + [start_array, {string, <<$/>>}, end_array, end_json], + <<"[\"", $/, "\"]">>, + [dirty_strings, escaped_forward_slashes] + }, + {"dirty <<194, 129>>", + [start_array, {string, <<194, 129>>}, end_array, end_json], + <<"[\"", 194, 129, "\"]">>, + [dirty_strings] + } + ], + [{Title, ?_assertEqual(Events, decode(JSON, Config))} + || {Title, Events, JSON, Config} <- Cases + ] ++ + % ensure `dirty_strings` and `strict` interact properly + [{Title, ?_assertEqual(Events, decode(JSON, Config ++ [strict]))} + || {Title, Events, JSON, Config} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON, Config))} + || {Title, Events, JSON, Config} <- Cases + ]. + bad_utf8_test_() -> - [ - {"noncharacter u+fffe", ?_assertError( - badarg, - decode_bad_utf(<<239, 191, 190>>, []) - )}, - {"noncharacter u+fffe replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<239, 191, 190>>, [replaced_bad_utf8]) - )}, - {"noncharacter u+ffff", ?_assertError( - badarg, - decode_bad_utf(<<239, 191, 191>>, []) - )}, - {"noncharacter u+ffff replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<239, 191, 191>>, [replaced_bad_utf8]) - )}, - {"orphan continuation byte u+0080", ?_assertError( - badarg, - decode_bad_utf(<<16#0080>>, []) - )}, - {"orphan continuation byte u+0080 replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#0080>>, [replaced_bad_utf8]) - )}, - {"orphan continuation byte u+00bf", ?_assertError( - badarg, - decode_bad_utf(<<16#00bf>>, []) - )}, - {"orphan continuation byte u+00bf replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#00bf>>, [replaced_bad_utf8]) - )}, - {"2 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, []) - )}, - {"2 continuation bytes replaced", ?_assertEqual( + Cases = [ + {"noncharacter u+fffe", <<16#fffd/utf8>>, <<239, 191, 190>>}, + {"noncharacter u+ffff", <<16#fffd/utf8>>, <<239, 191, 191>>}, + {"orphan continuation byte u+0080", <<16#fffd/utf8>>, <<16#0080>>}, + {"orphan continuation byte u+00bf", <<16#fffd/utf8>>, <<16#00bf>>}, + {"2 continuation bytes", binary:copy(<<16#fffd/utf8>>, 2), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, [replaced_bad_utf8]) - )}, - {"3 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, []) - )}, - {"3 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 2))/binary>> + }, + {"3 continuation bytes", binary:copy(<<16#fffd/utf8>>, 3), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, [replaced_bad_utf8]) - )}, - {"4 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, []) - )}, - {"4 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 3))/binary>> + }, + {"4 continuation bytes", binary:copy(<<16#fffd/utf8>>, 4), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, [replaced_bad_utf8]) - )}, - {"5 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, []) - )}, - {"5 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 4))/binary>> + }, + {"5 continuation bytes", binary:copy(<<16#fffd/utf8>>, 5), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, [replaced_bad_utf8]) - )}, - {"6 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, []) - )}, - {"6 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 5))/binary>> + }, + {"6 continuation bytes", binary:copy(<<16#fffd/utf8>>, 6), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, [replaced_bad_utf8]) - )}, - {"all continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, []) - )}, - {"all continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 6))/binary>> + }, + {"all continuation bytes", binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), - decode_bad_utf( - <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, - [replaced_bad_utf8] - ) - )}, - {"lonely start byte", ?_assertError( - badarg, - decode_bad_utf(<<16#00c0>>, []) - )}, - {"lonely start byte replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#00c0>>, [replaced_bad_utf8]) - )}, - {"lonely start bytes (2 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, []) - )}, - {"lonely start bytes (2 byte) replaced", ?_assertEqual( + <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>> + }, + {"lonely start byte", <<16#fffd/utf8>>, <<16#00c0>>}, + {"lonely start bytes (2 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, [replaced_bad_utf8]) - )}, - {"lonely start bytes (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, []) - )}, - {"lonely start bytes (3 byte) replaced", ?_assertEqual( + <<16#00c0, 32, 16#00df>> + }, + {"lonely start bytes (3 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, [replaced_bad_utf8]) - )}, - {"lonely start bytes (4 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, []) - )}, - {"lonely start bytes (4 byte) replaced", ?_assertEqual( + <<16#00e0, 32, 16#00ef>> + }, + {"lonely start bytes (4 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, [replaced_bad_utf8]) - )}, - {"missing continuation byte (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<224, 160, 32>>, []) - )}, - {"missing continuation byte (3 byte) replaced", ?_assertEqual( + <<16#00f0, 32, 16#00f7>> + }, + {"missing continuation byte (3 byte)", <<16#fffd/utf8, 32>>, <<224, 160, 32>>}, + {"missing continuation byte (4 byte missing one)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<224, 160, 32>>, [replaced_bad_utf8]) - )}, - {"missing continuation byte (4 byte missing one)", ?_assertError( - badarg, - decode_bad_utf(<<240, 144, 128, 32>>, []) - )}, - {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( + <<240, 144, 128, 32>> + }, + {"missing continuation byte (4 byte missing two)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<240, 144, 128, 32>>, [replaced_bad_utf8]) - )}, - {"missing continuation byte (4 byte missing two)", ?_assertError( - badarg, - decode_bad_utf(<<240, 144, 32>>, []) - )}, - {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( + <<240, 144, 32>> + }, + {"overlong encoding of u+002f (2 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<240, 144, 32>>, [replaced_bad_utf8]) - )}, - {"overlong encoding of u+002f (2 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#c0, 16#af, 32>>, []) - )}, - {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( + <<16#c0, 16#af, 32>> + }, + {"overlong encoding of u+002f (3 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#c0, 16#af, 32>>, [replaced_bad_utf8]) - )}, - {"overlong encoding of u+002f (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, []) - )}, - {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( + <<16#e0, 16#80, 16#af, 32>> + }, + {"overlong encoding of u+002f (4 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, [replaced_bad_utf8]) - )}, - {"overlong encoding of u+002f (4 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, []) - )}, - {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( + <<16#f0, 16#80, 16#80, 16#af, 32>> + }, + {"highest overlong 2 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, [replaced_bad_utf8]) - )}, - {"highest overlong 2 byte sequence", ?_assertError( - badarg, - decode_bad_utf(<<16#c1, 16#bf, 32>>, []) - )}, - {"highest overlong 2 byte sequence replaced", ?_assertEqual( + <<16#c1, 16#bf, 32>> + }, + {"highest overlong 3 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#c1, 16#bf, 32>>, [replaced_bad_utf8]) - )}, - {"highest overlong 3 byte sequence", ?_assertError( - badarg, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, []) - )}, - {"highest overlong 3 byte sequence replaced", ?_assertEqual( + <<16#e0, 16#9f, 16#bf, 32>> + }, + {"highest overlong 4 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, [replaced_bad_utf8]) - )}, - {"highest overlong 4 byte sequence", ?_assertError( + <<16#f0, 16#8f, 16#bf, 16#bf, 32>> + } + ], + [{Title, ?_assertError( badarg, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, []) - )}, - {"highest overlong 4 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [replaced_bad_utf8]) - )} + decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]) + )} || {Title, _, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError( + badarg, + incremental_decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]) + )} || {Title, _, JSON} <- Cases + ] ++ + [{Title ++ " replaced", ?_assertEqual( + [{string, Replacement}, end_json], + decode(<<34, JSON/binary, 34>>) + )} || {Title, Replacement, JSON} <- Cases + ] ++ + [{Title ++ " replaced (incremental)", ?_assertEqual( + [{string, Replacement}, end_json], + incremental_decode(<<34, JSON/binary, 34>>) + )} || {Title, Replacement, JSON} <- Cases ]. -unescape(Bin, Config) -> - case decode(<<34, Bin/binary, 34>>, Config) of - [{string, String}, end_json] -> String; - {error, badarg} -> erlang:error(badarg) - end. - unescape_test_() -> - [ - {"unescape backspace", ?_assertEqual( - <<"\b">>, - unescape(<<"\\b"/utf8>>, []) - )}, - {"unescape tab", ?_assertEqual( - <<"\t">>, - unescape(<<"\\t"/utf8>>, []) - )}, - {"unescape newline", ?_assertEqual( - <<"\n">>, - unescape(<<"\\n"/utf8>>, []) - )}, - {"unescape formfeed", ?_assertEqual( - <<"\f">>, - unescape(<<"\\f"/utf8>>, []) - )}, - {"unescape carriage return", ?_assertEqual( - <<"\r">>, - unescape(<<"\\r"/utf8>>, []) - )}, - {"unescape quote", ?_assertEqual( - <<"\"">>, - unescape(<<"\\\""/utf8>>, []) - )}, - {"unescape single quote", ?_assertEqual( - <<"'">>, - unescape(<<"\\'"/utf8>>, [single_quoted_strings]) - )}, - {"unescape solidus", ?_assertEqual( - <<"/">>, - unescape(<<"\\/"/utf8>>, []) - )}, - {"unescape reverse solidus", ?_assertEqual( - <<"\\">>, - unescape(<<"\\\\"/utf8>>, []) - )}, - {"unescape control", ?_assertEqual( - <<0>>, - unescape(<<"\\u0000"/utf8>>, []) - )}, - {"unescape surrogate pair", ?_assertEqual( - <<16#10000/utf8>>, - unescape(<<"\\ud800\\udc00"/utf8>>, []) - )}, - {"replace bad high surrogate", ?_assertEqual( - <<16#fffd/utf8>>, - unescape(<<"\\udc00"/utf8>>, [replaced_bad_utf8]) - )}, - {"do not unescape bad high surrogate", ?_assertError( - badarg, - unescape(<<"\\udc00"/utf8>>, []) - )}, - {"replace naked high surrogate", ?_assertEqual( + Cases = [ + {"unescape backspace", <<"\b">>, <<"\\b"/utf8>>}, + {"unescape tab", <<"\t">>, <<"\\t"/utf8>>}, + {"unescape newline", <<"\n">>, <<"\\n"/utf8>>}, + {"unescape formfeed", <<"\f">>, <<"\\f"/utf8>>}, + {"unescape carriage return", <<"\r">>, <<"\\r"/utf8>>}, + {"unescape quote", <<"\"">>, <<"\\\""/utf8>>}, + {"unescape solidus", <<"/">>, <<"\\/"/utf8>>}, + {"unescape reverse solidus", <<"\\">>, <<"\\\\"/utf8>>}, + {"unescape control", <<0>>, <<"\\u0000"/utf8>>}, + {"unescape surrogate pair", <<16#10000/utf8>>, <<"\\ud800\\udc00"/utf8>>}, + {"replace bad high surrogate", <<16#fffd/utf8>>, <<"\\udc00"/utf8>>}, + {"replace naked high surrogate", <<16#fffd/utf8, "hello world">>, - unescape(<<"\\ud800hello world"/utf8>>, [replaced_bad_utf8]) - )}, - {"do not unescape naked high surrogate", ?_assertError( - badarg, - unescape(<<"\\ud800hello world"/utf8>>, []) - )}, - {"replace naked low surrogate", ?_assertEqual( + <<"\\ud800hello world"/utf8>> + }, + {"replace naked low surrogate", <<16#fffd/utf8, "hello world">>, - unescape(<<"\\udc00hello world"/utf8>>, [replaced_bad_utf8]) - )}, - {"do not unescape naked low surrogate", ?_assertError( - badarg, - unescape(<<"\\udc00hello world"/utf8>>, []) - )}, - {"replace bad surrogate pair", ?_assertEqual( - <<16#fffd/utf8, 16#fffd/utf8>>, - unescape(<<"\\ud800\\u0000">>, [replaced_bad_utf8]) - )}, - {"do not unescape bad surrogate pair", ?_assertError( - badarg, - unescape(<<"\\ud800\\u0000">>, []) - )}, - {"bad pseudo escape sequence", ?_assertError( - badarg, - unescape(<<"\\uabcg">>, []) - )} + <<"\\udc00hello world"/utf8>> + }, + {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\ud800\\u0000">>} + ], + [{Title, ?_assertEqual([{string, Escaped}, end_json], decode(<<34, JSON/binary, 34>>))} + || {Title, Escaped, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual( + [{string, Escaped}, end_json], + incremental_decode(<<34, JSON/binary, 34>>) + )} || {Title, Escaped, JSON} <- Cases ]. -maybe_escape(Bin, Config) -> - [{string, String}, end_json] = decode(Bin, Config), - String. +bad_escaped_surrogate_test_() -> + Cases = [ + {"do not unescape bad high surrogate", <<"\\udc00">>}, + {"do not unescape naked high surrogate", <<"\\ud800hello world">>}, + {"do not unescape naked low surrogate", <<"\\udc00hello world">>}, + {"do not unescape bad surrogate pair", <<"\\ud800\\u0000">>} + ], + [{Title, ?_assertError(badarg, decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]))} + || {Title, JSON} <- Cases + ]. + escape_test_() -> - [ - {"maybe_escape backspace", ?_assertEqual( - <<"\\b">>, - maybe_escape(<<34, "\\b"/utf8, 34>>, [escaped_strings]) - )}, - {"don't escape backspace", ?_assertEqual( - <<"\b">>, - maybe_escape(<<34, "\\b"/utf8, 34>>, []) - )}, - {"maybe_escape tab", ?_assertEqual( - <<"\\t">>, - maybe_escape(<<34, "\\t"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape newline", ?_assertEqual( - <<"\\n">>, - maybe_escape(<<34, "\\n"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape formfeed", ?_assertEqual( - <<"\\f">>, - maybe_escape(<<34, "\\f"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape carriage return", ?_assertEqual( - <<"\\r">>, - maybe_escape(<<34, "\\r"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape quote", ?_assertEqual( - <<"\\\"">>, - maybe_escape(<<34, "\\\""/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape forward slash", ?_assertEqual( - <<"\\/">>, - maybe_escape(<<34, "/"/utf8, 34>>, [escaped_strings, escaped_forward_slashes]) - )}, - {"do not maybe_escape forward slash", ?_assertEqual( - <<"/">>, - maybe_escape(<<34, "/"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape backslash", ?_assertEqual( - <<"\\\\">>, - maybe_escape(<<34, "\\\\"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape jsonp (u2028)", ?_assertEqual( - <<"\\u2028">>, - maybe_escape(<<34, 16#2028/utf8, 34>>, [escaped_strings]) - )}, - {"do not maybe_escape jsonp (u2028)", ?_assertEqual( - <<16#2028/utf8>>, - maybe_escape(<<34, 16#2028/utf8, 34>>, [escaped_strings, unescaped_jsonp]) - )}, - {"maybe_escape jsonp (u2029)", ?_assertEqual( - <<"\\u2029">>, - maybe_escape(<<34, 16#2029/utf8, 34>>, [escaped_strings]) - )}, - {"do not maybe_escape jsonp (u2029)", ?_assertEqual( - <<16#2029/utf8>>, - maybe_escape(<<34, 16#2029/utf8, 34>>, [escaped_strings, unescaped_jsonp]) - )}, - {"maybe_escape u0000", ?_assertEqual( - <<"\\u0000">>, - maybe_escape(<<34, "\\u0000"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0001", ?_assertEqual( - <<"\\u0001">>, - maybe_escape(<<34, "\\u0001"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0002", ?_assertEqual( - <<"\\u0002">>, - maybe_escape(<<34, "\\u0002"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0003", ?_assertEqual( - <<"\\u0003">>, - maybe_escape(<<34, "\\u0003"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0004", ?_assertEqual( - <<"\\u0004">>, - maybe_escape(<<34, "\\u0004"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0005", ?_assertEqual( - <<"\\u0005">>, - maybe_escape(<<34, "\\u0005"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0006", ?_assertEqual( - <<"\\u0006">>, - maybe_escape(<<34, "\\u0006"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0007", ?_assertEqual( - <<"\\u0007">>, - maybe_escape(<<34, "\\u0007"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000b", ?_assertEqual( - <<"\\u000b">>, - maybe_escape(<<34, "\\u000b"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000e", ?_assertEqual( - <<"\\u000e">>, - maybe_escape(<<34, "\\u000e"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000f", ?_assertEqual( - <<"\\u000f">>, - maybe_escape(<<34, "\\u000f"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0010", ?_assertEqual( - <<"\\u0010">>, - maybe_escape(<<34, "\\u0010"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0011", ?_assertEqual( - <<"\\u0011">>, - maybe_escape(<<34, "\\u0011"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0012", ?_assertEqual( - <<"\\u0012">>, - maybe_escape(<<34, "\\u0012"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0013", ?_assertEqual( - <<"\\u0013">>, - maybe_escape(<<34, "\\u0013"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0014", ?_assertEqual( - <<"\\u0014">>, - maybe_escape(<<34, "\\u0014"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0015", ?_assertEqual( - <<"\\u0015">>, - maybe_escape(<<34, "\\u0015"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0016", ?_assertEqual( - <<"\\u0016">>, - maybe_escape(<<34, "\\u0016"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0017", ?_assertEqual( - <<"\\u0017">>, - maybe_escape(<<34, "\\u0017"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0018", ?_assertEqual( - <<"\\u0018">>, - maybe_escape(<<34, "\\u0018"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0019", ?_assertEqual( - <<"\\u0019">>, - maybe_escape(<<34, "\\u0019"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001a", ?_assertEqual( - <<"\\u001a">>, - maybe_escape(<<34, "\\u001a"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001b", ?_assertEqual( - <<"\\u001b">>, - maybe_escape(<<34, "\\u001b"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001c", ?_assertEqual( - <<"\\u001c">>, - maybe_escape(<<34, "\\u001c"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001d", ?_assertEqual( - <<"\\u001d">>, - maybe_escape(<<34, "\\u001d"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001e", ?_assertEqual( - <<"\\u001e">>, - maybe_escape(<<34, "\\u001e"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001f", ?_assertEqual( - <<"\\u001f">>, - maybe_escape(<<34, "\\u001f"/utf8, 34>>, [escaped_strings]) - )} + Cases = [ + {"backspace", <<"\b">>, <<"\\b">>}, + {"tab", <<"\t">>, <<"\\t">>}, + {"newline", <<"\n">>, <<"\\n">>}, + {"formfeed", <<"\f">>, <<"\\f">>}, + {"carriage return", <<"\r">>, <<"\\r">>}, + {"quote", <<"\"">>, <<"\\\"">>}, + {"backslash", <<"\\">>, <<"\\\\">>}, + {"control", <<0>>, <<"\\u0000">>} + ], + [{"escape " ++ Title, ?_assertEqual( + [{string, Escaped}, end_json], + decode(<<34, Escaped/binary, 34>>, [escaped_strings]) + )} || {Title, _Unescaped, Escaped} <- Cases + ] ++ + [{"do not escape " ++ Title, ?_assertEqual( + [{string, Unescaped}, end_json], + decode(<<34, Escaped/binary, 34>>) + )} || {Title, Unescaped, Escaped} <- Cases + ]. + + +special_escape_test_() -> + Cases = [ + {"escape forward slash", <<"\\/">>, <<"/"/utf8>>, [escaped_forward_slashes]}, + {"do not escape forward slash", <<"/">>, <<"/"/utf8>>, []}, + {"escape jsonp", <<"\\u2028">>, <<16#2028/utf8>>, []}, + {"do not escape jsonp", <<16#2028/utf8>>, <<16#2028/utf8>>, [unescaped_jsonp]} + ], + [{Title, ?_assertEqual( + [{string, Expect}, end_json], + decode(<<34, Raw/binary, 34>>, [escaped_strings] ++ Config) + )} || {Title, Expect, Raw, Config} <- Cases ]. single_quoted_string_test_() -> - [ - {"single quoted string", ?_assertEqual( - [{string, <<"hello world">>}, end_json], - decode(<<39, "hello world", 39>>, [single_quoted_strings]) - )}, - {"single quoted string with embedded double quotes", ?_assertEqual( + Cases = [ + {"single quoted string", [{string, <<"hello world">>}, end_json], <<39, "hello world", 39>>}, + {"single quoted string with embedded double quotes", [{string, <<"quoth the raven, \"nevermore\"">>}, end_json], - decode(<<39, "quoth the raven, \"nevermore\"", 39>>, [single_quoted_strings]) - )}, - {"string with embedded single quotes", ?_assertEqual( + <<39, "quoth the raven, \"nevermore\"", 39>> + }, + {"escaped single quote", [{string, <<"quoth the raven, 'nevermore'">>}, end_json], - decode(<<34, "quoth the raven, 'nevermore'", 34>>, []) - )}, - {"escaped single quote", ?_assertEqual( - [{string, <<"quoth the raven, 'nevermore'">>}, end_json], - decode(<<39, "quoth the raven, \\'nevermore\\'", 39>>, [single_quoted_strings]) - )}, - {"single quoted key", ?_assertEqual( + <<39, "quoth the raven, \\'nevermore\\'", 39>> + }, + {"single quoted key", [start_object, {key, <<"key">>}, {string, <<"value">>}, {key, <<"another key">>}, {string, <<"another value">>}, end_object, end_json], - decode(<<"{'key':'value','another key':'another value'}">>, [single_quoted_strings]) + <<"{'key':'value','another key':'another value'}">> + } + ], + [{Title, ?_assertEqual(Expect, decode(Raw, []))} || {Title, Expect, Raw} <- Cases] ++ + [{Title, ?_assertError( + badarg, + decode(Raw, [{strict, [single_quotes]}]) + )} || {Title, _Expect, Raw} <- Cases + ]. + + +embedded_single_quoted_string_test_() -> + [ + {"string with embedded single quotes", ?_assertEqual( + [{string, <<"quoth the raven, 'nevermore'">>}, end_json], + decode(<<34, "quoth the raven, 'nevermore'", 34>>, []) + )}, + {"string with embedded single quotes", ?_assertEqual( + [{string, <<"quoth the raven, 'nevermore'">>}, end_json], + decode(<<34, "quoth the raven, 'nevermore'", 34>>, [{strict, [single_quotes]}]) )} ]. + ignored_bad_escapes_test_() -> [ {"ignore unrecognized escape sequence", ?_assertEqual( [{string, <<"\\x25">>}, end_json], - decode(<<"\"\\x25\"">>, [ignored_bad_escapes]) + decode(<<"\"\\x25\"">>, []) )} ]. @@ -1898,201 +1631,58 @@ bom_test_() -> ]. +incomplete_test_() -> + [ + {"stream false", ?_assertError( + badarg, + decode(<<"{">>) + )}, + {"stream true", ?_assert( + case decode(<<"{">>, [stream]) of + {incomplete, _} -> true; + _ -> false + end + )}, + {"complete input", ?_assert( + case decode(<<"{}">>, [stream]) of + {incomplete, _} -> true; + _ -> false + end + )} + ]. + + error_test_() -> - Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, - [ - {"maybe_bom error", ?_assertError( - badarg, - Decode(<<16#ef, 0>>, []) - )}, - {"definitely_bom error", ?_assertError( - badarg, - Decode(<<16#ef, 16#bb, 0>>, []) - )}, - {"value error", ?_assertError( - badarg, - Decode(<<0>>, []) - )}, - {"object error", ?_assertError( - badarg, - Decode(<<"{"/utf8, 0>>, []) - )}, - {"colon error", ?_assertError( - badarg, - Decode(<<"{\"\""/utf8, 0>>, []) - )}, - {"key error", ?_assertError( - badarg, - Decode(<<"{\"\":1,"/utf8, 0>>, []) - )}, - {"negative error", ?_assertError( - badarg, - Decode(<<"-"/utf8, 0>>, []) - )}, - {"zero error", ?_assertError( - badarg, - Decode(<<"0"/utf8, 0>>, [explicit_end]) - )}, - {"integer error", ?_assertError( - badarg, - Decode(<<"1"/utf8, 0>>, [explicit_end]) - )}, - {"decimal error", ?_assertError( - badarg, - Decode(<<"1.0"/utf8, 0>>, [explicit_end]) - )}, - {"exp error", ?_assertError( - badarg, - Decode(<<"1.0e1"/utf8, 0>>, [explicit_end]) - )}, - {"e error", ?_assertError( - badarg, - Decode(<<"1e"/utf8, 0>>, []) - )}, - {"ex error", ?_assertError( - badarg, - Decode(<<"1e+"/utf8, 0>>, []) - )}, - {"exp error", ?_assertError( - badarg, - Decode(<<"1.e"/utf8>>, []) - )}, - {"true error", ?_assertError( - badarg, - Decode(<<"tru"/utf8, 0>>, []) - )}, - {"false error", ?_assertError( - badarg, - Decode(<<"fals"/utf8, 0>>, []) - )}, - {"null error", ?_assertError( - badarg, - Decode(<<"nul"/utf8, 0>>, []) - )}, - {"maybe_done error", ?_assertError( - badarg, - Decode(<<"[[]"/utf8, 0>>, []) - )}, - {"done error", ?_assertError( - badarg, - Decode(<<"[]"/utf8, 0>>, []) - )}, - {"comment error", ?_assertError( - badarg, - Decode(<<"[ / ]">>, [comments]) - )}, - {"single_comment error", ?_assertError( - badarg, - Decode(<<"[ //"/utf8, 192>>, [comments]) - )}, - {"multi_comment error", ?_assertError( - badarg, - Decode(<<"[ /*"/utf8, 192>>, [comments]) - )} - ]. - - -custom_error_handler_test_() -> - Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, - Error = fun(Rest, {_, State, _, _, _}, _) -> {State, Rest} end, - [ - {"maybe_bom error", ?_assertEqual( - {value, <<16#ef, 0>>}, - Decode(<<16#ef, 0>>, [{error_handler, Error}]) - )}, - {"definitely_bom error", ?_assertEqual( - {value, <<16#ef, 16#bb, 0>>}, - Decode(<<16#ef, 16#bb, 0>>, [{error_handler, Error}]) - )}, - {"value error", ?_assertEqual( - {value, <<0>>}, - Decode(<<0>>, [{error_handler, Error}]) - )}, - {"object error", ?_assertEqual( - {object, <<0>>}, - Decode(<<"{"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"colon error", ?_assertEqual( - {colon, <<0>>}, - Decode(<<"{\"\""/utf8, 0>>, [{error_handler, Error}]) - )}, - {"key error", ?_assertEqual( - {key, <<0>>}, - Decode(<<"{\"\":1,"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"negative error", ?_assertEqual( - {value, <<"-"/utf8, 0>>}, - Decode(<<"-"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"zero error", ?_assertEqual( - {value, <<"0"/utf8, 0>>}, - Decode(<<"0"/utf8, 0>>, [explicit_end, {error_handler, Error}]) - )}, - {"integer error", ?_assertEqual( - {integer, <<0>>}, - Decode(<<"1"/utf8, 0>>, [explicit_end, {error_handler, Error}]) - )}, - {"decimal error", ?_assertEqual( - {decimal, <<0>>}, - Decode(<<"1.0"/utf8, 0>>, [explicit_end, {error_handler, Error}]) - )}, - {"exp error", ?_assertEqual( - {exp, <<0>>}, - Decode(<<"1.0e1"/utf8, 0>>, [explicit_end, {error_handler, Error}]) - )}, - {"e error", ?_assertEqual( - {decimal, <<$e, 0>>}, - Decode(<<"1e"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"ex error", ?_assertEqual( - {decimal, <<$e, ?positive, 0>>}, - Decode(<<"1e+"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"exp error", ?_assertEqual( - {decimal, <<$e>>}, - Decode(<<"1.e"/utf8>>, [{error_handler, Error}]) - )}, - {"true error", ?_assertEqual( - {true, <<"ru"/utf8, 0>>}, - Decode(<<"tru"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"false error", ?_assertEqual( - {false, <<"als"/utf8, 0>>}, - Decode(<<"fals"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"null error", ?_assertEqual( - {null, <<"ul"/utf8, 0>>}, - Decode(<<"nul"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"maybe_done error", ?_assertEqual( - {maybe_done, <<0>>}, - Decode(<<"[[]"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"done error", ?_assertEqual( - {done, <<0>>}, - Decode(<<"[]"/utf8, 0>>, [{error_handler, Error}]) - )}, - {"comment error", ?_assertEqual( - {value, <<"/ ]"/utf8>>}, - Decode(<<"[ / ]">>, [{error_handler, Error}, comments]) - )}, - {"single_comment error", ?_assertEqual( - {comment, <<192>>}, - Decode(<<"[ //"/utf8, 192>>, [{error_handler, Error}, comments]) - )}, - {"multi_comment error", ?_assertEqual( - {comment, <<192>>}, - Decode(<<"[ /*"/utf8, 192>>, [{error_handler, Error}, comments]) - )} - ]. + Cases = [ + {"maybe_bom error", <<16#ef, 0>>}, + {"definitely_bom error", <<16#ef, 16#bb, 0>>}, + {"object error", <<"{"/utf8, 0>>}, + {"colon error", <<"{\"\""/utf8, 0>>}, + {"key error", <<"{\"\":1,"/utf8, 0>>}, + {"value error", <<0>>}, + {"negative error", <<"-"/utf8, 0>>}, + {"zero error", <<"0"/utf8, 0>>}, + {"integer error", <<"1"/utf8, 0>>}, + {"decimal error", <<"1.0"/utf8, 0>>}, + {"e error", <<"1e"/utf8, 0>>}, + {"ex error", <<"1e+"/utf8, 0>>}, + {"exp error", <<"1e1"/utf8, 0>>}, + {"exp error", <<"1.0e1"/utf8, 0>>}, + {"exp error", <<"1.e"/utf8>>}, + {"true error", <<"tru"/utf8, 0>>}, + {"false error", <<"fals"/utf8, 0>>}, + {"null error", <<"nul"/utf8, 0>>}, + {"maybe_done error", <<"[[]"/utf8, 0>>}, + {"done error", <<"[]"/utf8, 0>>} + ], + [{Title, ?_assertError(badarg, decode(State))} || {Title, State} <- Cases]. custom_incomplete_handler_test_() -> - Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, [ {"custom incomplete handler", ?_assertError( - badarg, - Decode(<<>>, [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) + incomplete, + decode(<<>>, [{incomplete_handler, fun(_, _, _) -> erlang:error(incomplete) end}, stream]) )} ]. diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 4c7564b..0eaf2f2 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -23,310 +23,66 @@ -module(jsx_encoder). --export([encoder/3]). +-export([encoder/3, encode/1, encode/2, unzip/1]). -spec encoder(Handler::module(), State::any(), Config::list()) -> jsx:encoder(). encoder(Handler, State, Config) -> - fun(JSON) -> - start( - JSON, - {Handler, Handler:init(State)}, - jsx_config:parse_config(Config) - ) - end. + Parser = jsx:parser(Handler, State, Config), + fun(Term) -> Parser(encode(Term) ++ [end_json]) end. +-spec encode(Term::any()) -> any(). --include("jsx_config.hrl"). +encode(Term) -> encode(Term, ?MODULE). --ifndef(error). --define(error(State, Term, Handler, Config), - case Config#config.error_handler of - false -> erlang:error(badarg); - F -> erlang:throw(F(Term, {encoder, State, Handler}, jsx_config:config_to_list(Config))) - end -). --endif. +-spec encode(Term::any(), EntryPoint::module()) -> any(). +encode([], _EntryPoint) -> [start_array, end_array]; +encode([{}], _EntryPoint) -> [start_object, end_object]; -start(Term, {Handler, State}, Config) -> - try Handler:handle_event(end_json, value(pre_encode(Term, Config), {Handler, State}, Config)) - catch - throw:Error -> Error; - Type:Value -> erlang:Type(Value) - end. - - -value(String, {Handler, State}, Config) when is_binary(String) -> - Handler:handle_event({string, clean_string(String, {Handler, State}, Config)}, State); -value(Float, {Handler, State}, _Config) when is_float(Float) -> - Handler:handle_event({float, Float}, State); -value(Int, {Handler, State}, _Config) when is_integer(Int) -> - Handler:handle_event({integer, Int}, State); -value(Literal, {Handler, State}, _Config) - when Literal == true; Literal == false; Literal == null -> - Handler:handle_event({literal, Literal}, State); -value(String, {Handler, State}, Config) when is_atom(String) -> - Handler:handle_event({string, clean_string(atom_to_binary(String,latin1), {Handler, State}, Config)}, State); -value([{}], {Handler, State}, _Config) -> - Handler:handle_event(end_object, Handler:handle_event(start_object, State)); -value([], {Handler, State}, _Config) -> - Handler:handle_event(end_array, Handler:handle_event(start_array, State)); -value(List, Handler, Config) when is_list(List) -> - list_or_object(List, Handler, Config); -value(Term, Handler, Config) -> ?error(value, Term, Handler, Config). - - -list_or_object([Term|Rest], {Handler, State}, Config) -> - case pre_encode(Term, Config) of - {K, V} when is_atom(K); is_binary(K); is_integer(K) -> - object([{K, V}|Rest], {Handler, Handler:handle_event(start_object, State)}, Config) - ; T -> - list([T|Rest], {Handler, Handler:handle_event(start_array, State)}, Config) - end. - - -object([{Key, Value}, Next|Rest], {Handler, State}, Config) when is_atom(Key); is_binary(Key); is_integer(Key) -> - V = pre_encode(Value, Config), - object( - [pre_encode(Next, Config)|Rest], - { - Handler, - value( - V, - {Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)}, - Config - ) - }, - Config +encode([{_, _}|_] = Term, EntryPoint) -> + lists:flatten( + [start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unzip(Term) ] ++ [end_object] ); -object([{Key, Value}], {Handler, State}, Config) when is_atom(Key); is_binary(Key); is_integer(Key) -> - object( - [], - { - Handler, - value( - pre_encode(Value, Config), - {Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)}, - Config - ) - }, - Config +encode(Term, EntryPoint) when is_list(Term) -> + lists:flatten( + [start_array] ++ [ EntryPoint:encode(T, EntryPoint) || T <- Term ] ++ [end_array] ); -object([], {Handler, State}, _Config) -> Handler:handle_event(end_object, State); -object(Term, Handler, Config) -> ?error(object, Term, Handler, Config). + +encode(Else, _EntryPoint) -> [Else]. -list([Value, Next|Rest], {Handler, State}, Config) -> - list([pre_encode(Next, Config)|Rest], {Handler, value(Value, {Handler, State}, Config)}, Config); -list([Value], {Handler, State}, Config) -> - list([], {Handler, value(Value, {Handler, State}, Config)}, Config); -list([], {Handler, State}, _Config) -> Handler:handle_event(end_array, State). +unzip(List) -> unzip(List, []). -pre_encode(Value, #config{pre_encode=false}) -> Value; -pre_encode(Value, Config) -> (Config#config.pre_encode)(Value). - - -fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8)); -fix_key(Key) when is_integer(Key) -> fix_key(list_to_binary(integer_to_list(Key))); -fix_key(Key) when is_binary(Key) -> Key. - - -clean_string(Bin, Handler, Config) -> - case clean_string(Bin, Config) of - {error, badarg} -> ?error(string, Bin, Handler, Config); - String -> String - end. - - - --include("jsx_strings.hrl"). +unzip([], Acc) -> lists:reverse(Acc); +unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K); is_integer(K) -> unzip(Rest, [V, K] ++ Acc). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -encode_test_() -> - Data = jsx:test_cases(), - [ - { - Title, ?_assertEqual( - Events ++ [end_json], - start(Term, {jsx, []}, #config{}) - ) - } || {Title, _, Term, Events} <- Data - ]. +parser(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). -encode(Term, Config) -> start(Term, {jsx, []}, jsx_config:parse_config(Config)). - -pre_encoders_test_() -> - Term = [ - {<<"object">>, [ - {atomkey, atomvalue}, - {<<"literals">>, [true, false, null]}, - {<<"strings">>, [<<"foo">>, <<"bar">>, <<"baz">>]}, - {<<"numbers">>, [1, 1.0, 1.0e0]} - ]} - ], - [ - {"no pre encode", ?_assertEqual( - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"atomkey">>}, {string, <<"atomvalue">>}, - {key, <<"literals">>}, start_array, - {literal, true}, {literal, false}, {literal, null}, - end_array, - {key, <<"strings">>}, start_array, - {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>}, - end_array, - {key, <<"numbers">>}, start_array, - {integer, 1}, {float, 1.0}, {float, 1.0}, - end_array, - end_object, - end_object, - end_json - ], - encode(Term, []) - )}, - {"replace lists with empty lists", ?_assertEqual( - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"atomkey">>}, {string, <<"atomvalue">>}, - {key, <<"literals">>}, start_array, end_array, - {key, <<"strings">>}, start_array, end_array, - {key, <<"numbers">>}, start_array, end_array, - end_object, - end_object, - end_json - ], - encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end}]) - )}, - {"replace objects with empty objects", ?_assertEqual( - [ - start_object, - end_object, - end_json - ], - encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end}]) - )}, - {"replace all non-list and non_tuple values with false", ?_assertEqual( - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"atomkey">>}, {literal, false}, - {key, <<"literals">>}, start_array, - {literal, false}, {literal, false}, {literal, false}, - end_array, - {key, <<"strings">>}, start_array, - {literal, false}, {literal, false}, {literal, false}, - end_array, - {key, <<"numbers">>}, start_array, - {literal, false}, {literal, false}, {literal, false}, - end_array, - end_object, - end_object, - end_json - ], - encode(Term, [{pre_encode, fun(V) when is_list(V); is_tuple(V) -> V; (_) -> false end}]) - )}, - {"replace all atoms with atom_to_list", ?_assertEqual( - [ - start_object, - {key, <<"object">>}, start_object, - {key, <<"atomkey">>}, {string, <<"atomvalue">>}, - {key, <<"literals">>}, start_array, - {string, <<"true">>}, {string, <<"false">>}, {string, <<"null">>}, - end_array, - {key, <<"strings">>}, start_array, - {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>}, - end_array, - {key, <<"numbers">>}, start_array, - {integer, 1}, {float, 1.0}, {float, 1.0}, - end_array, - end_object, - end_object, - end_json - ], - encode(Term, [{pre_encode, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}]) - )}, - {"pre_encode tuple", ?_assertEqual( - [ - start_array, - {integer, 1}, {integer, 2}, {integer, 3}, - end_array, - end_json - ], - encode({1, 2, 3}, [{pre_encode, fun(Tuple) when is_tuple(Tuple) -> tuple_to_list(Tuple); (V) -> V end}]) - )}, - {"pre_encode 2-tuples", ?_assertEqual( - [ - start_object, - {key, <<"two">>}, {integer, 2}, {key, <<"three">>}, {integer, 3}, - end_object, - end_json - ], - encode([{two, 1}, {three, 2}], [{pre_encode, fun({K, V}) -> {K, V + 1}; (V) -> V end}]) - )}, - {"pre_encode one field record", ?_assertEqual( - [ - start_object, - {key, <<"bar">>}, {literal, false}, - end_object, - end_json - ], - encode([{foo, bar}], [{pre_encode, fun({foo, V}) -> {V, undefined}; (undefined) -> false; (V) -> V end}]) - )}, - {"pre_encode list", ?_assertEqual( - [ - start_array, - {integer, 2}, {integer, 3}, {integer, 4}, - end_array, - end_json - ], - encode([1,2,3], [{pre_encode, fun(X) when is_integer(X) -> X + 1; (V) -> V end}]) - )} - ]. - error_test_() -> [ - {"value error", ?_assertError(badarg, encode(self(), []))}, - {"string error", ?_assertError(badarg, encode(<<239, 191, 191>>, []))} + {"value error", ?_assertError(badarg, parser(self(), []))}, + {"string error", ?_assertError(badarg, parser(<<239, 191, 191>>, [strict]))} ]. custom_error_handler_test_() -> - Error = fun(Term, {_, State, _}, _) -> {State, Term} end, + Error = fun(Term, {_, State, _, _}, _) -> {State, Term} end, [ {"value error", ?_assertEqual( - {value, self()}, - encode(self(), [{error_handler, Error}]) + {value, [self()]}, + parser(self(), [{error_handler, Error}]) )}, {"string error", ?_assertEqual( - {string, <<239, 191, 191>>}, - encode(<<239, 191, 191>>, [{error_handler, Error}]) - )} - ]. - -integer_key_test_() -> - Term = [{123, [{456, 789}]}], - [ - {"basic integer keys", ?_assertEqual( - [ - start_object, - {key, <<"123">>}, - start_object, - {key, <<"456">>}, - {integer, 789}, - end_object, - end_object, - end_json - ], - encode(Term, []) + {string, [{string, <<239, 191, 191>>}]}, + parser(<<239, 191, 191>>, [{error_handler, Error}, strict]) )} ]. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 4e6a9a0..c5c23a2 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -68,6 +68,8 @@ resume(Rest, State, Handler, Stack, Config) -> -endif. +incomplete(State, Handler, Stack, Config=#config{stream=false}) -> + ?error(State, [], Handler, Stack, Config); incomplete(State, Handler, Stack, Config=#config{incomplete_handler=false}) -> {incomplete, fun(end_stream) -> case resume([end_json], State, Handler, Stack, Config) of @@ -82,8 +84,6 @@ incomplete(State, Handler, Stack, Config=#config{incomplete_handler=F}) -> F([], {parser, State, Handler, Stack}, jsx_config:config_to_list(Config)). -%handle_event([], Handler, _Config) -> Handler; -%handle_event([Event|Rest], Handler, Config) -> handle_event(Rest, handle_event(Event, Handler, Config), Config); handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}. @@ -91,24 +91,10 @@ value([start_object|Tokens], Handler, Stack, Config) -> object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); value([start_array|Tokens], Handler, Stack, Config) -> array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); -value([{literal, true}|Tokens], Handler, [], Config) -> - done(Tokens, handle_event({literal, true}, Handler, Config), [], Config); -value([{literal, false}|Tokens], Handler, [], Config) -> - done(Tokens, handle_event({literal, false}, Handler, Config), [], Config); -value([{literal, null}|Tokens], Handler, [], Config) -> - done(Tokens, handle_event({literal, null}, Handler, Config), [], Config); -value([{literal, true}|Tokens], Handler, Stack, Config) -> - maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config); -value([{literal, false}|Tokens], Handler, Stack, Config) -> - maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config); -value([{literal, null}|Tokens], Handler, Stack, Config) -> - maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config); +value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> + maybe_done(Tokens, handle_event({literal, Literal}, Handler, Config), Stack, Config); value([Literal|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> value([{literal, Literal}] ++ Tokens, Handler, Stack, Config); -value([{integer, Number}|Tokens], Handler, [], Config) when is_integer(Number) -> - done(Tokens, handle_event({integer, Number}, Handler, Config), [], Config); -value([{float, Number}|Tokens], Handler, [], Config) when is_float(Number) -> - done(Tokens, handle_event({float, Number}, Handler, Config), [], Config); value([{integer, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) -> maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config); value([{float, Number}|Tokens], Handler, Stack, Config) when is_float(Number) -> @@ -121,12 +107,6 @@ value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) -> value([{integer, Number}] ++ Tokens, Handler, Stack, Config); value([Number|Tokens], Handler, Stack, Config) when is_float(Number) -> value([{float, Number}] ++ Tokens, Handler, Stack, Config); -value([{string, String}|Tokens], Handler, [], Config) when is_binary(String) -> - case clean_string(String, Tokens, Handler, [], Config) of - Clean when is_binary(Clean) -> - done(Tokens, handle_event({string, Clean}, Handler, Config), [], Config); - Error -> Error - end; value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) -> case clean_string(String, Tokens, Handler, Stack, Config) of Clean when is_binary(Clean) -> @@ -135,6 +115,8 @@ value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) end; value([String|Tokens], Handler, Stack, Config) when is_binary(String) -> value([{string, String}] ++ Tokens, Handler, Stack, Config); +value([String|Tokens], Handler, Stack, Config) when is_atom(String) -> + value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config); value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) -> value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config); value([], Handler, Stack, Config) -> @@ -146,13 +128,13 @@ value(Token, Handler, Stack, Config) -> object([end_object|Tokens], Handler, [object|Stack], Config) -> maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config); -object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key) -> +object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) -> case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of Clean when is_binary(Clean) -> value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config); Error -> Error end; -object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key) -> +object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) -> case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of Clean when is_binary(Clean) -> value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config); @@ -185,7 +167,7 @@ maybe_done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> maybe_done(Token, Handler, Stack, Config) -> maybe_done([Token], Handler, Stack, Config). -done([], Handler, [], Config=#config{explicit_end=true}) -> +done([], Handler, [], Config=#config{stream=true}) -> incomplete(done, Handler, [], Config); done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] -> {_, State} = handle_event(end_json, Handler, Config), @@ -196,7 +178,8 @@ done(Token, Handler, Stack, Config) -> done([Token], Handler, Stack, Config). -fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8)); +fix_key(Key) when is_atom(Key) -> atom_to_binary(Key, utf8); +fix_key(Key) when is_integer(Key) -> list_to_binary(integer_to_list(Key)); fix_key(Key) when is_binary(Key) -> Key. @@ -206,6 +189,256 @@ clean_string(Bin, Tokens, Handler, Stack, Config) -> String -> String end. +clean_string(Bin, #config{dirty_strings=true}) -> Bin; +clean_string(Bin, Config) -> clean(Bin, [], Config). + + +%% escape and/or replace bad codepoints if requested +clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc)); +clean(<<0, Rest/binary>>, Acc, Config) -> maybe_replace(0, Rest, Acc, Config); +clean(<<1, Rest/binary>>, Acc, Config) -> maybe_replace(1, Rest, Acc, Config); +clean(<<2, Rest/binary>>, Acc, Config) -> maybe_replace(2, Rest, Acc, Config); +clean(<<3, Rest/binary>>, Acc, Config) -> maybe_replace(3, Rest, Acc, Config); +clean(<<4, Rest/binary>>, Acc, Config) -> maybe_replace(4, Rest, Acc, Config); +clean(<<5, Rest/binary>>, Acc, Config) -> maybe_replace(5, Rest, Acc, Config); +clean(<<6, Rest/binary>>, Acc, Config) -> maybe_replace(6, Rest, Acc, Config); +clean(<<7, Rest/binary>>, Acc, Config) -> maybe_replace(7, Rest, Acc, Config); +clean(<<8, Rest/binary>>, Acc, Config) -> maybe_replace(8, Rest, Acc, Config); +clean(<<9, Rest/binary>>, Acc, Config) -> maybe_replace(9, Rest, Acc, Config); +clean(<<10, Rest/binary>>, Acc, Config) -> maybe_replace(10, Rest, Acc, Config); +clean(<<11, Rest/binary>>, Acc, Config) -> maybe_replace(11, Rest, Acc, Config); +clean(<<12, Rest/binary>>, Acc, Config) -> maybe_replace(12, Rest, Acc, Config); +clean(<<13, Rest/binary>>, Acc, Config) -> maybe_replace(13, Rest, Acc, Config); +clean(<<14, Rest/binary>>, Acc, Config) -> maybe_replace(14, Rest, Acc, Config); +clean(<<15, Rest/binary>>, Acc, Config) -> maybe_replace(15, Rest, Acc, Config); +clean(<<16, Rest/binary>>, Acc, Config) -> maybe_replace(16, Rest, Acc, Config); +clean(<<17, Rest/binary>>, Acc, Config) -> maybe_replace(17, Rest, Acc, Config); +clean(<<18, Rest/binary>>, Acc, Config) -> maybe_replace(18, Rest, Acc, Config); +clean(<<19, Rest/binary>>, Acc, Config) -> maybe_replace(19, Rest, Acc, Config); +clean(<<20, Rest/binary>>, Acc, Config) -> maybe_replace(20, Rest, Acc, Config); +clean(<<21, Rest/binary>>, Acc, Config) -> maybe_replace(21, Rest, Acc, Config); +clean(<<22, Rest/binary>>, Acc, Config) -> maybe_replace(22, Rest, Acc, Config); +clean(<<23, Rest/binary>>, Acc, Config) -> maybe_replace(23, Rest, Acc, Config); +clean(<<24, Rest/binary>>, Acc, Config) -> maybe_replace(24, Rest, Acc, Config); +clean(<<25, Rest/binary>>, Acc, Config) -> maybe_replace(25, Rest, Acc, Config); +clean(<<26, Rest/binary>>, Acc, Config) -> maybe_replace(26, Rest, Acc, Config); +clean(<<27, Rest/binary>>, Acc, Config) -> maybe_replace(27, Rest, Acc, Config); +clean(<<28, Rest/binary>>, Acc, Config) -> maybe_replace(28, Rest, Acc, Config); +clean(<<29, Rest/binary>>, Acc, Config) -> maybe_replace(29, Rest, Acc, Config); +clean(<<30, Rest/binary>>, Acc, Config) -> maybe_replace(30, Rest, Acc, Config); +clean(<<31, Rest/binary>>, Acc, Config) -> maybe_replace(31, Rest, Acc, Config); +clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config); +clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config); +clean(<<34, Rest/binary>>, Acc, Config) -> maybe_replace(34, Rest, Acc, Config); +clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config); +clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config); +clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config); +clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config); +clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config); +clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config); +clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config); +clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config); +clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config); +clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config); +clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config); +clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config); +clean(<<47, Rest/binary>>, Acc, Config) -> maybe_replace(47, Rest, Acc, Config); +clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config); +clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config); +clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config); +clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config); +clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config); +clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config); +clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config); +clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config); +clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config); +clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config); +clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config); +clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config); +clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config); +clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config); +clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config); +clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config); +clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config); +clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config); +clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config); +clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config); +clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config); +clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config); +clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config); +clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config); +clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config); +clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config); +clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config); +clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config); +clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config); +clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config); +clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config); +clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config); +clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config); +clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config); +clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config); +clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config); +clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config); +clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config); +clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config); +clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config); +clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config); +clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config); +clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config); +clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config); +clean(<<92, Rest/binary>>, Acc, Config) -> maybe_replace(92, Rest, Acc, Config); +clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config); +clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config); +clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config); +clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config); +clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config); +clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config); +clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config); +clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config); +clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config); +clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config); +clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config); +clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config); +clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config); +clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config); +clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config); +clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config); +clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config); +clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config); +clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config); +clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config); +clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config); +clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config); +clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config); +clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config); +clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config); +clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config); +clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config); +clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config); +clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config); +clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config); +clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config); +clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config); +clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config); +clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config); +clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config); +clean(<>, Acc, Config) when X == 16#2028; X == 16#2029 -> + maybe_replace(X, Rest, Acc, Config); +clean(<>, Acc, Config) when X < 16#d800 -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X > 16#dfff, X < 16#fdd0 -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X > 16#fdef, X < 16#fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#10000, X < 16#1fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#20000, X < 16#2fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#30000, X < 16#3fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#40000, X < 16#4fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#50000, X < 16#5fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#60000, X < 16#6fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#70000, X < 16#7fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#80000, X < 16#8fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#90000, X < 16#9fffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#a0000, X < 16#afffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#b0000, X < 16#bfffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#c0000, X < 16#cfffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#d0000, X < 16#dfffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#e0000, X < 16#efffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#f0000, X < 16#ffffe -> + clean(Rest, [X] ++ Acc, Config); +clean(<>, Acc, Config) when X >= 16#100000, X < 16#10fffe -> + clean(Rest, [X] ++ Acc, Config); +%% surrogates +clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> + maybe_replace(surrogate, Rest, Acc, Config); +%% noncharacters +clean(<<_/utf8, Rest/binary>>, Acc, Config) -> + maybe_replace(noncharacter, Rest, Acc, Config); +%% u+fffe and u+ffff for R14BXX +clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 -> + maybe_replace(noncharacter, Rest, Acc, Config); +%% overlong encodings and missing continuations of a 2 byte sequence +clean(<>, Acc, Config) when X >= 192, X =< 223 -> + maybe_replace(badutf, strip_continuations(Rest, 1), Acc, Config); +%% overlong encodings and missing continuations of a 3 byte sequence +clean(<>, Acc, Config) when X >= 224, X =< 239 -> + maybe_replace(badutf, strip_continuations(Rest, 2), Acc, Config); +%% overlong encodings and missing continuations of a 4 byte sequence +clean(<>, Acc, Config) when X >= 240, X =< 247 -> + maybe_replace(badutf, strip_continuations(Rest, 3), Acc, Config); +clean(<<_, Rest/binary>>, Acc, Config) -> + maybe_replace(badutf, Rest, Acc, Config). + + +strip_continuations(Bin, 0) -> Bin; +strip_continuations(<>, N) when X >= 128, X =< 191 -> + strip_continuations(Rest, N - 1); +%% not a continuation byte +strip_continuations(Bin, _) -> Bin. + + +maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$b, $\\] ++ Acc, Config); +maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$t, $\\] ++ Acc, Config); +maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$n, $\\] ++ Acc, Config); +maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$f, $\\] ++ Acc, Config); +maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$r, $\\] ++ Acc, Config); +maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$\", $\\] ++ Acc, Config); +maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) -> + case Config#config.escaped_forward_slashes of + true -> clean(Rest, [$/, $\\] ++ Acc, Config); + false -> clean(Rest, [$/] ++ Acc, Config) + end; +maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) -> + clean(Rest, [$\\, $\\] ++ Acc, Config); +maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> + case Config#config.unescaped_jsonp of + true -> clean(Rest, [X] ++ Acc, Config); + false -> clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config) + end; +maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 -> + clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config); +maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> {error, badarg}; +maybe_replace(noncharacter, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); +maybe_replace(surrogate, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); +maybe_replace(badutf, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); +maybe_replace(X, Rest, Acc, Config) -> clean(Rest, [X] ++ Acc, Config). + + +%% convert a codepoint to it's \uXXXX equiv. +json_escape_sequence(X) -> + <> = <>, + [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. + + +to_hex(10) -> $a; +to_hex(11) -> $b; +to_hex(12) -> $c; +to_hex(13) -> $d; +to_hex(14) -> $e; +to_hex(15) -> $f; +to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... + %% for raw input -spec init(proplists:proplist()) -> list(). @@ -218,54 +451,20 @@ handle_event(end_json, State) -> lists:reverse(State); handle_event(Event, State) -> [Event] ++ State. --include("jsx_strings.hrl"). - -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -parse(Events, Config) -> - Chunk = try - value(Events ++ [end_json], {jsx, []}, [], jsx_config:parse_config(Config)) - catch - error:badarg -> {error, badarg} - end, - Incremental = try - Final = lists:foldl( - fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end, - parser(jsx, [], [explicit_end] ++ Config), - lists:map(fun(X) -> [X] end, Events) - ), - Final(end_stream) - catch - error:badarg -> {error, badarg} - end, - ?assert(Chunk == Incremental), - Chunk. - - -parse_test_() -> - Data = jsx:test_cases(), - [ - { - Title, ?_assertEqual( - Events ++ [end_json], - parse(Events, []) - ) - } || {Title, _, _, Events} <- Data - ]. - - -parse_error(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)). +parse(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)). error_test_() -> [ - {"value error", ?_assertError(badarg, parse_error([self()], []))}, - {"maybe_done error", ?_assertError(badarg, parse_error([start_array, end_array, start_array, end_json], []))}, - {"done error", ?_assertError(badarg, parse_error([{string, <<"">>}, {literal, true}, end_json], []))}, - {"string error", ?_assertError(badarg, parse_error([{string, <<239, 191, 191>>}, end_json], []))} + {"value error", ?_assertError(badarg, parse([self()], []))}, + {"maybe_done error", ?_assertError(badarg, parse([start_array, end_array, start_array, end_json], []))}, + {"done error", ?_assertError(badarg, parse([{string, <<"">>}, {literal, true}, end_json], []))}, + {"string error", ?_assertError(badarg, parse([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))} ]. @@ -274,47 +473,540 @@ custom_error_handler_test_() -> [ {"value error", ?_assertEqual( {value, [self()]}, - parse_error([self()], [{error_handler, Error}]) + parse([self()], [{error_handler, Error}]) )}, {"maybe_done error", ?_assertEqual( {maybe_done, [start_array, end_json]}, - parse_error([start_array, end_array, start_array, end_json], [{error_handler, Error}]) + parse([start_array, end_array, start_array, end_json], [{error_handler, Error}]) )}, {"done error", ?_assertEqual( - {done, [{literal, true}, end_json]}, - parse_error([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) + {maybe_done, [{literal, true}, end_json]}, + parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}, end_json]}, - parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}]) + parse([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict]) )} ]. +incomplete_test_() -> + Cases = [ + {"incomplete value", []}, + {"incomplete object", [start_object]}, + {"incomplete array", [start_array]}, + {"incomplete maybe_done", [start_array, end_array]} + ], + [{Title, ?_assertError(badarg, parse(Events, []))} + || {Title, Events} <- Cases + ]. + + custom_incomplete_handler_test_() -> [ {"custom incomplete handler", ?_assertError( badarg, - parse_error([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) + parse([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) )} ]. raw_test_() -> + Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end, [ {"raw empty list", ?_assertEqual( - [start_array, end_array, end_json], - parse([{raw, <<"[]">>}], []) + [start_array, end_array], + Parse([{raw, <<"[]">>}], []) )}, {"raw empty object", ?_assertEqual( - [start_object, end_object, end_json], - parse([{raw, <<"{}">>}], []) + [start_object, end_object], + Parse([{raw, <<"{}">>}], []) )}, {"raw chunk inside stream", ?_assertEqual( - [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object, end_json], - parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], []) + [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object], + Parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], []) )} ]. +%% erlang refuses to encode certain codepoints, so fake them +to_fake_utf8(N) when N < 16#0080 -> <>; +to_fake_utf8(N) when N < 16#0800 -> + <<0:5, Y:5, X:6>> = <>, + <<2#110:3, Y:5, 2#10:2, X:6>>; +to_fake_utf8(N) when N < 16#10000 -> + <> = <>, + <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; +to_fake_utf8(N) -> + <<0:3, W:3, Z:6, Y:6, X:6>> = <>, + <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. + + +codepoints() -> + unicode:characters_to_binary( + [32, 33] + ++ lists:seq(35, 46) + ++ lists:seq(48, 91) + ++ lists:seq(93, 16#2027) + ++ lists:seq(16#202a, 16#d7ff) + ++ lists:seq(16#e000, 16#fdcf) + ++ lists:seq(16#fdf0, 16#fffd) + ). + +extended_codepoints() -> + unicode:characters_to_binary( + lists:seq(16#10000, 16#1fffd) ++ [ + 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, + 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, + 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 + ] + ). + +reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. + +surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. + +noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. + +extended_noncharacters() -> + [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] + ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] + ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] + ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] + ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] + ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] + ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] + ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] + ]. + + +clean_string_test_() -> + [ + {"clean codepoints", ?_assertEqual( + codepoints(), + clean_string(codepoints(), #config{}) + )}, + {"clean extended codepoints", ?_assertEqual( + extended_codepoints(), + clean_string(extended_codepoints(), #config{}) + )}, + {"escape path codepoints", ?_assertEqual( + codepoints(), + clean_string(codepoints(), #config{escaped_strings=true}) + )}, + {"escape path extended codepoints", ?_assertEqual( + extended_codepoints(), + clean_string(extended_codepoints(), #config{escaped_strings=true}) + )}, + {"error reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space()) + )}, + {"error surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates()) + )}, + {"error noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters()) + )}, + {"error extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters()) + )}, + {"clean reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space()) + )}, + {"clean surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates()) + )}, + {"clean noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters()) + )}, + {"clean extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters()) + )} + ]. + + +escape_test_() -> + [ + {"maybe_escape backspace", ?_assertEqual( + <<"\\b">>, + clean_string(<<16#0008/utf8>>, #config{escaped_strings=true}) + )}, + {"don't escape backspace", ?_assertEqual( + <<"\b">>, + clean_string(<<16#0008/utf8>>, #config{}) + )}, + {"maybe_escape tab", ?_assertEqual( + <<"\\t">>, + clean_string(<<16#0009/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape newline", ?_assertEqual( + <<"\\n">>, + clean_string(<<16#000a/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape formfeed", ?_assertEqual( + <<"\\f">>, + clean_string(<<16#000c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape carriage return", ?_assertEqual( + <<"\\r">>, + clean_string(<<16#000d/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape quote", ?_assertEqual( + <<"\\\"">>, + clean_string(<<16#0022/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape forward slash", ?_assertEqual( + <<"\\/">>, + clean_string(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true}) + )}, + {"do not maybe_escape forward slash", ?_assertEqual( + <<"/">>, + clean_string(<<16#002f/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape backslash", ?_assertEqual( + <<"\\\\">>, + clean_string(<<16#005c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape jsonp (u2028)", ?_assertEqual( + <<"\\u2028">>, + clean_string(<<16#2028/utf8>>, #config{escaped_strings=true}) + )}, + {"do not maybe_escape jsonp (u2028)", ?_assertEqual( + <<16#2028/utf8>>, + clean_string(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) + )}, + {"maybe_escape jsonp (u2029)", ?_assertEqual( + <<"\\u2029">>, + clean_string(<<16#2029/utf8>>, #config{escaped_strings=true}) + )}, + {"do not maybe_escape jsonp (u2029)", ?_assertEqual( + <<16#2029/utf8>>, + clean_string(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) + )}, + {"maybe_escape u0000", ?_assertEqual( + <<"\\u0000">>, + clean_string(<<16#0000/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0001", ?_assertEqual( + <<"\\u0001">>, + clean_string(<<16#0001/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0002", ?_assertEqual( + <<"\\u0002">>, + clean_string(<<16#0002/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0003", ?_assertEqual( + <<"\\u0003">>, + clean_string(<<16#0003/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0004", ?_assertEqual( + <<"\\u0004">>, + clean_string(<<16#0004/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0005", ?_assertEqual( + <<"\\u0005">>, + clean_string(<<16#0005/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0006", ?_assertEqual( + <<"\\u0006">>, + clean_string(<<16#0006/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0007", ?_assertEqual( + <<"\\u0007">>, + clean_string(<<16#0007/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000b", ?_assertEqual( + <<"\\u000b">>, + clean_string(<<16#000b/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000e", ?_assertEqual( + <<"\\u000e">>, + clean_string(<<16#000e/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000f", ?_assertEqual( + <<"\\u000f">>, + clean_string(<<16#000f/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0010", ?_assertEqual( + <<"\\u0010">>, + clean_string(<<16#0010/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0011", ?_assertEqual( + <<"\\u0011">>, + clean_string(<<16#0011/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0012", ?_assertEqual( + <<"\\u0012">>, + clean_string(<<16#0012/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0013", ?_assertEqual( + <<"\\u0013">>, + clean_string(<<16#0013/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0014", ?_assertEqual( + <<"\\u0014">>, + clean_string(<<16#0014/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0015", ?_assertEqual( + <<"\\u0015">>, + clean_string(<<16#0015/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0016", ?_assertEqual( + <<"\\u0016">>, + clean_string(<<16#0016/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0017", ?_assertEqual( + <<"\\u0017">>, + clean_string(<<16#0017/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0018", ?_assertEqual( + <<"\\u0018">>, + clean_string(<<16#0018/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0019", ?_assertEqual( + <<"\\u0019">>, + clean_string(<<16#0019/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001a", ?_assertEqual( + <<"\\u001a">>, + clean_string(<<16#001a/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001b", ?_assertEqual( + <<"\\u001b">>, + clean_string(<<16#001b/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001c", ?_assertEqual( + <<"\\u001c">>, + clean_string(<<16#001c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001d", ?_assertEqual( + <<"\\u001d">>, + clean_string(<<16#001d/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001e", ?_assertEqual( + <<"\\u001e">>, + clean_string(<<16#001e/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001f", ?_assertEqual( + <<"\\u001f">>, + clean_string(<<16#001f/utf8>>, #config{escaped_strings=true}) + )} + ]. + + +bad_utf8_test_() -> + [ + {"noncharacter u+fffe", ?_assertEqual( + {error, badarg}, + clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true}) + )}, + {"noncharacter u+fffe replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(to_fake_utf8(16#fffe), #config{}) + )}, + {"noncharacter u+ffff", ?_assertEqual( + {error, badarg}, + clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true}) + )}, + {"noncharacter u+ffff replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(to_fake_utf8(16#ffff), #config{}) + )}, + {"orphan continuation byte u+0080", ?_assertEqual( + {error, badarg}, + clean_string(<<16#0080>>, #config{strict_utf8=true}) + )}, + {"orphan continuation byte u+0080 replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#0080>>, #config{}) + )}, + {"orphan continuation byte u+00bf", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00bf>>, #config{strict_utf8=true}) + )}, + {"orphan continuation byte u+00bf replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#00bf>>, #config{}) + )}, + {"2 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) + )}, + {"2 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 2), + clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) + )}, + {"3 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) + )}, + {"3 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 3), + clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) + )}, + {"4 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) + )}, + {"4 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 4), + clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) + )}, + {"5 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) + )}, + {"5 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 5), + clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) + )}, + {"6 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) + )}, + {"6 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 6), + clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) + )}, + {"all continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) + )}, + {"all continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), + clean_string( + <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, + #config{} + ) + )}, + {"lonely start byte", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00c0>>, #config{strict_utf8=true}) + )}, + {"lonely start byte replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#00c0>>, #config{}) + )}, + {"lonely start bytes (2 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (2 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00c0, 32, 16#00df>>, #config{}) + )}, + {"lonely start bytes (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) + )}, + {"lonely start bytes (4 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (4 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) + )}, + {"missing continuation byte (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<224, 160, 32>>, #config{}) + )}, + {"missing continuation byte (4 byte missing one)", ?_assertEqual( + {error, badarg}, + clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<240, 144, 128, 32>>, #config{}) + )}, + {"missing continuation byte (4 byte missing two)", ?_assertEqual( + {error, badarg}, + clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<240, 144, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (2 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#c0, 16#af, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (4 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) + )}, + {"highest overlong 2 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 2 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#c1, 16#bf, 32>>, #config{}) + )}, + {"highest overlong 3 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 3 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) + )}, + {"highest overlong 4 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 4 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) + )} + ]. + + +json_escape_sequence_test_() -> + [ + {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")}, + {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")}, + {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} + ]. + + +fix_key_test_() -> + [ + {"binary key", ?_assertEqual(fix_key(<<"foo">>), <<"foo">>)}, + {"atom key", ?_assertEqual(fix_key(foo), <<"foo">>)}, + {"integer key", ?_assertEqual(fix_key(123), <<"123">>)} + ]. + -endif. diff --git a/src/jsx_strings.hrl b/src/jsx_strings.hrl deleted file mode 100644 index b572480..0000000 --- a/src/jsx_strings.hrl +++ /dev/null @@ -1,403 +0,0 @@ -clean_string(Bin, #config{dirty_strings=true}) -> Bin; -clean_string(Bin, Config) -> - case Config#config.replaced_bad_utf8 orelse Config#config.escaped_strings of - true -> clean(Bin, [], Config); - false -> ensure_clean(Bin) - end. - - -ensure_clean(Bin) -> - case is_clean(Bin) of - ok -> Bin; - {error, badarg} -> {error, badarg} - end. - -%% fast path for no escaping and no correcting, throws error if string is 'bad' -is_clean(<<>>) -> ok; -is_clean(<<0, Rest/binary>>) -> is_clean(Rest); -is_clean(<<1, Rest/binary>>) -> is_clean(Rest); -is_clean(<<2, Rest/binary>>) -> is_clean(Rest); -is_clean(<<3, Rest/binary>>) -> is_clean(Rest); -is_clean(<<4, Rest/binary>>) -> is_clean(Rest); -is_clean(<<5, Rest/binary>>) -> is_clean(Rest); -is_clean(<<6, Rest/binary>>) -> is_clean(Rest); -is_clean(<<7, Rest/binary>>) -> is_clean(Rest); -is_clean(<<8, Rest/binary>>) -> is_clean(Rest); -is_clean(<<9, Rest/binary>>) -> is_clean(Rest); -is_clean(<<10, Rest/binary>>) -> is_clean(Rest); -is_clean(<<11, Rest/binary>>) -> is_clean(Rest); -is_clean(<<12, Rest/binary>>) -> is_clean(Rest); -is_clean(<<13, Rest/binary>>) -> is_clean(Rest); -is_clean(<<14, Rest/binary>>) -> is_clean(Rest); -is_clean(<<15, Rest/binary>>) -> is_clean(Rest); -is_clean(<<16, Rest/binary>>) -> is_clean(Rest); -is_clean(<<17, Rest/binary>>) -> is_clean(Rest); -is_clean(<<18, Rest/binary>>) -> is_clean(Rest); -is_clean(<<19, Rest/binary>>) -> is_clean(Rest); -is_clean(<<20, Rest/binary>>) -> is_clean(Rest); -is_clean(<<21, Rest/binary>>) -> is_clean(Rest); -is_clean(<<22, Rest/binary>>) -> is_clean(Rest); -is_clean(<<23, Rest/binary>>) -> is_clean(Rest); -is_clean(<<24, Rest/binary>>) -> is_clean(Rest); -is_clean(<<25, Rest/binary>>) -> is_clean(Rest); -is_clean(<<26, Rest/binary>>) -> is_clean(Rest); -is_clean(<<27, Rest/binary>>) -> is_clean(Rest); -is_clean(<<28, Rest/binary>>) -> is_clean(Rest); -is_clean(<<29, Rest/binary>>) -> is_clean(Rest); -is_clean(<<30, Rest/binary>>) -> is_clean(Rest); -is_clean(<<31, Rest/binary>>) -> is_clean(Rest); -is_clean(<<32, Rest/binary>>) -> is_clean(Rest); -is_clean(<<33, Rest/binary>>) -> is_clean(Rest); -is_clean(<<34, Rest/binary>>) -> is_clean(Rest); -is_clean(<<35, Rest/binary>>) -> is_clean(Rest); -is_clean(<<36, Rest/binary>>) -> is_clean(Rest); -is_clean(<<37, Rest/binary>>) -> is_clean(Rest); -is_clean(<<38, Rest/binary>>) -> is_clean(Rest); -is_clean(<<39, Rest/binary>>) -> is_clean(Rest); -is_clean(<<40, Rest/binary>>) -> is_clean(Rest); -is_clean(<<41, Rest/binary>>) -> is_clean(Rest); -is_clean(<<42, Rest/binary>>) -> is_clean(Rest); -is_clean(<<43, Rest/binary>>) -> is_clean(Rest); -is_clean(<<44, Rest/binary>>) -> is_clean(Rest); -is_clean(<<45, Rest/binary>>) -> is_clean(Rest); -is_clean(<<46, Rest/binary>>) -> is_clean(Rest); -is_clean(<<47, Rest/binary>>) -> is_clean(Rest); -is_clean(<<48, Rest/binary>>) -> is_clean(Rest); -is_clean(<<49, Rest/binary>>) -> is_clean(Rest); -is_clean(<<50, Rest/binary>>) -> is_clean(Rest); -is_clean(<<51, Rest/binary>>) -> is_clean(Rest); -is_clean(<<52, Rest/binary>>) -> is_clean(Rest); -is_clean(<<53, Rest/binary>>) -> is_clean(Rest); -is_clean(<<54, Rest/binary>>) -> is_clean(Rest); -is_clean(<<55, Rest/binary>>) -> is_clean(Rest); -is_clean(<<56, Rest/binary>>) -> is_clean(Rest); -is_clean(<<57, Rest/binary>>) -> is_clean(Rest); -is_clean(<<58, Rest/binary>>) -> is_clean(Rest); -is_clean(<<59, Rest/binary>>) -> is_clean(Rest); -is_clean(<<60, Rest/binary>>) -> is_clean(Rest); -is_clean(<<61, Rest/binary>>) -> is_clean(Rest); -is_clean(<<62, Rest/binary>>) -> is_clean(Rest); -is_clean(<<63, Rest/binary>>) -> is_clean(Rest); -is_clean(<<64, Rest/binary>>) -> is_clean(Rest); -is_clean(<<65, Rest/binary>>) -> is_clean(Rest); -is_clean(<<66, Rest/binary>>) -> is_clean(Rest); -is_clean(<<67, Rest/binary>>) -> is_clean(Rest); -is_clean(<<68, Rest/binary>>) -> is_clean(Rest); -is_clean(<<69, Rest/binary>>) -> is_clean(Rest); -is_clean(<<70, Rest/binary>>) -> is_clean(Rest); -is_clean(<<71, Rest/binary>>) -> is_clean(Rest); -is_clean(<<72, Rest/binary>>) -> is_clean(Rest); -is_clean(<<73, Rest/binary>>) -> is_clean(Rest); -is_clean(<<74, Rest/binary>>) -> is_clean(Rest); -is_clean(<<75, Rest/binary>>) -> is_clean(Rest); -is_clean(<<76, Rest/binary>>) -> is_clean(Rest); -is_clean(<<77, Rest/binary>>) -> is_clean(Rest); -is_clean(<<78, Rest/binary>>) -> is_clean(Rest); -is_clean(<<79, Rest/binary>>) -> is_clean(Rest); -is_clean(<<80, Rest/binary>>) -> is_clean(Rest); -is_clean(<<81, Rest/binary>>) -> is_clean(Rest); -is_clean(<<82, Rest/binary>>) -> is_clean(Rest); -is_clean(<<83, Rest/binary>>) -> is_clean(Rest); -is_clean(<<84, Rest/binary>>) -> is_clean(Rest); -is_clean(<<85, Rest/binary>>) -> is_clean(Rest); -is_clean(<<86, Rest/binary>>) -> is_clean(Rest); -is_clean(<<87, Rest/binary>>) -> is_clean(Rest); -is_clean(<<88, Rest/binary>>) -> is_clean(Rest); -is_clean(<<89, Rest/binary>>) -> is_clean(Rest); -is_clean(<<90, Rest/binary>>) -> is_clean(Rest); -is_clean(<<91, Rest/binary>>) -> is_clean(Rest); -is_clean(<<92, Rest/binary>>) -> is_clean(Rest); -is_clean(<<93, Rest/binary>>) -> is_clean(Rest); -is_clean(<<94, Rest/binary>>) -> is_clean(Rest); -is_clean(<<95, Rest/binary>>) -> is_clean(Rest); -is_clean(<<96, Rest/binary>>) -> is_clean(Rest); -is_clean(<<97, Rest/binary>>) -> is_clean(Rest); -is_clean(<<98, Rest/binary>>) -> is_clean(Rest); -is_clean(<<99, Rest/binary>>) -> is_clean(Rest); -is_clean(<<100, Rest/binary>>) -> is_clean(Rest); -is_clean(<<101, Rest/binary>>) -> is_clean(Rest); -is_clean(<<102, Rest/binary>>) -> is_clean(Rest); -is_clean(<<103, Rest/binary>>) -> is_clean(Rest); -is_clean(<<104, Rest/binary>>) -> is_clean(Rest); -is_clean(<<105, Rest/binary>>) -> is_clean(Rest); -is_clean(<<106, Rest/binary>>) -> is_clean(Rest); -is_clean(<<107, Rest/binary>>) -> is_clean(Rest); -is_clean(<<108, Rest/binary>>) -> is_clean(Rest); -is_clean(<<109, Rest/binary>>) -> is_clean(Rest); -is_clean(<<110, Rest/binary>>) -> is_clean(Rest); -is_clean(<<111, Rest/binary>>) -> is_clean(Rest); -is_clean(<<112, Rest/binary>>) -> is_clean(Rest); -is_clean(<<113, Rest/binary>>) -> is_clean(Rest); -is_clean(<<114, Rest/binary>>) -> is_clean(Rest); -is_clean(<<115, Rest/binary>>) -> is_clean(Rest); -is_clean(<<116, Rest/binary>>) -> is_clean(Rest); -is_clean(<<117, Rest/binary>>) -> is_clean(Rest); -is_clean(<<118, Rest/binary>>) -> is_clean(Rest); -is_clean(<<119, Rest/binary>>) -> is_clean(Rest); -is_clean(<<120, Rest/binary>>) -> is_clean(Rest); -is_clean(<<121, Rest/binary>>) -> is_clean(Rest); -is_clean(<<122, Rest/binary>>) -> is_clean(Rest); -is_clean(<<123, Rest/binary>>) -> is_clean(Rest); -is_clean(<<124, Rest/binary>>) -> is_clean(Rest); -is_clean(<<125, Rest/binary>>) -> is_clean(Rest); -is_clean(<<126, Rest/binary>>) -> is_clean(Rest); -is_clean(<<127, Rest/binary>>) -> is_clean(Rest); -is_clean(<>) when X < 16#d800 -> is_clean(Rest); -is_clean(<>) when X > 16#dfff, X < 16#fdd0 -> is_clean(Rest); -is_clean(<>) when X > 16#fdef, X < 16#fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#10000, X < 16#1fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#20000, X < 16#2fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#30000, X < 16#3fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#40000, X < 16#4fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#50000, X < 16#5fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#60000, X < 16#6fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#70000, X < 16#7fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#80000, X < 16#8fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#90000, X < 16#9fffe -> is_clean(Rest); -is_clean(<>) when X >= 16#a0000, X < 16#afffe -> is_clean(Rest); -is_clean(<>) when X >= 16#b0000, X < 16#bfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#c0000, X < 16#cfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#d0000, X < 16#dfffe -> is_clean(Rest); -is_clean(<>) when X >= 16#e0000, X < 16#efffe -> is_clean(Rest); -is_clean(<>) when X >= 16#f0000, X < 16#ffffe -> is_clean(Rest); -is_clean(<>) when X >= 16#100000, X < 16#10fffe -> is_clean(Rest); -is_clean(_Bin) -> {error, badarg}. - - -%% escape and/or replace bad codepoints if requested -clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc)); -clean(<<0, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(0, Config) ++ Acc, Config); -clean(<<1, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(1, Config) ++ Acc, Config); -clean(<<2, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(2, Config) ++ Acc, Config); -clean(<<3, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(3, Config) ++ Acc, Config); -clean(<<4, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(4, Config) ++ Acc, Config); -clean(<<5, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(5, Config) ++ Acc, Config); -clean(<<6, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(6, Config) ++ Acc, Config); -clean(<<7, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(7, Config) ++ Acc, Config); -clean(<<8, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(8, Config) ++ Acc, Config); -clean(<<9, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(9, Config) ++ Acc, Config); -clean(<<10, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(10, Config) ++ Acc, Config); -clean(<<11, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(11, Config) ++ Acc, Config); -clean(<<12, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(12, Config) ++ Acc, Config); -clean(<<13, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(13, Config) ++ Acc, Config); -clean(<<14, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(14, Config) ++ Acc, Config); -clean(<<15, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(15, Config) ++ Acc, Config); -clean(<<16, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(16, Config) ++ Acc, Config); -clean(<<17, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(17, Config) ++ Acc, Config); -clean(<<18, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(18, Config) ++ Acc, Config); -clean(<<19, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(19, Config) ++ Acc, Config); -clean(<<20, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(20, Config) ++ Acc, Config); -clean(<<21, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(21, Config) ++ Acc, Config); -clean(<<22, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(22, Config) ++ Acc, Config); -clean(<<23, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(23, Config) ++ Acc, Config); -clean(<<24, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(24, Config) ++ Acc, Config); -clean(<<25, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(25, Config) ++ Acc, Config); -clean(<<26, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(26, Config) ++ Acc, Config); -clean(<<27, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(27, Config) ++ Acc, Config); -clean(<<28, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(28, Config) ++ Acc, Config); -clean(<<29, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(29, Config) ++ Acc, Config); -clean(<<30, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(30, Config) ++ Acc, Config); -clean(<<31, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(31, Config) ++ Acc, Config); -clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config); -clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config); -clean(<<34, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(34, Config) ++ Acc, Config); -clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config); -clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config); -clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config); -clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config); -clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config); -clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config); -clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config); -clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config); -clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config); -clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config); -clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config); -clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config); -clean(<<47, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(47, Config) ++ Acc, Config); -clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config); -clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config); -clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config); -clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config); -clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config); -clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config); -clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config); -clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config); -clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config); -clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config); -clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config); -clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config); -clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config); -clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config); -clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config); -clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config); -clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config); -clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config); -clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config); -clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config); -clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config); -clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config); -clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config); -clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config); -clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config); -clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config); -clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config); -clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config); -clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config); -clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config); -clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config); -clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config); -clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config); -clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config); -clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config); -clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config); -clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config); -clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config); -clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config); -clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config); -clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config); -clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config); -clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config); -clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config); -clean(<<92, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(92, Config) ++ Acc, Config); -clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config); -clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config); -clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config); -clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config); -clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config); -clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config); -clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config); -clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config); -clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config); -clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config); -clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config); -clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config); -clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config); -clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config); -clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config); -clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config); -clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config); -clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config); -clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config); -clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config); -clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config); -clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config); -clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config); -clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config); -clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config); -clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config); -clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config); -clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config); -clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config); -clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config); -clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config); -clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config); -clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config); -clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config); -clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config); -clean(<>, Acc, Config) when X == 16#2028; X == 16#2029 -> - clean(Rest, maybe_replace(X, Config) ++ Acc, Config); -clean(<>, Acc, Config) when X < 16#d800 -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X > 16#dfff, X < 16#fdd0 -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X > 16#fdef, X < 16#fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#10000, X < 16#1fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#20000, X < 16#2fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#30000, X < 16#3fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#40000, X < 16#4fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#50000, X < 16#5fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#60000, X < 16#6fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#70000, X < 16#7fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#80000, X < 16#8fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#90000, X < 16#9fffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#a0000, X < 16#afffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#b0000, X < 16#bfffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#c0000, X < 16#cfffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#d0000, X < 16#dfffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#e0000, X < 16#efffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#f0000, X < 16#ffffe -> - clean(Rest, [X] ++ Acc, Config); -clean(<>, Acc, Config) when X >= 16#100000, X < 16#10fffe -> - clean(Rest, [X] ++ Acc, Config); -%% surrogates -clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> - clean(Rest, maybe_replace(surrogate, Config) ++ Acc, Config); -%% noncharacters -clean(<<_/utf8, Rest/binary>>, Acc, Config) -> - clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config); -%% u+fffe and u+ffff for R14BXX -clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 -> - clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config); -%% overlong encodings and missing continuations of a 2 byte sequence -clean(<>, Acc, Config) when X >= 192, X =< 223 -> - clean(strip_continuations(Rest, 1), maybe_replace(badutf, Config) ++ Acc, Config); -%% overlong encodings and missing continuations of a 3 byte sequence -clean(<>, Acc, Config) when X >= 224, X =< 239 -> - clean(strip_continuations(Rest, 2), maybe_replace(badutf, Config) ++ Acc, Config); -%% overlong encodings and missing continuations of a 4 byte sequence -clean(<>, Acc, Config) when X >= 240, X =< 247 -> - clean(strip_continuations(Rest, 3), maybe_replace(badutf, Config) ++ Acc, Config); -clean(<<_, Rest/binary>>, Acc, Config) -> - clean(Rest, maybe_replace(badutf, Config) ++ Acc, Config). - - -strip_continuations(Bin, 0) -> Bin; -strip_continuations(<>, N) when X >= 128, X =< 191 -> - strip_continuations(Rest, N - 1); -%% not a continuation byte -strip_continuations(Bin, _) -> Bin. - - -maybe_replace($\b, #config{escaped_strings=true}) -> [$b, $\\]; -maybe_replace($\t, #config{escaped_strings=true}) -> [$t, $\\]; -maybe_replace($\n, #config{escaped_strings=true}) -> [$n, $\\]; -maybe_replace($\f, #config{escaped_strings=true}) -> [$f, $\\]; -maybe_replace($\r, #config{escaped_strings=true}) -> [$r, $\\]; -maybe_replace($\", #config{escaped_strings=true}) -> [$\", $\\]; -maybe_replace($/, Config=#config{escaped_strings=true}) -> - case Config#config.escaped_forward_slashes of - true -> [$/, $\\]; - false -> [$/] - end; -maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\]; -maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> - case Config#config.unescaped_jsonp of - true -> [X]; - false -> lists:reverse(json_escape_sequence(X)) - end; -maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> - lists:reverse(json_escape_sequence(X)); -maybe_replace(noncharacter, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(surrogate, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(badutf, #config{replaced_bad_utf8=true}) -> [16#fffd]; -maybe_replace(_, _) -> {error, badarg}. - - -%% convert a codepoint to it's \uXXXX equiv. -json_escape_sequence(X) -> - <> = <>, - [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. - - -to_hex(10) -> $a; -to_hex(11) -> $b; -to_hex(12) -> $c; -to_hex(13) -> $d; -to_hex(14) -> $e; -to_hex(15) -> $f; -to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... \ No newline at end of file diff --git a/src/jsx_tests.hrl b/src/jsx_tests.hrl deleted file mode 100644 index 3e8c6e6..0000000 --- a/src/jsx_tests.hrl +++ /dev/null @@ -1,689 +0,0 @@ -%% data and helper functions for tests - --export([init/1, handle_event/2]). --export([test_cases/0]). - - --include_lib("eunit/include/eunit.hrl"). - - -%% test handler -init([]) -> []. - -handle_event(end_json, State) -> lists:reverse([end_json] ++ State); -handle_event(Event, State) -> [Event] ++ State. - - -test_cases() -> - empty_array() - ++ nested_array() - ++ empty_object() - ++ nested_object() - ++ strings() - ++ literals() - ++ integers() - ++ floats() - ++ compound_object(). - - -empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}]. - -nested_array() -> - [{ - "[[[]]]", - <<"[[[]]]">>, - [[[]]], - [start_array, start_array, start_array, end_array, end_array, end_array] - }]. - - -empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}]. - -nested_object() -> - [{ - "{\"key\":{\"key\":{}}}", - <<"{\"key\":{\"key\":{}}}">>, - [{<<"key">>, [{<<"key">>, [{}]}]}], - [ - start_object, - {key, <<"key">>}, - start_object, - {key, <<"key">>}, - start_object, - end_object, - end_object, - end_object - ] - }]. - - -naked_strings() -> - Raw = [ - "", - "hello world" - ], - [ - { - String, - <<"\"", (list_to_binary(String))/binary, "\"">>, - list_to_binary(String), - [{string, list_to_binary(String)}] - } - || String <- Raw - ]. - -strings() -> - naked_strings() - ++ [ wrap_with_array(Test) || Test <- naked_strings() ] - ++ [ wrap_with_object(Test) || Test <- naked_strings() ]. - - -naked_integers() -> - Raw = [ - 1, 2, 3, - 127, 128, 129, - 255, 256, 257, - 65534, 65535, 65536, - 18446744073709551616, - 18446744073709551617 - ], - [ - { - integer_to_list(X), - list_to_binary(integer_to_list(X)), - X, - [{integer, X}] - } - || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0] - ]. - -integers() -> - naked_integers() - ++ [ wrap_with_array(Test) || Test <- naked_integers() ] - ++ [ wrap_with_object(Test) || Test <- naked_integers() ]. - - -naked_floats() -> - Raw = [ - 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, - 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, - 1234567890.0987654321, - 0.0e0, - 1234567890.0987654321e16, - 0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308, - 1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308, - 2.2250738585072014e-308, %% min normalized float - 1.7976931348623157e308, %% max normalized float - 5.0e-324, %% min denormalized float - 2.225073858507201e-308 %% max denormalized float - ], - [ - { - sane_float_to_list(X), - list_to_binary(sane_float_to_list(X)), - X, - [{float, X}] - } - || X <- Raw ++ [ -1 * Y || Y <- Raw ] - ]. - -floats() -> - naked_floats() - ++ [ wrap_with_array(Test) || Test <- naked_floats() ] - ++ [ wrap_with_object(Test) || Test <- naked_floats() ]. - - -naked_literals() -> - [ - { - atom_to_list(Literal), - atom_to_binary(Literal, unicode), - Literal, - [{literal, Literal}] - } - || Literal <- [true, false, null] - ]. - -literals() -> - naked_literals() - ++ [ wrap_with_array(Test) || Test <- naked_literals() ] - ++ [ wrap_with_object(Test) || Test <- naked_literals() ]. - - -compound_object() -> - [{ - "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]", - <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>, - [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]], - [ - start_array, - start_object, - {key, <<"alpha">>}, - start_array, - {integer, 1}, - {integer, 2}, - {integer, 3}, - end_array, - {key, <<"beta">>}, - start_object, - {key, <<"alpha">>}, - start_array, - {float, 1.0}, - {float, 2.0}, - {float, 3.0}, - end_array, - {key, <<"beta">>}, - start_array, - {literal, true}, - {literal, false}, - end_array, - end_object, - end_object, - start_array, - start_object, - end_object, - end_array, - end_array - ] - }]. - - -wrap_with_array({Title, JSON, Term, Events}) -> - { - "[" ++ Title ++ "]", - <<"[", JSON/binary, "]">>, - [Term], - [start_array] ++ Events ++ [end_array] - }. - - -wrap_with_object({Title, JSON, Term, Events}) -> - { - "{\"key\":" ++ Title ++ "}", - <<"{\"key\":", JSON/binary, "}">>, - [{<<"key">>, Term}], - [start_object, {key, <<"key">>}] ++ Events ++ [end_object] - }. - - -sane_float_to_list(X) -> - [Output] = io_lib:format("~p", [X]), - Output. - --include("jsx_config.hrl"). --include("jsx_strings.hrl"). - - -%% erlang refuses to encode certain codepoints, so fake them -to_fake_utf8(N) when N < 16#0080 -> <>; -to_fake_utf8(N) when N < 16#0800 -> - <<0:5, Y:5, X:6>> = <>, - <<2#110:3, Y:5, 2#10:2, X:6>>; -to_fake_utf8(N) when N < 16#10000 -> - <> = <>, - <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; -to_fake_utf8(N) -> - <<0:3, W:3, Z:6, Y:6, X:6>> = <>, - <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. - - -codepoints() -> - unicode:characters_to_binary( - [32, 33] - ++ lists:seq(35, 46) - ++ lists:seq(48, 91) - ++ lists:seq(93, 16#2027) - ++ lists:seq(16#202a, 16#d7ff) - ++ lists:seq(16#e000, 16#fdcf) - ++ lists:seq(16#fdf0, 16#fffd) - ). - -extended_codepoints() -> - unicode:characters_to_binary( - lists:seq(16#10000, 16#1fffd) ++ [ - 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, - 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, - 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 - ] - ). - -reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. - -surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. - -noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. - -extended_noncharacters() -> - [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] - ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] - ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] - ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] - ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] - ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] - ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] - ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] - ]. - - -clean_string_test_() -> - [ - {"clean codepoints", ?_assertEqual( - codepoints(), - clean_string(codepoints(), #config{}) - )}, - {"clean extended codepoints", ?_assertEqual( - extended_codepoints(), - clean_string(extended_codepoints(), #config{}) - )}, - {"escape path codepoints", ?_assertEqual( - codepoints(), - clean_string(codepoints(), #config{escaped_strings=true}) - )}, - {"escape path extended codepoints", ?_assertEqual( - extended_codepoints(), - clean_string(extended_codepoints(), #config{escaped_strings=true}) - )}, - {"error reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space()) - )}, - {"error surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates()) - )}, - {"error noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters()) - )}, - {"error extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters()) - )}, - {"clean reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, reserved_space()) - )}, - {"clean surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, surrogates()) - )}, - {"clean noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, noncharacters()) - )}, - {"clean extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, extended_noncharacters()) - )} - ]. - - -maybe_escape(Bin, Config) -> clean_string(Bin, Config). - -escape_test_() -> - [ - {"maybe_escape backspace", ?_assertEqual( - <<"\\b">>, - maybe_escape(<<16#0008/utf8>>, #config{escaped_strings=true}) - )}, - {"don't escape backspace", ?_assertEqual( - <<"\b">>, - maybe_escape(<<16#0008/utf8>>, #config{}) - )}, - {"maybe_escape tab", ?_assertEqual( - <<"\\t">>, - maybe_escape(<<16#0009/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape newline", ?_assertEqual( - <<"\\n">>, - maybe_escape(<<16#000a/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape formfeed", ?_assertEqual( - <<"\\f">>, - maybe_escape(<<16#000c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape carriage return", ?_assertEqual( - <<"\\r">>, - maybe_escape(<<16#000d/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape quote", ?_assertEqual( - <<"\\\"">>, - maybe_escape(<<16#0022/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape forward slash", ?_assertEqual( - <<"\\/">>, - maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true}) - )}, - {"do not maybe_escape forward slash", ?_assertEqual( - <<"/">>, - maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape backslash", ?_assertEqual( - <<"\\\\">>, - maybe_escape(<<16#005c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape jsonp (u2028)", ?_assertEqual( - <<"\\u2028">>, - maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true}) - )}, - {"do not maybe_escape jsonp (u2028)", ?_assertEqual( - <<16#2028/utf8>>, - maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) - )}, - {"maybe_escape jsonp (u2029)", ?_assertEqual( - <<"\\u2029">>, - maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true}) - )}, - {"do not maybe_escape jsonp (u2029)", ?_assertEqual( - <<16#2029/utf8>>, - maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) - )}, - {"maybe_escape u0000", ?_assertEqual( - <<"\\u0000">>, - maybe_escape(<<16#0000/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0001", ?_assertEqual( - <<"\\u0001">>, - maybe_escape(<<16#0001/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0002", ?_assertEqual( - <<"\\u0002">>, - maybe_escape(<<16#0002/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0003", ?_assertEqual( - <<"\\u0003">>, - maybe_escape(<<16#0003/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0004", ?_assertEqual( - <<"\\u0004">>, - maybe_escape(<<16#0004/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0005", ?_assertEqual( - <<"\\u0005">>, - maybe_escape(<<16#0005/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0006", ?_assertEqual( - <<"\\u0006">>, - maybe_escape(<<16#0006/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0007", ?_assertEqual( - <<"\\u0007">>, - maybe_escape(<<16#0007/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000b", ?_assertEqual( - <<"\\u000b">>, - maybe_escape(<<16#000b/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000e", ?_assertEqual( - <<"\\u000e">>, - maybe_escape(<<16#000e/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000f", ?_assertEqual( - <<"\\u000f">>, - maybe_escape(<<16#000f/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0010", ?_assertEqual( - <<"\\u0010">>, - maybe_escape(<<16#0010/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0011", ?_assertEqual( - <<"\\u0011">>, - maybe_escape(<<16#0011/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0012", ?_assertEqual( - <<"\\u0012">>, - maybe_escape(<<16#0012/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0013", ?_assertEqual( - <<"\\u0013">>, - maybe_escape(<<16#0013/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0014", ?_assertEqual( - <<"\\u0014">>, - maybe_escape(<<16#0014/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0015", ?_assertEqual( - <<"\\u0015">>, - maybe_escape(<<16#0015/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0016", ?_assertEqual( - <<"\\u0016">>, - maybe_escape(<<16#0016/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0017", ?_assertEqual( - <<"\\u0017">>, - maybe_escape(<<16#0017/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0018", ?_assertEqual( - <<"\\u0018">>, - maybe_escape(<<16#0018/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0019", ?_assertEqual( - <<"\\u0019">>, - maybe_escape(<<16#0019/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001a", ?_assertEqual( - <<"\\u001a">>, - maybe_escape(<<16#001a/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001b", ?_assertEqual( - <<"\\u001b">>, - maybe_escape(<<16#001b/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001c", ?_assertEqual( - <<"\\u001c">>, - maybe_escape(<<16#001c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001d", ?_assertEqual( - <<"\\u001d">>, - maybe_escape(<<16#001d/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001e", ?_assertEqual( - <<"\\u001e">>, - maybe_escape(<<16#001e/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001f", ?_assertEqual( - <<"\\u001f">>, - maybe_escape(<<16#001f/utf8>>, #config{escaped_strings=true}) - )} - ]. - - -bad_utf8_test_() -> - [ - {"noncharacter u+fffe", ?_assertEqual( - {error, badarg}, - clean_string(to_fake_utf8(16#fffe), #config{}) - )}, - {"noncharacter u+fffe replaced", ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(to_fake_utf8(16#fffe), #config{replaced_bad_utf8=true}) - )}, - {"noncharacter u+ffff", ?_assertEqual( - {error, badarg}, - clean_string(to_fake_utf8(16#ffff), #config{}) - )}, - {"noncharacter u+ffff replaced", ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(to_fake_utf8(16#ffff), #config{replaced_bad_utf8=true}) - )}, - {"orphan continuation byte u+0080", ?_assertEqual( - {error, badarg}, - clean_string(<<16#0080>>, #config{}) - )}, - {"orphan continuation byte u+0080 replaced", ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(<<16#0080>>, #config{replaced_bad_utf8=true}) - )}, - {"orphan continuation byte u+00bf", ?_assertEqual( - {error, badarg}, - clean_string(<<16#00bf>>, #config{}) - )}, - {"orphan continuation byte u+00bf replaced", ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(<<16#00bf>>, #config{replaced_bad_utf8=true}) - )}, - {"2 continuation bytes", ?_assertEqual( - {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) - )}, - {"2 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 2), - clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{replaced_bad_utf8=true}) - )}, - {"3 continuation bytes", ?_assertEqual( - {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) - )}, - {"3 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 3), - clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{replaced_bad_utf8=true}) - )}, - {"4 continuation bytes", ?_assertEqual( - {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) - )}, - {"4 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 4), - clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{replaced_bad_utf8=true}) - )}, - {"5 continuation bytes", ?_assertEqual( - {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) - )}, - {"5 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 5), - clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{replaced_bad_utf8=true}) - )}, - {"6 continuation bytes", ?_assertEqual( - {error, badarg}, - clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) - )}, - {"6 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 6), - clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{replaced_bad_utf8=true}) - )}, - {"all continuation bytes", ?_assertEqual( - {error, badarg}, - clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{}) - )}, - {"all continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), - clean_string( - <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, - #config{replaced_bad_utf8=true} - ) - )}, - {"lonely start byte", ?_assertEqual( - {error, badarg}, - clean_string(<<16#00c0>>, #config{}) - )}, - {"lonely start byte replaced", ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(<<16#00c0>>, #config{replaced_bad_utf8=true}) - )}, - {"lonely start bytes (2 byte)", ?_assertEqual( - {error, badarg}, - clean_string(<<16#00c0, 32, 16#00df>>, #config{}) - )}, - {"lonely start bytes (2 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - clean_string(<<16#00c0, 32, 16#00df>>, #config{replaced_bad_utf8=true}) - )}, - {"lonely start bytes (3 byte)", ?_assertEqual( - {error, badarg}, - clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) - )}, - {"lonely start bytes (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - clean_string(<<16#00e0, 32, 16#00ef>>, #config{replaced_bad_utf8=true}) - )}, - {"lonely start bytes (4 byte)", ?_assertEqual( - {error, badarg}, - clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) - )}, - {"lonely start bytes (4 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - clean_string(<<16#00f0, 32, 16#00f7>>, #config{replaced_bad_utf8=true}) - )}, - {"missing continuation byte (3 byte)", ?_assertEqual( - {error, badarg}, - clean_string(<<224, 160, 32>>, #config{}) - )}, - {"missing continuation byte (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<224, 160, 32>>, #config{replaced_bad_utf8=true}) - )}, - {"missing continuation byte (4 byte missing one)", ?_assertEqual( - {error, badarg}, - clean_string(<<240, 144, 128, 32>>, #config{}) - )}, - {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<240, 144, 128, 32>>, #config{replaced_bad_utf8=true}) - )}, - {"missing continuation byte (4 byte missing two)", ?_assertEqual( - {error, badarg}, - clean_string(<<240, 144, 32>>, #config{}) - )}, - {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<240, 144, 32>>, #config{replaced_bad_utf8=true}) - )}, - {"overlong encoding of u+002f (2 byte)", ?_assertEqual( - {error, badarg}, - clean_string(<<16#c0, 16#af, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<16#c0, 16#af, 32>>, #config{replaced_bad_utf8=true}) - )}, - {"overlong encoding of u+002f (3 byte)", ?_assertEqual( - {error, badarg}, - clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true}) - )}, - {"overlong encoding of u+002f (4 byte)", ?_assertEqual( - {error, badarg}, - clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true}) - )}, - {"highest overlong 2 byte sequence", ?_assertEqual( - {error, badarg}, - clean_string(<<16#c1, 16#bf, 32>>, #config{}) - )}, - {"highest overlong 2 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<16#c1, 16#bf, 32>>, #config{replaced_bad_utf8=true}) - )}, - {"highest overlong 3 byte sequence", ?_assertEqual( - {error, badarg}, - clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) - )}, - {"highest overlong 3 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{replaced_bad_utf8=true}) - )}, - {"highest overlong 4 byte sequence", ?_assertEqual( - {error, badarg}, - clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) - )}, - {"highest overlong 4 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{replaced_bad_utf8=true}) - )} - ]. - - -json_escape_sequence_test_() -> - [ - {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")}, - {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")}, - {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} - ]. \ No newline at end of file diff --git a/src/jsx_to_json.erl b/src/jsx_to_json.erl index 7e5aaf2..2616200 100644 --- a/src/jsx_to_json.erl +++ b/src/jsx_to_json.erl @@ -25,6 +25,8 @@ -export([to_json/2, format/2]). -export([init/1, handle_event/2]). +-export([start_json/0, start_json/1]). +-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]). -record(config, { @@ -74,7 +76,6 @@ parse_config([], Config) -> Config. - -define(start_object, <<"{">>). -define(start_array, <<"[">>). -define(end_object, <<"}">>). @@ -86,95 +87,50 @@ parse_config([], Config) -> -define(newline, <<"\n">>). --type state() :: {any(), unicode:charlist(), #config{}}. +-type state() :: {unicode:charlist(), #config{}}. -spec init(Config::proplists:proplist()) -> state(). -init(Config) -> {start, [], parse_config(Config)}. +init(Config) -> {[], parse_config(Config)}. + -spec handle_event(Event::any(), State::state()) -> state(). -handle_event(Event, {start, Acc, Config}) -> - case Event of - {Type, Value} -> {[], [Acc, encode(Type, Value, Config)], Config} - ; start_object -> {[object_start], [Acc, ?start_object], Config} - ; start_array -> {[array_start], [Acc, ?start_array], Config} - end; -handle_event(Event, {[object_start|Stack], Acc, OldConfig = #config{depth = Depth}}) -> - Config = OldConfig#config{depth = Depth + 1}, - case Event of - {key, Key} -> - {[object_value|Stack], [Acc, indent(Config), encode(string, Key, Config), ?colon, space(Config)], Config} - ; end_object -> - {Stack, [Acc, ?end_object], OldConfig} - end; -handle_event(Event, {[object_value|Stack], Acc, Config}) -> - case Event of - {Type, Value} when Type == string; Type == literal; - Type == integer; Type == float -> - {[key|Stack], [Acc, encode(Type, Value, Config)], Config} - ; start_object -> {[object_start, key|Stack], [Acc, ?start_object], Config} - ; start_array -> {[array_start, key|Stack], [Acc, ?start_array], Config} - end; -handle_event(Event, {[key|Stack], Acc, Config = #config{depth = Depth}}) -> - case Event of - {key, Key} -> - {[object_value|Stack], [Acc, ?comma, indent_or_space(Config), encode(string, Key, Config), ?colon, space(Config)], Config} - ; end_object -> - NewConfig = Config#config{depth = Depth - 1}, - {Stack, [Acc, indent(NewConfig), ?end_object], NewConfig} - end; -handle_event(Event, {[array_start|Stack], Acc, OldConfig = #config{depth = Depth}}) -> - Config = OldConfig#config{depth = Depth + 1}, - case Event of - {Type, Value} when Type == string; Type == literal; - Type == integer; Type == float -> - {[array|Stack], [Acc, indent(Config), encode(Type, Value, Config)], Config} - ; start_object -> {[object_start, array|Stack], [Acc, indent(Config), ?start_object], Config} - ; start_array -> {[array_start, array|Stack], [Acc, indent(Config), ?start_array], Config} - ; end_array -> {Stack, [Acc, ?end_array], OldConfig} - end; -handle_event(Event, {[array|Stack], Acc, Config = #config{depth = Depth}}) -> - case Event of - {Type, Value} when Type == string; Type == literal; - Type == integer; Type == float -> - {[array|Stack], [Acc, ?comma, indent_or_space(Config), encode(Type, Value, Config)], Config} - ; end_array -> - NewConfig = Config#config{depth = Depth - 1}, - {Stack, [Acc, indent(NewConfig), ?end_array], NewConfig} - ; start_object -> {[object_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_object], Config} - ; start_array -> {[array_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_array], Config} - end; -handle_event(end_json, {[], Acc, _Config}) -> unicode:characters_to_binary(Acc, utf8). +handle_event(end_json, State) -> get_value(State); + +handle_event(start_object, State) -> start_object(State); +handle_event(end_object, State) -> finish(State); + +handle_event(start_array, State) -> start_array(State); +handle_event(end_array, State) -> finish(State); + +handle_event({Type, Event}, {_, Config} = State) -> insert(encode(Type, Event, Config), State). encode(string, String, _Config) -> - [?quote, String, ?quote]; + <>; +encode(key, Key, _Config) -> + <>; encode(literal, Literal, _Config) -> - erlang:atom_to_list(Literal); + unicode:characters_to_binary(erlang:atom_to_list(Literal)); encode(integer, Integer, _Config) -> - erlang:integer_to_list(Integer); + unicode:characters_to_binary(erlang:integer_to_list(Integer)); encode(float, Float, _Config) -> - [Output] = io_lib:format("~p", [Float]), Output. + [Output] = io_lib:format("~p", [Float]), unicode:characters_to_binary(Output). space(Config) -> case Config#config.space of - 0 -> [] + 0 -> <<>> ; X when X > 0 -> binary:copy(?space, X) end. indent(Config) -> case Config#config.indent of - 0 -> [] - ; X when X > 0 -> - Indent = binary:copy(?space, X), - indent(Indent, Config#config.depth, [?newline]) + 0 -> <<>> + ; X when X > 0 -> <> end. -indent(_Indent, 0, Acc) -> Acc; -indent(Indent, N, Acc) -> indent(Indent, N - 1, [Acc, Indent]). - indent_or_space(Config) -> case Config#config.indent > 0 of @@ -183,6 +139,119 @@ indent_or_space(Config) -> end. +%% internal state is a stack and a config object +%% `{Stack, Config}` +%% the stack is a list of in progress objects/arrays +%% `[Current, Parent, Grandparent,...OriginalAncestor]` +%% an object has the representation on the stack of +%% `{object, Object}` +%% of if there's a key with a yet to be matched value +%% `{object, Key, Object}` +%% an array looks like +%% `{array, Array}` +%% `Object` and `Array` are utf8 encoded binaries + +start_json() -> {[], #config{}}. + +start_json(Config) when is_list(Config) -> {[], parse_config(Config)}. + +%% allocate a new object on top of the stack +start_object({Stack, Config}) -> {[{object, ?start_object}] ++ Stack, Config}. + +%% allocate a new array on top of the stack +start_array({Stack, Config}) -> {[{array, ?start_array}] ++ Stack, Config}. + +%% finish an object or array and insert it into the parent object if it exists +finish({[{object, Object}], Config}) -> + {<>, Config}; +finish({[{object, Object}|Rest], Config}) -> + insert(<>, {Rest, Config}); +finish({[{array, Array}], Config}) -> + {<>, Config}; +finish({[{array, Array}|Rest], Config}) -> + insert(<>, {Rest, Config}); +finish(_) -> erlang:error(badarg). + +%% insert a value when there's no parent object or array +insert(Value, {[], Config}) when is_binary(Value) -> + {Value, Config}; +%% insert a key or value into an object or array, autodetects the 'right' thing +insert(Key, {[{object, Object}|Rest], Config}) when is_binary(Key) -> + {[{object, Key, Object}] ++ Rest, Config}; +insert(Value, {[{object, Key, ?start_object}|Rest], Config}) when is_binary(Value) -> + { + [{object, <>}] ++ Rest, + Config + }; +insert(Value, {[{object, Key, Object}|Rest], Config}) when is_binary(Value) -> + { + [{object, <>}] ++ Rest, + Config + }; +insert(Value, {[{array, ?start_array}|Rest], Config}) when is_binary(Value) -> + {[{array, <>}] ++ Rest, Config}; +insert(Value, {[{array, Array}|Rest], Config}) when is_binary(Value) -> + { + [{array, <>}] ++ Rest, + Config + }; +insert(_, _) -> erlang:error(badarg). + +%% insert a key/value pair into an object +insert(Key, Value, {[{object, ?start_object}|Rest], Config}) when is_binary(Key), is_binary(Value) -> + { + [{object, <>}] ++ Rest, + Config + }; +insert(Key, Value, {[{object, Object}|Rest], Config}) when is_binary(Key), is_binary(Value) -> + { + [{object, <>}] ++ Rest, + Config + }; +insert(_, _, _) -> erlang:error(badarg). + + +get_key({[{object, Key, _}|_], _}) -> Key; +get_key(_) -> erlang:error(badarg). + + +get_value({Value, Config}) -> + case Value of + Value when is_binary(Value) -> Value; + _ -> erlang:error(badarg) + end; +get_value(_) -> erlang:error(badarg). + + + %% eunit tests -ifdef(TEST). @@ -215,7 +284,7 @@ config_test_() -> space_test_() -> [ - {"no space", ?_assertEqual([], space(#config{space=0}))}, + {"no space", ?_assertEqual(<<>>, space(#config{space=0}))}, {"one space", ?_assertEqual(<<" ">>, space(#config{space=1}))}, {"four spaces", ?_assertEqual(<<" ">>, space(#config{space=4}))} ]. @@ -223,21 +292,21 @@ space_test_() -> indent_test_() -> [ - {"no indent", ?_assertEqual([], indent(#config{indent=0, depth=1}))}, + {"no indent", ?_assertEqual(<<>>, indent(#config{indent=0, depth=1}))}, {"indent 1 depth 1", ?_assertEqual( - [[?newline], ?space], + <>/binary>>, indent(#config{indent=1, depth=1}) )}, {"indent 1 depth 2", ?_assertEqual( - [[[?newline], ?space], ?space], + <>/binary>>, indent(#config{indent=1, depth=2}) )}, {"indent 4 depth 1", ?_assertEqual( - [[?newline], <<" ">>], + <>/binary>>, indent(#config{indent=4, depth=1}) )}, {"indent 4 depth 2", ?_assertEqual( - [[[?newline], <<" ">>], <<" ">>], + <>/binary, <<" ">>/binary>>, indent(#config{indent=4, depth=2}) )} ]. @@ -250,7 +319,7 @@ indent_or_space_test_() -> indent_or_space(#config{space=1, indent=0, depth=1}) )}, {"indent so no space", ?_assertEqual( - [[?newline], ?space], + <>/binary>>, indent_or_space(#config{space=1, indent=1, depth=1}) )} ]. @@ -258,50 +327,137 @@ indent_or_space_test_() -> format_test_() -> [ - {"0.0", ?_assert(encode(float, 0.0, #config{}) =:= "0.0")}, - {"1.0", ?_assert(encode(float, 1.0, #config{}) =:= "1.0")}, - {"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= "-1.0")}, + {"0.0", ?_assert(encode(float, 0.0, #config{}) =:= <<"0.0">>)}, + {"1.0", ?_assert(encode(float, 1.0, #config{}) =:= <<"1.0">>)}, + {"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= <<"-1.0">>)}, {"3.1234567890987654321", ?_assert( - encode(float, 3.1234567890987654321, #config{}) =:= "3.1234567890987655") + encode(float, 3.1234567890987654321, #config{}) =:= <<"3.1234567890987655">>) }, - {"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= "1.0e23")}, - {"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= "0.3")}, - {"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= "0.0001")}, - {"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= "1.0e-5")}, - {"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= "1.0e-8")}, - {"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= "1.0e-323")}, - {"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= "1.0e308")}, + {"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= <<"1.0e23">>)}, + {"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= <<"0.3">>)}, + {"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= <<"0.0001">>)}, + {"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= <<"1.0e-5">>)}, + {"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= <<"1.0e-8">>)}, + {"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= <<"1.0e-323">>)}, + {"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= <<"1.0e308">>)}, {"min normalized float", ?_assert( - encode(float, math:pow(2, -1022), #config{}) =:= "2.2250738585072014e-308" + encode(float, math:pow(2, -1022), #config{}) =:= <<"2.2250738585072014e-308">> ) }, {"max normalized float", ?_assert( encode(float, (2 - math:pow(2, -52)) * math:pow(2, 1023), #config{}) - =:= "1.7976931348623157e308" + =:= <<"1.7976931348623157e308">> ) }, {"min denormalized float", - ?_assert(encode(float, math:pow(2, -1074), #config{}) =:= "5.0e-324") + ?_assert(encode(float, math:pow(2, -1074), #config{}) =:= <<"5.0e-324">>) }, {"max denormalized float", ?_assert( encode(float, (1 - math:pow(2, -52)) * math:pow(2, -1022), #config{}) - =:= "2.225073858507201e-308" + =:= <<"2.225073858507201e-308">> ) - } + }, + {"hello world", ?_assert(encode(string, <<"hello world">>, #config{}) =:= <<"\"hello world\"">>)}, + {"key", ?_assert(encode(key, <<"key">>, #config{}) =:= <<"\"key\"">>)}, + {"1", ?_assert(encode(integer, 1, #config{}) =:= <<"1">>)}, + {"-1", ?_assert(encode(integer, -1, #config{}) =:= <<"-1">>)}, + {"true", ?_assert(encode(literal, true, #config{}) =:= <<"true">>)}, + {"false", ?_assert(encode(literal, false, #config{}) =:= <<"false">>)}, + {"null", ?_assert(encode(literal, null, #config{}) =:= <<"null">>)} + ]. + + +rep_manipulation_test_() -> + [ + {"allocate a new context", ?_assertEqual( + {[], #config{}}, + start_json() + )}, + {"allocate a new context with config", ?_assertEqual( + {[], #config{space=1, indent=2}}, + start_json([{space, 1}, {indent, 2}]) + )}, + {"allocate a new object on an empty stack", ?_assertEqual( + {[{object, <<"{">>}], #config{}}, + start_object({[], #config{}}) + )}, + {"allocate a new object on a stack", ?_assertEqual( + {[{object, <<"{">>}, {object, <<"{">>}], #config{}}, + start_object({[{object, <<"{">>}], #config{}}) + )}, + {"allocate a new array on an empty stack", ?_assertEqual( + {[{array, <<"[">>}], #config{}}, + start_array({[], #config{}}) + )}, + {"allocate a new array on a stack", ?_assertEqual( + {[{array, <<"[">>}, {object, <<"{">>}], #config{}}, + start_array({[{object, <<"{">>}], #config{}}) + )}, + {"insert a key into an object", ?_assertEqual( + {[{object, <<"\"key\"">>, <<"{">>}], #config{}}, + insert(<<"\"key\"">>, {[{object, <<"{">>}], #config{}}) + )}, + {"get current key", ?_assertEqual( + key, + get_key({[{object, key, <<"{">>}], #config{}}) + )}, + {"try to get non-key from object", ?_assertError( + badarg, + get_key({[{object, <<"{">>}], #config{}}) + )}, + {"try to get key from array", ?_assertError( + badarg, + get_key({[{array, <<"[">>}], #config{}}) + )}, + {"insert a value into an object", ?_assertEqual( + {[{object, <<"{\"key\":true">>}], #config{}}, + insert(<<"true">>, {[{object, <<"\"key\"">>, <<"{">>}], #config{}}) + )}, + {"insert a value into an array", ?_assertEqual( + {[{array, <<"[true">>}], #config{}}, + insert(<<"true">>, {[{array, <<"[">>}], #config{}}) + )}, + {"insert a key/value pair into an object", ?_assertEqual( + {[{object, <<"{\"x\":true,\"y\":false">>}], #config{}}, + insert(<<"\"y\"">>, <<"false">>, {[{object, <<"{\"x\":true">>}], #config{}}) + )}, + {"finish an object with no ancestor", ?_assertEqual( + {<<"{\"x\":true,\"y\":false}">>, #config{}}, + finish({[{object, <<"{\"x\":true,\"y\":false">>}], #config{}}) + )}, + {"finish an empty object", ?_assertEqual( + {<<"{}">>, #config{}}, + finish({[{object, <<"{">>}], #config{}}) + )}, + {"finish an object with an ancestor", ?_assertEqual( + {[{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], #config{}}, + finish({ + [{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}], + #config{} + }) + )}, + {"finish an array with no ancestor", ?_assertEqual( + {<<"[true,false,null]">>, #config{}}, + finish({[{array, <<"[true,false,null">>}], #config{}}) + )}, + {"finish an array with an ancestor", ?_assertEqual( + {[{array, <<"[1,2,3,[true,false,null]">>}], #config{}}, + finish({[{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}], #config{}}) + )} ]. handle_event_test_() -> - Data = jsx:test_cases(), + Data = jsx:test_cases() ++ jsx:special_test_cases(), [ { Title, ?_assertEqual( JSON, - lists:foldl(fun handle_event/2, {start, [], #config{}}, Events ++ [end_json]) + lists:foldl(fun handle_event/2, init([]), Events ++ [end_json]) ) } || {Title, JSON, _, Events} <- Data ]. diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index fad898a..d9840d3 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -25,11 +25,12 @@ -export([to_term/2]). -export([init/1, handle_event/2]). +-export([start_term/0, start_term/1]). +-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]). -record(config, { - labels = binary, - post_decode = false + labels = binary }). -type config() :: list(). @@ -59,8 +60,6 @@ parse_config([{labels, Val}|Rest], Config) parse_config(Rest, Config#config{labels = Val}); parse_config([labels|Rest], Config) -> parse_config(Rest, Config#config{labels = binary}); -parse_config([{post_decode, F}|Rest], Config=#config{post_decode=false}) when is_function(F, 1) -> - parse_config(Rest, Config#config{post_decode=F}); parse_config([{K, _}|Rest] = Options, Config) -> case lists:member(K, jsx_config:valid_flags()) of true -> parse_config(Rest, Config) @@ -77,34 +76,21 @@ parse_config([], Config) -> -type state() :: {[any()], #config{}}. -spec init(Config::proplists:proplist()) -> state(). -init(Config) -> {[[]], parse_config(Config)}. +init(Config) -> {[], parse_config(Config)}. -spec handle_event(Event::any(), State::state()) -> state(). -handle_event(end_json, {[[Terms]], _Config}) -> Terms; +handle_event(end_json, State) -> get_value(State); -handle_event(start_object, {Terms, Config}) -> {[[]|Terms], Config}; -handle_event(end_object, {[[], {key, Key}, Last|Terms], Config}) -> - {[[{Key, post_decode([{}], Config)}] ++ Last] ++ Terms, Config}; -handle_event(end_object, {[Object, {key, Key}, Last|Terms], Config}) -> - {[[{Key, post_decode(lists:reverse(Object), Config)}] ++ Last] ++ Terms, Config}; -handle_event(end_object, {[[], Last|Terms], Config}) -> - {[[post_decode([{}], Config)] ++ Last] ++ Terms, Config}; -handle_event(end_object, {[Object, Last|Terms], Config}) -> - {[[post_decode(lists:reverse(Object), Config)] ++ Last] ++ Terms, Config}; +handle_event(start_object, State) -> start_object(State); +handle_event(end_object, State) -> finish(State); -handle_event(start_array, {Terms, Config}) -> {[[]|Terms], Config}; -handle_event(end_array, {[List, {key, Key}, Last|Terms], Config}) -> - {[[{Key, post_decode(lists:reverse(List), Config)}] ++ Last] ++ Terms, Config}; -handle_event(end_array, {[List, Last|Terms], Config}) -> - {[[post_decode(lists:reverse(List), Config)] ++ Last] ++ Terms, Config}; +handle_event(start_array, State) -> start_array(State); +handle_event(end_array, State) -> finish(State); -handle_event({key, Key}, {Terms, Config}) -> {[{key, format_key(Key, Config)}] ++ Terms, Config}; +handle_event({key, Key}, {_, Config} = State) -> insert(format_key(Key, Config), State); -handle_event({_, Event}, {[{key, Key}, Last|Terms], Config}) -> - {[[{Key, post_decode(Event, Config)}] ++ Last] ++ Terms, Config}; -handle_event({_, Event}, {[Last|Terms], Config}) -> - {[[post_decode(Event, Config)] ++ Last] ++ Terms, Config}. +handle_event({_, Event}, State) -> insert(Event, State). format_key(Key, Config) -> @@ -121,8 +107,60 @@ format_key(Key, Config) -> end. -post_decode(Value, #config{post_decode=false}) -> Value; -post_decode(Value, Config) -> (Config#config.post_decode)(Value). +%% internal state is a stack and a config object +%% `{Stack, Config}` +%% the stack is a list of in progress objects/arrays +%% `[Current, Parent, Grandparent,...OriginalAncestor]` +%% an object has the representation on the stack of +%% `{object, [{NthKey, NthValue}, {NMinus1Key, NthMinus1Value},...{FirstKey, FirstValue}]}` +%% of if there's a key with a yet to be matched value +%% `{object, Key, [{NthKey, NthValue},...]}` +%% an array looks like +%% `{array, [NthValue, NthMinus1Value,...FirstValue]}` + +start_term() -> {[], #config{}}. + +start_term(Config) when is_list(Config) -> {[], parse_config(Config)}. + +%% allocate a new object on top of the stack +start_object({Stack, Config}) -> {[{object, []}] ++ Stack, Config}. + +%% allocate a new array on top of the stack +start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. + +%% finish an object or array and insert it into the parent object if it exists or +%% return it if it is the root object +finish({[{object, []}], Config}) -> {[{}], Config}; +finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); +finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; +finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); +finish(_) -> erlang:error(badarg). + +%% insert a value when there's no parent object or array +insert(Value, {[], Config}) -> {Value, Config}; +%% insert a key or value into an object or array, autodetects the 'right' thing +insert(Key, {[{object, Pairs}|Rest], Config}) -> + {[{object, Key, Pairs}] ++ Rest, Config}; +insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> + {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; +insert(Value, {[{array, Values}|Rest], Config}) -> + {[{array, [Value] ++ Values}] ++ Rest, Config}; +insert(_, _) -> erlang:error(badarg). + +%% insert a key/value pair into an object +insert(Key, Value, {[{object, Pairs}|Rest], Config}) -> + {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; +insert(_, _, _) -> erlang:error(badarg). + + +get_key({[{object, Key, _}|_], _}) -> Key; +get_key(_) -> erlang:error(badarg). + + +get_value({Value, _Config}) -> Value; +get_value(_) -> erlang:error(badarg). %% eunit tests @@ -132,9 +170,6 @@ post_decode(Value, Config) -> (Config#config.post_decode)(Value). config_test_() -> - %% for post_decode tests - F = fun(X) -> X end, - G = fun(X, Y) -> {X, Y} end, [ {"empty config", ?_assertEqual(#config{}, parse_config([]))}, {"implicit binary labels", ?_assertEqual(#config{}, parse_config([labels]))}, @@ -144,15 +179,6 @@ config_test_() -> #config{labels=existing_atom}, parse_config([{labels, existing_atom}]) )}, - {"sloppy existing atom labels", ?_assertEqual( - #config{labels=attempt_atom}, - parse_config([{labels, attempt_atom}]) - )}, - {"post decode", ?_assertEqual( - #config{post_decode=F}, - parse_config([{post_decode, F}]) - )}, - {"post decode wrong arity", ?_assertError(badarg, parse_config([{post_decode, G}]))}, {"invalid opt flag", ?_assertError(badarg, parse_config([error]))}, {"invalid opt tuple", ?_assertError(badarg, parse_config([{error, true}]))} ]. @@ -181,110 +207,79 @@ format_key_test_() -> ]. -post_decoders_test_() -> - Events = [ - [{}], - [{<<"key">>, <<"value">>}], - [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}], - [], - [<<"string">>], - [true, false, null], - true, - false, - null, - <<"hello">>, - <<"world">>, - 1, - 1.0 - ], +rep_manipulation_test_() -> [ - {"no post_decode", ?_assertEqual( - Events, - [ post_decode(Event, #config{}) || Event <- Events ] + {"allocate a new context", ?_assertEqual( + {[], #config{}}, + start_term() )}, - {"replace arrays with empty arrays", ?_assertEqual( - [ - [{}], - [{<<"key">>, <<"value">>}], - [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}], - [], - [], - [], - true, - false, - null, - <<"hello">>, - <<"world">>, - 1, - 1.0 - ], - [ post_decode(Event, #config{ - post_decode=fun([T|_] = V) when is_tuple(T) -> V; (V) when is_list(V) -> []; (V) -> V end - }) || Event <- Events - ] + {"allocate a new context with option", ?_assertEqual( + {[], #config{labels=atom}}, + start_term([{labels, atom}]) )}, - {"replace objects with empty objects", ?_assertEqual( - [ - [{}], - [{}], - [{}], - [], - [<<"string">>], - [true, false, null], - true, - false, - null, - <<"hello">>, - <<"world">>, - 1, - 1.0 - ], - [ post_decode(Event, #config{ - post_decode=fun([T|_]) when is_tuple(T) -> [{}]; (V) -> V end - }) || Event <- Events - ] + {"allocate a new object on an empty stack", ?_assertEqual( + {[{object, []}], #config{}}, + start_object({[], #config{}}) )}, - {"replace all non-array/non-object values with false", ?_assertEqual( - [ - [{}], - [{<<"key">>, <<"value">>}], - [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}], - [], - [<<"string">>], - [true, false, null], - false, - false, - false, - false, - false, - false, - false - ], - [ post_decode(Event, #config{ - post_decode=fun(V) when is_list(V) -> V; (_) -> false end - }) || Event <- Events - ] + {"allocate a new object on a stack", ?_assertEqual( + {[{object, []}, {object, []}], #config{}}, + start_object({[{object, []}], #config{}}) )}, - {"atoms_to_strings", ?_assertEqual( - [ - [{}], - [{<<"key">>, <<"value">>}], - [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}], - [], - [<<"string">>], - [true, false, null], - <<"true">>, - <<"false">>, - <<"null">>, - <<"hello">>, - <<"world">>, - 1, - 1.0 - ], - [ post_decode(Event, #config{ - post_decode=fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end - }) || Event <- Events - ] + {"allocate a new array on an empty stack", ?_assertEqual( + {[{array, []}], #config{}}, + start_array({[], #config{}}) + )}, + {"allocate a new array on a stack", ?_assertEqual( + {[{array, []}, {object, []}], #config{}}, + start_array({[{object, []}], #config{}}) + )}, + {"insert a key into an object", ?_assertEqual( + {[{object, key, []}, junk], #config{}}, + insert(key, {[{object, []}, junk], #config{}}) + )}, + {"get current key", ?_assertEqual( + key, + get_key({[{object, key, []}], #config{}}) + )}, + {"try to get non-key from object", ?_assertError( + badarg, + get_key({[{object, []}], #config{}}) + )}, + {"try to get key from array", ?_assertError( + badarg, + get_key({[{array, []}], #config{}}) + )}, + {"insert a value into an object", ?_assertEqual( + {[{object, [{key, value}]}, junk], #config{}}, + insert(value, {[{object, key, []}, junk], #config{}}) + )}, + {"insert a value into an array", ?_assertEqual( + {[{array, [value]}, junk], #config{}}, + insert(value, {[{array, []}, junk], #config{}}) + )}, + {"insert a key/value pair into an object", ?_assertEqual( + {[{object, [{key, value}, {x, y}]}, junk], #config{}}, + insert(key, value, {[{object, [{x, y}]}, junk], #config{}}) + )}, + {"finish an object with no ancestor", ?_assertEqual( + {[{a, b}, {x, y}], #config{}}, + finish({[{object, [{x, y}, {a, b}]}], #config{}}) + )}, + {"finish an empty object", ?_assertEqual( + {[{}], #config{}}, + finish({[{object, []}], #config{}}) + )}, + {"finish an object with an ancestor", ?_assertEqual( + {[{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], #config{}}, + finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}}) + )}, + {"finish an array with no ancestor", ?_assertEqual( + {[a, b, c], #config{}}, + finish({[{array, [c, b, a]}], #config{}}) + )}, + {"finish an array with an ancestor", ?_assertEqual( + {[{array, [[a, b, c], d, e, f]}], #config{}}, + finish({[{array, [c, b, a]}, {array, [d, e, f]}], #config{}}) )} ]. @@ -295,7 +290,7 @@ handle_event_test_() -> { Title, ?_assertEqual( Term, - lists:foldl(fun handle_event/2, {[[]], #config{}}, Events ++ [end_json]) + lists:foldl(fun handle_event/2, init([]), Events ++ [end_json]) ) } || {Title, _, Term, Events} <- Data ]. diff --git a/src/jsx_verify.erl b/src/jsx_verify.erl index 34c50e5..e923691 100644 --- a/src/jsx_verify.erl +++ b/src/jsx_verify.erl @@ -159,7 +159,7 @@ repeated_keys_test_() -> handle_event_test_() -> - Data = jsx:test_cases(), + Data = jsx:test_cases() ++ jsx:special_test_cases(), [ { Title, ?_assertEqual(