Merge branch 'vtwopointoh' into develop
This commit is contained in:
commit
ff3915abbc
13 changed files with 2406 additions and 3116 deletions
13
CHANGES.md
13
CHANGES.md
|
@ -1,3 +1,16 @@
|
|||
v2.0
|
||||
|
||||
* jsx is much more pragmatic by default; common json errors are silently
|
||||
ignored (and fixed). stricter parsing must be enabled with options
|
||||
* removed `pre_encode` and `post_decode` options in favour of making jsx
|
||||
functions easier to wrap and customize
|
||||
* added abstraction layer for manipulating the internal state of `jsx_to_term`
|
||||
and `jsx_to_json` and exposed it to user code
|
||||
* streaming behavior is now disabled by default and must be requested explicitly
|
||||
* removed deprecated function names (`to_json`, `to_term`, `term_to_json`, etc)
|
||||
* expanded test coverage
|
||||
|
||||
|
||||
v1.4.5
|
||||
|
||||
* various fixes to typespecs uncovered by dialyzer
|
||||
|
|
335
README.md
335
README.md
|
@ -1,16 +1,21 @@
|
|||
# jsx (v1.4.5) #
|
||||
# jsx (v2.0) #
|
||||
|
||||
an erlang application for consuming, producing and manipulating [json][json].
|
||||
inspired by [yajl][yajl]
|
||||
|
||||
jsx is built via [rebar][rebar] and continuous integration testing provided courtesy [travis][travis]
|
||||
**jsx** is built via [rebar][rebar] and continuous integration testing provided courtesy [travis][travis]
|
||||
|
||||
current status: [](http://travis-ci.org/talentdeficit/jsx)
|
||||
|
||||
jsx is released under the terms of the [MIT][MIT] license
|
||||
**jsx** is released under the terms of the [MIT][MIT] license
|
||||
|
||||
copyright 2010-2013 alisdair sullivan
|
||||
|
||||
## really important note ##
|
||||
|
||||
this is a preview of the 2.0 release. there are lots of changes. see [CHANGES.md](CHANGES.md)
|
||||
for the overview or read this document for the details
|
||||
|
||||
## index ##
|
||||
|
||||
* [quickstart](#quickstart)
|
||||
|
@ -21,7 +26,6 @@ copyright 2010-2013 alisdair sullivan
|
|||
- [`json_term()`](#json_term)
|
||||
- [`json_text()`](#json_text)
|
||||
- [`event()`](#event)
|
||||
- [`token()`](#token)
|
||||
- [`option()`](#option)
|
||||
* [exports](#exports)
|
||||
- [`encoder/3`, `decoder/3` & `parser/3`](#encoder3-decoder3--parser3)
|
||||
|
@ -113,27 +117,31 @@ false
|
|||
## description ##
|
||||
|
||||
|
||||
jsx is an erlang application for consuming, producing and manipulating
|
||||
**jsx** is an erlang application for consuming, producing and manipulating
|
||||
[json][json]
|
||||
|
||||
json has a [spec][rfc4627] but common usage differs subtly. it's common
|
||||
usage jsx attempts to address, with guidance from the spec
|
||||
**jsx** follows the json [spec][rfc4627] as closely as possible with allowances for
|
||||
real world usage
|
||||
|
||||
all json produced and consumed by jsx should be `utf8` encoded text or a
|
||||
reasonable approximation thereof. ascii works too, but anything beyond that
|
||||
i'm not going to make any promises. **especially** not latin1
|
||||
**jsx** is pragmatic. the json spec allows extensions so **jsx** extends the spec in a
|
||||
number of ways. see the section on `strict` in [options](#option) below though
|
||||
|
||||
the [spec][rfc4627] thinks json values must be wrapped in a json array or
|
||||
object but everyone else disagrees so jsx allows naked json values by default.
|
||||
if you're a curmudgeon who's offended by this deviation here is a wrapper for
|
||||
you:
|
||||
json has no official comments but this parser allows c/c++ style comments.
|
||||
anywhere whitespace is allowed you can insert comments (both `// ...` and `/* ... */`)
|
||||
|
||||
all **jsx** decoder input should be `utf8` encoded binaries. sometimes you get binaries
|
||||
that are almost but not quite valid utf8 whether due to improper escaping or poor
|
||||
encoding. **jsx** replaces invalid codepoints and poorly formed sequences with the
|
||||
unicode replacement character (`u+FFFD`)
|
||||
|
||||
json only allows keys and strings to be delimited by double quotes (`u+0022`) but
|
||||
javascript allows them to be delimited by single quotes (`u+0027`) as well. **jsx**
|
||||
follows javascript in this. strings that start with single quotes can contain double
|
||||
quotes but must end with single quotes and must escape any single quotes they contain
|
||||
|
||||
json and **jsx** only recognize escape sequences as outlined in the json spec. it just
|
||||
ignores bad escape sequences
|
||||
|
||||
```erlang
|
||||
%% usage: `real_json(jsx:decode(JSON))`
|
||||
real_json(Result) when is_list(Result) -> Result;
|
||||
real_json(Result) when is_tuple(Result, 2) -> Result;
|
||||
real_json(_) -> erlang:error(badarg).
|
||||
```
|
||||
|
||||
|
||||
### json <-> erlang mapping ###
|
||||
|
@ -148,17 +156,18 @@ real_json(_) -> erlang:error(badarg).
|
|||
|
||||
* numbers
|
||||
|
||||
javascript and thus json represent all numeric values with floats. as
|
||||
this is woefully insufficient for many uses, **jsx**, just like erlang,
|
||||
supports bigints. whenever possible, this library will interpret json
|
||||
numbers that look like integers as integers. other numbers will be converted
|
||||
to erlang's floating point type, which is nearly but not quite iee754.
|
||||
negative zero is not representable in erlang (zero is unsigned in erlang and
|
||||
`0` is equivalent to `-0`) and will be interpreted as regular zero. numbers
|
||||
not representable are beyond the concern of this implementation, and will
|
||||
result in parsing errors
|
||||
javascript and thus json represent all numeric values with floats. there's no
|
||||
reason for erlang -- a language that supports arbitrarily large integers -- to
|
||||
restrict all numbers to the ieee754 range
|
||||
|
||||
whenever possible, **jsx** will interpret json numbers that look like integers as
|
||||
integers. other numbers will be converted to erlang's floating point type, which
|
||||
is nearly but not quite iee754. negative zero is not representable in erlang (zero
|
||||
is unsigned in erlang and `0` is equivalent to `-0`) and will be interpreted as
|
||||
regular zero. numbers not representable are beyond the concern of this implementation,
|
||||
and will result in parsing errors
|
||||
|
||||
when converting from erlang to json, numbers are represented with their
|
||||
when converting from erlang to json, floats are represented with their
|
||||
shortest representation that will round trip without loss of precision. this
|
||||
means that some floats may be superficially dissimilar (although
|
||||
functionally equivalent). for example, `1.0000000000000001` will be
|
||||
|
@ -166,32 +175,23 @@ real_json(_) -> erlang:error(badarg).
|
|||
|
||||
* strings
|
||||
|
||||
all erlang strings are represented by **valid** `utf8` encoded binaries or
|
||||
atoms. note that the atoms `true`, `false` and `null` will never be
|
||||
automatically converted to strings as the json equivalent values take
|
||||
precedence. when decoding json strings will always be presented as binaries,
|
||||
never atoms
|
||||
|
||||
the [json spec][rfc4627] is frustratingly vague on the exact details of json
|
||||
strings. json must be unicode, but no encoding is specified. javascript
|
||||
explicitly allows strings containing codepoints explicitly disallowed by
|
||||
unicode. json allows implementations to set limits on the content of
|
||||
strings. other implementations attempt to resolve this in various ways. this
|
||||
implementation, in default operation, only accepts strings that meet the
|
||||
constraints set out in the json spec (strings are sequences of unicode
|
||||
codepoints deliminated by `"` (`u+0022`) that may not contain control codes
|
||||
unless properly escaped with `\` (`u+005c`)) and that are encoded in `utf8`
|
||||
|
||||
the utf8 restriction means improperly paired surrogates are explicitly
|
||||
disallowed. `u+d800` to `u+dfff` are allowed, but only when they form valid
|
||||
surrogate pairs. surrogates encountered otherwise result in errors. the
|
||||
noncharacters will also result in errors
|
||||
json strings must be unicode encoded binaries or erlang atoms. in practice,
|
||||
because **jsx** only accepts `utf8` binaries all binary strings must be `utf8`.
|
||||
in addition to being unicode json strings restrict a number of codepoints and
|
||||
define a number of escape sequences
|
||||
|
||||
json string escapes of the form `\uXXXX` will be converted to their
|
||||
equivalent codepoints during parsing. this means control characters and
|
||||
other codepoints disallowed by the json spec may be encountered in resulting
|
||||
strings, but codepoints disallowed by the unicode spec will not be. in the
|
||||
interest of pragmatism there is an [option](#option) for looser parsing
|
||||
strings. the utf8 restriction means the surrogates are explicitly disallowed.
|
||||
if a string contains escaped surrogates (`u+d800` to `u+dfff`) they are
|
||||
interpreted but only when they form valid surrogate pairs. surrogates
|
||||
encountered otherwise are replaced with the replacement codepoint (`u+fffd`)
|
||||
|
||||
all erlang strings are represented by **valid** `utf8` encoded binaries. the
|
||||
encoder will check strings for conformance. noncharacters (like `u+ffff`)
|
||||
are allowed in erlang utf8 encoded binaries, but will be replaced in strings
|
||||
passed to the encoder (although, again, see [options](#option))
|
||||
|
||||
this implementation performs no normalization on strings beyond that
|
||||
detailed here. be careful when comparing strings as equivalent strings
|
||||
|
@ -220,22 +220,30 @@ real_json(_) -> erlang:error(badarg).
|
|||
|
||||
### incomplete input ###
|
||||
|
||||
jsx handles incomplete json texts. if a partial json text is parsed, rather than
|
||||
returning a term from your callback handler, jsx returns `{incomplete, F}` where
|
||||
`F` is a function with an identical API to the anonymous fun returned from
|
||||
`decoder/3`, `encoder/3` or `parser/3`. it retains the internal state of the
|
||||
parser at the point where input was exhausted. this allows you to parse as you
|
||||
stream json over a socket or file descriptor, or to parse large json texts
|
||||
without needing to keep them entirely in memory
|
||||
**jsx** can handle incomplete json texts. if the option `stream` is passed to the decoder
|
||||
or parser and if a partial json text is parsed, rather than returning a term from
|
||||
your callback handler, **jsx** returns `{incomplete, F}` where `F` is a function with
|
||||
an identical API to the anonymous fun returned from `decoder/3`, `encoder/3` or
|
||||
`parser/3`. it retains the internal state of the parser at the point where input
|
||||
was exhausted. this allows you to parse as you stream json over a socket or file
|
||||
descriptor, or to parse large json texts without needing to keep them entirely in
|
||||
memory
|
||||
|
||||
however, it is important to recognize that jsx is greedy by default. jsx will
|
||||
consider the parsing complete if input is exhausted and the json text is not
|
||||
unambiguously incomplete. this is mostly relevant when parsing bare numbers like
|
||||
`<<"1234">>`. this could be a complete json integer or just the beginning of a
|
||||
json integer that is being parsed incrementally. jsx will treat it as a whole
|
||||
integer. calling jsx with the [option](#options) `explicit_end` reverses this
|
||||
behavior and never considers parsing complete until the `incomplete` function is
|
||||
called with the argument `end_stream`
|
||||
however, it is important to recognize that **jsx** is conservative by default. **jsx** will
|
||||
not consider the parsing complete even when input is exhausted and the json text is
|
||||
unambiguously incomplete. to end parsing call the `incomplete` function with the
|
||||
argument `end_stream` like:
|
||||
|
||||
```erlang
|
||||
1> {incomplete, F} = jsx:decode(<<"[">>, [stream]).
|
||||
{incomplete,#Fun<jsx_decoder.1.122947756>}
|
||||
2> F(end_stream).
|
||||
** exception error: bad argument
|
||||
3> {incomplete, G} = F(<<"]">>).
|
||||
{incomplete,#Fun<jsx_decoder.1.122947756>}
|
||||
4> G(end_stream).
|
||||
[]
|
||||
```
|
||||
|
||||
|
||||
## data types ##
|
||||
|
@ -282,50 +290,32 @@ event() = start_object
|
|||
| end_json
|
||||
```
|
||||
|
||||
#### `token()` ####
|
||||
|
||||
```erlang
|
||||
token() = event()
|
||||
| binary()
|
||||
| {number, integer() | float()}
|
||||
| integer()
|
||||
| float()
|
||||
| true
|
||||
| false
|
||||
| null
|
||||
```
|
||||
|
||||
the representation used during syntactic analysis. you can generate this
|
||||
yourself and feed it to `jsx:parser/3` if you'd like to define your own
|
||||
representations
|
||||
the subset of [`token()`](#token) emitted by the decoder and encoder to handlers
|
||||
|
||||
#### `option()` ####
|
||||
|
||||
```erlang
|
||||
option() = replaced_bad_utf8
|
||||
| escaped_forward_slashes
|
||||
| single_quoted_strings
|
||||
| unescaped_jsonp
|
||||
| comments
|
||||
option() = escaped_forward_slashes
|
||||
| escaped_strings
|
||||
| unescaped_jsonp
|
||||
| dirty_strings
|
||||
| ignored_bad_escapes
|
||||
| relax
|
||||
| explicit_end
|
||||
```
|
||||
| strict
|
||||
| {strict, [strict_option()]}
|
||||
| stream
|
||||
| {incomplete_handler, fun()}
|
||||
| {error_handler, fun()}
|
||||
|
||||
jsx functions all take a common set of options. not all flags have meaning
|
||||
strict_option() = comments
|
||||
| utf8
|
||||
| single_quotes
|
||||
| escapes
|
||||
```
|
||||
|
||||
**jsx** functions all take a common set of options. not all flags have meaning
|
||||
in all contexts, but they are always valid options. functions may have
|
||||
additional options beyond these. see
|
||||
[individual function documentation](#exports) for details
|
||||
|
||||
- `replaced_bad_utf8`
|
||||
|
||||
json text input and json strings SHOULD be utf8 encoded binaries,
|
||||
appropriately escaped as per the json spec. attempts are made to replace
|
||||
invalid codepoints with `u+FFFD` as per the unicode spec when this option is
|
||||
present. this applies both to malformed unicode and disallowed codepoints
|
||||
|
||||
- `escaped_forward_slashes`
|
||||
|
||||
json strings are escaped according to the json spec. this means forward
|
||||
|
@ -333,35 +323,6 @@ additional options beyond these. see
|
|||
are left unescaped. you may want to use this if you are embedding json
|
||||
directly into a html or xml document
|
||||
|
||||
- `single_quoted_strings`
|
||||
|
||||
some parsers allow double quotes (`u+0022`) to be replaced by single quotes
|
||||
(`u+0027`) to delimit keys and strings. this option allows json containing
|
||||
single quotes as structural characters to be parsed without errors. note
|
||||
that the parser expects strings to be terminated by the same quote type that
|
||||
opened it and that single quotes must, obviously, be escaped within strings
|
||||
delimited by single quotes
|
||||
|
||||
double quotes must **always** be escaped, regardless of what kind of quotes
|
||||
delimit the string they are found in
|
||||
|
||||
the parser will never emit json with keys or strings delimited by single
|
||||
quotes
|
||||
|
||||
- `unescaped_jsonp`
|
||||
|
||||
javascript interpreters treat the codepoints `u+2028` and `u+2029` as
|
||||
significant whitespace. json strings that contain either of these codepoints
|
||||
will be parsed incorrectly by some javascript interpreters. by default,
|
||||
these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to
|
||||
retain compatibility. this option simply removes that escaping
|
||||
|
||||
- `comments`
|
||||
|
||||
json has no official comments but some parsers allow c/c++ style comments.
|
||||
anywhere whitespace is allowed this flag allows comments (both `// ...` and
|
||||
`/* ... */`)
|
||||
|
||||
- `escaped_strings`
|
||||
|
||||
by default both the encoder and decoder return strings as utf8 binaries
|
||||
|
@ -370,12 +331,14 @@ additional options beyond these. see
|
|||
unaltered. this flag escapes strings as if for output in json, removing
|
||||
control codes and problematic codepoints and replacing them with the
|
||||
appropriate escapes
|
||||
|
||||
- `unescaped_jsonp`
|
||||
|
||||
- `ignored_bad_escapes`
|
||||
|
||||
during decoding ignore unrecognized escape sequences and leave them as is in
|
||||
the stream. note that combining this option with `escaped_strings` will
|
||||
result in the escape character itself being escaped
|
||||
javascript interpreters treat the codepoints `u+2028` and `u+2029` as
|
||||
significant whitespace. json strings that contain either of these codepoints
|
||||
will be parsed incorrectly by some javascript interpreters. by default,
|
||||
these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to
|
||||
retain compatibility. this option simply removes that escaping
|
||||
|
||||
- `dirty_strings`
|
||||
|
||||
|
@ -383,42 +346,39 @@ additional options beyond these. see
|
|||
can result in unwanted behaviour. if your strings are already escaped (or
|
||||
you'd like to force invalid strings into "json" you monster) use this flag
|
||||
to bypass escaping. this can also be used to read in **really** invalid json
|
||||
strings. everything but escaped quotes are passed as is to the resulting
|
||||
string term. note that this overrides `ignored_bad_escapes`,
|
||||
`unescaped_jsonp` and `escaped_strings`
|
||||
strings. everything between unescaped quotes are passed as is to the resulting
|
||||
string term. note that this takes precedence over any other options
|
||||
|
||||
- `explicit_end`
|
||||
- `strict`
|
||||
|
||||
as mentioned [earlier](#description), **jsx** is pragmatic. if you're more of a
|
||||
json purist or you're really into bdsm stricter adherence to the spec is
|
||||
possible. the following restrictions are available
|
||||
|
||||
* `comments`
|
||||
|
||||
comments are disabled and result in a `badarg` error
|
||||
|
||||
* `utf8`
|
||||
|
||||
invalid codepoints and malformed unicode result in `badarg` errors
|
||||
|
||||
* `single_quotes`
|
||||
|
||||
only keys and strings delimited by double quotes (`u+0022`) are allowed. the
|
||||
single quote (`u+0027`) results in a `badarg` error
|
||||
|
||||
* `escapes`
|
||||
|
||||
escape sequences not adhering to the json spec result in a `badarg` error
|
||||
|
||||
any combination of these can be passed to **jsx** by using `{strict, [strict_option()]}`.
|
||||
`strict` is equivalent to `{strict, [comments, bad_utf8, single_quotes, escapes]}`
|
||||
|
||||
- `stream`
|
||||
|
||||
see [incomplete input](#incomplete-input)
|
||||
|
||||
- `relax`
|
||||
|
||||
relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments,
|
||||
ignored_bad_escapes]` for when you don't care how absolutely terrible your
|
||||
json input is, you just want the parser to do the best it can
|
||||
|
||||
- `incomplete_handler` & `error_handler`
|
||||
|
||||
the default incomplete and error handlers can be replaced with user defined
|
||||
handlers. if options include `{error_handler, F}` and/or
|
||||
`{incomplete_handler, F}` where `F` is a function of arity 3 they will be
|
||||
called instead of the default handler. the spec for `F` is as follows
|
||||
```erlang
|
||||
F(Remaining, InternalState, Config) -> any()
|
||||
|
||||
Remaining = binary() | term()
|
||||
InternalState = opaque()
|
||||
Config = list()
|
||||
```
|
||||
`Remaining` is the binary fragment or term that caused the error
|
||||
|
||||
`InternalState` is an opaque structure containing the internal state of the
|
||||
parser/decoder/encoder
|
||||
|
||||
`Config` is a list of options/flags in use by the parser/decoder/encoder
|
||||
|
||||
these functions should be considered experimental for now
|
||||
|
||||
|
||||
## exports ##
|
||||
|
||||
|
@ -435,10 +395,10 @@ parser(Module, Args, Opts) -> Fun((Tokens) -> any())
|
|||
Opts = [option()]
|
||||
JSONText = json_text()
|
||||
JSONTerm = json_term()
|
||||
Tokens = token() | [token()]
|
||||
Tokens = event() | [event()]
|
||||
```
|
||||
|
||||
jsx is a json compiler with interleaved tokenizing, syntactic analysis and
|
||||
**jsx** is a json compiler with interleaved tokenizing, syntactic analysis and
|
||||
semantic analysis stages. included are two tokenizers; one that handles json
|
||||
texts (`decoder/3`) and one that handles erlang terms (`encoder/3`). there is
|
||||
also an entry point to the syntactic analysis stage for use with user-defined
|
||||
|
@ -468,7 +428,7 @@ decode(JSON, Opts) -> Term
|
|||
|
||||
JSON = json_text()
|
||||
Term = json_term()
|
||||
Opts = [option() | labels | {labels, Label} | {post_decode, F}]
|
||||
Opts = [option() | labels | {labels, Label}]
|
||||
Label = binary | atom | existing_atom | attempt_atom
|
||||
F = fun((any()) -> any())
|
||||
```
|
||||
|
@ -485,18 +445,6 @@ new atoms to the atom table and will result in a `badarg` error if the atom
|
|||
does not exist. `attempt_atom` will convert keys to atoms when they exist,
|
||||
and leave them as binary otherwise
|
||||
|
||||
`{post_decode, F}` is a user defined function of arity 1 that is called on each
|
||||
output value (objects, arrays, strings, numbers and literals). it may return any
|
||||
value to be substituted in the returned term. for example:
|
||||
|
||||
```erlang
|
||||
1> F = fun(V) when is_list(V) -> V; (V) -> false end.
|
||||
2> jsx:decode(<<"{\"a list\": [true, \"a string\", 1]}">>, [{post_decode, F}]).
|
||||
[{<<"a list">>, [false, false, false]}]
|
||||
```
|
||||
|
||||
declaring more than one post-decoder will result in a `badarg` error exception
|
||||
|
||||
raises a `badarg` error exception if input is not valid json
|
||||
|
||||
|
||||
|
@ -508,7 +456,7 @@ encode(Term, Opts) -> JSON
|
|||
|
||||
Term = json_term()
|
||||
JSON = json_text()
|
||||
Opts = [option() | {pre_encode, F} | space | {space, N} | indent | {indent, N}]
|
||||
Opts = [option() | space | {space, N} | indent | {indent, N}]
|
||||
F = fun((any()) -> any())
|
||||
N = pos_integer()
|
||||
```
|
||||
|
@ -522,18 +470,6 @@ the option `{indent, N}` inserts a newline and `N` spaces for each level of
|
|||
indentation in your json output. note that this overrides spaces inserted after
|
||||
a comma. `indent` is an alias for `{indent, 1}`. the default is `{indent, 0}`
|
||||
|
||||
`{pre_encode, F}` is a user defined function of arity 1 that is called on each
|
||||
input value. it may return any valid json value to be substituted in the
|
||||
returned json. for example:
|
||||
|
||||
```erlang
|
||||
1> F = fun(V) when is_list(V) -> V; (V) -> false end.
|
||||
2> jsx:encode([{<<"a list">>, [true, <<"a string">>, 1]}], [{pre_encode, F}]).
|
||||
<<"{\"a list\": [false, false, false]}">>
|
||||
```
|
||||
|
||||
declaring more than one pre-encoder will result in a `badarg` error exception
|
||||
|
||||
raises a `badarg` error exception if input is not a valid
|
||||
[erlang representation of json](#json---erlang-mapping)
|
||||
|
||||
|
@ -621,7 +557,7 @@ what exactly constitutes valid json may be altered via [options](#option)
|
|||
|
||||
## callback exports ##
|
||||
|
||||
the following functions should be exported from a jsx callback module
|
||||
the following functions should be exported from a **jsx** callback module
|
||||
|
||||
#### `Module:init/1` ####
|
||||
|
||||
|
@ -667,16 +603,11 @@ following events must be handled:
|
|||
|
||||
the end of a json array
|
||||
|
||||
- `{key, binary()}`
|
||||
|
||||
a key in a json object. this is guaranteed to follow either `start_object`
|
||||
or a json value. it will usually be a `utf8` encoded binary. see the
|
||||
[options](#option) for possible exceptions
|
||||
|
||||
- `{string, binary()}`
|
||||
|
||||
a json string. it will usually be a `utf8` encoded binary. see the
|
||||
[options](#option) for possible exceptions
|
||||
[options](#option) for possible exceptions. note that keys are also
|
||||
json strings
|
||||
|
||||
- `{integer, integer()}`
|
||||
|
||||
|
|
357
src/jsx.erl
357
src/jsx.erl
|
@ -28,22 +28,17 @@
|
|||
-export([format/1, format/2, minify/1, prettify/1]).
|
||||
-export([encoder/3, decoder/3, parser/3]).
|
||||
-export([resume/3]).
|
||||
%% old api
|
||||
-export([term_to_json/1, term_to_json/2, json_to_term/1, json_to_term/2]).
|
||||
-export([to_json/1, to_json/2]).
|
||||
-export([to_term/1, to_term/2]).
|
||||
|
||||
-export_type([json_term/0, json_text/0, token/0]).
|
||||
-export_type([config/0, encoder/0, decoder/0, parser/0, internal_state/0]).
|
||||
-export_type([encoder/0, decoder/0, parser/0, internal_state/0]).
|
||||
|
||||
|
||||
-ifdef(TEST).
|
||||
-include("jsx_tests.hrl").
|
||||
-else.
|
||||
-include("jsx_config.hrl").
|
||||
%% data and helper functions for tests
|
||||
-export([test_cases/0, special_test_cases/0]).
|
||||
-export([init/1, handle_event/2]).
|
||||
-endif.
|
||||
|
||||
-type config() :: #config{}.
|
||||
|
||||
-type json_term()
|
||||
:: [{binary() | atom(), json_term()}]
|
||||
|
@ -64,19 +59,12 @@
|
|||
encode(Source) -> encode(Source, []).
|
||||
encode(Source, Config) -> jsx_to_json:to_json(Source, Config).
|
||||
|
||||
%% old api, alias for encode/x
|
||||
|
||||
-spec to_json(Source::json_term()) -> json_text() | {incomplete, encoder()}.
|
||||
-spec to_json(Source::json_term(), Config::jsx_to_json:config()) -> json_text() | {incomplete, encoder()}.
|
||||
-spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}.
|
||||
-spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}.
|
||||
|
||||
to_json(Source) -> encode(Source, []).
|
||||
to_json(Source, Config) -> encode(Source, Config).
|
||||
|
||||
-spec term_to_json(Source::json_term()) -> json_text() | {incomplete, encoder()}.
|
||||
-spec term_to_json(Source::json_term(), Config::jsx_to_json:config()) -> json_text() | {incomplete, encoder()}.
|
||||
|
||||
term_to_json(Source) -> encode(Source, []).
|
||||
term_to_json(Source, Config) -> encode(Source, Config).
|
||||
decode(Source) -> decode(Source, []).
|
||||
decode(Source, Config) -> jsx_to_term:to_term(Source, Config).
|
||||
|
||||
|
||||
-spec format(Source::json_text()) -> json_text() | {incomplete, decoder()}.
|
||||
|
@ -96,27 +84,6 @@ minify(Source) -> format(Source, []).
|
|||
prettify(Source) -> format(Source, [space, {indent, 2}]).
|
||||
|
||||
|
||||
-spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}.
|
||||
-spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}.
|
||||
|
||||
decode(Source) -> decode(Source, []).
|
||||
decode(Source, Config) -> jsx_to_term:to_term(Source, Config).
|
||||
|
||||
%% old api, alias for to_term/x
|
||||
|
||||
-spec to_term(Source::json_text()) -> json_term() | {incomplete, decoder()}.
|
||||
-spec to_term(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}.
|
||||
|
||||
to_term(Source) -> decode(Source, []).
|
||||
to_term(Source, Config) -> decode(Source, Config).
|
||||
|
||||
-spec json_to_term(Source::json_text()) -> json_term() | {incomplete, decoder()}.
|
||||
-spec json_to_term(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}.
|
||||
|
||||
json_to_term(Source) -> decode(Source, []).
|
||||
json_to_term(Source, Config) -> decode(Source, Config).
|
||||
|
||||
|
||||
-spec is_json(Source::any()) -> true | false.
|
||||
-spec is_json(Source::any(), Config::jsx_verify:config()) -> true | false.
|
||||
|
||||
|
@ -182,3 +149,311 @@ resume(Term, {decoder, State, Handler, Acc, Stack}, Config) ->
|
|||
resume(Term, {parser, State, Handler, Stack}, Config) ->
|
||||
jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)).
|
||||
|
||||
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
|
||||
%% test handler
|
||||
init([]) -> [].
|
||||
|
||||
handle_event(end_json, State) -> lists:reverse([end_json] ++ State);
|
||||
handle_event(Event, State) -> [Event] ++ State.
|
||||
|
||||
|
||||
test_cases() ->
|
||||
empty_array()
|
||||
++ nested_array()
|
||||
++ empty_object()
|
||||
++ nested_object()
|
||||
++ strings()
|
||||
++ literals()
|
||||
++ integers()
|
||||
++ floats()
|
||||
++ compound_object().
|
||||
|
||||
%% segregate these so we can skip them in `jsx_to_term`
|
||||
special_test_cases() -> special_objects() ++ special_array().
|
||||
|
||||
|
||||
empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}].
|
||||
|
||||
|
||||
nested_array() ->
|
||||
[{
|
||||
"[[[]]]",
|
||||
<<"[[[]]]">>,
|
||||
[[[]]],
|
||||
[start_array, start_array, start_array, end_array, end_array, end_array]
|
||||
}].
|
||||
|
||||
|
||||
empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}].
|
||||
|
||||
|
||||
nested_object() ->
|
||||
[{
|
||||
"{\"key\":{\"key\":{}}}",
|
||||
<<"{\"key\":{\"key\":{}}}">>,
|
||||
[{<<"key">>, [{<<"key">>, [{}]}]}],
|
||||
[
|
||||
start_object,
|
||||
{key, <<"key">>},
|
||||
start_object,
|
||||
{key, <<"key">>},
|
||||
start_object,
|
||||
end_object,
|
||||
end_object,
|
||||
end_object
|
||||
]
|
||||
}].
|
||||
|
||||
|
||||
naked_strings() ->
|
||||
Raw = [
|
||||
"",
|
||||
"hello world"
|
||||
],
|
||||
[
|
||||
{
|
||||
String,
|
||||
<<"\"", (list_to_binary(String))/binary, "\"">>,
|
||||
list_to_binary(String),
|
||||
[{string, list_to_binary(String)}]
|
||||
}
|
||||
|| String <- Raw
|
||||
].
|
||||
|
||||
|
||||
strings() ->
|
||||
naked_strings()
|
||||
++ [ wrap_with_array(Test) || Test <- naked_strings() ]
|
||||
++ [ wrap_with_object(Test) || Test <- naked_strings() ].
|
||||
|
||||
|
||||
naked_integers() ->
|
||||
Raw = [
|
||||
1, 2, 3,
|
||||
127, 128, 129,
|
||||
255, 256, 257,
|
||||
65534, 65535, 65536,
|
||||
18446744073709551616,
|
||||
18446744073709551617
|
||||
],
|
||||
[
|
||||
{
|
||||
integer_to_list(X),
|
||||
list_to_binary(integer_to_list(X)),
|
||||
X,
|
||||
[{integer, X}]
|
||||
}
|
||||
|| X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0]
|
||||
].
|
||||
|
||||
|
||||
integers() ->
|
||||
naked_integers()
|
||||
++ [ wrap_with_array(Test) || Test <- naked_integers() ]
|
||||
++ [ wrap_with_object(Test) || Test <- naked_integers() ].
|
||||
|
||||
|
||||
naked_floats() ->
|
||||
Raw = [
|
||||
0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
|
||||
1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9,
|
||||
1234567890.0987654321,
|
||||
0.0e0,
|
||||
1234567890.0987654321e16,
|
||||
0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308,
|
||||
1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308,
|
||||
2.2250738585072014e-308, %% min normalized float
|
||||
1.7976931348623157e308, %% max normalized float
|
||||
5.0e-324, %% min denormalized float
|
||||
2.225073858507201e-308 %% max denormalized float
|
||||
],
|
||||
[
|
||||
{
|
||||
sane_float_to_list(X),
|
||||
list_to_binary(sane_float_to_list(X)),
|
||||
X,
|
||||
[{float, X}]
|
||||
}
|
||||
|| X <- Raw ++ [ -1 * Y || Y <- Raw ]
|
||||
].
|
||||
|
||||
|
||||
floats() ->
|
||||
naked_floats()
|
||||
++ [ wrap_with_array(Test) || Test <- naked_floats() ]
|
||||
++ [ wrap_with_object(Test) || Test <- naked_floats() ].
|
||||
|
||||
|
||||
naked_literals() ->
|
||||
[
|
||||
{
|
||||
atom_to_list(Literal),
|
||||
atom_to_binary(Literal, unicode),
|
||||
Literal,
|
||||
[{literal, Literal}]
|
||||
}
|
||||
|| Literal <- [true, false, null]
|
||||
].
|
||||
|
||||
|
||||
literals() ->
|
||||
naked_literals()
|
||||
++ [ wrap_with_array(Test) || Test <- naked_literals() ]
|
||||
++ [ wrap_with_object(Test) || Test <- naked_literals() ].
|
||||
|
||||
|
||||
compound_object() ->
|
||||
[{
|
||||
"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]",
|
||||
<<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>,
|
||||
[[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]],
|
||||
[
|
||||
start_array,
|
||||
start_object,
|
||||
{key, <<"alpha">>},
|
||||
start_array,
|
||||
{integer, 1},
|
||||
{integer, 2},
|
||||
{integer, 3},
|
||||
end_array,
|
||||
{key, <<"beta">>},
|
||||
start_object,
|
||||
{key, <<"alpha">>},
|
||||
start_array,
|
||||
{float, 1.0},
|
||||
{float, 2.0},
|
||||
{float, 3.0},
|
||||
end_array,
|
||||
{key, <<"beta">>},
|
||||
start_array,
|
||||
{literal, true},
|
||||
{literal, false},
|
||||
end_array,
|
||||
end_object,
|
||||
end_object,
|
||||
start_array,
|
||||
start_object,
|
||||
end_object,
|
||||
end_array,
|
||||
end_array
|
||||
]
|
||||
}].
|
||||
|
||||
|
||||
special_objects() ->
|
||||
[
|
||||
{
|
||||
"[{key, atom}]",
|
||||
<<"{\"key\":\"atom\"}">>,
|
||||
[{key, atom}],
|
||||
[start_object, {key, <<"key">>}, {string, <<"atom">>}, end_object]
|
||||
},
|
||||
{
|
||||
"[{1, true}]",
|
||||
<<"{\"1\":true}">>,
|
||||
[{1, true}],
|
||||
[start_object, {key, <<"1">>}, {literal, true}, end_object]
|
||||
}
|
||||
].
|
||||
|
||||
|
||||
special_array() ->
|
||||
[
|
||||
{
|
||||
"[foo, bar]",
|
||||
<<"[\"foo\",\"bar\"]">>,
|
||||
[foo, bar],
|
||||
[start_array, {string, <<"foo">>}, {string, <<"bar">>}, end_array]
|
||||
}
|
||||
].
|
||||
|
||||
|
||||
wrap_with_array({Title, JSON, Term, Events}) ->
|
||||
{
|
||||
"[" ++ Title ++ "]",
|
||||
<<"[", JSON/binary, "]">>,
|
||||
[Term],
|
||||
[start_array] ++ Events ++ [end_array]
|
||||
}.
|
||||
|
||||
|
||||
wrap_with_object({Title, JSON, Term, Events}) ->
|
||||
{
|
||||
"{\"key\":" ++ Title ++ "}",
|
||||
<<"{\"key\":", JSON/binary, "}">>,
|
||||
[{<<"key">>, Term}],
|
||||
[start_object, {key, <<"key">>}] ++ Events ++ [end_object]
|
||||
}.
|
||||
|
||||
|
||||
sane_float_to_list(X) ->
|
||||
[Output] = io_lib:format("~p", [X]),
|
||||
Output.
|
||||
|
||||
|
||||
incremental_decode(JSON) ->
|
||||
Final = lists:foldl(
|
||||
fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end,
|
||||
decoder(jsx, [], [stream]),
|
||||
json_to_bytes(JSON)
|
||||
),
|
||||
Final(end_stream).
|
||||
|
||||
|
||||
incremental_parse(Events) ->
|
||||
Final = lists:foldl(
|
||||
fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end,
|
||||
parser(?MODULE, [], [stream]),
|
||||
lists:map(fun(X) -> [X] end, Events)
|
||||
),
|
||||
Final(end_stream).
|
||||
|
||||
|
||||
%% used to convert a json text into a list of codepoints to be incrementally
|
||||
%% parsed
|
||||
json_to_bytes(JSON) -> json_to_bytes(JSON, []).
|
||||
|
||||
json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc);
|
||||
json_to_bytes(<<X, Rest/binary>>, Acc) -> json_to_bytes(Rest, [<<X>>] ++ Acc).
|
||||
|
||||
|
||||
%% actual tests!
|
||||
decode_test_() ->
|
||||
Data = test_cases(),
|
||||
[{Title, ?_assertEqual(Events ++ [end_json], (decoder(?MODULE, [], []))(JSON))}
|
||||
|| {Title, JSON, _, Events} <- Data
|
||||
] ++
|
||||
[{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_decode(JSON))}
|
||||
|| {Title, JSON, _, Events} <- Data
|
||||
].
|
||||
|
||||
|
||||
parse_test_() ->
|
||||
Data = test_cases(),
|
||||
[{Title, ?_assertEqual(Events ++ [end_json], (parser(?MODULE, [], []))(Events ++ [end_json]))}
|
||||
|| {Title, _, _, Events} <- Data
|
||||
] ++
|
||||
[{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_parse(Events))}
|
||||
|| {Title, _, _, Events} <- Data
|
||||
].
|
||||
|
||||
|
||||
encode_test_() ->
|
||||
Data = test_cases(),
|
||||
[
|
||||
{
|
||||
Title, ?_assertEqual(
|
||||
Events ++ [end_json],
|
||||
(jsx:encoder(jsx, [], []))(Term)
|
||||
)
|
||||
} || {Title, _, Term, Events} <- Data
|
||||
].
|
||||
|
||||
|
||||
-endif.
|
||||
|
|
|
@ -49,41 +49,27 @@
|
|||
%% parsing of jsx config
|
||||
-spec parse_config(Config::proplists:proplist()) -> jsx:config().
|
||||
|
||||
parse_config(Config) ->
|
||||
parse_config(Config, #config{}).
|
||||
parse_config(Config) -> parse_config(Config, #config{}).
|
||||
|
||||
parse_config([], Config) ->
|
||||
Config;
|
||||
parse_config([replaced_bad_utf8|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{replaced_bad_utf8=true});
|
||||
parse_config([], Config) -> Config;
|
||||
parse_config([escaped_forward_slashes|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{escaped_forward_slashes=true});
|
||||
parse_config([explicit_end|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{explicit_end=true});
|
||||
parse_config([single_quoted_strings|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{single_quoted_strings=true});
|
||||
parse_config([unescaped_jsonp|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{unescaped_jsonp=true});
|
||||
parse_config([comments|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{comments=true});
|
||||
parse_config([escaped_strings|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{escaped_strings=true});
|
||||
parse_config([unescaped_jsonp|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{unescaped_jsonp=true});
|
||||
parse_config([dirty_strings|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{dirty_strings=true});
|
||||
parse_config([ignored_bad_escapes|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{ignored_bad_escapes=true});
|
||||
parse_config([relax|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{
|
||||
replaced_bad_utf8 = true,
|
||||
single_quoted_strings = true,
|
||||
comments = true,
|
||||
ignored_bad_escapes = true
|
||||
parse_config([strict|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{strict_comments=true,
|
||||
strict_utf8=true,
|
||||
strict_single_quotes=true,
|
||||
strict_escapes=true
|
||||
});
|
||||
parse_config([{pre_encode, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) ->
|
||||
case Config#config.pre_encode of
|
||||
false -> parse_config(Rest, Config#config{pre_encode=Encoder})
|
||||
; _ -> erlang:error(badarg, [Options, Config])
|
||||
end;
|
||||
parse_config([{strict, Strict}|Rest], Config) ->
|
||||
parse_strict(Strict, Rest, Config);
|
||||
parse_config([stream|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{stream=true});
|
||||
parse_config([{error_handler, ErrorHandler}|Rest] = Options, Config) when is_function(ErrorHandler, 3) ->
|
||||
case Config#config.error_handler of
|
||||
false -> parse_config(Rest, Config#config{error_handler=ErrorHandler})
|
||||
|
@ -94,34 +80,28 @@ parse_config([{incomplete_handler, IncompleteHandler}|Rest] = Options, Config) w
|
|||
false -> parse_config(Rest, Config#config{incomplete_handler=IncompleteHandler})
|
||||
; _ -> erlang:error(badarg, [Options, Config])
|
||||
end;
|
||||
%% deprecated flags
|
||||
parse_config([{pre_encoder, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) ->
|
||||
case Config#config.pre_encode of
|
||||
false -> parse_config(Rest, Config#config{pre_encode=Encoder})
|
||||
; _ -> erlang:error(badarg, [Options, Config])
|
||||
end;
|
||||
parse_config([loose_unicode|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{replaced_bad_utf8=true});
|
||||
parse_config([escape_forward_slash|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{escaped_forward_slashes=true});
|
||||
parse_config([single_quotes|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{single_quoted_strings=true});
|
||||
parse_config([no_jsonp_escapes|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{unescaped_jsonp=true});
|
||||
parse_config([json_escape|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{escaped_strings=true});
|
||||
parse_config([ignore_bad_escapes|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{ignored_bad_escapes=true});
|
||||
parse_config(Options, Config) ->
|
||||
erlang:error(badarg, [Options, Config]).
|
||||
parse_config(_Options, _Config) -> erlang:error(badarg).
|
||||
|
||||
|
||||
parse_strict([], Rest, Config) -> parse_config(Rest, Config);
|
||||
parse_strict([comments|Strict], Rest, Config) ->
|
||||
parse_strict(Strict, Rest, Config#config{strict_comments=true});
|
||||
parse_strict([utf8|Strict], Rest, Config) ->
|
||||
parse_strict(Strict, Rest, Config#config{strict_utf8=true});
|
||||
parse_strict([single_quotes|Strict], Rest, Config) ->
|
||||
parse_strict(Strict, Rest, Config#config{strict_single_quotes=true});
|
||||
parse_strict([escapes|Strict], Rest, Config) ->
|
||||
parse_strict(Strict, Rest, Config#config{strict_escapes=true});
|
||||
parse_strict(_Strict, _Rest, _Config) ->
|
||||
erlang:error(badarg).
|
||||
|
||||
|
||||
|
||||
-spec config_to_list(Config::jsx:config()) -> proplists:proplist().
|
||||
|
||||
config_to_list(Config) ->
|
||||
lists:map(
|
||||
fun ({pre_encode, F}) -> {pre_encode, F};
|
||||
({error_handler, F}) -> {error_handler, F};
|
||||
reduce_config(lists:map(
|
||||
fun ({error_handler, F}) -> {error_handler, F};
|
||||
({incomplete_handler, F}) -> {incomplete_handler, F};
|
||||
({Key, true}) -> Key
|
||||
end,
|
||||
|
@ -129,34 +109,41 @@ config_to_list(Config) ->
|
|||
fun({_, false}) -> false; (_) -> true end,
|
||||
lists:zip(record_info(fields, config), tl(tuple_to_list(Config)))
|
||||
)
|
||||
).
|
||||
)).
|
||||
|
||||
|
||||
reduce_config(Input) -> reduce_config(Input, [], []).
|
||||
|
||||
reduce_config([], Output, Strict) ->
|
||||
case length(Strict) of
|
||||
0 -> lists:reverse(Output);
|
||||
4 -> lists:reverse(Output) ++ [strict];
|
||||
_ -> lists:reverse(Output) ++ [{strict, lists:reverse(Strict)}]
|
||||
end;
|
||||
reduce_config([strict_comments|Input], Output, Strict) ->
|
||||
reduce_config(Input, Output, [comments] ++ Strict);
|
||||
reduce_config([strict_utf8|Input], Output, Strict) ->
|
||||
reduce_config(Input, Output, [utf8] ++ Strict);
|
||||
reduce_config([strict_single_quotes|Input], Output, Strict) ->
|
||||
reduce_config(Input, Output, [single_quotes] ++ Strict);
|
||||
reduce_config([strict_escapes|Input], Output, Strict) ->
|
||||
reduce_config(Input, Output, [escapes] ++ Strict);
|
||||
reduce_config([Else|Input], Output, Strict) ->
|
||||
reduce_config(Input, [Else] ++ Output, Strict).
|
||||
|
||||
|
||||
-spec valid_flags() -> [atom()].
|
||||
|
||||
valid_flags() ->
|
||||
[
|
||||
replaced_bad_utf8,
|
||||
escaped_forward_slashes,
|
||||
single_quoted_strings,
|
||||
unescaped_jsonp,
|
||||
comments,
|
||||
escaped_strings,
|
||||
unescaped_jsonp,
|
||||
dirty_strings,
|
||||
ignored_bad_escapes,
|
||||
explicit_end,
|
||||
relax,
|
||||
pre_encode,
|
||||
strict,
|
||||
stream,
|
||||
error_handler,
|
||||
incomplete_handler,
|
||||
%% deprecated flags
|
||||
pre_encoder, %% pre_encode
|
||||
loose_unicode, %% replaced_bad_utf8
|
||||
escape_forward_slash, %% escaped_forward_slashes
|
||||
single_quotes, %% single_quoted_strings
|
||||
no_jsonp_escapes, %% unescaped_jsonp
|
||||
json_escape, %% escaped_strings
|
||||
ignore_bad_escapes %% ignored_bad_escapes
|
||||
incomplete_handler
|
||||
].
|
||||
|
||||
|
||||
|
@ -187,70 +174,51 @@ config_test_() ->
|
|||
[
|
||||
{"all flags",
|
||||
?_assertEqual(
|
||||
#config{
|
||||
replaced_bad_utf8=true,
|
||||
escaped_forward_slashes=true,
|
||||
explicit_end=true,
|
||||
single_quoted_strings=true,
|
||||
unescaped_jsonp=true,
|
||||
comments=true,
|
||||
dirty_strings=true,
|
||||
ignored_bad_escapes=true
|
||||
#config{escaped_forward_slashes = true,
|
||||
escaped_strings = true,
|
||||
unescaped_jsonp = true,
|
||||
dirty_strings = true,
|
||||
strict_comments = true,
|
||||
strict_utf8 = true,
|
||||
strict_single_quotes = true,
|
||||
strict_escapes = true,
|
||||
stream = true
|
||||
},
|
||||
parse_config([
|
||||
replaced_bad_utf8,
|
||||
escaped_forward_slashes,
|
||||
explicit_end,
|
||||
single_quoted_strings,
|
||||
parse_config([escaped_forward_slashes,
|
||||
escaped_strings,
|
||||
unescaped_jsonp,
|
||||
comments,
|
||||
dirty_strings,
|
||||
ignored_bad_escapes
|
||||
strict,
|
||||
stream
|
||||
])
|
||||
)
|
||||
},
|
||||
{"relax flag",
|
||||
{"strict flag",
|
||||
?_assertEqual(
|
||||
#config{
|
||||
replaced_bad_utf8=true,
|
||||
single_quoted_strings=true,
|
||||
comments=true,
|
||||
ignored_bad_escapes=true
|
||||
#config{strict_comments = true,
|
||||
strict_utf8 = true,
|
||||
strict_single_quotes = true,
|
||||
strict_escapes = true
|
||||
},
|
||||
parse_config([relax])
|
||||
parse_config([strict])
|
||||
)
|
||||
},
|
||||
{"strict selective",
|
||||
?_assertEqual(
|
||||
#config{strict_comments = true},
|
||||
parse_config([{strict, [comments]}])
|
||||
)
|
||||
},
|
||||
{"strict expanded",
|
||||
?_assertEqual(
|
||||
#config{strict_comments = true,
|
||||
strict_utf8 = true,
|
||||
strict_single_quotes = true,
|
||||
strict_escapes = true
|
||||
},
|
||||
parse_config([{strict, [comments, utf8, single_quotes, escapes]}])
|
||||
)
|
||||
},
|
||||
{"deprecated flags", ?_assertEqual(
|
||||
#config{
|
||||
pre_encode=fun lists:length/1,
|
||||
replaced_bad_utf8=true,
|
||||
escaped_forward_slashes=true,
|
||||
single_quoted_strings=true,
|
||||
unescaped_jsonp=true,
|
||||
escaped_strings=true,
|
||||
ignored_bad_escapes=true
|
||||
},
|
||||
parse_config([
|
||||
{pre_encoder, fun lists:length/1},
|
||||
loose_unicode,
|
||||
escape_forward_slash,
|
||||
single_quotes,
|
||||
no_jsonp_escapes,
|
||||
json_escape,
|
||||
ignore_bad_escapes
|
||||
])
|
||||
)},
|
||||
{"pre_encode flag", ?_assertEqual(
|
||||
#config{pre_encode=fun lists:length/1},
|
||||
parse_config([{pre_encode, fun lists:length/1}])
|
||||
)},
|
||||
{"two pre_encoders defined", ?_assertError(
|
||||
badarg,
|
||||
parse_config([
|
||||
{pre_encode, fun(_) -> true end},
|
||||
{pre_encode, fun(_) -> false end}
|
||||
])
|
||||
)},
|
||||
{"error_handler flag", ?_assertEqual(
|
||||
#config{error_handler=fun ?MODULE:fake_error_handler/3},
|
||||
parse_config([{error_handler, fun ?MODULE:fake_error_handler/3}])
|
||||
|
@ -273,7 +241,7 @@ config_test_() ->
|
|||
{incomplete_handler, fun(_) -> false end}
|
||||
])
|
||||
)},
|
||||
{"bad option flag", ?_assertError(badarg, parse_config([error]))}
|
||||
{"bad option flag", ?_assertError(badarg, parse_config([this_flag_does_not_exist]))}
|
||||
].
|
||||
|
||||
|
||||
|
@ -284,32 +252,40 @@ config_to_list_test_() ->
|
|||
config_to_list(#config{})
|
||||
)},
|
||||
{"all flags", ?_assertEqual(
|
||||
[
|
||||
replaced_bad_utf8,
|
||||
escaped_forward_slashes,
|
||||
single_quoted_strings,
|
||||
[escaped_forward_slashes,
|
||||
escaped_strings,
|
||||
unescaped_jsonp,
|
||||
comments,
|
||||
dirty_strings,
|
||||
ignored_bad_escapes,
|
||||
explicit_end
|
||||
stream,
|
||||
strict
|
||||
],
|
||||
config_to_list(
|
||||
#config{
|
||||
replaced_bad_utf8=true,
|
||||
escaped_forward_slashes=true,
|
||||
explicit_end=true,
|
||||
single_quoted_strings=true,
|
||||
unescaped_jsonp=true,
|
||||
comments=true,
|
||||
dirty_strings=true,
|
||||
ignored_bad_escapes=true
|
||||
#config{escaped_forward_slashes = true,
|
||||
escaped_strings = true,
|
||||
unescaped_jsonp = true,
|
||||
dirty_strings = true,
|
||||
strict_comments = true,
|
||||
strict_utf8 = true,
|
||||
strict_single_quotes = true,
|
||||
strict_escapes = true,
|
||||
stream = true
|
||||
}
|
||||
)
|
||||
)},
|
||||
{"pre_encode", ?_assertEqual(
|
||||
[{pre_encode, fun lists:length/1}],
|
||||
config_to_list(#config{pre_encode=fun lists:length/1})
|
||||
{"single strict", ?_assertEqual(
|
||||
[{strict, [comments]}],
|
||||
config_to_list(#config{strict_comments = true})
|
||||
)},
|
||||
{"multiple strict", ?_assertEqual(
|
||||
[{strict, [utf8, single_quotes, escapes]}],
|
||||
config_to_list(#config{strict_utf8 = true, strict_single_quotes = true, strict_escapes = true})
|
||||
)},
|
||||
{"all strict", ?_assertEqual(
|
||||
[strict],
|
||||
config_to_list(#config{strict_comments = true,
|
||||
strict_utf8 = true,
|
||||
strict_single_quotes = true,
|
||||
strict_escapes = true})
|
||||
)},
|
||||
{"error handler", ?_assertEqual(
|
||||
[{error_handler, fun ?MODULE:fake_error_handler/3}],
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
-record(config, {
|
||||
replaced_bad_utf8 = false :: boolean(),
|
||||
escaped_forward_slashes = false :: boolean(),
|
||||
single_quoted_strings = false :: boolean(),
|
||||
unescaped_jsonp = false :: boolean(),
|
||||
comments = false :: boolean(),
|
||||
escaped_strings = false :: boolean(),
|
||||
dirty_strings = false :: boolean(),
|
||||
ignored_bad_escapes = false :: boolean(),
|
||||
explicit_end = false :: boolean(),
|
||||
pre_encode = false :: false | fun((any()) -> any()),
|
||||
error_handler = false :: false | jsx_config:handler(),
|
||||
incomplete_handler = false :: false | jsx_config:handler()
|
||||
escaped_forward_slashes = false :: boolean(),
|
||||
escaped_strings = false :: boolean(),
|
||||
unescaped_jsonp = false :: boolean(),
|
||||
dirty_strings = false :: boolean(),
|
||||
strict_comments = false :: boolean(),
|
||||
strict_utf8 = false :: boolean(),
|
||||
strict_single_quotes = false :: boolean(),
|
||||
strict_escapes = false :: boolean(),
|
||||
stream = false :: boolean(),
|
||||
error_handler = false :: false | jsx_config:handler(),
|
||||
incomplete_handler = false :: false | jsx_config:handler()
|
||||
}).
|
||||
|
||||
|
|
1678
src/jsx_decoder.erl
1678
src/jsx_decoder.erl
File diff suppressed because it is too large
Load diff
|
@ -23,310 +23,66 @@
|
|||
|
||||
-module(jsx_encoder).
|
||||
|
||||
-export([encoder/3]).
|
||||
-export([encoder/3, encode/1, encode/2, unzip/1]).
|
||||
|
||||
-spec encoder(Handler::module(), State::any(), Config::list()) -> jsx:encoder().
|
||||
|
||||
encoder(Handler, State, Config) ->
|
||||
fun(JSON) ->
|
||||
start(
|
||||
JSON,
|
||||
{Handler, Handler:init(State)},
|
||||
jsx_config:parse_config(Config)
|
||||
)
|
||||
end.
|
||||
Parser = jsx:parser(Handler, State, Config),
|
||||
fun(Term) -> Parser(encode(Term) ++ [end_json]) end.
|
||||
|
||||
|
||||
-spec encode(Term::any()) -> any().
|
||||
|
||||
-include("jsx_config.hrl").
|
||||
encode(Term) -> encode(Term, ?MODULE).
|
||||
|
||||
|
||||
-ifndef(error).
|
||||
-define(error(State, Term, Handler, Config),
|
||||
case Config#config.error_handler of
|
||||
false -> erlang:error(badarg);
|
||||
F -> erlang:throw(F(Term, {encoder, State, Handler}, jsx_config:config_to_list(Config)))
|
||||
end
|
||||
).
|
||||
-endif.
|
||||
-spec encode(Term::any(), EntryPoint::module()) -> any().
|
||||
|
||||
encode([], _EntryPoint) -> [start_array, end_array];
|
||||
encode([{}], _EntryPoint) -> [start_object, end_object];
|
||||
|
||||
start(Term, {Handler, State}, Config) ->
|
||||
try Handler:handle_event(end_json, value(pre_encode(Term, Config), {Handler, State}, Config))
|
||||
catch
|
||||
throw:Error -> Error;
|
||||
Type:Value -> erlang:Type(Value)
|
||||
end.
|
||||
|
||||
|
||||
value(String, {Handler, State}, Config) when is_binary(String) ->
|
||||
Handler:handle_event({string, clean_string(String, {Handler, State}, Config)}, State);
|
||||
value(Float, {Handler, State}, _Config) when is_float(Float) ->
|
||||
Handler:handle_event({float, Float}, State);
|
||||
value(Int, {Handler, State}, _Config) when is_integer(Int) ->
|
||||
Handler:handle_event({integer, Int}, State);
|
||||
value(Literal, {Handler, State}, _Config)
|
||||
when Literal == true; Literal == false; Literal == null ->
|
||||
Handler:handle_event({literal, Literal}, State);
|
||||
value(String, {Handler, State}, Config) when is_atom(String) ->
|
||||
Handler:handle_event({string, clean_string(atom_to_binary(String,latin1), {Handler, State}, Config)}, State);
|
||||
value([{}], {Handler, State}, _Config) ->
|
||||
Handler:handle_event(end_object, Handler:handle_event(start_object, State));
|
||||
value([], {Handler, State}, _Config) ->
|
||||
Handler:handle_event(end_array, Handler:handle_event(start_array, State));
|
||||
value(List, Handler, Config) when is_list(List) ->
|
||||
list_or_object(List, Handler, Config);
|
||||
value(Term, Handler, Config) -> ?error(value, Term, Handler, Config).
|
||||
|
||||
|
||||
list_or_object([Term|Rest], {Handler, State}, Config) ->
|
||||
case pre_encode(Term, Config) of
|
||||
{K, V} when is_atom(K); is_binary(K); is_integer(K) ->
|
||||
object([{K, V}|Rest], {Handler, Handler:handle_event(start_object, State)}, Config)
|
||||
; T ->
|
||||
list([T|Rest], {Handler, Handler:handle_event(start_array, State)}, Config)
|
||||
end.
|
||||
|
||||
|
||||
object([{Key, Value}, Next|Rest], {Handler, State}, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
|
||||
V = pre_encode(Value, Config),
|
||||
object(
|
||||
[pre_encode(Next, Config)|Rest],
|
||||
{
|
||||
Handler,
|
||||
value(
|
||||
V,
|
||||
{Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)},
|
||||
Config
|
||||
)
|
||||
},
|
||||
Config
|
||||
encode([{_, _}|_] = Term, EntryPoint) ->
|
||||
lists:flatten(
|
||||
[start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unzip(Term) ] ++ [end_object]
|
||||
);
|
||||
object([{Key, Value}], {Handler, State}, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
|
||||
object(
|
||||
[],
|
||||
{
|
||||
Handler,
|
||||
value(
|
||||
pre_encode(Value, Config),
|
||||
{Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)},
|
||||
Config
|
||||
)
|
||||
},
|
||||
Config
|
||||
encode(Term, EntryPoint) when is_list(Term) ->
|
||||
lists:flatten(
|
||||
[start_array] ++ [ EntryPoint:encode(T, EntryPoint) || T <- Term ] ++ [end_array]
|
||||
);
|
||||
object([], {Handler, State}, _Config) -> Handler:handle_event(end_object, State);
|
||||
object(Term, Handler, Config) -> ?error(object, Term, Handler, Config).
|
||||
|
||||
encode(Else, _EntryPoint) -> [Else].
|
||||
|
||||
|
||||
list([Value, Next|Rest], {Handler, State}, Config) ->
|
||||
list([pre_encode(Next, Config)|Rest], {Handler, value(Value, {Handler, State}, Config)}, Config);
|
||||
list([Value], {Handler, State}, Config) ->
|
||||
list([], {Handler, value(Value, {Handler, State}, Config)}, Config);
|
||||
list([], {Handler, State}, _Config) -> Handler:handle_event(end_array, State).
|
||||
unzip(List) -> unzip(List, []).
|
||||
|
||||
pre_encode(Value, #config{pre_encode=false}) -> Value;
|
||||
pre_encode(Value, Config) -> (Config#config.pre_encode)(Value).
|
||||
|
||||
|
||||
fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
|
||||
fix_key(Key) when is_integer(Key) -> fix_key(list_to_binary(integer_to_list(Key)));
|
||||
fix_key(Key) when is_binary(Key) -> Key.
|
||||
|
||||
|
||||
clean_string(Bin, Handler, Config) ->
|
||||
case clean_string(Bin, Config) of
|
||||
{error, badarg} -> ?error(string, Bin, Handler, Config);
|
||||
String -> String
|
||||
end.
|
||||
|
||||
|
||||
|
||||
-include("jsx_strings.hrl").
|
||||
unzip([], Acc) -> lists:reverse(Acc);
|
||||
unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K); is_integer(K) -> unzip(Rest, [V, K] ++ Acc).
|
||||
|
||||
|
||||
-ifdef(TEST).
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
|
||||
encode_test_() ->
|
||||
Data = jsx:test_cases(),
|
||||
[
|
||||
{
|
||||
Title, ?_assertEqual(
|
||||
Events ++ [end_json],
|
||||
start(Term, {jsx, []}, #config{})
|
||||
)
|
||||
} || {Title, _, Term, Events} <- Data
|
||||
].
|
||||
parser(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term).
|
||||
|
||||
|
||||
encode(Term, Config) -> start(Term, {jsx, []}, jsx_config:parse_config(Config)).
|
||||
|
||||
pre_encoders_test_() ->
|
||||
Term = [
|
||||
{<<"object">>, [
|
||||
{atomkey, atomvalue},
|
||||
{<<"literals">>, [true, false, null]},
|
||||
{<<"strings">>, [<<"foo">>, <<"bar">>, <<"baz">>]},
|
||||
{<<"numbers">>, [1, 1.0, 1.0e0]}
|
||||
]}
|
||||
],
|
||||
[
|
||||
{"no pre encode", ?_assertEqual(
|
||||
[
|
||||
start_object,
|
||||
{key, <<"object">>}, start_object,
|
||||
{key, <<"atomkey">>}, {string, <<"atomvalue">>},
|
||||
{key, <<"literals">>}, start_array,
|
||||
{literal, true}, {literal, false}, {literal, null},
|
||||
end_array,
|
||||
{key, <<"strings">>}, start_array,
|
||||
{string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>},
|
||||
end_array,
|
||||
{key, <<"numbers">>}, start_array,
|
||||
{integer, 1}, {float, 1.0}, {float, 1.0},
|
||||
end_array,
|
||||
end_object,
|
||||
end_object,
|
||||
end_json
|
||||
],
|
||||
encode(Term, [])
|
||||
)},
|
||||
{"replace lists with empty lists", ?_assertEqual(
|
||||
[
|
||||
start_object,
|
||||
{key, <<"object">>}, start_object,
|
||||
{key, <<"atomkey">>}, {string, <<"atomvalue">>},
|
||||
{key, <<"literals">>}, start_array, end_array,
|
||||
{key, <<"strings">>}, start_array, end_array,
|
||||
{key, <<"numbers">>}, start_array, end_array,
|
||||
end_object,
|
||||
end_object,
|
||||
end_json
|
||||
],
|
||||
encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end}])
|
||||
)},
|
||||
{"replace objects with empty objects", ?_assertEqual(
|
||||
[
|
||||
start_object,
|
||||
end_object,
|
||||
end_json
|
||||
],
|
||||
encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end}])
|
||||
)},
|
||||
{"replace all non-list and non_tuple values with false", ?_assertEqual(
|
||||
[
|
||||
start_object,
|
||||
{key, <<"object">>}, start_object,
|
||||
{key, <<"atomkey">>}, {literal, false},
|
||||
{key, <<"literals">>}, start_array,
|
||||
{literal, false}, {literal, false}, {literal, false},
|
||||
end_array,
|
||||
{key, <<"strings">>}, start_array,
|
||||
{literal, false}, {literal, false}, {literal, false},
|
||||
end_array,
|
||||
{key, <<"numbers">>}, start_array,
|
||||
{literal, false}, {literal, false}, {literal, false},
|
||||
end_array,
|
||||
end_object,
|
||||
end_object,
|
||||
end_json
|
||||
],
|
||||
encode(Term, [{pre_encode, fun(V) when is_list(V); is_tuple(V) -> V; (_) -> false end}])
|
||||
)},
|
||||
{"replace all atoms with atom_to_list", ?_assertEqual(
|
||||
[
|
||||
start_object,
|
||||
{key, <<"object">>}, start_object,
|
||||
{key, <<"atomkey">>}, {string, <<"atomvalue">>},
|
||||
{key, <<"literals">>}, start_array,
|
||||
{string, <<"true">>}, {string, <<"false">>}, {string, <<"null">>},
|
||||
end_array,
|
||||
{key, <<"strings">>}, start_array,
|
||||
{string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>},
|
||||
end_array,
|
||||
{key, <<"numbers">>}, start_array,
|
||||
{integer, 1}, {float, 1.0}, {float, 1.0},
|
||||
end_array,
|
||||
end_object,
|
||||
end_object,
|
||||
end_json
|
||||
],
|
||||
encode(Term, [{pre_encode, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}])
|
||||
)},
|
||||
{"pre_encode tuple", ?_assertEqual(
|
||||
[
|
||||
start_array,
|
||||
{integer, 1}, {integer, 2}, {integer, 3},
|
||||
end_array,
|
||||
end_json
|
||||
],
|
||||
encode({1, 2, 3}, [{pre_encode, fun(Tuple) when is_tuple(Tuple) -> tuple_to_list(Tuple); (V) -> V end}])
|
||||
)},
|
||||
{"pre_encode 2-tuples", ?_assertEqual(
|
||||
[
|
||||
start_object,
|
||||
{key, <<"two">>}, {integer, 2}, {key, <<"three">>}, {integer, 3},
|
||||
end_object,
|
||||
end_json
|
||||
],
|
||||
encode([{two, 1}, {three, 2}], [{pre_encode, fun({K, V}) -> {K, V + 1}; (V) -> V end}])
|
||||
)},
|
||||
{"pre_encode one field record", ?_assertEqual(
|
||||
[
|
||||
start_object,
|
||||
{key, <<"bar">>}, {literal, false},
|
||||
end_object,
|
||||
end_json
|
||||
],
|
||||
encode([{foo, bar}], [{pre_encode, fun({foo, V}) -> {V, undefined}; (undefined) -> false; (V) -> V end}])
|
||||
)},
|
||||
{"pre_encode list", ?_assertEqual(
|
||||
[
|
||||
start_array,
|
||||
{integer, 2}, {integer, 3}, {integer, 4},
|
||||
end_array,
|
||||
end_json
|
||||
],
|
||||
encode([1,2,3], [{pre_encode, fun(X) when is_integer(X) -> X + 1; (V) -> V end}])
|
||||
)}
|
||||
].
|
||||
|
||||
error_test_() ->
|
||||
[
|
||||
{"value error", ?_assertError(badarg, encode(self(), []))},
|
||||
{"string error", ?_assertError(badarg, encode(<<239, 191, 191>>, []))}
|
||||
{"value error", ?_assertError(badarg, parser(self(), []))},
|
||||
{"string error", ?_assertError(badarg, parser(<<239, 191, 191>>, [strict]))}
|
||||
].
|
||||
|
||||
custom_error_handler_test_() ->
|
||||
Error = fun(Term, {_, State, _}, _) -> {State, Term} end,
|
||||
Error = fun(Term, {_, State, _, _}, _) -> {State, Term} end,
|
||||
[
|
||||
{"value error", ?_assertEqual(
|
||||
{value, self()},
|
||||
encode(self(), [{error_handler, Error}])
|
||||
{value, [self()]},
|
||||
parser(self(), [{error_handler, Error}])
|
||||
)},
|
||||
{"string error", ?_assertEqual(
|
||||
{string, <<239, 191, 191>>},
|
||||
encode(<<239, 191, 191>>, [{error_handler, Error}])
|
||||
)}
|
||||
].
|
||||
|
||||
integer_key_test_() ->
|
||||
Term = [{123, [{456, 789}]}],
|
||||
[
|
||||
{"basic integer keys", ?_assertEqual(
|
||||
[
|
||||
start_object,
|
||||
{key, <<"123">>},
|
||||
start_object,
|
||||
{key, <<"456">>},
|
||||
{integer, 789},
|
||||
end_object,
|
||||
end_object,
|
||||
end_json
|
||||
],
|
||||
encode(Term, [])
|
||||
{string, [{string, <<239, 191, 191>>}]},
|
||||
parser(<<239, 191, 191>>, [{error_handler, Error}, strict])
|
||||
)}
|
||||
].
|
||||
|
||||
|
|
|
@ -68,6 +68,8 @@ resume(Rest, State, Handler, Stack, Config) ->
|
|||
-endif.
|
||||
|
||||
|
||||
incomplete(State, Handler, Stack, Config=#config{stream=false}) ->
|
||||
?error(State, [], Handler, Stack, Config);
|
||||
incomplete(State, Handler, Stack, Config=#config{incomplete_handler=false}) ->
|
||||
{incomplete, fun(end_stream) ->
|
||||
case resume([end_json], State, Handler, Stack, Config) of
|
||||
|
@ -82,8 +84,6 @@ incomplete(State, Handler, Stack, Config=#config{incomplete_handler=F}) ->
|
|||
F([], {parser, State, Handler, Stack}, jsx_config:config_to_list(Config)).
|
||||
|
||||
|
||||
%handle_event([], Handler, _Config) -> Handler;
|
||||
%handle_event([Event|Rest], Handler, Config) -> handle_event(Rest, handle_event(Event, Handler, Config), Config);
|
||||
handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}.
|
||||
|
||||
|
||||
|
@ -91,24 +91,10 @@ value([start_object|Tokens], Handler, Stack, Config) ->
|
|||
object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config);
|
||||
value([start_array|Tokens], Handler, Stack, Config) ->
|
||||
array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config);
|
||||
value([{literal, true}|Tokens], Handler, [], Config) ->
|
||||
done(Tokens, handle_event({literal, true}, Handler, Config), [], Config);
|
||||
value([{literal, false}|Tokens], Handler, [], Config) ->
|
||||
done(Tokens, handle_event({literal, false}, Handler, Config), [], Config);
|
||||
value([{literal, null}|Tokens], Handler, [], Config) ->
|
||||
done(Tokens, handle_event({literal, null}, Handler, Config), [], Config);
|
||||
value([{literal, true}|Tokens], Handler, Stack, Config) ->
|
||||
maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config);
|
||||
value([{literal, false}|Tokens], Handler, Stack, Config) ->
|
||||
maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config);
|
||||
value([{literal, null}|Tokens], Handler, Stack, Config) ->
|
||||
maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config);
|
||||
value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
|
||||
maybe_done(Tokens, handle_event({literal, Literal}, Handler, Config), Stack, Config);
|
||||
value([Literal|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
|
||||
value([{literal, Literal}] ++ Tokens, Handler, Stack, Config);
|
||||
value([{integer, Number}|Tokens], Handler, [], Config) when is_integer(Number) ->
|
||||
done(Tokens, handle_event({integer, Number}, Handler, Config), [], Config);
|
||||
value([{float, Number}|Tokens], Handler, [], Config) when is_float(Number) ->
|
||||
done(Tokens, handle_event({float, Number}, Handler, Config), [], Config);
|
||||
value([{integer, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) ->
|
||||
maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config);
|
||||
value([{float, Number}|Tokens], Handler, Stack, Config) when is_float(Number) ->
|
||||
|
@ -121,12 +107,6 @@ value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) ->
|
|||
value([{integer, Number}] ++ Tokens, Handler, Stack, Config);
|
||||
value([Number|Tokens], Handler, Stack, Config) when is_float(Number) ->
|
||||
value([{float, Number}] ++ Tokens, Handler, Stack, Config);
|
||||
value([{string, String}|Tokens], Handler, [], Config) when is_binary(String) ->
|
||||
case clean_string(String, Tokens, Handler, [], Config) of
|
||||
Clean when is_binary(Clean) ->
|
||||
done(Tokens, handle_event({string, Clean}, Handler, Config), [], Config);
|
||||
Error -> Error
|
||||
end;
|
||||
value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) ->
|
||||
case clean_string(String, Tokens, Handler, Stack, Config) of
|
||||
Clean when is_binary(Clean) ->
|
||||
|
@ -135,6 +115,8 @@ value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String)
|
|||
end;
|
||||
value([String|Tokens], Handler, Stack, Config) when is_binary(String) ->
|
||||
value([{string, String}] ++ Tokens, Handler, Stack, Config);
|
||||
value([String|Tokens], Handler, Stack, Config) when is_atom(String) ->
|
||||
value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config);
|
||||
value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) ->
|
||||
value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config);
|
||||
value([], Handler, Stack, Config) ->
|
||||
|
@ -146,13 +128,13 @@ value(Token, Handler, Stack, Config) ->
|
|||
|
||||
object([end_object|Tokens], Handler, [object|Stack], Config) ->
|
||||
maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config);
|
||||
object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key) ->
|
||||
object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
|
||||
case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of
|
||||
Clean when is_binary(Clean) ->
|
||||
value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config);
|
||||
Error -> Error
|
||||
end;
|
||||
object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key) ->
|
||||
object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
|
||||
case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of
|
||||
Clean when is_binary(Clean) ->
|
||||
value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config);
|
||||
|
@ -185,7 +167,7 @@ maybe_done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) ->
|
|||
maybe_done(Token, Handler, Stack, Config) ->
|
||||
maybe_done([Token], Handler, Stack, Config).
|
||||
|
||||
done([], Handler, [], Config=#config{explicit_end=true}) ->
|
||||
done([], Handler, [], Config=#config{stream=true}) ->
|
||||
incomplete(done, Handler, [], Config);
|
||||
done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] ->
|
||||
{_, State} = handle_event(end_json, Handler, Config),
|
||||
|
@ -196,7 +178,8 @@ done(Token, Handler, Stack, Config) ->
|
|||
done([Token], Handler, Stack, Config).
|
||||
|
||||
|
||||
fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
|
||||
fix_key(Key) when is_atom(Key) -> atom_to_binary(Key, utf8);
|
||||
fix_key(Key) when is_integer(Key) -> list_to_binary(integer_to_list(Key));
|
||||
fix_key(Key) when is_binary(Key) -> Key.
|
||||
|
||||
|
||||
|
@ -206,6 +189,256 @@ clean_string(Bin, Tokens, Handler, Stack, Config) ->
|
|||
String -> String
|
||||
end.
|
||||
|
||||
clean_string(Bin, #config{dirty_strings=true}) -> Bin;
|
||||
clean_string(Bin, Config) -> clean(Bin, [], Config).
|
||||
|
||||
|
||||
%% escape and/or replace bad codepoints if requested
|
||||
clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc));
|
||||
clean(<<0, Rest/binary>>, Acc, Config) -> maybe_replace(0, Rest, Acc, Config);
|
||||
clean(<<1, Rest/binary>>, Acc, Config) -> maybe_replace(1, Rest, Acc, Config);
|
||||
clean(<<2, Rest/binary>>, Acc, Config) -> maybe_replace(2, Rest, Acc, Config);
|
||||
clean(<<3, Rest/binary>>, Acc, Config) -> maybe_replace(3, Rest, Acc, Config);
|
||||
clean(<<4, Rest/binary>>, Acc, Config) -> maybe_replace(4, Rest, Acc, Config);
|
||||
clean(<<5, Rest/binary>>, Acc, Config) -> maybe_replace(5, Rest, Acc, Config);
|
||||
clean(<<6, Rest/binary>>, Acc, Config) -> maybe_replace(6, Rest, Acc, Config);
|
||||
clean(<<7, Rest/binary>>, Acc, Config) -> maybe_replace(7, Rest, Acc, Config);
|
||||
clean(<<8, Rest/binary>>, Acc, Config) -> maybe_replace(8, Rest, Acc, Config);
|
||||
clean(<<9, Rest/binary>>, Acc, Config) -> maybe_replace(9, Rest, Acc, Config);
|
||||
clean(<<10, Rest/binary>>, Acc, Config) -> maybe_replace(10, Rest, Acc, Config);
|
||||
clean(<<11, Rest/binary>>, Acc, Config) -> maybe_replace(11, Rest, Acc, Config);
|
||||
clean(<<12, Rest/binary>>, Acc, Config) -> maybe_replace(12, Rest, Acc, Config);
|
||||
clean(<<13, Rest/binary>>, Acc, Config) -> maybe_replace(13, Rest, Acc, Config);
|
||||
clean(<<14, Rest/binary>>, Acc, Config) -> maybe_replace(14, Rest, Acc, Config);
|
||||
clean(<<15, Rest/binary>>, Acc, Config) -> maybe_replace(15, Rest, Acc, Config);
|
||||
clean(<<16, Rest/binary>>, Acc, Config) -> maybe_replace(16, Rest, Acc, Config);
|
||||
clean(<<17, Rest/binary>>, Acc, Config) -> maybe_replace(17, Rest, Acc, Config);
|
||||
clean(<<18, Rest/binary>>, Acc, Config) -> maybe_replace(18, Rest, Acc, Config);
|
||||
clean(<<19, Rest/binary>>, Acc, Config) -> maybe_replace(19, Rest, Acc, Config);
|
||||
clean(<<20, Rest/binary>>, Acc, Config) -> maybe_replace(20, Rest, Acc, Config);
|
||||
clean(<<21, Rest/binary>>, Acc, Config) -> maybe_replace(21, Rest, Acc, Config);
|
||||
clean(<<22, Rest/binary>>, Acc, Config) -> maybe_replace(22, Rest, Acc, Config);
|
||||
clean(<<23, Rest/binary>>, Acc, Config) -> maybe_replace(23, Rest, Acc, Config);
|
||||
clean(<<24, Rest/binary>>, Acc, Config) -> maybe_replace(24, Rest, Acc, Config);
|
||||
clean(<<25, Rest/binary>>, Acc, Config) -> maybe_replace(25, Rest, Acc, Config);
|
||||
clean(<<26, Rest/binary>>, Acc, Config) -> maybe_replace(26, Rest, Acc, Config);
|
||||
clean(<<27, Rest/binary>>, Acc, Config) -> maybe_replace(27, Rest, Acc, Config);
|
||||
clean(<<28, Rest/binary>>, Acc, Config) -> maybe_replace(28, Rest, Acc, Config);
|
||||
clean(<<29, Rest/binary>>, Acc, Config) -> maybe_replace(29, Rest, Acc, Config);
|
||||
clean(<<30, Rest/binary>>, Acc, Config) -> maybe_replace(30, Rest, Acc, Config);
|
||||
clean(<<31, Rest/binary>>, Acc, Config) -> maybe_replace(31, Rest, Acc, Config);
|
||||
clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config);
|
||||
clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config);
|
||||
clean(<<34, Rest/binary>>, Acc, Config) -> maybe_replace(34, Rest, Acc, Config);
|
||||
clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config);
|
||||
clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config);
|
||||
clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config);
|
||||
clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config);
|
||||
clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config);
|
||||
clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config);
|
||||
clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config);
|
||||
clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config);
|
||||
clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config);
|
||||
clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config);
|
||||
clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config);
|
||||
clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config);
|
||||
clean(<<47, Rest/binary>>, Acc, Config) -> maybe_replace(47, Rest, Acc, Config);
|
||||
clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config);
|
||||
clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config);
|
||||
clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config);
|
||||
clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config);
|
||||
clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config);
|
||||
clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config);
|
||||
clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config);
|
||||
clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config);
|
||||
clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config);
|
||||
clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config);
|
||||
clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config);
|
||||
clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config);
|
||||
clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config);
|
||||
clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config);
|
||||
clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config);
|
||||
clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config);
|
||||
clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config);
|
||||
clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config);
|
||||
clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config);
|
||||
clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config);
|
||||
clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config);
|
||||
clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config);
|
||||
clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config);
|
||||
clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config);
|
||||
clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config);
|
||||
clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config);
|
||||
clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config);
|
||||
clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config);
|
||||
clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config);
|
||||
clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config);
|
||||
clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config);
|
||||
clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config);
|
||||
clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config);
|
||||
clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config);
|
||||
clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config);
|
||||
clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config);
|
||||
clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config);
|
||||
clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config);
|
||||
clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config);
|
||||
clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config);
|
||||
clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config);
|
||||
clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config);
|
||||
clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config);
|
||||
clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config);
|
||||
clean(<<92, Rest/binary>>, Acc, Config) -> maybe_replace(92, Rest, Acc, Config);
|
||||
clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config);
|
||||
clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config);
|
||||
clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config);
|
||||
clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config);
|
||||
clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config);
|
||||
clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config);
|
||||
clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config);
|
||||
clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config);
|
||||
clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config);
|
||||
clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config);
|
||||
clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config);
|
||||
clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config);
|
||||
clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config);
|
||||
clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config);
|
||||
clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config);
|
||||
clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config);
|
||||
clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config);
|
||||
clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config);
|
||||
clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config);
|
||||
clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config);
|
||||
clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config);
|
||||
clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config);
|
||||
clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config);
|
||||
clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config);
|
||||
clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config);
|
||||
clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config);
|
||||
clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config);
|
||||
clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config);
|
||||
clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config);
|
||||
clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config);
|
||||
clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config);
|
||||
clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config);
|
||||
clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config);
|
||||
clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config);
|
||||
clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
|
||||
maybe_replace(X, Rest, Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#d800 ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#dfff, X < 16#fdd0 ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#fdef, X < 16#fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#10000, X < 16#1fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#20000, X < 16#2fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#30000, X < 16#3fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#40000, X < 16#4fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#50000, X < 16#5fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#60000, X < 16#6fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#70000, X < 16#7fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#80000, X < 16#8fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#90000, X < 16#9fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#a0000, X < 16#afffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#b0000, X < 16#bfffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#c0000, X < 16#cfffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#d0000, X < 16#dfffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#e0000, X < 16#efffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#f0000, X < 16#ffffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#100000, X < 16#10fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
%% surrogates
|
||||
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
|
||||
maybe_replace(surrogate, Rest, Acc, Config);
|
||||
%% noncharacters
|
||||
clean(<<_/utf8, Rest/binary>>, Acc, Config) ->
|
||||
maybe_replace(noncharacter, Rest, Acc, Config);
|
||||
%% u+fffe and u+ffff for R14BXX
|
||||
clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 ->
|
||||
maybe_replace(noncharacter, Rest, Acc, Config);
|
||||
%% overlong encodings and missing continuations of a 2 byte sequence
|
||||
clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 ->
|
||||
maybe_replace(badutf, strip_continuations(Rest, 1), Acc, Config);
|
||||
%% overlong encodings and missing continuations of a 3 byte sequence
|
||||
clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 ->
|
||||
maybe_replace(badutf, strip_continuations(Rest, 2), Acc, Config);
|
||||
%% overlong encodings and missing continuations of a 4 byte sequence
|
||||
clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
|
||||
maybe_replace(badutf, strip_continuations(Rest, 3), Acc, Config);
|
||||
clean(<<_, Rest/binary>>, Acc, Config) ->
|
||||
maybe_replace(badutf, Rest, Acc, Config).
|
||||
|
||||
|
||||
strip_continuations(Bin, 0) -> Bin;
|
||||
strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
|
||||
strip_continuations(Rest, N - 1);
|
||||
%% not a continuation byte
|
||||
strip_continuations(Bin, _) -> Bin.
|
||||
|
||||
|
||||
maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||
clean(Rest, [$b, $\\] ++ Acc, Config);
|
||||
maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||
clean(Rest, [$t, $\\] ++ Acc, Config);
|
||||
maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||
clean(Rest, [$n, $\\] ++ Acc, Config);
|
||||
maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||
clean(Rest, [$f, $\\] ++ Acc, Config);
|
||||
maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||
clean(Rest, [$r, $\\] ++ Acc, Config);
|
||||
maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||
clean(Rest, [$\", $\\] ++ Acc, Config);
|
||||
maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||
case Config#config.escaped_forward_slashes of
|
||||
true -> clean(Rest, [$/, $\\] ++ Acc, Config);
|
||||
false -> clean(Rest, [$/] ++ Acc, Config)
|
||||
end;
|
||||
maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||
clean(Rest, [$\\, $\\] ++ Acc, Config);
|
||||
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
|
||||
case Config#config.unescaped_jsonp of
|
||||
true -> clean(Rest, [X] ++ Acc, Config);
|
||||
false -> clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config)
|
||||
end;
|
||||
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 ->
|
||||
clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config);
|
||||
maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> {error, badarg};
|
||||
maybe_replace(noncharacter, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
|
||||
maybe_replace(surrogate, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
|
||||
maybe_replace(badutf, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
|
||||
maybe_replace(X, Rest, Acc, Config) -> clean(Rest, [X] ++ Acc, Config).
|
||||
|
||||
|
||||
%% convert a codepoint to it's \uXXXX equiv.
|
||||
json_escape_sequence(X) ->
|
||||
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
|
||||
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
|
||||
|
||||
|
||||
to_hex(10) -> $a;
|
||||
to_hex(11) -> $b;
|
||||
to_hex(12) -> $c;
|
||||
to_hex(13) -> $d;
|
||||
to_hex(14) -> $e;
|
||||
to_hex(15) -> $f;
|
||||
to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc...
|
||||
|
||||
|
||||
%% for raw input
|
||||
-spec init(proplists:proplist()) -> list().
|
||||
|
@ -218,54 +451,20 @@ handle_event(end_json, State) -> lists:reverse(State);
|
|||
handle_event(Event, State) -> [Event] ++ State.
|
||||
|
||||
|
||||
-include("jsx_strings.hrl").
|
||||
|
||||
|
||||
-ifdef(TEST).
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
|
||||
parse(Events, Config) ->
|
||||
Chunk = try
|
||||
value(Events ++ [end_json], {jsx, []}, [], jsx_config:parse_config(Config))
|
||||
catch
|
||||
error:badarg -> {error, badarg}
|
||||
end,
|
||||
Incremental = try
|
||||
Final = lists:foldl(
|
||||
fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end,
|
||||
parser(jsx, [], [explicit_end] ++ Config),
|
||||
lists:map(fun(X) -> [X] end, Events)
|
||||
),
|
||||
Final(end_stream)
|
||||
catch
|
||||
error:badarg -> {error, badarg}
|
||||
end,
|
||||
?assert(Chunk == Incremental),
|
||||
Chunk.
|
||||
|
||||
|
||||
parse_test_() ->
|
||||
Data = jsx:test_cases(),
|
||||
[
|
||||
{
|
||||
Title, ?_assertEqual(
|
||||
Events ++ [end_json],
|
||||
parse(Events, [])
|
||||
)
|
||||
} || {Title, _, _, Events} <- Data
|
||||
].
|
||||
|
||||
|
||||
parse_error(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)).
|
||||
parse(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)).
|
||||
|
||||
|
||||
error_test_() ->
|
||||
[
|
||||
{"value error", ?_assertError(badarg, parse_error([self()], []))},
|
||||
{"maybe_done error", ?_assertError(badarg, parse_error([start_array, end_array, start_array, end_json], []))},
|
||||
{"done error", ?_assertError(badarg, parse_error([{string, <<"">>}, {literal, true}, end_json], []))},
|
||||
{"string error", ?_assertError(badarg, parse_error([{string, <<239, 191, 191>>}, end_json], []))}
|
||||
{"value error", ?_assertError(badarg, parse([self()], []))},
|
||||
{"maybe_done error", ?_assertError(badarg, parse([start_array, end_array, start_array, end_json], []))},
|
||||
{"done error", ?_assertError(badarg, parse([{string, <<"">>}, {literal, true}, end_json], []))},
|
||||
{"string error", ?_assertError(badarg, parse([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))}
|
||||
].
|
||||
|
||||
|
||||
|
@ -274,47 +473,540 @@ custom_error_handler_test_() ->
|
|||
[
|
||||
{"value error", ?_assertEqual(
|
||||
{value, [self()]},
|
||||
parse_error([self()], [{error_handler, Error}])
|
||||
parse([self()], [{error_handler, Error}])
|
||||
)},
|
||||
{"maybe_done error", ?_assertEqual(
|
||||
{maybe_done, [start_array, end_json]},
|
||||
parse_error([start_array, end_array, start_array, end_json], [{error_handler, Error}])
|
||||
parse([start_array, end_array, start_array, end_json], [{error_handler, Error}])
|
||||
)},
|
||||
{"done error", ?_assertEqual(
|
||||
{done, [{literal, true}, end_json]},
|
||||
parse_error([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}])
|
||||
{maybe_done, [{literal, true}, end_json]},
|
||||
parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}])
|
||||
)},
|
||||
{"string error", ?_assertEqual(
|
||||
{string, [{string, <<239, 191, 191>>}, end_json]},
|
||||
parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}])
|
||||
parse([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict])
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
incomplete_test_() ->
|
||||
Cases = [
|
||||
{"incomplete value", []},
|
||||
{"incomplete object", [start_object]},
|
||||
{"incomplete array", [start_array]},
|
||||
{"incomplete maybe_done", [start_array, end_array]}
|
||||
],
|
||||
[{Title, ?_assertError(badarg, parse(Events, []))}
|
||||
|| {Title, Events} <- Cases
|
||||
].
|
||||
|
||||
|
||||
custom_incomplete_handler_test_() ->
|
||||
[
|
||||
{"custom incomplete handler", ?_assertError(
|
||||
badarg,
|
||||
parse_error([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}])
|
||||
parse([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}])
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
raw_test_() ->
|
||||
Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end,
|
||||
[
|
||||
{"raw empty list", ?_assertEqual(
|
||||
[start_array, end_array, end_json],
|
||||
parse([{raw, <<"[]">>}], [])
|
||||
[start_array, end_array],
|
||||
Parse([{raw, <<"[]">>}], [])
|
||||
)},
|
||||
{"raw empty object", ?_assertEqual(
|
||||
[start_object, end_object, end_json],
|
||||
parse([{raw, <<"{}">>}], [])
|
||||
[start_object, end_object],
|
||||
Parse([{raw, <<"{}">>}], [])
|
||||
)},
|
||||
{"raw chunk inside stream", ?_assertEqual(
|
||||
[start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object, end_json],
|
||||
parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], [])
|
||||
[start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object],
|
||||
Parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], [])
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
%% erlang refuses to encode certain codepoints, so fake them
|
||||
to_fake_utf8(N) when N < 16#0080 -> <<N:8>>;
|
||||
to_fake_utf8(N) when N < 16#0800 ->
|
||||
<<0:5, Y:5, X:6>> = <<N:16>>,
|
||||
<<2#110:3, Y:5, 2#10:2, X:6>>;
|
||||
to_fake_utf8(N) when N < 16#10000 ->
|
||||
<<Z:4, Y:6, X:6>> = <<N:16>>,
|
||||
<<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>;
|
||||
to_fake_utf8(N) ->
|
||||
<<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>,
|
||||
<<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>.
|
||||
|
||||
|
||||
codepoints() ->
|
||||
unicode:characters_to_binary(
|
||||
[32, 33]
|
||||
++ lists:seq(35, 46)
|
||||
++ lists:seq(48, 91)
|
||||
++ lists:seq(93, 16#2027)
|
||||
++ lists:seq(16#202a, 16#d7ff)
|
||||
++ lists:seq(16#e000, 16#fdcf)
|
||||
++ lists:seq(16#fdf0, 16#fffd)
|
||||
).
|
||||
|
||||
extended_codepoints() ->
|
||||
unicode:characters_to_binary(
|
||||
lists:seq(16#10000, 16#1fffd) ++ [
|
||||
16#20000, 16#30000, 16#40000, 16#50000, 16#60000,
|
||||
16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000,
|
||||
16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000
|
||||
]
|
||||
).
|
||||
|
||||
reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ].
|
||||
|
||||
surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ].
|
||||
|
||||
noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ].
|
||||
|
||||
extended_noncharacters() ->
|
||||
[ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
|
||||
++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
|
||||
++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
|
||||
++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
|
||||
++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
|
||||
++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
|
||||
++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
|
||||
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]
|
||||
].
|
||||
|
||||
|
||||
clean_string_test_() ->
|
||||
[
|
||||
{"clean codepoints", ?_assertEqual(
|
||||
codepoints(),
|
||||
clean_string(codepoints(), #config{})
|
||||
)},
|
||||
{"clean extended codepoints", ?_assertEqual(
|
||||
extended_codepoints(),
|
||||
clean_string(extended_codepoints(), #config{})
|
||||
)},
|
||||
{"escape path codepoints", ?_assertEqual(
|
||||
codepoints(),
|
||||
clean_string(codepoints(), #config{escaped_strings=true})
|
||||
)},
|
||||
{"escape path extended codepoints", ?_assertEqual(
|
||||
extended_codepoints(),
|
||||
clean_string(extended_codepoints(), #config{escaped_strings=true})
|
||||
)},
|
||||
{"error reserved space", ?_assertEqual(
|
||||
lists:duplicate(length(reserved_space()), {error, badarg}),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space())
|
||||
)},
|
||||
{"error surrogates", ?_assertEqual(
|
||||
lists:duplicate(length(surrogates()), {error, badarg}),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates())
|
||||
)},
|
||||
{"error noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(noncharacters()), {error, badarg}),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters())
|
||||
)},
|
||||
{"error extended noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(extended_noncharacters()), {error, badarg}),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters())
|
||||
)},
|
||||
{"clean reserved space", ?_assertEqual(
|
||||
lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space())
|
||||
)},
|
||||
{"clean surrogates", ?_assertEqual(
|
||||
lists:duplicate(length(surrogates()), <<16#fffd/utf8>>),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates())
|
||||
)},
|
||||
{"clean noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters())
|
||||
)},
|
||||
{"clean extended noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters())
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
escape_test_() ->
|
||||
[
|
||||
{"maybe_escape backspace", ?_assertEqual(
|
||||
<<"\\b">>,
|
||||
clean_string(<<16#0008/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"don't escape backspace", ?_assertEqual(
|
||||
<<"\b">>,
|
||||
clean_string(<<16#0008/utf8>>, #config{})
|
||||
)},
|
||||
{"maybe_escape tab", ?_assertEqual(
|
||||
<<"\\t">>,
|
||||
clean_string(<<16#0009/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape newline", ?_assertEqual(
|
||||
<<"\\n">>,
|
||||
clean_string(<<16#000a/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape formfeed", ?_assertEqual(
|
||||
<<"\\f">>,
|
||||
clean_string(<<16#000c/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape carriage return", ?_assertEqual(
|
||||
<<"\\r">>,
|
||||
clean_string(<<16#000d/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape quote", ?_assertEqual(
|
||||
<<"\\\"">>,
|
||||
clean_string(<<16#0022/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape forward slash", ?_assertEqual(
|
||||
<<"\\/">>,
|
||||
clean_string(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true})
|
||||
)},
|
||||
{"do not maybe_escape forward slash", ?_assertEqual(
|
||||
<<"/">>,
|
||||
clean_string(<<16#002f/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape backslash", ?_assertEqual(
|
||||
<<"\\\\">>,
|
||||
clean_string(<<16#005c/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape jsonp (u2028)", ?_assertEqual(
|
||||
<<"\\u2028">>,
|
||||
clean_string(<<16#2028/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"do not maybe_escape jsonp (u2028)", ?_assertEqual(
|
||||
<<16#2028/utf8>>,
|
||||
clean_string(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
|
||||
)},
|
||||
{"maybe_escape jsonp (u2029)", ?_assertEqual(
|
||||
<<"\\u2029">>,
|
||||
clean_string(<<16#2029/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"do not maybe_escape jsonp (u2029)", ?_assertEqual(
|
||||
<<16#2029/utf8>>,
|
||||
clean_string(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
|
||||
)},
|
||||
{"maybe_escape u0000", ?_assertEqual(
|
||||
<<"\\u0000">>,
|
||||
clean_string(<<16#0000/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0001", ?_assertEqual(
|
||||
<<"\\u0001">>,
|
||||
clean_string(<<16#0001/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0002", ?_assertEqual(
|
||||
<<"\\u0002">>,
|
||||
clean_string(<<16#0002/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0003", ?_assertEqual(
|
||||
<<"\\u0003">>,
|
||||
clean_string(<<16#0003/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0004", ?_assertEqual(
|
||||
<<"\\u0004">>,
|
||||
clean_string(<<16#0004/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0005", ?_assertEqual(
|
||||
<<"\\u0005">>,
|
||||
clean_string(<<16#0005/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0006", ?_assertEqual(
|
||||
<<"\\u0006">>,
|
||||
clean_string(<<16#0006/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0007", ?_assertEqual(
|
||||
<<"\\u0007">>,
|
||||
clean_string(<<16#0007/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u000b", ?_assertEqual(
|
||||
<<"\\u000b">>,
|
||||
clean_string(<<16#000b/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u000e", ?_assertEqual(
|
||||
<<"\\u000e">>,
|
||||
clean_string(<<16#000e/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u000f", ?_assertEqual(
|
||||
<<"\\u000f">>,
|
||||
clean_string(<<16#000f/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0010", ?_assertEqual(
|
||||
<<"\\u0010">>,
|
||||
clean_string(<<16#0010/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0011", ?_assertEqual(
|
||||
<<"\\u0011">>,
|
||||
clean_string(<<16#0011/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0012", ?_assertEqual(
|
||||
<<"\\u0012">>,
|
||||
clean_string(<<16#0012/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0013", ?_assertEqual(
|
||||
<<"\\u0013">>,
|
||||
clean_string(<<16#0013/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0014", ?_assertEqual(
|
||||
<<"\\u0014">>,
|
||||
clean_string(<<16#0014/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0015", ?_assertEqual(
|
||||
<<"\\u0015">>,
|
||||
clean_string(<<16#0015/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0016", ?_assertEqual(
|
||||
<<"\\u0016">>,
|
||||
clean_string(<<16#0016/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0017", ?_assertEqual(
|
||||
<<"\\u0017">>,
|
||||
clean_string(<<16#0017/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0018", ?_assertEqual(
|
||||
<<"\\u0018">>,
|
||||
clean_string(<<16#0018/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0019", ?_assertEqual(
|
||||
<<"\\u0019">>,
|
||||
clean_string(<<16#0019/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001a", ?_assertEqual(
|
||||
<<"\\u001a">>,
|
||||
clean_string(<<16#001a/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001b", ?_assertEqual(
|
||||
<<"\\u001b">>,
|
||||
clean_string(<<16#001b/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001c", ?_assertEqual(
|
||||
<<"\\u001c">>,
|
||||
clean_string(<<16#001c/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001d", ?_assertEqual(
|
||||
<<"\\u001d">>,
|
||||
clean_string(<<16#001d/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001e", ?_assertEqual(
|
||||
<<"\\u001e">>,
|
||||
clean_string(<<16#001e/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001f", ?_assertEqual(
|
||||
<<"\\u001f">>,
|
||||
clean_string(<<16#001f/utf8>>, #config{escaped_strings=true})
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
bad_utf8_test_() ->
|
||||
[
|
||||
{"noncharacter u+fffe", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true})
|
||||
)},
|
||||
{"noncharacter u+fffe replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(to_fake_utf8(16#fffe), #config{})
|
||||
)},
|
||||
{"noncharacter u+ffff", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true})
|
||||
)},
|
||||
{"noncharacter u+ffff replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(to_fake_utf8(16#ffff), #config{})
|
||||
)},
|
||||
{"orphan continuation byte u+0080", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#0080>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"orphan continuation byte u+0080 replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(<<16#0080>>, #config{})
|
||||
)},
|
||||
{"orphan continuation byte u+00bf", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00bf>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"orphan continuation byte u+00bf replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(<<16#00bf>>, #config{})
|
||||
)},
|
||||
{"2 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"2 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 2),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{})
|
||||
)},
|
||||
{"3 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"3 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 3),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{})
|
||||
)},
|
||||
{"4 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"4 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 4),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{})
|
||||
)},
|
||||
{"5 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"5 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 5),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{})
|
||||
)},
|
||||
{"6 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"6 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 6),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{})
|
||||
)},
|
||||
{"all continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"all continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))),
|
||||
clean_string(
|
||||
<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>,
|
||||
#config{}
|
||||
)
|
||||
)},
|
||||
{"lonely start byte", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00c0>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"lonely start byte replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(<<16#00c0>>, #config{})
|
||||
)},
|
||||
{"lonely start bytes (2 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"lonely start bytes (2 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||
clean_string(<<16#00c0, 32, 16#00df>>, #config{})
|
||||
)},
|
||||
{"lonely start bytes (3 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"lonely start bytes (3 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||
clean_string(<<16#00e0, 32, 16#00ef>>, #config{})
|
||||
)},
|
||||
{"lonely start bytes (4 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"lonely start bytes (4 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||
clean_string(<<16#00f0, 32, 16#00f7>>, #config{})
|
||||
)},
|
||||
{"missing continuation byte (3 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<224, 160, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"missing continuation byte (3 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<224, 160, 32>>, #config{})
|
||||
)},
|
||||
{"missing continuation byte (4 byte missing one)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"missing continuation byte (4 byte missing one) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<240, 144, 128, 32>>, #config{})
|
||||
)},
|
||||
{"missing continuation byte (4 byte missing two)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<240, 144, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"missing continuation byte (4 byte missing two) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<240, 144, 32>>, #config{})
|
||||
)},
|
||||
{"overlong encoding of u+002f (2 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#c0, 16#af, 32>>, #config{})
|
||||
)},
|
||||
{"overlong encoding of u+002f (3 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{})
|
||||
)},
|
||||
{"overlong encoding of u+002f (4 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{})
|
||||
)},
|
||||
{"highest overlong 2 byte sequence", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"highest overlong 2 byte sequence replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#c1, 16#bf, 32>>, #config{})
|
||||
)},
|
||||
{"highest overlong 3 byte sequence", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"highest overlong 3 byte sequence replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{})
|
||||
)},
|
||||
{"highest overlong 4 byte sequence", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true})
|
||||
)},
|
||||
{"highest overlong 4 byte sequence replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{})
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
json_escape_sequence_test_() ->
|
||||
[
|
||||
{"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")},
|
||||
{"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")},
|
||||
{"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
|
||||
].
|
||||
|
||||
|
||||
fix_key_test_() ->
|
||||
[
|
||||
{"binary key", ?_assertEqual(fix_key(<<"foo">>), <<"foo">>)},
|
||||
{"atom key", ?_assertEqual(fix_key(foo), <<"foo">>)},
|
||||
{"integer key", ?_assertEqual(fix_key(123), <<"123">>)}
|
||||
].
|
||||
|
||||
-endif.
|
||||
|
|
|
@ -1,403 +0,0 @@
|
|||
clean_string(Bin, #config{dirty_strings=true}) -> Bin;
|
||||
clean_string(Bin, Config) ->
|
||||
case Config#config.replaced_bad_utf8 orelse Config#config.escaped_strings of
|
||||
true -> clean(Bin, [], Config);
|
||||
false -> ensure_clean(Bin)
|
||||
end.
|
||||
|
||||
|
||||
ensure_clean(Bin) ->
|
||||
case is_clean(Bin) of
|
||||
ok -> Bin;
|
||||
{error, badarg} -> {error, badarg}
|
||||
end.
|
||||
|
||||
%% fast path for no escaping and no correcting, throws error if string is 'bad'
|
||||
is_clean(<<>>) -> ok;
|
||||
is_clean(<<0, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<1, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<2, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<3, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<4, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<5, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<6, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<7, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<8, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<9, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<10, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<11, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<12, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<13, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<14, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<15, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<16, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<17, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<18, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<19, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<20, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<21, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<22, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<23, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<24, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<25, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<26, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<27, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<28, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<29, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<30, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<31, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<32, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<33, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<34, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<35, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<36, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<37, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<38, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<39, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<40, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<41, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<42, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<43, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<44, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<45, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<46, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<47, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<48, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<49, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<50, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<51, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<52, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<53, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<54, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<55, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<56, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<57, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<58, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<59, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<60, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<61, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<62, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<63, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<64, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<65, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<66, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<67, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<68, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<69, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<70, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<71, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<72, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<73, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<74, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<75, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<76, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<77, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<78, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<79, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<80, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<81, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<82, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<83, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<84, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<85, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<86, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<87, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<88, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<89, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<90, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<91, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<92, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<93, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<94, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<95, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<96, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<97, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<98, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<99, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<100, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<101, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<102, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<103, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<104, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<105, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<106, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<107, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<108, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<109, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<110, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<111, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<112, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<113, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<114, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<115, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<116, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<117, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<118, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<119, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<120, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<121, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<122, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<123, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<124, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<125, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<126, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<127, Rest/binary>>) -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X < 16#d800 -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X > 16#dfff, X < 16#fdd0 -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X > 16#fdef, X < 16#fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#10000, X < 16#1fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#20000, X < 16#2fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#30000, X < 16#3fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#40000, X < 16#4fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#50000, X < 16#5fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#60000, X < 16#6fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#70000, X < 16#7fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#80000, X < 16#8fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#90000, X < 16#9fffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#a0000, X < 16#afffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#b0000, X < 16#bfffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#c0000, X < 16#cfffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#d0000, X < 16#dfffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#e0000, X < 16#efffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#f0000, X < 16#ffffe -> is_clean(Rest);
|
||||
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#100000, X < 16#10fffe -> is_clean(Rest);
|
||||
is_clean(_Bin) -> {error, badarg}.
|
||||
|
||||
|
||||
%% escape and/or replace bad codepoints if requested
|
||||
clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc));
|
||||
clean(<<0, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(0, Config) ++ Acc, Config);
|
||||
clean(<<1, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(1, Config) ++ Acc, Config);
|
||||
clean(<<2, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(2, Config) ++ Acc, Config);
|
||||
clean(<<3, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(3, Config) ++ Acc, Config);
|
||||
clean(<<4, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(4, Config) ++ Acc, Config);
|
||||
clean(<<5, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(5, Config) ++ Acc, Config);
|
||||
clean(<<6, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(6, Config) ++ Acc, Config);
|
||||
clean(<<7, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(7, Config) ++ Acc, Config);
|
||||
clean(<<8, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(8, Config) ++ Acc, Config);
|
||||
clean(<<9, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(9, Config) ++ Acc, Config);
|
||||
clean(<<10, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(10, Config) ++ Acc, Config);
|
||||
clean(<<11, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(11, Config) ++ Acc, Config);
|
||||
clean(<<12, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(12, Config) ++ Acc, Config);
|
||||
clean(<<13, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(13, Config) ++ Acc, Config);
|
||||
clean(<<14, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(14, Config) ++ Acc, Config);
|
||||
clean(<<15, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(15, Config) ++ Acc, Config);
|
||||
clean(<<16, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(16, Config) ++ Acc, Config);
|
||||
clean(<<17, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(17, Config) ++ Acc, Config);
|
||||
clean(<<18, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(18, Config) ++ Acc, Config);
|
||||
clean(<<19, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(19, Config) ++ Acc, Config);
|
||||
clean(<<20, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(20, Config) ++ Acc, Config);
|
||||
clean(<<21, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(21, Config) ++ Acc, Config);
|
||||
clean(<<22, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(22, Config) ++ Acc, Config);
|
||||
clean(<<23, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(23, Config) ++ Acc, Config);
|
||||
clean(<<24, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(24, Config) ++ Acc, Config);
|
||||
clean(<<25, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(25, Config) ++ Acc, Config);
|
||||
clean(<<26, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(26, Config) ++ Acc, Config);
|
||||
clean(<<27, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(27, Config) ++ Acc, Config);
|
||||
clean(<<28, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(28, Config) ++ Acc, Config);
|
||||
clean(<<29, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(29, Config) ++ Acc, Config);
|
||||
clean(<<30, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(30, Config) ++ Acc, Config);
|
||||
clean(<<31, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(31, Config) ++ Acc, Config);
|
||||
clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config);
|
||||
clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config);
|
||||
clean(<<34, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(34, Config) ++ Acc, Config);
|
||||
clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config);
|
||||
clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config);
|
||||
clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config);
|
||||
clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config);
|
||||
clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config);
|
||||
clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config);
|
||||
clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config);
|
||||
clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config);
|
||||
clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config);
|
||||
clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config);
|
||||
clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config);
|
||||
clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config);
|
||||
clean(<<47, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(47, Config) ++ Acc, Config);
|
||||
clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config);
|
||||
clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config);
|
||||
clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config);
|
||||
clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config);
|
||||
clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config);
|
||||
clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config);
|
||||
clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config);
|
||||
clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config);
|
||||
clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config);
|
||||
clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config);
|
||||
clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config);
|
||||
clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config);
|
||||
clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config);
|
||||
clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config);
|
||||
clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config);
|
||||
clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config);
|
||||
clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config);
|
||||
clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config);
|
||||
clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config);
|
||||
clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config);
|
||||
clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config);
|
||||
clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config);
|
||||
clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config);
|
||||
clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config);
|
||||
clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config);
|
||||
clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config);
|
||||
clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config);
|
||||
clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config);
|
||||
clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config);
|
||||
clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config);
|
||||
clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config);
|
||||
clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config);
|
||||
clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config);
|
||||
clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config);
|
||||
clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config);
|
||||
clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config);
|
||||
clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config);
|
||||
clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config);
|
||||
clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config);
|
||||
clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config);
|
||||
clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config);
|
||||
clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config);
|
||||
clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config);
|
||||
clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config);
|
||||
clean(<<92, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(92, Config) ++ Acc, Config);
|
||||
clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config);
|
||||
clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config);
|
||||
clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config);
|
||||
clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config);
|
||||
clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config);
|
||||
clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config);
|
||||
clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config);
|
||||
clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config);
|
||||
clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config);
|
||||
clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config);
|
||||
clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config);
|
||||
clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config);
|
||||
clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config);
|
||||
clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config);
|
||||
clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config);
|
||||
clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config);
|
||||
clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config);
|
||||
clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config);
|
||||
clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config);
|
||||
clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config);
|
||||
clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config);
|
||||
clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config);
|
||||
clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config);
|
||||
clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config);
|
||||
clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config);
|
||||
clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config);
|
||||
clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config);
|
||||
clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config);
|
||||
clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config);
|
||||
clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config);
|
||||
clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config);
|
||||
clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config);
|
||||
clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config);
|
||||
clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config);
|
||||
clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
|
||||
clean(Rest, maybe_replace(X, Config) ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#d800 ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#dfff, X < 16#fdd0 ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#fdef, X < 16#fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#10000, X < 16#1fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#20000, X < 16#2fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#30000, X < 16#3fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#40000, X < 16#4fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#50000, X < 16#5fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#60000, X < 16#6fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#70000, X < 16#7fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#80000, X < 16#8fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#90000, X < 16#9fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#a0000, X < 16#afffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#b0000, X < 16#bfffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#c0000, X < 16#cfffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#d0000, X < 16#dfffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#e0000, X < 16#efffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#f0000, X < 16#ffffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#100000, X < 16#10fffe ->
|
||||
clean(Rest, [X] ++ Acc, Config);
|
||||
%% surrogates
|
||||
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
|
||||
clean(Rest, maybe_replace(surrogate, Config) ++ Acc, Config);
|
||||
%% noncharacters
|
||||
clean(<<_/utf8, Rest/binary>>, Acc, Config) ->
|
||||
clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config);
|
||||
%% u+fffe and u+ffff for R14BXX
|
||||
clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 ->
|
||||
clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config);
|
||||
%% overlong encodings and missing continuations of a 2 byte sequence
|
||||
clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 ->
|
||||
clean(strip_continuations(Rest, 1), maybe_replace(badutf, Config) ++ Acc, Config);
|
||||
%% overlong encodings and missing continuations of a 3 byte sequence
|
||||
clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 ->
|
||||
clean(strip_continuations(Rest, 2), maybe_replace(badutf, Config) ++ Acc, Config);
|
||||
%% overlong encodings and missing continuations of a 4 byte sequence
|
||||
clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
|
||||
clean(strip_continuations(Rest, 3), maybe_replace(badutf, Config) ++ Acc, Config);
|
||||
clean(<<_, Rest/binary>>, Acc, Config) ->
|
||||
clean(Rest, maybe_replace(badutf, Config) ++ Acc, Config).
|
||||
|
||||
|
||||
strip_continuations(Bin, 0) -> Bin;
|
||||
strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
|
||||
strip_continuations(Rest, N - 1);
|
||||
%% not a continuation byte
|
||||
strip_continuations(Bin, _) -> Bin.
|
||||
|
||||
|
||||
maybe_replace($\b, #config{escaped_strings=true}) -> [$b, $\\];
|
||||
maybe_replace($\t, #config{escaped_strings=true}) -> [$t, $\\];
|
||||
maybe_replace($\n, #config{escaped_strings=true}) -> [$n, $\\];
|
||||
maybe_replace($\f, #config{escaped_strings=true}) -> [$f, $\\];
|
||||
maybe_replace($\r, #config{escaped_strings=true}) -> [$r, $\\];
|
||||
maybe_replace($\", #config{escaped_strings=true}) -> [$\", $\\];
|
||||
maybe_replace($/, Config=#config{escaped_strings=true}) ->
|
||||
case Config#config.escaped_forward_slashes of
|
||||
true -> [$/, $\\];
|
||||
false -> [$/]
|
||||
end;
|
||||
maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\];
|
||||
maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
|
||||
case Config#config.unescaped_jsonp of
|
||||
true -> [X];
|
||||
false -> lists:reverse(json_escape_sequence(X))
|
||||
end;
|
||||
maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
|
||||
lists:reverse(json_escape_sequence(X));
|
||||
maybe_replace(noncharacter, #config{replaced_bad_utf8=true}) -> [16#fffd];
|
||||
maybe_replace(surrogate, #config{replaced_bad_utf8=true}) -> [16#fffd];
|
||||
maybe_replace(badutf, #config{replaced_bad_utf8=true}) -> [16#fffd];
|
||||
maybe_replace(_, _) -> {error, badarg}.
|
||||
|
||||
|
||||
%% convert a codepoint to it's \uXXXX equiv.
|
||||
json_escape_sequence(X) ->
|
||||
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
|
||||
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
|
||||
|
||||
|
||||
to_hex(10) -> $a;
|
||||
to_hex(11) -> $b;
|
||||
to_hex(12) -> $c;
|
||||
to_hex(13) -> $d;
|
||||
to_hex(14) -> $e;
|
||||
to_hex(15) -> $f;
|
||||
to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc...
|
|
@ -1,689 +0,0 @@
|
|||
%% data and helper functions for tests
|
||||
|
||||
-export([init/1, handle_event/2]).
|
||||
-export([test_cases/0]).
|
||||
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
|
||||
%% test handler
|
||||
init([]) -> [].
|
||||
|
||||
handle_event(end_json, State) -> lists:reverse([end_json] ++ State);
|
||||
handle_event(Event, State) -> [Event] ++ State.
|
||||
|
||||
|
||||
test_cases() ->
|
||||
empty_array()
|
||||
++ nested_array()
|
||||
++ empty_object()
|
||||
++ nested_object()
|
||||
++ strings()
|
||||
++ literals()
|
||||
++ integers()
|
||||
++ floats()
|
||||
++ compound_object().
|
||||
|
||||
|
||||
empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}].
|
||||
|
||||
nested_array() ->
|
||||
[{
|
||||
"[[[]]]",
|
||||
<<"[[[]]]">>,
|
||||
[[[]]],
|
||||
[start_array, start_array, start_array, end_array, end_array, end_array]
|
||||
}].
|
||||
|
||||
|
||||
empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}].
|
||||
|
||||
nested_object() ->
|
||||
[{
|
||||
"{\"key\":{\"key\":{}}}",
|
||||
<<"{\"key\":{\"key\":{}}}">>,
|
||||
[{<<"key">>, [{<<"key">>, [{}]}]}],
|
||||
[
|
||||
start_object,
|
||||
{key, <<"key">>},
|
||||
start_object,
|
||||
{key, <<"key">>},
|
||||
start_object,
|
||||
end_object,
|
||||
end_object,
|
||||
end_object
|
||||
]
|
||||
}].
|
||||
|
||||
|
||||
naked_strings() ->
|
||||
Raw = [
|
||||
"",
|
||||
"hello world"
|
||||
],
|
||||
[
|
||||
{
|
||||
String,
|
||||
<<"\"", (list_to_binary(String))/binary, "\"">>,
|
||||
list_to_binary(String),
|
||||
[{string, list_to_binary(String)}]
|
||||
}
|
||||
|| String <- Raw
|
||||
].
|
||||
|
||||
strings() ->
|
||||
naked_strings()
|
||||
++ [ wrap_with_array(Test) || Test <- naked_strings() ]
|
||||
++ [ wrap_with_object(Test) || Test <- naked_strings() ].
|
||||
|
||||
|
||||
naked_integers() ->
|
||||
Raw = [
|
||||
1, 2, 3,
|
||||
127, 128, 129,
|
||||
255, 256, 257,
|
||||
65534, 65535, 65536,
|
||||
18446744073709551616,
|
||||
18446744073709551617
|
||||
],
|
||||
[
|
||||
{
|
||||
integer_to_list(X),
|
||||
list_to_binary(integer_to_list(X)),
|
||||
X,
|
||||
[{integer, X}]
|
||||
}
|
||||
|| X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0]
|
||||
].
|
||||
|
||||
integers() ->
|
||||
naked_integers()
|
||||
++ [ wrap_with_array(Test) || Test <- naked_integers() ]
|
||||
++ [ wrap_with_object(Test) || Test <- naked_integers() ].
|
||||
|
||||
|
||||
naked_floats() ->
|
||||
Raw = [
|
||||
0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
|
||||
1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9,
|
||||
1234567890.0987654321,
|
||||
0.0e0,
|
||||
1234567890.0987654321e16,
|
||||
0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308,
|
||||
1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308,
|
||||
2.2250738585072014e-308, %% min normalized float
|
||||
1.7976931348623157e308, %% max normalized float
|
||||
5.0e-324, %% min denormalized float
|
||||
2.225073858507201e-308 %% max denormalized float
|
||||
],
|
||||
[
|
||||
{
|
||||
sane_float_to_list(X),
|
||||
list_to_binary(sane_float_to_list(X)),
|
||||
X,
|
||||
[{float, X}]
|
||||
}
|
||||
|| X <- Raw ++ [ -1 * Y || Y <- Raw ]
|
||||
].
|
||||
|
||||
floats() ->
|
||||
naked_floats()
|
||||
++ [ wrap_with_array(Test) || Test <- naked_floats() ]
|
||||
++ [ wrap_with_object(Test) || Test <- naked_floats() ].
|
||||
|
||||
|
||||
naked_literals() ->
|
||||
[
|
||||
{
|
||||
atom_to_list(Literal),
|
||||
atom_to_binary(Literal, unicode),
|
||||
Literal,
|
||||
[{literal, Literal}]
|
||||
}
|
||||
|| Literal <- [true, false, null]
|
||||
].
|
||||
|
||||
literals() ->
|
||||
naked_literals()
|
||||
++ [ wrap_with_array(Test) || Test <- naked_literals() ]
|
||||
++ [ wrap_with_object(Test) || Test <- naked_literals() ].
|
||||
|
||||
|
||||
compound_object() ->
|
||||
[{
|
||||
"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]",
|
||||
<<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>,
|
||||
[[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]],
|
||||
[
|
||||
start_array,
|
||||
start_object,
|
||||
{key, <<"alpha">>},
|
||||
start_array,
|
||||
{integer, 1},
|
||||
{integer, 2},
|
||||
{integer, 3},
|
||||
end_array,
|
||||
{key, <<"beta">>},
|
||||
start_object,
|
||||
{key, <<"alpha">>},
|
||||
start_array,
|
||||
{float, 1.0},
|
||||
{float, 2.0},
|
||||
{float, 3.0},
|
||||
end_array,
|
||||
{key, <<"beta">>},
|
||||
start_array,
|
||||
{literal, true},
|
||||
{literal, false},
|
||||
end_array,
|
||||
end_object,
|
||||
end_object,
|
||||
start_array,
|
||||
start_object,
|
||||
end_object,
|
||||
end_array,
|
||||
end_array
|
||||
]
|
||||
}].
|
||||
|
||||
|
||||
wrap_with_array({Title, JSON, Term, Events}) ->
|
||||
{
|
||||
"[" ++ Title ++ "]",
|
||||
<<"[", JSON/binary, "]">>,
|
||||
[Term],
|
||||
[start_array] ++ Events ++ [end_array]
|
||||
}.
|
||||
|
||||
|
||||
wrap_with_object({Title, JSON, Term, Events}) ->
|
||||
{
|
||||
"{\"key\":" ++ Title ++ "}",
|
||||
<<"{\"key\":", JSON/binary, "}">>,
|
||||
[{<<"key">>, Term}],
|
||||
[start_object, {key, <<"key">>}] ++ Events ++ [end_object]
|
||||
}.
|
||||
|
||||
|
||||
sane_float_to_list(X) ->
|
||||
[Output] = io_lib:format("~p", [X]),
|
||||
Output.
|
||||
|
||||
-include("jsx_config.hrl").
|
||||
-include("jsx_strings.hrl").
|
||||
|
||||
|
||||
%% erlang refuses to encode certain codepoints, so fake them
|
||||
to_fake_utf8(N) when N < 16#0080 -> <<N:8>>;
|
||||
to_fake_utf8(N) when N < 16#0800 ->
|
||||
<<0:5, Y:5, X:6>> = <<N:16>>,
|
||||
<<2#110:3, Y:5, 2#10:2, X:6>>;
|
||||
to_fake_utf8(N) when N < 16#10000 ->
|
||||
<<Z:4, Y:6, X:6>> = <<N:16>>,
|
||||
<<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>;
|
||||
to_fake_utf8(N) ->
|
||||
<<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>,
|
||||
<<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>.
|
||||
|
||||
|
||||
codepoints() ->
|
||||
unicode:characters_to_binary(
|
||||
[32, 33]
|
||||
++ lists:seq(35, 46)
|
||||
++ lists:seq(48, 91)
|
||||
++ lists:seq(93, 16#2027)
|
||||
++ lists:seq(16#202a, 16#d7ff)
|
||||
++ lists:seq(16#e000, 16#fdcf)
|
||||
++ lists:seq(16#fdf0, 16#fffd)
|
||||
).
|
||||
|
||||
extended_codepoints() ->
|
||||
unicode:characters_to_binary(
|
||||
lists:seq(16#10000, 16#1fffd) ++ [
|
||||
16#20000, 16#30000, 16#40000, 16#50000, 16#60000,
|
||||
16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000,
|
||||
16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000
|
||||
]
|
||||
).
|
||||
|
||||
reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ].
|
||||
|
||||
surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ].
|
||||
|
||||
noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ].
|
||||
|
||||
extended_noncharacters() ->
|
||||
[ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
|
||||
++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
|
||||
++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
|
||||
++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
|
||||
++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
|
||||
++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
|
||||
++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
|
||||
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]
|
||||
].
|
||||
|
||||
|
||||
clean_string_test_() ->
|
||||
[
|
||||
{"clean codepoints", ?_assertEqual(
|
||||
codepoints(),
|
||||
clean_string(codepoints(), #config{})
|
||||
)},
|
||||
{"clean extended codepoints", ?_assertEqual(
|
||||
extended_codepoints(),
|
||||
clean_string(extended_codepoints(), #config{})
|
||||
)},
|
||||
{"escape path codepoints", ?_assertEqual(
|
||||
codepoints(),
|
||||
clean_string(codepoints(), #config{escaped_strings=true})
|
||||
)},
|
||||
{"escape path extended codepoints", ?_assertEqual(
|
||||
extended_codepoints(),
|
||||
clean_string(extended_codepoints(), #config{escaped_strings=true})
|
||||
)},
|
||||
{"error reserved space", ?_assertEqual(
|
||||
lists:duplicate(length(reserved_space()), {error, badarg}),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space())
|
||||
)},
|
||||
{"error surrogates", ?_assertEqual(
|
||||
lists:duplicate(length(surrogates()), {error, badarg}),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates())
|
||||
)},
|
||||
{"error noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(noncharacters()), {error, badarg}),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters())
|
||||
)},
|
||||
{"error extended noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(extended_noncharacters()), {error, badarg}),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters())
|
||||
)},
|
||||
{"clean reserved space", ?_assertEqual(
|
||||
lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, reserved_space())
|
||||
)},
|
||||
{"clean surrogates", ?_assertEqual(
|
||||
lists:duplicate(length(surrogates()), <<16#fffd/utf8>>),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, surrogates())
|
||||
)},
|
||||
{"clean noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, noncharacters())
|
||||
)},
|
||||
{"clean extended noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>),
|
||||
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, extended_noncharacters())
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
maybe_escape(Bin, Config) -> clean_string(Bin, Config).
|
||||
|
||||
escape_test_() ->
|
||||
[
|
||||
{"maybe_escape backspace", ?_assertEqual(
|
||||
<<"\\b">>,
|
||||
maybe_escape(<<16#0008/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"don't escape backspace", ?_assertEqual(
|
||||
<<"\b">>,
|
||||
maybe_escape(<<16#0008/utf8>>, #config{})
|
||||
)},
|
||||
{"maybe_escape tab", ?_assertEqual(
|
||||
<<"\\t">>,
|
||||
maybe_escape(<<16#0009/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape newline", ?_assertEqual(
|
||||
<<"\\n">>,
|
||||
maybe_escape(<<16#000a/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape formfeed", ?_assertEqual(
|
||||
<<"\\f">>,
|
||||
maybe_escape(<<16#000c/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape carriage return", ?_assertEqual(
|
||||
<<"\\r">>,
|
||||
maybe_escape(<<16#000d/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape quote", ?_assertEqual(
|
||||
<<"\\\"">>,
|
||||
maybe_escape(<<16#0022/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape forward slash", ?_assertEqual(
|
||||
<<"\\/">>,
|
||||
maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true})
|
||||
)},
|
||||
{"do not maybe_escape forward slash", ?_assertEqual(
|
||||
<<"/">>,
|
||||
maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape backslash", ?_assertEqual(
|
||||
<<"\\\\">>,
|
||||
maybe_escape(<<16#005c/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape jsonp (u2028)", ?_assertEqual(
|
||||
<<"\\u2028">>,
|
||||
maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"do not maybe_escape jsonp (u2028)", ?_assertEqual(
|
||||
<<16#2028/utf8>>,
|
||||
maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
|
||||
)},
|
||||
{"maybe_escape jsonp (u2029)", ?_assertEqual(
|
||||
<<"\\u2029">>,
|
||||
maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"do not maybe_escape jsonp (u2029)", ?_assertEqual(
|
||||
<<16#2029/utf8>>,
|
||||
maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
|
||||
)},
|
||||
{"maybe_escape u0000", ?_assertEqual(
|
||||
<<"\\u0000">>,
|
||||
maybe_escape(<<16#0000/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0001", ?_assertEqual(
|
||||
<<"\\u0001">>,
|
||||
maybe_escape(<<16#0001/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0002", ?_assertEqual(
|
||||
<<"\\u0002">>,
|
||||
maybe_escape(<<16#0002/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0003", ?_assertEqual(
|
||||
<<"\\u0003">>,
|
||||
maybe_escape(<<16#0003/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0004", ?_assertEqual(
|
||||
<<"\\u0004">>,
|
||||
maybe_escape(<<16#0004/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0005", ?_assertEqual(
|
||||
<<"\\u0005">>,
|
||||
maybe_escape(<<16#0005/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0006", ?_assertEqual(
|
||||
<<"\\u0006">>,
|
||||
maybe_escape(<<16#0006/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0007", ?_assertEqual(
|
||||
<<"\\u0007">>,
|
||||
maybe_escape(<<16#0007/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u000b", ?_assertEqual(
|
||||
<<"\\u000b">>,
|
||||
maybe_escape(<<16#000b/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u000e", ?_assertEqual(
|
||||
<<"\\u000e">>,
|
||||
maybe_escape(<<16#000e/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u000f", ?_assertEqual(
|
||||
<<"\\u000f">>,
|
||||
maybe_escape(<<16#000f/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0010", ?_assertEqual(
|
||||
<<"\\u0010">>,
|
||||
maybe_escape(<<16#0010/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0011", ?_assertEqual(
|
||||
<<"\\u0011">>,
|
||||
maybe_escape(<<16#0011/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0012", ?_assertEqual(
|
||||
<<"\\u0012">>,
|
||||
maybe_escape(<<16#0012/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0013", ?_assertEqual(
|
||||
<<"\\u0013">>,
|
||||
maybe_escape(<<16#0013/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0014", ?_assertEqual(
|
||||
<<"\\u0014">>,
|
||||
maybe_escape(<<16#0014/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0015", ?_assertEqual(
|
||||
<<"\\u0015">>,
|
||||
maybe_escape(<<16#0015/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0016", ?_assertEqual(
|
||||
<<"\\u0016">>,
|
||||
maybe_escape(<<16#0016/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0017", ?_assertEqual(
|
||||
<<"\\u0017">>,
|
||||
maybe_escape(<<16#0017/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0018", ?_assertEqual(
|
||||
<<"\\u0018">>,
|
||||
maybe_escape(<<16#0018/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u0019", ?_assertEqual(
|
||||
<<"\\u0019">>,
|
||||
maybe_escape(<<16#0019/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001a", ?_assertEqual(
|
||||
<<"\\u001a">>,
|
||||
maybe_escape(<<16#001a/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001b", ?_assertEqual(
|
||||
<<"\\u001b">>,
|
||||
maybe_escape(<<16#001b/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001c", ?_assertEqual(
|
||||
<<"\\u001c">>,
|
||||
maybe_escape(<<16#001c/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001d", ?_assertEqual(
|
||||
<<"\\u001d">>,
|
||||
maybe_escape(<<16#001d/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001e", ?_assertEqual(
|
||||
<<"\\u001e">>,
|
||||
maybe_escape(<<16#001e/utf8>>, #config{escaped_strings=true})
|
||||
)},
|
||||
{"maybe_escape u001f", ?_assertEqual(
|
||||
<<"\\u001f">>,
|
||||
maybe_escape(<<16#001f/utf8>>, #config{escaped_strings=true})
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
bad_utf8_test_() ->
|
||||
[
|
||||
{"noncharacter u+fffe", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(to_fake_utf8(16#fffe), #config{})
|
||||
)},
|
||||
{"noncharacter u+fffe replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(to_fake_utf8(16#fffe), #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"noncharacter u+ffff", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(to_fake_utf8(16#ffff), #config{})
|
||||
)},
|
||||
{"noncharacter u+ffff replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(to_fake_utf8(16#ffff), #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"orphan continuation byte u+0080", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#0080>>, #config{})
|
||||
)},
|
||||
{"orphan continuation byte u+0080 replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(<<16#0080>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"orphan continuation byte u+00bf", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00bf>>, #config{})
|
||||
)},
|
||||
{"orphan continuation byte u+00bf replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(<<16#00bf>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"2 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{})
|
||||
)},
|
||||
{"2 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 2),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"3 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{})
|
||||
)},
|
||||
{"3 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 3),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"4 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{})
|
||||
)},
|
||||
{"4 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 4),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"5 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{})
|
||||
)},
|
||||
{"5 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 5),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"6 continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{})
|
||||
)},
|
||||
{"6 continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, 6),
|
||||
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"all continuation bytes", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{})
|
||||
)},
|
||||
{"all continuation bytes replaced", ?_assertEqual(
|
||||
binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))),
|
||||
clean_string(
|
||||
<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>,
|
||||
#config{replaced_bad_utf8=true}
|
||||
)
|
||||
)},
|
||||
{"lonely start byte", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00c0>>, #config{})
|
||||
)},
|
||||
{"lonely start byte replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8>>,
|
||||
clean_string(<<16#00c0>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"lonely start bytes (2 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00c0, 32, 16#00df>>, #config{})
|
||||
)},
|
||||
{"lonely start bytes (2 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||
clean_string(<<16#00c0, 32, 16#00df>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"lonely start bytes (3 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00e0, 32, 16#00ef>>, #config{})
|
||||
)},
|
||||
{"lonely start bytes (3 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||
clean_string(<<16#00e0, 32, 16#00ef>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"lonely start bytes (4 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#00f0, 32, 16#00f7>>, #config{})
|
||||
)},
|
||||
{"lonely start bytes (4 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||
clean_string(<<16#00f0, 32, 16#00f7>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"missing continuation byte (3 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<224, 160, 32>>, #config{})
|
||||
)},
|
||||
{"missing continuation byte (3 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<224, 160, 32>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"missing continuation byte (4 byte missing one)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<240, 144, 128, 32>>, #config{})
|
||||
)},
|
||||
{"missing continuation byte (4 byte missing one) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<240, 144, 128, 32>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"missing continuation byte (4 byte missing two)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<240, 144, 32>>, #config{})
|
||||
)},
|
||||
{"missing continuation byte (4 byte missing two) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<240, 144, 32>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"overlong encoding of u+002f (2 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#c0, 16#af, 32>>, #config{})
|
||||
)},
|
||||
{"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#c0, 16#af, 32>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"overlong encoding of u+002f (3 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{})
|
||||
)},
|
||||
{"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"overlong encoding of u+002f (4 byte)", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{})
|
||||
)},
|
||||
{"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"highest overlong 2 byte sequence", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#c1, 16#bf, 32>>, #config{})
|
||||
)},
|
||||
{"highest overlong 2 byte sequence replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#c1, 16#bf, 32>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"highest overlong 3 byte sequence", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{})
|
||||
)},
|
||||
{"highest overlong 3 byte sequence replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{replaced_bad_utf8=true})
|
||||
)},
|
||||
{"highest overlong 4 byte sequence", ?_assertEqual(
|
||||
{error, badarg},
|
||||
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{})
|
||||
)},
|
||||
{"highest overlong 4 byte sequence replaced", ?_assertEqual(
|
||||
<<16#fffd/utf8, 32>>,
|
||||
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{replaced_bad_utf8=true})
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
json_escape_sequence_test_() ->
|
||||
[
|
||||
{"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")},
|
||||
{"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")},
|
||||
{"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
|
||||
].
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
-export([to_json/2, format/2]).
|
||||
-export([init/1, handle_event/2]).
|
||||
-export([start_json/0, start_json/1]).
|
||||
-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]).
|
||||
|
||||
|
||||
-record(config, {
|
||||
|
@ -74,7 +76,6 @@ parse_config([], Config) ->
|
|||
Config.
|
||||
|
||||
|
||||
|
||||
-define(start_object, <<"{">>).
|
||||
-define(start_array, <<"[">>).
|
||||
-define(end_object, <<"}">>).
|
||||
|
@ -86,95 +87,50 @@ parse_config([], Config) ->
|
|||
-define(newline, <<"\n">>).
|
||||
|
||||
|
||||
-type state() :: {any(), unicode:charlist(), #config{}}.
|
||||
-type state() :: {unicode:charlist(), #config{}}.
|
||||
-spec init(Config::proplists:proplist()) -> state().
|
||||
|
||||
init(Config) -> {start, [], parse_config(Config)}.
|
||||
init(Config) -> {[], parse_config(Config)}.
|
||||
|
||||
|
||||
-spec handle_event(Event::any(), State::state()) -> state().
|
||||
|
||||
handle_event(Event, {start, Acc, Config}) ->
|
||||
case Event of
|
||||
{Type, Value} -> {[], [Acc, encode(Type, Value, Config)], Config}
|
||||
; start_object -> {[object_start], [Acc, ?start_object], Config}
|
||||
; start_array -> {[array_start], [Acc, ?start_array], Config}
|
||||
end;
|
||||
handle_event(Event, {[object_start|Stack], Acc, OldConfig = #config{depth = Depth}}) ->
|
||||
Config = OldConfig#config{depth = Depth + 1},
|
||||
case Event of
|
||||
{key, Key} ->
|
||||
{[object_value|Stack], [Acc, indent(Config), encode(string, Key, Config), ?colon, space(Config)], Config}
|
||||
; end_object ->
|
||||
{Stack, [Acc, ?end_object], OldConfig}
|
||||
end;
|
||||
handle_event(Event, {[object_value|Stack], Acc, Config}) ->
|
||||
case Event of
|
||||
{Type, Value} when Type == string; Type == literal;
|
||||
Type == integer; Type == float ->
|
||||
{[key|Stack], [Acc, encode(Type, Value, Config)], Config}
|
||||
; start_object -> {[object_start, key|Stack], [Acc, ?start_object], Config}
|
||||
; start_array -> {[array_start, key|Stack], [Acc, ?start_array], Config}
|
||||
end;
|
||||
handle_event(Event, {[key|Stack], Acc, Config = #config{depth = Depth}}) ->
|
||||
case Event of
|
||||
{key, Key} ->
|
||||
{[object_value|Stack], [Acc, ?comma, indent_or_space(Config), encode(string, Key, Config), ?colon, space(Config)], Config}
|
||||
; end_object ->
|
||||
NewConfig = Config#config{depth = Depth - 1},
|
||||
{Stack, [Acc, indent(NewConfig), ?end_object], NewConfig}
|
||||
end;
|
||||
handle_event(Event, {[array_start|Stack], Acc, OldConfig = #config{depth = Depth}}) ->
|
||||
Config = OldConfig#config{depth = Depth + 1},
|
||||
case Event of
|
||||
{Type, Value} when Type == string; Type == literal;
|
||||
Type == integer; Type == float ->
|
||||
{[array|Stack], [Acc, indent(Config), encode(Type, Value, Config)], Config}
|
||||
; start_object -> {[object_start, array|Stack], [Acc, indent(Config), ?start_object], Config}
|
||||
; start_array -> {[array_start, array|Stack], [Acc, indent(Config), ?start_array], Config}
|
||||
; end_array -> {Stack, [Acc, ?end_array], OldConfig}
|
||||
end;
|
||||
handle_event(Event, {[array|Stack], Acc, Config = #config{depth = Depth}}) ->
|
||||
case Event of
|
||||
{Type, Value} when Type == string; Type == literal;
|
||||
Type == integer; Type == float ->
|
||||
{[array|Stack], [Acc, ?comma, indent_or_space(Config), encode(Type, Value, Config)], Config}
|
||||
; end_array ->
|
||||
NewConfig = Config#config{depth = Depth - 1},
|
||||
{Stack, [Acc, indent(NewConfig), ?end_array], NewConfig}
|
||||
; start_object -> {[object_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_object], Config}
|
||||
; start_array -> {[array_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_array], Config}
|
||||
end;
|
||||
handle_event(end_json, {[], Acc, _Config}) -> unicode:characters_to_binary(Acc, utf8).
|
||||
handle_event(end_json, State) -> get_value(State);
|
||||
|
||||
handle_event(start_object, State) -> start_object(State);
|
||||
handle_event(end_object, State) -> finish(State);
|
||||
|
||||
handle_event(start_array, State) -> start_array(State);
|
||||
handle_event(end_array, State) -> finish(State);
|
||||
|
||||
handle_event({Type, Event}, {_, Config} = State) -> insert(encode(Type, Event, Config), State).
|
||||
|
||||
|
||||
encode(string, String, _Config) ->
|
||||
[?quote, String, ?quote];
|
||||
<<?quote/binary, String/binary, ?quote/binary>>;
|
||||
encode(key, Key, _Config) ->
|
||||
<<?quote/binary, Key/binary, ?quote/binary>>;
|
||||
encode(literal, Literal, _Config) ->
|
||||
erlang:atom_to_list(Literal);
|
||||
unicode:characters_to_binary(erlang:atom_to_list(Literal));
|
||||
encode(integer, Integer, _Config) ->
|
||||
erlang:integer_to_list(Integer);
|
||||
unicode:characters_to_binary(erlang:integer_to_list(Integer));
|
||||
encode(float, Float, _Config) ->
|
||||
[Output] = io_lib:format("~p", [Float]), Output.
|
||||
[Output] = io_lib:format("~p", [Float]), unicode:characters_to_binary(Output).
|
||||
|
||||
|
||||
space(Config) ->
|
||||
case Config#config.space of
|
||||
0 -> []
|
||||
0 -> <<>>
|
||||
; X when X > 0 -> binary:copy(?space, X)
|
||||
end.
|
||||
|
||||
|
||||
indent(Config) ->
|
||||
case Config#config.indent of
|
||||
0 -> []
|
||||
; X when X > 0 ->
|
||||
Indent = binary:copy(?space, X),
|
||||
indent(Indent, Config#config.depth, [?newline])
|
||||
0 -> <<>>
|
||||
; X when X > 0 -> <<?newline/binary, (binary:copy(?space, X * Config#config.depth))/binary>>
|
||||
end.
|
||||
|
||||
indent(_Indent, 0, Acc) -> Acc;
|
||||
indent(Indent, N, Acc) -> indent(Indent, N - 1, [Acc, Indent]).
|
||||
|
||||
|
||||
indent_or_space(Config) ->
|
||||
case Config#config.indent > 0 of
|
||||
|
@ -183,6 +139,119 @@ indent_or_space(Config) ->
|
|||
end.
|
||||
|
||||
|
||||
%% internal state is a stack and a config object
|
||||
%% `{Stack, Config}`
|
||||
%% the stack is a list of in progress objects/arrays
|
||||
%% `[Current, Parent, Grandparent,...OriginalAncestor]`
|
||||
%% an object has the representation on the stack of
|
||||
%% `{object, Object}`
|
||||
%% of if there's a key with a yet to be matched value
|
||||
%% `{object, Key, Object}`
|
||||
%% an array looks like
|
||||
%% `{array, Array}`
|
||||
%% `Object` and `Array` are utf8 encoded binaries
|
||||
|
||||
start_json() -> {[], #config{}}.
|
||||
|
||||
start_json(Config) when is_list(Config) -> {[], parse_config(Config)}.
|
||||
|
||||
%% allocate a new object on top of the stack
|
||||
start_object({Stack, Config}) -> {[{object, ?start_object}] ++ Stack, Config}.
|
||||
|
||||
%% allocate a new array on top of the stack
|
||||
start_array({Stack, Config}) -> {[{array, ?start_array}] ++ Stack, Config}.
|
||||
|
||||
%% finish an object or array and insert it into the parent object if it exists
|
||||
finish({[{object, Object}], Config}) ->
|
||||
{<<Object/binary, ?end_object/binary>>, Config};
|
||||
finish({[{object, Object}|Rest], Config}) ->
|
||||
insert(<<Object/binary, ?end_object/binary>>, {Rest, Config});
|
||||
finish({[{array, Array}], Config}) ->
|
||||
{<<Array/binary, ?end_array/binary>>, Config};
|
||||
finish({[{array, Array}|Rest], Config}) ->
|
||||
insert(<<Array/binary, ?end_array/binary>>, {Rest, Config});
|
||||
finish(_) -> erlang:error(badarg).
|
||||
|
||||
%% insert a value when there's no parent object or array
|
||||
insert(Value, {[], Config}) when is_binary(Value) ->
|
||||
{Value, Config};
|
||||
%% insert a key or value into an object or array, autodetects the 'right' thing
|
||||
insert(Key, {[{object, Object}|Rest], Config}) when is_binary(Key) ->
|
||||
{[{object, Key, Object}] ++ Rest, Config};
|
||||
insert(Value, {[{object, Key, ?start_object}|Rest], Config}) when is_binary(Value) ->
|
||||
{
|
||||
[{object, <<?start_object/binary,
|
||||
Key/binary,
|
||||
?colon/binary,
|
||||
(space(Config))/binary,
|
||||
Value/binary
|
||||
>>}] ++ Rest,
|
||||
Config
|
||||
};
|
||||
insert(Value, {[{object, Key, Object}|Rest], Config}) when is_binary(Value) ->
|
||||
{
|
||||
[{object, <<Object/binary,
|
||||
?comma/binary,
|
||||
(indent_or_space(Config))/binary,
|
||||
Key/binary,
|
||||
?colon/binary,
|
||||
(space(Config))/binary,
|
||||
Value/binary
|
||||
>>}] ++ Rest,
|
||||
Config
|
||||
};
|
||||
insert(Value, {[{array, ?start_array}|Rest], Config}) when is_binary(Value) ->
|
||||
{[{array, <<?start_array/binary, Value/binary>>}] ++ Rest, Config};
|
||||
insert(Value, {[{array, Array}|Rest], Config}) when is_binary(Value) ->
|
||||
{
|
||||
[{array, <<Array/binary,
|
||||
?comma/binary,
|
||||
(indent_or_space(Config))/binary,
|
||||
Value/binary
|
||||
>>}] ++ Rest,
|
||||
Config
|
||||
};
|
||||
insert(_, _) -> erlang:error(badarg).
|
||||
|
||||
%% insert a key/value pair into an object
|
||||
insert(Key, Value, {[{object, ?start_object}|Rest], Config}) when is_binary(Key), is_binary(Value) ->
|
||||
{
|
||||
[{object, <<?start_object/binary,
|
||||
Key/binary,
|
||||
?colon/binary,
|
||||
(space(Config))/binary,
|
||||
Value/binary
|
||||
>>}] ++ Rest,
|
||||
Config
|
||||
};
|
||||
insert(Key, Value, {[{object, Object}|Rest], Config}) when is_binary(Key), is_binary(Value) ->
|
||||
{
|
||||
[{object, <<Object/binary,
|
||||
?comma/binary,
|
||||
(indent_or_space(Config))/binary,
|
||||
Key/binary,
|
||||
?colon/binary,
|
||||
(space(Config))/binary,
|
||||
Value/binary
|
||||
>>}] ++ Rest,
|
||||
Config
|
||||
};
|
||||
insert(_, _, _) -> erlang:error(badarg).
|
||||
|
||||
|
||||
get_key({[{object, Key, _}|_], _}) -> Key;
|
||||
get_key(_) -> erlang:error(badarg).
|
||||
|
||||
|
||||
get_value({Value, Config}) ->
|
||||
case Value of
|
||||
Value when is_binary(Value) -> Value;
|
||||
_ -> erlang:error(badarg)
|
||||
end;
|
||||
get_value(_) -> erlang:error(badarg).
|
||||
|
||||
|
||||
|
||||
%% eunit tests
|
||||
|
||||
-ifdef(TEST).
|
||||
|
@ -215,7 +284,7 @@ config_test_() ->
|
|||
|
||||
space_test_() ->
|
||||
[
|
||||
{"no space", ?_assertEqual([], space(#config{space=0}))},
|
||||
{"no space", ?_assertEqual(<<>>, space(#config{space=0}))},
|
||||
{"one space", ?_assertEqual(<<" ">>, space(#config{space=1}))},
|
||||
{"four spaces", ?_assertEqual(<<" ">>, space(#config{space=4}))}
|
||||
].
|
||||
|
@ -223,21 +292,21 @@ space_test_() ->
|
|||
|
||||
indent_test_() ->
|
||||
[
|
||||
{"no indent", ?_assertEqual([], indent(#config{indent=0, depth=1}))},
|
||||
{"no indent", ?_assertEqual(<<>>, indent(#config{indent=0, depth=1}))},
|
||||
{"indent 1 depth 1", ?_assertEqual(
|
||||
[[?newline], ?space],
|
||||
<<?newline/binary, <<" ">>/binary>>,
|
||||
indent(#config{indent=1, depth=1})
|
||||
)},
|
||||
{"indent 1 depth 2", ?_assertEqual(
|
||||
[[[?newline], ?space], ?space],
|
||||
<<?newline/binary, <<" ">>/binary>>,
|
||||
indent(#config{indent=1, depth=2})
|
||||
)},
|
||||
{"indent 4 depth 1", ?_assertEqual(
|
||||
[[?newline], <<" ">>],
|
||||
<<?newline/binary, <<" ">>/binary>>,
|
||||
indent(#config{indent=4, depth=1})
|
||||
)},
|
||||
{"indent 4 depth 2", ?_assertEqual(
|
||||
[[[?newline], <<" ">>], <<" ">>],
|
||||
<<?newline/binary, <<" ">>/binary, <<" ">>/binary>>,
|
||||
indent(#config{indent=4, depth=2})
|
||||
)}
|
||||
].
|
||||
|
@ -250,7 +319,7 @@ indent_or_space_test_() ->
|
|||
indent_or_space(#config{space=1, indent=0, depth=1})
|
||||
)},
|
||||
{"indent so no space", ?_assertEqual(
|
||||
[[?newline], ?space],
|
||||
<<?newline/binary, <<" ">>/binary>>,
|
||||
indent_or_space(#config{space=1, indent=1, depth=1})
|
||||
)}
|
||||
].
|
||||
|
@ -258,50 +327,137 @@ indent_or_space_test_() ->
|
|||
|
||||
format_test_() ->
|
||||
[
|
||||
{"0.0", ?_assert(encode(float, 0.0, #config{}) =:= "0.0")},
|
||||
{"1.0", ?_assert(encode(float, 1.0, #config{}) =:= "1.0")},
|
||||
{"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= "-1.0")},
|
||||
{"0.0", ?_assert(encode(float, 0.0, #config{}) =:= <<"0.0">>)},
|
||||
{"1.0", ?_assert(encode(float, 1.0, #config{}) =:= <<"1.0">>)},
|
||||
{"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= <<"-1.0">>)},
|
||||
{"3.1234567890987654321",
|
||||
?_assert(
|
||||
encode(float, 3.1234567890987654321, #config{}) =:= "3.1234567890987655")
|
||||
encode(float, 3.1234567890987654321, #config{}) =:= <<"3.1234567890987655">>)
|
||||
},
|
||||
{"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= "1.0e23")},
|
||||
{"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= "0.3")},
|
||||
{"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= "0.0001")},
|
||||
{"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= "1.0e-5")},
|
||||
{"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= "1.0e-8")},
|
||||
{"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= "1.0e-323")},
|
||||
{"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= "1.0e308")},
|
||||
{"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= <<"1.0e23">>)},
|
||||
{"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= <<"0.3">>)},
|
||||
{"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= <<"0.0001">>)},
|
||||
{"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= <<"1.0e-5">>)},
|
||||
{"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= <<"1.0e-8">>)},
|
||||
{"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= <<"1.0e-323">>)},
|
||||
{"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= <<"1.0e308">>)},
|
||||
{"min normalized float",
|
||||
?_assert(
|
||||
encode(float, math:pow(2, -1022), #config{}) =:= "2.2250738585072014e-308"
|
||||
encode(float, math:pow(2, -1022), #config{}) =:= <<"2.2250738585072014e-308">>
|
||||
)
|
||||
},
|
||||
{"max normalized float",
|
||||
?_assert(
|
||||
encode(float, (2 - math:pow(2, -52)) * math:pow(2, 1023), #config{})
|
||||
=:= "1.7976931348623157e308"
|
||||
=:= <<"1.7976931348623157e308">>
|
||||
)
|
||||
},
|
||||
{"min denormalized float",
|
||||
?_assert(encode(float, math:pow(2, -1074), #config{}) =:= "5.0e-324")
|
||||
?_assert(encode(float, math:pow(2, -1074), #config{}) =:= <<"5.0e-324">>)
|
||||
},
|
||||
{"max denormalized float",
|
||||
?_assert(
|
||||
encode(float, (1 - math:pow(2, -52)) * math:pow(2, -1022), #config{})
|
||||
=:= "2.225073858507201e-308"
|
||||
=:= <<"2.225073858507201e-308">>
|
||||
)
|
||||
}
|
||||
},
|
||||
{"hello world", ?_assert(encode(string, <<"hello world">>, #config{}) =:= <<"\"hello world\"">>)},
|
||||
{"key", ?_assert(encode(key, <<"key">>, #config{}) =:= <<"\"key\"">>)},
|
||||
{"1", ?_assert(encode(integer, 1, #config{}) =:= <<"1">>)},
|
||||
{"-1", ?_assert(encode(integer, -1, #config{}) =:= <<"-1">>)},
|
||||
{"true", ?_assert(encode(literal, true, #config{}) =:= <<"true">>)},
|
||||
{"false", ?_assert(encode(literal, false, #config{}) =:= <<"false">>)},
|
||||
{"null", ?_assert(encode(literal, null, #config{}) =:= <<"null">>)}
|
||||
].
|
||||
|
||||
|
||||
rep_manipulation_test_() ->
|
||||
[
|
||||
{"allocate a new context", ?_assertEqual(
|
||||
{[], #config{}},
|
||||
start_json()
|
||||
)},
|
||||
{"allocate a new context with config", ?_assertEqual(
|
||||
{[], #config{space=1, indent=2}},
|
||||
start_json([{space, 1}, {indent, 2}])
|
||||
)},
|
||||
{"allocate a new object on an empty stack", ?_assertEqual(
|
||||
{[{object, <<"{">>}], #config{}},
|
||||
start_object({[], #config{}})
|
||||
)},
|
||||
{"allocate a new object on a stack", ?_assertEqual(
|
||||
{[{object, <<"{">>}, {object, <<"{">>}], #config{}},
|
||||
start_object({[{object, <<"{">>}], #config{}})
|
||||
)},
|
||||
{"allocate a new array on an empty stack", ?_assertEqual(
|
||||
{[{array, <<"[">>}], #config{}},
|
||||
start_array({[], #config{}})
|
||||
)},
|
||||
{"allocate a new array on a stack", ?_assertEqual(
|
||||
{[{array, <<"[">>}, {object, <<"{">>}], #config{}},
|
||||
start_array({[{object, <<"{">>}], #config{}})
|
||||
)},
|
||||
{"insert a key into an object", ?_assertEqual(
|
||||
{[{object, <<"\"key\"">>, <<"{">>}], #config{}},
|
||||
insert(<<"\"key\"">>, {[{object, <<"{">>}], #config{}})
|
||||
)},
|
||||
{"get current key", ?_assertEqual(
|
||||
key,
|
||||
get_key({[{object, key, <<"{">>}], #config{}})
|
||||
)},
|
||||
{"try to get non-key from object", ?_assertError(
|
||||
badarg,
|
||||
get_key({[{object, <<"{">>}], #config{}})
|
||||
)},
|
||||
{"try to get key from array", ?_assertError(
|
||||
badarg,
|
||||
get_key({[{array, <<"[">>}], #config{}})
|
||||
)},
|
||||
{"insert a value into an object", ?_assertEqual(
|
||||
{[{object, <<"{\"key\":true">>}], #config{}},
|
||||
insert(<<"true">>, {[{object, <<"\"key\"">>, <<"{">>}], #config{}})
|
||||
)},
|
||||
{"insert a value into an array", ?_assertEqual(
|
||||
{[{array, <<"[true">>}], #config{}},
|
||||
insert(<<"true">>, {[{array, <<"[">>}], #config{}})
|
||||
)},
|
||||
{"insert a key/value pair into an object", ?_assertEqual(
|
||||
{[{object, <<"{\"x\":true,\"y\":false">>}], #config{}},
|
||||
insert(<<"\"y\"">>, <<"false">>, {[{object, <<"{\"x\":true">>}], #config{}})
|
||||
)},
|
||||
{"finish an object with no ancestor", ?_assertEqual(
|
||||
{<<"{\"x\":true,\"y\":false}">>, #config{}},
|
||||
finish({[{object, <<"{\"x\":true,\"y\":false">>}], #config{}})
|
||||
)},
|
||||
{"finish an empty object", ?_assertEqual(
|
||||
{<<"{}">>, #config{}},
|
||||
finish({[{object, <<"{">>}], #config{}})
|
||||
)},
|
||||
{"finish an object with an ancestor", ?_assertEqual(
|
||||
{[{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], #config{}},
|
||||
finish({
|
||||
[{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}],
|
||||
#config{}
|
||||
})
|
||||
)},
|
||||
{"finish an array with no ancestor", ?_assertEqual(
|
||||
{<<"[true,false,null]">>, #config{}},
|
||||
finish({[{array, <<"[true,false,null">>}], #config{}})
|
||||
)},
|
||||
{"finish an array with an ancestor", ?_assertEqual(
|
||||
{[{array, <<"[1,2,3,[true,false,null]">>}], #config{}},
|
||||
finish({[{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}], #config{}})
|
||||
)}
|
||||
].
|
||||
|
||||
|
||||
handle_event_test_() ->
|
||||
Data = jsx:test_cases(),
|
||||
Data = jsx:test_cases() ++ jsx:special_test_cases(),
|
||||
[
|
||||
{
|
||||
Title, ?_assertEqual(
|
||||
JSON,
|
||||
lists:foldl(fun handle_event/2, {start, [], #config{}}, Events ++ [end_json])
|
||||
lists:foldl(fun handle_event/2, init([]), Events ++ [end_json])
|
||||
)
|
||||
} || {Title, JSON, _, Events} <- Data
|
||||
].
|
||||
|
|
|
@ -25,11 +25,12 @@
|
|||
|
||||
-export([to_term/2]).
|
||||
-export([init/1, handle_event/2]).
|
||||
-export([start_term/0, start_term/1]).
|
||||
-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]).
|
||||
|
||||
|
||||
-record(config, {
|
||||
labels = binary,
|
||||
post_decode = false
|
||||
labels = binary
|
||||
}).
|
||||
|
||||
-type config() :: list().
|
||||
|
@ -59,8 +60,6 @@ parse_config([{labels, Val}|Rest], Config)
|
|||
parse_config(Rest, Config#config{labels = Val});
|
||||
parse_config([labels|Rest], Config) ->
|
||||
parse_config(Rest, Config#config{labels = binary});
|
||||
parse_config([{post_decode, F}|Rest], Config=#config{post_decode=false}) when is_function(F, 1) ->
|
||||
parse_config(Rest, Config#config{post_decode=F});
|
||||
parse_config([{K, _}|Rest] = Options, Config) ->
|
||||
case lists:member(K, jsx_config:valid_flags()) of
|
||||
true -> parse_config(Rest, Config)
|
||||
|
@ -77,34 +76,21 @@ parse_config([], Config) ->
|
|||
-type state() :: {[any()], #config{}}.
|
||||
-spec init(Config::proplists:proplist()) -> state().
|
||||
|
||||
init(Config) -> {[[]], parse_config(Config)}.
|
||||
init(Config) -> {[], parse_config(Config)}.
|
||||
|
||||
-spec handle_event(Event::any(), State::state()) -> state().
|
||||
|
||||
handle_event(end_json, {[[Terms]], _Config}) -> Terms;
|
||||
handle_event(end_json, State) -> get_value(State);
|
||||
|
||||
handle_event(start_object, {Terms, Config}) -> {[[]|Terms], Config};
|
||||
handle_event(end_object, {[[], {key, Key}, Last|Terms], Config}) ->
|
||||
{[[{Key, post_decode([{}], Config)}] ++ Last] ++ Terms, Config};
|
||||
handle_event(end_object, {[Object, {key, Key}, Last|Terms], Config}) ->
|
||||
{[[{Key, post_decode(lists:reverse(Object), Config)}] ++ Last] ++ Terms, Config};
|
||||
handle_event(end_object, {[[], Last|Terms], Config}) ->
|
||||
{[[post_decode([{}], Config)] ++ Last] ++ Terms, Config};
|
||||
handle_event(end_object, {[Object, Last|Terms], Config}) ->
|
||||
{[[post_decode(lists:reverse(Object), Config)] ++ Last] ++ Terms, Config};
|
||||
handle_event(start_object, State) -> start_object(State);
|
||||
handle_event(end_object, State) -> finish(State);
|
||||
|
||||
handle_event(start_array, {Terms, Config}) -> {[[]|Terms], Config};
|
||||
handle_event(end_array, {[List, {key, Key}, Last|Terms], Config}) ->
|
||||
{[[{Key, post_decode(lists:reverse(List), Config)}] ++ Last] ++ Terms, Config};
|
||||
handle_event(end_array, {[List, Last|Terms], Config}) ->
|
||||
{[[post_decode(lists:reverse(List), Config)] ++ Last] ++ Terms, Config};
|
||||
handle_event(start_array, State) -> start_array(State);
|
||||
handle_event(end_array, State) -> finish(State);
|
||||
|
||||
handle_event({key, Key}, {Terms, Config}) -> {[{key, format_key(Key, Config)}] ++ Terms, Config};
|
||||
handle_event({key, Key}, {_, Config} = State) -> insert(format_key(Key, Config), State);
|
||||
|
||||
handle_event({_, Event}, {[{key, Key}, Last|Terms], Config}) ->
|
||||
{[[{Key, post_decode(Event, Config)}] ++ Last] ++ Terms, Config};
|
||||
handle_event({_, Event}, {[Last|Terms], Config}) ->
|
||||
{[[post_decode(Event, Config)] ++ Last] ++ Terms, Config}.
|
||||
handle_event({_, Event}, State) -> insert(Event, State).
|
||||
|
||||
|
||||
format_key(Key, Config) ->
|
||||
|
@ -121,8 +107,60 @@ format_key(Key, Config) ->
|
|||
end.
|
||||
|
||||
|
||||
post_decode(Value, #config{post_decode=false}) -> Value;
|
||||
post_decode(Value, Config) -> (Config#config.post_decode)(Value).
|
||||
%% internal state is a stack and a config object
|
||||
%% `{Stack, Config}`
|
||||
%% the stack is a list of in progress objects/arrays
|
||||
%% `[Current, Parent, Grandparent,...OriginalAncestor]`
|
||||
%% an object has the representation on the stack of
|
||||
%% `{object, [{NthKey, NthValue}, {NMinus1Key, NthMinus1Value},...{FirstKey, FirstValue}]}`
|
||||
%% of if there's a key with a yet to be matched value
|
||||
%% `{object, Key, [{NthKey, NthValue},...]}`
|
||||
%% an array looks like
|
||||
%% `{array, [NthValue, NthMinus1Value,...FirstValue]}`
|
||||
|
||||
start_term() -> {[], #config{}}.
|
||||
|
||||
start_term(Config) when is_list(Config) -> {[], parse_config(Config)}.
|
||||
|
||||
%% allocate a new object on top of the stack
|
||||
start_object({Stack, Config}) -> {[{object, []}] ++ Stack, Config}.
|
||||
|
||||
%% allocate a new array on top of the stack
|
||||
start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}.
|
||||
|
||||
%% finish an object or array and insert it into the parent object if it exists or
|
||||
%% return it if it is the root object
|
||||
finish({[{object, []}], Config}) -> {[{}], Config};
|
||||
finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config});
|
||||
finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config};
|
||||
finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config});
|
||||
finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config};
|
||||
finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config});
|
||||
finish(_) -> erlang:error(badarg).
|
||||
|
||||
%% insert a value when there's no parent object or array
|
||||
insert(Value, {[], Config}) -> {Value, Config};
|
||||
%% insert a key or value into an object or array, autodetects the 'right' thing
|
||||
insert(Key, {[{object, Pairs}|Rest], Config}) ->
|
||||
{[{object, Key, Pairs}] ++ Rest, Config};
|
||||
insert(Value, {[{object, Key, Pairs}|Rest], Config}) ->
|
||||
{[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config};
|
||||
insert(Value, {[{array, Values}|Rest], Config}) ->
|
||||
{[{array, [Value] ++ Values}] ++ Rest, Config};
|
||||
insert(_, _) -> erlang:error(badarg).
|
||||
|
||||
%% insert a key/value pair into an object
|
||||
insert(Key, Value, {[{object, Pairs}|Rest], Config}) ->
|
||||
{[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config};
|
||||
insert(_, _, _) -> erlang:error(badarg).
|
||||
|
||||
|
||||
get_key({[{object, Key, _}|_], _}) -> Key;
|
||||
get_key(_) -> erlang:error(badarg).
|
||||
|
||||
|
||||
get_value({Value, _Config}) -> Value;
|
||||
get_value(_) -> erlang:error(badarg).
|
||||
|
||||
|
||||
%% eunit tests
|
||||
|
@ -132,9 +170,6 @@ post_decode(Value, Config) -> (Config#config.post_decode)(Value).
|
|||
|
||||
|
||||
config_test_() ->
|
||||
%% for post_decode tests
|
||||
F = fun(X) -> X end,
|
||||
G = fun(X, Y) -> {X, Y} end,
|
||||
[
|
||||
{"empty config", ?_assertEqual(#config{}, parse_config([]))},
|
||||
{"implicit binary labels", ?_assertEqual(#config{}, parse_config([labels]))},
|
||||
|
@ -144,15 +179,6 @@ config_test_() ->
|
|||
#config{labels=existing_atom},
|
||||
parse_config([{labels, existing_atom}])
|
||||
)},
|
||||
{"sloppy existing atom labels", ?_assertEqual(
|
||||
#config{labels=attempt_atom},
|
||||
parse_config([{labels, attempt_atom}])
|
||||
)},
|
||||
{"post decode", ?_assertEqual(
|
||||
#config{post_decode=F},
|
||||
parse_config([{post_decode, F}])
|
||||
)},
|
||||
{"post decode wrong arity", ?_assertError(badarg, parse_config([{post_decode, G}]))},
|
||||
{"invalid opt flag", ?_assertError(badarg, parse_config([error]))},
|
||||
{"invalid opt tuple", ?_assertError(badarg, parse_config([{error, true}]))}
|
||||
].
|
||||
|
@ -181,110 +207,79 @@ format_key_test_() ->
|
|||
].
|
||||
|
||||
|
||||
post_decoders_test_() ->
|
||||
Events = [
|
||||
[{}],
|
||||
[{<<"key">>, <<"value">>}],
|
||||
[{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
|
||||
[],
|
||||
[<<"string">>],
|
||||
[true, false, null],
|
||||
true,
|
||||
false,
|
||||
null,
|
||||
<<"hello">>,
|
||||
<<"world">>,
|
||||
1,
|
||||
1.0
|
||||
],
|
||||
rep_manipulation_test_() ->
|
||||
[
|
||||
{"no post_decode", ?_assertEqual(
|
||||
Events,
|
||||
[ post_decode(Event, #config{}) || Event <- Events ]
|
||||
{"allocate a new context", ?_assertEqual(
|
||||
{[], #config{}},
|
||||
start_term()
|
||||
)},
|
||||
{"replace arrays with empty arrays", ?_assertEqual(
|
||||
[
|
||||
[{}],
|
||||
[{<<"key">>, <<"value">>}],
|
||||
[{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
true,
|
||||
false,
|
||||
null,
|
||||
<<"hello">>,
|
||||
<<"world">>,
|
||||
1,
|
||||
1.0
|
||||
],
|
||||
[ post_decode(Event, #config{
|
||||
post_decode=fun([T|_] = V) when is_tuple(T) -> V; (V) when is_list(V) -> []; (V) -> V end
|
||||
}) || Event <- Events
|
||||
]
|
||||
{"allocate a new context with option", ?_assertEqual(
|
||||
{[], #config{labels=atom}},
|
||||
start_term([{labels, atom}])
|
||||
)},
|
||||
{"replace objects with empty objects", ?_assertEqual(
|
||||
[
|
||||
[{}],
|
||||
[{}],
|
||||
[{}],
|
||||
[],
|
||||
[<<"string">>],
|
||||
[true, false, null],
|
||||
true,
|
||||
false,
|
||||
null,
|
||||
<<"hello">>,
|
||||
<<"world">>,
|
||||
1,
|
||||
1.0
|
||||
],
|
||||
[ post_decode(Event, #config{
|
||||
post_decode=fun([T|_]) when is_tuple(T) -> [{}]; (V) -> V end
|
||||
}) || Event <- Events
|
||||
]
|
||||
{"allocate a new object on an empty stack", ?_assertEqual(
|
||||
{[{object, []}], #config{}},
|
||||
start_object({[], #config{}})
|
||||
)},
|
||||
{"replace all non-array/non-object values with false", ?_assertEqual(
|
||||
[
|
||||
[{}],
|
||||
[{<<"key">>, <<"value">>}],
|
||||
[{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
|
||||
[],
|
||||
[<<"string">>],
|
||||
[true, false, null],
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false
|
||||
],
|
||||
[ post_decode(Event, #config{
|
||||
post_decode=fun(V) when is_list(V) -> V; (_) -> false end
|
||||
}) || Event <- Events
|
||||
]
|
||||
{"allocate a new object on a stack", ?_assertEqual(
|
||||
{[{object, []}, {object, []}], #config{}},
|
||||
start_object({[{object, []}], #config{}})
|
||||
)},
|
||||
{"atoms_to_strings", ?_assertEqual(
|
||||
[
|
||||
[{}],
|
||||
[{<<"key">>, <<"value">>}],
|
||||
[{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
|
||||
[],
|
||||
[<<"string">>],
|
||||
[true, false, null],
|
||||
<<"true">>,
|
||||
<<"false">>,
|
||||
<<"null">>,
|
||||
<<"hello">>,
|
||||
<<"world">>,
|
||||
1,
|
||||
1.0
|
||||
],
|
||||
[ post_decode(Event, #config{
|
||||
post_decode=fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end
|
||||
}) || Event <- Events
|
||||
]
|
||||
{"allocate a new array on an empty stack", ?_assertEqual(
|
||||
{[{array, []}], #config{}},
|
||||
start_array({[], #config{}})
|
||||
)},
|
||||
{"allocate a new array on a stack", ?_assertEqual(
|
||||
{[{array, []}, {object, []}], #config{}},
|
||||
start_array({[{object, []}], #config{}})
|
||||
)},
|
||||
{"insert a key into an object", ?_assertEqual(
|
||||
{[{object, key, []}, junk], #config{}},
|
||||
insert(key, {[{object, []}, junk], #config{}})
|
||||
)},
|
||||
{"get current key", ?_assertEqual(
|
||||
key,
|
||||
get_key({[{object, key, []}], #config{}})
|
||||
)},
|
||||
{"try to get non-key from object", ?_assertError(
|
||||
badarg,
|
||||
get_key({[{object, []}], #config{}})
|
||||
)},
|
||||
{"try to get key from array", ?_assertError(
|
||||
badarg,
|
||||
get_key({[{array, []}], #config{}})
|
||||
)},
|
||||
{"insert a value into an object", ?_assertEqual(
|
||||
{[{object, [{key, value}]}, junk], #config{}},
|
||||
insert(value, {[{object, key, []}, junk], #config{}})
|
||||
)},
|
||||
{"insert a value into an array", ?_assertEqual(
|
||||
{[{array, [value]}, junk], #config{}},
|
||||
insert(value, {[{array, []}, junk], #config{}})
|
||||
)},
|
||||
{"insert a key/value pair into an object", ?_assertEqual(
|
||||
{[{object, [{key, value}, {x, y}]}, junk], #config{}},
|
||||
insert(key, value, {[{object, [{x, y}]}, junk], #config{}})
|
||||
)},
|
||||
{"finish an object with no ancestor", ?_assertEqual(
|
||||
{[{a, b}, {x, y}], #config{}},
|
||||
finish({[{object, [{x, y}, {a, b}]}], #config{}})
|
||||
)},
|
||||
{"finish an empty object", ?_assertEqual(
|
||||
{[{}], #config{}},
|
||||
finish({[{object, []}], #config{}})
|
||||
)},
|
||||
{"finish an object with an ancestor", ?_assertEqual(
|
||||
{[{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], #config{}},
|
||||
finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}})
|
||||
)},
|
||||
{"finish an array with no ancestor", ?_assertEqual(
|
||||
{[a, b, c], #config{}},
|
||||
finish({[{array, [c, b, a]}], #config{}})
|
||||
)},
|
||||
{"finish an array with an ancestor", ?_assertEqual(
|
||||
{[{array, [[a, b, c], d, e, f]}], #config{}},
|
||||
finish({[{array, [c, b, a]}, {array, [d, e, f]}], #config{}})
|
||||
)}
|
||||
].
|
||||
|
||||
|
@ -295,7 +290,7 @@ handle_event_test_() ->
|
|||
{
|
||||
Title, ?_assertEqual(
|
||||
Term,
|
||||
lists:foldl(fun handle_event/2, {[[]], #config{}}, Events ++ [end_json])
|
||||
lists:foldl(fun handle_event/2, init([]), Events ++ [end_json])
|
||||
)
|
||||
} || {Title, _, Term, Events} <- Data
|
||||
].
|
||||
|
|
|
@ -159,7 +159,7 @@ repeated_keys_test_() ->
|
|||
|
||||
|
||||
handle_event_test_() ->
|
||||
Data = jsx:test_cases(),
|
||||
Data = jsx:test_cases() ++ jsx:special_test_cases(),
|
||||
[
|
||||
{
|
||||
Title, ?_assertEqual(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue