Merge branch 'vtwopointoh' into develop

2014-01-14 00:41:03 +00:00 · 2014-01-14 00:41:03 +00:00 · ff3915abbc
commit ff3915abbc
parent d011411c23 92f0a65dab
13 changed files with 2406 additions and 3116 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -1,3 +1,16 @@
+v2.0
+
+* jsx is much more pragmatic by default; common json errors are silently
+    ignored (and fixed). stricter parsing must be enabled with options
+* removed `pre_encode` and `post_decode` options in favour of making jsx
+    functions easier to wrap and customize
+* added abstraction layer for manipulating the internal state of `jsx_to_term`
+    and `jsx_to_json` and exposed it to user code
+* streaming behavior is now disabled by default and must be requested explicitly
+* removed deprecated function names (`to_json`, `to_term`, `term_to_json`, etc) 
+* expanded test coverage
+    
+
 v1.4.5

 * various fixes to typespecs uncovered by dialyzer
--- a/README.md
+++ b/README.md
@ -1,16 +1,21 @@
-# jsx (v1.4.5) #
+# jsx (v2.0) #

 an erlang application for consuming, producing and manipulating [json][json]. 
 inspired by [yajl][yajl]

-jsx is built via [rebar][rebar] and continuous integration testing provided courtesy [travis][travis]
+**jsx** is built via [rebar][rebar] and continuous integration testing provided courtesy [travis][travis]

 current status: [![Build Status](https://secure.travis-ci.org/talentdeficit/jsx.png?branch=develop)](http://travis-ci.org/talentdeficit/jsx)

-jsx is released under the terms of the [MIT][MIT] license
+**jsx** is released under the terms of the [MIT][MIT] license

 copyright 2010-2013 alisdair sullivan

+## really important note ##
+
+this is a preview of the 2.0 release. there are lots of changes. see [CHANGES.md](CHANGES.md)
+for the overview or read this document for the details
+
 ## index ##

 * [quickstart](#quickstart)
@ -21,7 +26,6 @@ copyright 2010-2013 alisdair sullivan
  - [`json_term()`](#json_term)
  - [`json_text()`](#json_text)
  - [`event()`](#event)
-  - [`token()`](#token)
  - [`option()`](#option)
 * [exports](#exports)
  - [`encoder/3`, `decoder/3` & `parser/3`](#encoder3-decoder3--parser3)
@ -113,27 +117,31 @@ false
 ## description ##


-jsx is an erlang application for consuming, producing and manipulating 
+**jsx** is an erlang application for consuming, producing and manipulating 
 [json][json]

-json has a [spec][rfc4627] but common usage differs subtly. it's common 
-usage jsx attempts to address, with guidance from the spec
+**jsx** follows the json [spec][rfc4627] as closely as possible with allowances for
+real world usage

-all json produced and consumed by jsx should be `utf8` encoded text or a 
-reasonable approximation thereof. ascii works too, but anything beyond that 
-i'm not going to make any promises. **especially** not latin1
+**jsx** is pragmatic. the json spec allows extensions so **jsx** extends the spec in a
+number of ways. see the section on `strict` in [options](#option) below though

-the [spec][rfc4627] thinks json values must be wrapped in a json array or 
-object but everyone else disagrees so jsx allows naked json values by default. 
-if you're a curmudgeon who's offended by this deviation here is a wrapper for 
-you:
+json has no official comments but this parser allows c/c++ style comments. 
+anywhere whitespace is allowed you can insert comments (both `// ...` and `/* ... */`)
+
+all **jsx** decoder input should be `utf8` encoded binaries. sometimes you get binaries
+that are almost but not quite valid utf8 whether due to improper escaping or poor
+encoding. **jsx** replaces invalid codepoints and poorly formed sequences with the 
+unicode replacement character (`u+FFFD`)
+
+json only allows keys and strings to be delimited by double quotes (`u+0022`) but
+javascript allows them to be delimited by single quotes (`u+0027`) as well. **jsx**
+follows javascript in this. strings that start with single quotes can contain double
+quotes but must end with single quotes and must escape any single quotes they contain
+
+json and **jsx** only recognize escape sequences as outlined in the json spec. it just
+ignores bad escape sequences

-```erlang
-%% usage: `real_json(jsx:decode(JSON))`
-real_json(Result) when is_list(Result) -> Result;
-real_json(Result) when is_tuple(Result, 2) -> Result;
-real_json(_) -> erlang:error(badarg).
-```


 ### json &lt;-> erlang mapping ###
@ -148,17 +156,18 @@ real_json(_) -> erlang:error(badarg).

 *   numbers

-    javascript and thus json represent all numeric values with floats. as 
-    this is woefully insufficient for many uses, **jsx**, just like erlang, 
-    supports bigints. whenever possible, this library will interpret json 
-    numbers that look like integers as integers. other numbers will be converted 
-    to erlang's floating point type, which is nearly but not quite iee754. 
-    negative zero is not representable in erlang (zero is unsigned in erlang and 
-    `0` is equivalent to `-0`) and will be interpreted as regular zero. numbers 
-    not representable are beyond the concern of this implementation, and will 
-    result in parsing errors
+    javascript and thus json represent all numeric values with floats. there's no
+    reason for erlang -- a language that supports arbitrarily large integers -- to
+    restrict all numbers to the ieee754 range
    
-    when converting from erlang to json, numbers are represented with their 
+    whenever possible, **jsx** will interpret json numbers that look like integers as 
+    integers. other numbers will be converted  to erlang's floating point type, which
+    is nearly but not quite iee754. negative zero is not representable in erlang (zero
+    is unsigned in erlang and `0` is equivalent to `-0`) and will be interpreted as
+    regular zero. numbers not representable are beyond the concern of this implementation,
+    and will result in parsing errors
+
+    when converting from erlang to json, floats are represented with their 
    shortest representation that will round trip without loss of precision. this 
    means that some floats may be superficially dissimilar (although 
    functionally equivalent). for example, `1.0000000000000001` will be 
@ -166,32 +175,23 @@ real_json(_) -> erlang:error(badarg).

 *   strings

-    all erlang strings are represented by **valid** `utf8` encoded binaries or
-    atoms. note that the atoms `true`, `false` and `null` will never be
-    automatically converted to strings as the json equivalent values take
-    precedence. when decoding json strings will always be presented as binaries,
-    never atoms
-
-    the [json spec][rfc4627] is frustratingly vague on the exact details of json 
-    strings. json must be unicode, but no encoding is specified. javascript 
-    explicitly allows strings containing codepoints explicitly disallowed by 
-    unicode. json allows implementations to set limits on the content of 
-    strings. other implementations attempt to resolve this in various ways. this 
-    implementation, in default operation, only accepts strings that meet the 
-    constraints set out in the json spec (strings are sequences of unicode 
-    codepoints deliminated by `"` (`u+0022`) that may not contain control codes 
-    unless properly escaped with `\` (`u+005c`)) and that are encoded in `utf8`
-
-    the utf8 restriction means improperly paired surrogates are explicitly 
-    disallowed. `u+d800` to `u+dfff` are allowed, but only when they form valid 
-    surrogate pairs. surrogates encountered otherwise result in errors. the
-    noncharacters will also result in errors
+    json strings must be unicode encoded binaries or erlang atoms. in practice,
+    because **jsx** only accepts `utf8` binaries all binary strings must be `utf8`.
+    in addition to being unicode json strings restrict a number of codepoints and
+    define a number of escape sequences

    json string escapes of the form `\uXXXX` will be converted to their 
    equivalent codepoints during parsing. this means control characters and 
    other codepoints disallowed by the json spec may be encountered in resulting 
-    strings, but codepoints disallowed by the unicode spec will not be. in the 
-    interest of pragmatism there is an [option](#option) for looser parsing
+    strings. the utf8 restriction means the surrogates are explicitly disallowed.
+    if a string contains escaped surrogates (`u+d800` to `u+dfff`) they are
+    interpreted but only when they form valid surrogate pairs. surrogates
+    encountered otherwise are replaced with the replacement codepoint (`u+fffd`)
+
+    all erlang strings are represented by **valid** `utf8` encoded binaries. the 
+    encoder will check strings for conformance. noncharacters (like `u+ffff`) 
+    are allowed in erlang utf8 encoded binaries, but will be replaced in strings
+    passed to the encoder (although, again, see [options](#option))

    this implementation performs no normalization on strings beyond that 
    detailed here. be careful when comparing strings as equivalent strings 
@ -220,22 +220,30 @@ real_json(_) -> erlang:error(badarg).

 ### incomplete input ###

-jsx handles incomplete json texts. if a partial json text is parsed, rather than 
-returning a term from your callback handler, jsx returns `{incomplete, F}` where 
-`F` is a function with an identical API to the anonymous fun returned from 
-`decoder/3`, `encoder/3` or `parser/3`. it retains the internal state of the 
-parser at the point where input was exhausted. this allows you to parse as you 
-stream json over a socket or file descriptor, or to parse large json texts 
-without needing to keep them entirely in memory
+**jsx** can handle incomplete json texts. if the option `stream` is passed to the decoder
+or parser and if a partial json text is parsed, rather than returning a term from
+your callback handler, **jsx** returns `{incomplete, F}` where  `F` is a function with 
+an identical API to the anonymous fun returned from `decoder/3`, `encoder/3` or 
+`parser/3`. it retains the internal state of the  parser at the point where input
+was exhausted. this allows you to parse as you stream json over a socket or file 
+descriptor, or to parse large json texts without needing to keep them entirely in
+memory

-however, it is important to recognize that jsx is greedy by default. jsx will 
-consider the parsing complete if input is exhausted and the json text is not 
-unambiguously incomplete. this is mostly relevant when parsing bare numbers like 
-`<<"1234">>`. this could be a complete json integer or just the beginning of a 
-json integer that is being parsed incrementally. jsx will treat it as a whole 
-integer. calling jsx with the [option](#options) `explicit_end` reverses this 
-behavior and never considers parsing complete until the `incomplete` function is 
-called with the argument `end_stream`
+however, it is important to recognize that **jsx** is conservative by default. **jsx** will 
+not consider the parsing complete even when input is exhausted and the json text is
+unambiguously incomplete. to end parsing call the `incomplete` function with the
+argument `end_stream` like:
+
+```erlang
+1> {incomplete, F} = jsx:decode(<<"[">>, [stream]).
+{incomplete,#Fun<jsx_decoder.1.122947756>}
+2> F(end_stream).
+** exception error: bad argument
+3> {incomplete, G} = F(<<"]">>).
+{incomplete,#Fun<jsx_decoder.1.122947756>}
+4> G(end_stream).
+[]
+```


 ## data types ##
@ -282,50 +290,32 @@ event() = start_object
    | end_json
 ```

-#### `token()` ####
-
-```erlang
-token() = event()
-    | binary()
-    | {number, integer() | float()}
-    | integer()
-    | float()
-    | true
-    | false
-    | null
-```
-
-the representation used during syntactic analysis. you can generate this 
-yourself and feed it to `jsx:parser/3` if you'd like to define your own 
-representations
+the subset of [`token()`](#token) emitted by the decoder and encoder to handlers

 #### `option()` ####

 ```erlang
-option() = replaced_bad_utf8
-    | escaped_forward_slashes
-    | single_quoted_strings
-    | unescaped_jsonp
-    | comments
+option() = escaped_forward_slashes
    | escaped_strings
+    | unescaped_jsonp
    | dirty_strings
-    | ignored_bad_escapes
-    | relax
-    | explicit_end
+    | strict
+    | {strict, [strict_option()]}
+    | stream
+    | {incomplete_handler, fun()}
+    | {error_handler, fun()}
+
+strict_option() = comments
+    | utf8
+    | single_quotes
+    | escapes
 ``` 

-jsx functions all take a common set of options. not all flags have meaning 
+**jsx** functions all take a common set of options. not all flags have meaning 
 in all contexts, but they are always valid options. functions may have 
 additional options beyond these. see 
 [individual function documentation](#exports) for details

- `replaced_bad_utf8`
-
-    json text input and json strings SHOULD be utf8 encoded binaries, 
-    appropriately escaped as per the json spec. attempts are made to replace 
-    invalid codepoints with `u+FFFD` as per the unicode spec when this option is 
-    present. this applies both to malformed unicode and disallowed codepoints
-
 - `escaped_forward_slashes`

    json strings are escaped according to the json spec. this means forward 
@ -333,35 +323,6 @@ additional options beyond these. see
    are left unescaped. you may want to use this if you are embedding json 
    directly into a html or xml document

- `single_quoted_strings`
-
-    some parsers allow double quotes (`u+0022`) to be replaced by single quotes 
-    (`u+0027`) to delimit keys and strings. this option allows json containing 
-    single quotes as structural characters to be parsed without errors. note 
-    that the parser expects strings to be terminated by the same quote type that 
-    opened it and that single quotes must, obviously, be escaped within strings 
-    delimited by single quotes
-
-    double quotes must **always** be escaped, regardless of what kind of quotes 
-    delimit the string they are found in
-
-    the parser will never emit json with keys or strings delimited by single 
-    quotes
-
- `unescaped_jsonp`
-
-    javascript interpreters treat the codepoints `u+2028` and `u+2029` as 
-    significant whitespace. json strings that contain either of these codepoints 
-    will be parsed incorrectly by some javascript interpreters. by default, 
-    these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to 
-    retain compatibility. this option simply removes that escaping
-
- `comments`
-
-    json has no official comments but some parsers allow c/c++ style comments. 
-    anywhere whitespace is allowed this flag allows comments (both `// ...` and 
-    `/* ... */`)
-
 - `escaped_strings`

    by default both the encoder and decoder return strings as utf8 binaries 
@ -371,11 +332,13 @@ additional options beyond these. see
    control codes and problematic codepoints and replacing them with the 
    appropriate escapes
    
- `ignored_bad_escapes`
+- `unescaped_jsonp`

-    during decoding ignore unrecognized escape sequences and leave them as is in 
-    the stream. note that combining this option with `escaped_strings` will 
-    result in the escape character itself being escaped
+    javascript interpreters treat the codepoints `u+2028` and `u+2029` as 
+    significant whitespace. json strings that contain either of these codepoints 
+    will be parsed incorrectly by some javascript interpreters. by default, 
+    these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to 
+    retain compatibility. this option simply removes that escaping

 - `dirty_strings`

@ -383,42 +346,39 @@ additional options beyond these. see
    can result in unwanted behaviour. if your strings are already escaped (or 
    you'd like to force invalid strings into "json" you monster) use this flag 
    to bypass escaping. this can also be used to read in **really** invalid json 
-    strings. everything but escaped quotes are passed as is to the resulting 
-    string term. note that this overrides `ignored_bad_escapes`, 
-    `unescaped_jsonp` and `escaped_strings`
+    strings. everything between unescaped quotes are passed as is to the resulting 
+    string term. note that this takes precedence over any other options

- `explicit_end`
+- `strict`
+
+    as mentioned [earlier](#description), **jsx** is pragmatic. if you're more of a
+    json purist or you're really into bdsm stricter adherence to the spec is
+    possible. the following restrictions are available
+    
+    * `comments`
+    
+        comments are disabled and result in a `badarg` error
+    
+    * `utf8`
+    
+        invalid codepoints and malformed unicode result in `badarg` errors
+
+    * `single_quotes`
+    
+        only keys and strings delimited by double quotes (`u+0022`) are allowed. the
+        single quote (`u+0027`) results in a `badarg` error
+    
+    * `escapes`
+
+        escape sequences not adhering to the json spec result in a `badarg` error
+    
+    any combination of these can be passed to **jsx** by using `{strict, [strict_option()]}`.
+    `strict` is equivalent to `{strict, [comments, bad_utf8, single_quotes, escapes]}` 
+
+- `stream`

    see [incomplete input](#incomplete-input)

- `relax`
-
-    relax is a synonym for `[replaced_bad_utf8, single_quoted_strings, comments, 
-    ignored_bad_escapes]` for when you don't care how absolutely terrible your 
-    json input is, you just want the parser to do the best it can
-
- `incomplete_handler` & `error_handler`
-
-    the default incomplete and error handlers can be replaced with user defined 
-    handlers. if options include `{error_handler, F}` and/or 
-    `{incomplete_handler, F}` where `F` is a function of arity 3 they will be 
-    called instead of the default handler. the spec for `F` is as follows
-    ```erlang
-    F(Remaining, InternalState, Config) -> any()
-    
-      Remaining = binary() | term()
-      InternalState = opaque()
-      Config = list()
-    ```
-    `Remaining` is the binary fragment or term that caused the error
-    
-    `InternalState` is an opaque structure containing the internal state of the 
-    parser/decoder/encoder
-    
-    `Config` is a list of options/flags in use by the parser/decoder/encoder
-    
-    these functions should be considered experimental for now
-

 ## exports ##

@ -435,10 +395,10 @@ parser(Module, Args, Opts) -> Fun((Tokens) -> any())
  Opts = [option()]
  JSONText = json_text()
  JSONTerm = json_term()
-  Tokens = token() | [token()]
+  Tokens = event() | [event()]
 ```

-jsx is a json compiler with interleaved tokenizing, syntactic analysis and 
+**jsx** is a json compiler with interleaved tokenizing, syntactic analysis and 
 semantic analysis stages. included are two tokenizers; one that handles json 
 texts (`decoder/3`) and one that handles erlang terms (`encoder/3`). there is 
 also an entry point to the syntactic analysis stage for use with user-defined 
@ -468,7 +428,7 @@ decode(JSON, Opts) -> Term

  JSON = json_text()
  Term = json_term()
-  Opts = [option() | labels | {labels, Label} | {post_decode, F}]
+  Opts = [option() | labels | {labels, Label}]
    Label = binary | atom | existing_atom | attempt_atom
    F = fun((any()) -> any())
 ```
@ -485,18 +445,6 @@ new atoms to the atom table and will result in a `badarg` error if the atom
 does not exist. `attempt_atom` will convert keys to atoms when they exist,
 and leave them as binary otherwise

-`{post_decode, F}` is a user defined function of arity 1 that is called on each 
-output value (objects, arrays, strings, numbers and literals). it may return any 
-value to be substituted in the returned term. for example:
-
-```erlang
-1> F = fun(V) when is_list(V) -> V; (V) -> false end.
-2> jsx:decode(<<"{\"a list\": [true, \"a string\", 1]}">>, [{post_decode, F}]).
-[{<<"a list">>, [false, false, false]}]
-```
-
-declaring more than one post-decoder will result in a `badarg` error exception
-
 raises a `badarg` error exception if input is not valid json


@ -508,7 +456,7 @@ encode(Term, Opts) -> JSON

  Term = json_term()
  JSON = json_text()
-  Opts = [option() | {pre_encode, F} | space | {space, N} | indent | {indent, N}]
+  Opts = [option() | space | {space, N} | indent | {indent, N}]
    F = fun((any()) -> any())
    N = pos_integer()
 ```
@ -522,18 +470,6 @@ the option `{indent, N}` inserts a newline and `N` spaces for each level of
 indentation in your json output. note that this overrides spaces inserted after 
 a comma. `indent` is an alias for `{indent, 1}`. the default is `{indent, 0}`

-`{pre_encode, F}` is a user defined function of arity 1 that is called on each 
-input value. it may return any valid json value to be substituted in the 
-returned json. for example:
-
-```erlang
-1> F = fun(V) when is_list(V) -> V; (V) -> false end.
-2> jsx:encode([{<<"a list">>, [true, <<"a string">>, 1]}], [{pre_encode, F}]).
-<<"{\"a list\": [false, false, false]}">>
-```
-
-declaring more than one pre-encoder will result in a `badarg` error exception
-
 raises a `badarg` error exception if input is not a valid 
 [erlang representation of json](#json---erlang-mapping)

@ -621,7 +557,7 @@ what exactly constitutes valid json may be altered via [options](#option)

 ## callback exports ##

-the following functions should be exported from a jsx callback module
+the following functions should be exported from a **jsx** callback module

 #### `Module:init/1` ####

@ -667,16 +603,11 @@ following events must be handled:

    the end of a json array

-   `{key, binary()}`
-
-    a key in a json object. this is guaranteed to follow either `start_object` 
-    or a json value. it will usually be a `utf8` encoded binary. see the 
-    [options](#option) for possible exceptions
-
 -   `{string, binary()}`

    a json string. it will usually be a `utf8` encoded binary. see the 
-    [options](#option) for possible exceptions
+    [options](#option) for possible exceptions. note that keys are also
+    json strings

 -   `{integer, integer()}`

--- a/src/jsx.erl
+++ b/src/jsx.erl
@ -28,22 +28,17 @@
 -export([format/1, format/2, minify/1, prettify/1]).
 -export([encoder/3, decoder/3, parser/3]).
 -export([resume/3]).
-%% old api
-export([term_to_json/1, term_to_json/2, json_to_term/1, json_to_term/2]).
-export([to_json/1, to_json/2]).
-export([to_term/1, to_term/2]).

 -export_type([json_term/0, json_text/0, token/0]).
-export_type([config/0, encoder/0, decoder/0, parser/0, internal_state/0]).
+-export_type([encoder/0, decoder/0, parser/0, internal_state/0]).


 -ifdef(TEST).
-include("jsx_tests.hrl").
-else.
-include("jsx_config.hrl").
+%% data and helper functions for tests
+-export([test_cases/0, special_test_cases/0]).
+-export([init/1, handle_event/2]).
 -endif.

-type config() :: #config{}.

 -type json_term()
   :: [{binary() | atom(), json_term()}]
@ -64,19 +59,12 @@
 encode(Source) -> encode(Source, []).
 encode(Source, Config) -> jsx_to_json:to_json(Source, Config).

-%% old api, alias for encode/x

-spec to_json(Source::json_term()) -> json_text() | {incomplete, encoder()}.
-spec to_json(Source::json_term(), Config::jsx_to_json:config()) -> json_text() | {incomplete, encoder()}.
+-spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}.
+-spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term()  | {incomplete, decoder()}.

-to_json(Source) -> encode(Source, []).
-to_json(Source, Config) -> encode(Source, Config).
-
-spec term_to_json(Source::json_term()) -> json_text() | {incomplete, encoder()}.
-spec term_to_json(Source::json_term(), Config::jsx_to_json:config()) -> json_text() | {incomplete, encoder()}.
-
-term_to_json(Source) -> encode(Source, []).
-term_to_json(Source, Config) -> encode(Source, Config).
+decode(Source) -> decode(Source, []).
+decode(Source, Config) -> jsx_to_term:to_term(Source, Config).


 -spec format(Source::json_text()) -> json_text() | {incomplete, decoder()}.
@ -96,27 +84,6 @@ minify(Source) -> format(Source, []).
 prettify(Source) -> format(Source, [space, {indent, 2}]).


-spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}.
-spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term()  | {incomplete, decoder()}.
-
-decode(Source) -> decode(Source, []).
-decode(Source, Config) -> jsx_to_term:to_term(Source, Config).
-
-%% old api, alias for to_term/x
-
-spec to_term(Source::json_text()) -> json_term() | {incomplete, decoder()}.
-spec to_term(Source::json_text(), Config::jsx_to_term:config()) -> json_term()  | {incomplete, decoder()}.
-
-to_term(Source) -> decode(Source, []).
-to_term(Source, Config) -> decode(Source, Config).
-
-spec json_to_term(Source::json_text()) -> json_term() | {incomplete, decoder()}.
-spec json_to_term(Source::json_text(), Config::jsx_to_term:config()) -> json_term()  | {incomplete, decoder()}.
-
-json_to_term(Source) -> decode(Source, []).
-json_to_term(Source, Config) -> decode(Source, Config).
-
-
 -spec is_json(Source::any()) -> true | false.
 -spec is_json(Source::any(), Config::jsx_verify:config()) -> true | false.

@ -182,3 +149,311 @@ resume(Term, {decoder, State, Handler, Acc, Stack}, Config) ->
 resume(Term, {parser, State, Handler, Stack}, Config) ->
    jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)).

+
+
+-ifdef(TEST).
+
+-include_lib("eunit/include/eunit.hrl").
+
+
+%% test handler
+init([]) -> [].
+
+handle_event(end_json, State) -> lists:reverse([end_json] ++ State);
+handle_event(Event, State) -> [Event] ++ State.
+
+
+test_cases() ->
+    empty_array()
+    ++ nested_array()
+    ++ empty_object()
+    ++ nested_object()
+    ++ strings()
+    ++ literals()
+    ++ integers()
+    ++ floats()
+    ++ compound_object().
+
+%% segregate these so we can skip them in `jsx_to_term`
+special_test_cases() -> special_objects() ++ special_array().
+
+
+empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}].
+
+
+nested_array() ->
+    [{
+        "[[[]]]",
+        <<"[[[]]]">>,
+        [[[]]],
+        [start_array, start_array, start_array, end_array, end_array, end_array]
+    }].
+
+
+empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}].
+
+
+nested_object() ->
+    [{
+        "{\"key\":{\"key\":{}}}",
+        <<"{\"key\":{\"key\":{}}}">>,
+        [{<<"key">>, [{<<"key">>, [{}]}]}],
+        [
+            start_object,
+                {key, <<"key">>},
+                start_object,
+                    {key, <<"key">>},
+                    start_object,
+                    end_object,
+                end_object,
+            end_object
+        ]
+    }].
+
+
+naked_strings() ->
+    Raw = [
+        "",
+        "hello world"
+    ],
+    [
+        {
+            String,
+            <<"\"", (list_to_binary(String))/binary, "\"">>,
+            list_to_binary(String),
+            [{string, list_to_binary(String)}]
+        }
+        || String <- Raw
+    ].
+
+
+strings() ->
+    naked_strings()
+    ++ [ wrap_with_array(Test) || Test <- naked_strings() ]
+    ++ [ wrap_with_object(Test) || Test <- naked_strings() ].
+
+
+naked_integers() ->
+    Raw = [
+        1, 2, 3,
+        127, 128, 129,
+        255, 256, 257,
+        65534, 65535, 65536,
+        18446744073709551616,
+        18446744073709551617
+    ],
+    [
+        {
+            integer_to_list(X),
+            list_to_binary(integer_to_list(X)),
+            X,
+            [{integer, X}]
+        }
+        || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0]
+    ].
+
+
+integers() ->
+    naked_integers()
+    ++ [ wrap_with_array(Test) || Test <- naked_integers() ]
+    ++ [ wrap_with_object(Test) || Test <- naked_integers() ].
+
+
+naked_floats() ->
+    Raw = [
+        0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
+        1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9,
+        1234567890.0987654321,
+        0.0e0,
+        1234567890.0987654321e16,
+        0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308,
+        1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308,
+        2.2250738585072014e-308,    %% min normalized float
+        1.7976931348623157e308,     %% max normalized float
+        5.0e-324,                   %% min denormalized float
+        2.225073858507201e-308      %% max denormalized float
+    ],
+    [
+        {
+            sane_float_to_list(X),
+            list_to_binary(sane_float_to_list(X)),
+            X,
+            [{float, X}]
+        }
+        || X <- Raw ++ [ -1 * Y || Y <- Raw ]
+    ].
+
+
+floats() ->
+    naked_floats()
+    ++ [ wrap_with_array(Test) || Test <- naked_floats() ]
+    ++ [ wrap_with_object(Test) || Test <- naked_floats() ].
+
+
+naked_literals() ->
+    [
+        {
+            atom_to_list(Literal),
+            atom_to_binary(Literal, unicode),
+            Literal,
+            [{literal, Literal}]
+        }
+        || Literal <- [true, false, null]
+    ].
+
+
+literals() ->
+    naked_literals()
+    ++ [ wrap_with_array(Test) || Test <- naked_literals() ]
+    ++ [ wrap_with_object(Test) || Test <- naked_literals() ].
+
+
+compound_object() ->
+    [{
+        "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]",
+        <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>,
+        [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]],
+        [
+            start_array,
+                start_object,
+                    {key, <<"alpha">>},
+                    start_array,
+                        {integer, 1},
+                        {integer, 2},
+                        {integer, 3},
+                    end_array,
+                    {key, <<"beta">>},
+                    start_object,
+                        {key, <<"alpha">>},
+                        start_array,
+                            {float, 1.0},
+                            {float, 2.0},
+                            {float, 3.0},
+                        end_array,
+                        {key, <<"beta">>},
+                        start_array,
+                            {literal, true},
+                            {literal, false},
+                        end_array,
+                    end_object,
+                end_object,
+                start_array,
+                    start_object,
+                    end_object,
+                end_array,
+            end_array
+        ]
+    }].
+
+
+special_objects() ->
+    [
+        {
+            "[{key, atom}]",
+            <<"{\"key\":\"atom\"}">>,
+            [{key, atom}],
+            [start_object, {key, <<"key">>}, {string, <<"atom">>}, end_object]
+        },
+        {
+            "[{1, true}]",
+            <<"{\"1\":true}">>,
+            [{1, true}],
+            [start_object, {key, <<"1">>}, {literal, true}, end_object]
+        }
+    ].
+
+
+special_array() ->
+    [    
+        {
+            "[foo, bar]",
+            <<"[\"foo\",\"bar\"]">>,
+            [foo, bar],
+            [start_array, {string, <<"foo">>}, {string, <<"bar">>}, end_array]
+        }
+    ].
+
+
+wrap_with_array({Title, JSON, Term, Events}) ->
+    {
+        "[" ++ Title ++ "]",
+        <<"[", JSON/binary, "]">>,
+        [Term],
+        [start_array] ++ Events ++ [end_array]
+    }.
+
+
+wrap_with_object({Title, JSON, Term, Events}) ->
+    {
+        "{\"key\":" ++ Title ++ "}",
+        <<"{\"key\":", JSON/binary, "}">>,
+        [{<<"key">>, Term}],
+        [start_object, {key, <<"key">>}] ++ Events ++ [end_object]
+    }.
+
+
+sane_float_to_list(X) ->
+    [Output] = io_lib:format("~p", [X]),
+    Output.
+
+
+incremental_decode(JSON) ->
+    Final = lists:foldl(
+        fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end,
+        decoder(jsx, [], [stream]),
+        json_to_bytes(JSON)
+    ),
+    Final(end_stream).
+
+
+incremental_parse(Events) ->
+    Final = lists:foldl(
+        fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end,
+        parser(?MODULE, [], [stream]),
+        lists:map(fun(X) -> [X] end, Events)
+    ),
+    Final(end_stream).
+
+
+%% used to convert a json text into a list of codepoints to be incrementally
+%% parsed
+json_to_bytes(JSON) -> json_to_bytes(JSON, []).
+
+json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc);
+json_to_bytes(<<X, Rest/binary>>, Acc) -> json_to_bytes(Rest, [<<X>>] ++ Acc).
+
+
+%% actual tests!
+decode_test_() ->
+    Data = test_cases(),
+    [{Title, ?_assertEqual(Events ++ [end_json], (decoder(?MODULE, [], []))(JSON))}
+        || {Title, JSON, _, Events} <- Data
+    ] ++
+    [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_decode(JSON))}
+        || {Title, JSON, _, Events} <- Data
+    ].
+
+
+parse_test_() ->
+    Data = test_cases(),
+    [{Title, ?_assertEqual(Events ++ [end_json], (parser(?MODULE, [], []))(Events ++ [end_json]))}
+        || {Title, _, _, Events} <- Data
+    ] ++
+    [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_parse(Events))}
+        || {Title, _, _, Events} <- Data
+    ].
+
+
+encode_test_() ->
+    Data = test_cases(),
+    [
+        {
+            Title, ?_assertEqual(
+                Events ++ [end_json],
+                (jsx:encoder(jsx, [], []))(Term)
+            )
+        } || {Title, _, Term, Events} <- Data
+    ].
+
+
+-endif.
--- a/src/jsx_config.erl
+++ b/src/jsx_config.erl
@ -49,41 +49,27 @@
 %% parsing of jsx config
 -spec parse_config(Config::proplists:proplist()) -> jsx:config().

-parse_config(Config) ->
-    parse_config(Config, #config{}).
+parse_config(Config) -> parse_config(Config, #config{}).

-parse_config([], Config) ->
-    Config;
-parse_config([replaced_bad_utf8|Rest], Config) ->
-    parse_config(Rest, Config#config{replaced_bad_utf8=true});
+parse_config([], Config) -> Config;
 parse_config([escaped_forward_slashes|Rest], Config) ->
    parse_config(Rest, Config#config{escaped_forward_slashes=true});
-parse_config([explicit_end|Rest], Config) ->
-    parse_config(Rest, Config#config{explicit_end=true});
-parse_config([single_quoted_strings|Rest], Config) ->
-    parse_config(Rest, Config#config{single_quoted_strings=true});
-parse_config([unescaped_jsonp|Rest], Config) ->
-    parse_config(Rest, Config#config{unescaped_jsonp=true});
-parse_config([comments|Rest], Config) ->
-    parse_config(Rest, Config#config{comments=true});
 parse_config([escaped_strings|Rest], Config) ->
    parse_config(Rest, Config#config{escaped_strings=true});
+parse_config([unescaped_jsonp|Rest], Config) ->
+    parse_config(Rest, Config#config{unescaped_jsonp=true});
 parse_config([dirty_strings|Rest], Config) ->
    parse_config(Rest, Config#config{dirty_strings=true});
-parse_config([ignored_bad_escapes|Rest], Config) ->
-    parse_config(Rest, Config#config{ignored_bad_escapes=true});
-parse_config([relax|Rest], Config) ->
-    parse_config(Rest, Config#config{
-        replaced_bad_utf8 = true,
-        single_quoted_strings = true,
-        comments = true,
-        ignored_bad_escapes = true
+parse_config([strict|Rest], Config) ->
+    parse_config(Rest, Config#config{strict_comments=true,
+        strict_utf8=true,
+        strict_single_quotes=true,
+        strict_escapes=true
    });
-parse_config([{pre_encode, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) ->
-    case Config#config.pre_encode of
-        false -> parse_config(Rest, Config#config{pre_encode=Encoder})
-        ; _ -> erlang:error(badarg, [Options, Config])
-    end;
+parse_config([{strict, Strict}|Rest], Config) ->
+    parse_strict(Strict, Rest, Config);
+parse_config([stream|Rest], Config) ->
+    parse_config(Rest, Config#config{stream=true});
 parse_config([{error_handler, ErrorHandler}|Rest] = Options, Config) when is_function(ErrorHandler, 3) ->
    case Config#config.error_handler of
        false -> parse_config(Rest, Config#config{error_handler=ErrorHandler})
@ -94,34 +80,28 @@ parse_config([{incomplete_handler, IncompleteHandler}|Rest] = Options, Config) w
        false -> parse_config(Rest, Config#config{incomplete_handler=IncompleteHandler})
        ; _ -> erlang:error(badarg, [Options, Config])
    end;
-%% deprecated flags
-parse_config([{pre_encoder, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) ->
-    case Config#config.pre_encode of
-        false -> parse_config(Rest, Config#config{pre_encode=Encoder})
-        ; _ -> erlang:error(badarg, [Options, Config])
-    end;
-parse_config([loose_unicode|Rest], Config) ->
-    parse_config(Rest, Config#config{replaced_bad_utf8=true});
-parse_config([escape_forward_slash|Rest], Config) ->
-    parse_config(Rest, Config#config{escaped_forward_slashes=true});
-parse_config([single_quotes|Rest], Config) ->
-    parse_config(Rest, Config#config{single_quoted_strings=true});
-parse_config([no_jsonp_escapes|Rest], Config) ->
-    parse_config(Rest, Config#config{unescaped_jsonp=true});
-parse_config([json_escape|Rest], Config) ->
-    parse_config(Rest, Config#config{escaped_strings=true});
-parse_config([ignore_bad_escapes|Rest], Config) ->
-    parse_config(Rest, Config#config{ignored_bad_escapes=true});
-parse_config(Options, Config) ->
-    erlang:error(badarg, [Options, Config]).
+parse_config(_Options, _Config) -> erlang:error(badarg).
+
+
+parse_strict([], Rest, Config) -> parse_config(Rest, Config);
+parse_strict([comments|Strict], Rest, Config) ->
+    parse_strict(Strict, Rest, Config#config{strict_comments=true});
+parse_strict([utf8|Strict], Rest, Config) ->
+    parse_strict(Strict, Rest, Config#config{strict_utf8=true});
+parse_strict([single_quotes|Strict], Rest, Config) ->
+    parse_strict(Strict, Rest, Config#config{strict_single_quotes=true});
+parse_strict([escapes|Strict], Rest, Config) ->
+    parse_strict(Strict, Rest, Config#config{strict_escapes=true});
+parse_strict(_Strict, _Rest, _Config) ->
+    erlang:error(badarg).
+


 -spec config_to_list(Config::jsx:config()) -> proplists:proplist().

 config_to_list(Config) ->
-    lists:map(
-        fun ({pre_encode, F}) -> {pre_encode, F};
-            ({error_handler, F}) -> {error_handler, F};
+    reduce_config(lists:map(
+        fun ({error_handler, F}) -> {error_handler, F};
            ({incomplete_handler, F}) -> {incomplete_handler, F};
            ({Key, true}) -> Key
        end,
@ -129,34 +109,41 @@ config_to_list(Config) ->
            fun({_, false}) -> false; (_) -> true end,
            lists:zip(record_info(fields, config), tl(tuple_to_list(Config)))
        )
-    ).
+    )).
+
+
+reduce_config(Input) -> reduce_config(Input, [], []).
+
+reduce_config([], Output, Strict) ->
+    case length(Strict) of
+        0 -> lists:reverse(Output);
+        4 -> lists:reverse(Output) ++ [strict];
+        _ -> lists:reverse(Output) ++ [{strict, lists:reverse(Strict)}]
+    end;
+reduce_config([strict_comments|Input], Output, Strict) ->
+    reduce_config(Input, Output, [comments] ++ Strict);
+reduce_config([strict_utf8|Input], Output, Strict) ->
+    reduce_config(Input, Output, [utf8] ++ Strict);
+reduce_config([strict_single_quotes|Input], Output, Strict) ->
+    reduce_config(Input, Output, [single_quotes] ++ Strict);
+reduce_config([strict_escapes|Input], Output, Strict) ->
+    reduce_config(Input, Output, [escapes] ++ Strict);
+reduce_config([Else|Input], Output, Strict) ->
+    reduce_config(Input, [Else] ++ Output, Strict).


 -spec valid_flags() -> [atom()].

 valid_flags() ->
    [
-        replaced_bad_utf8,
        escaped_forward_slashes,
-        single_quoted_strings,
-        unescaped_jsonp,
-        comments,
        escaped_strings,
+        unescaped_jsonp,
        dirty_strings,
-        ignored_bad_escapes,
-        explicit_end,
-        relax,
-        pre_encode,
+        strict,
+        stream,
        error_handler,
-        incomplete_handler,
-        %% deprecated flags
-        pre_encoder,            %% pre_encode
-        loose_unicode,          %% replaced_bad_utf8
-        escape_forward_slash,   %% escaped_forward_slashes
-        single_quotes,          %% single_quoted_strings
-        no_jsonp_escapes,       %% unescaped_jsonp
-        json_escape,            %% escaped_strings
-        ignore_bad_escapes      %% ignored_bad_escapes
+        incomplete_handler
    ].


@ -187,70 +174,51 @@ config_test_() ->
    [
        {"all flags",
            ?_assertEqual(
-                #config{
-                    replaced_bad_utf8=true,
-                    escaped_forward_slashes=true,
-                    explicit_end=true,
-                    single_quoted_strings=true,
-                    unescaped_jsonp=true,
-                    comments=true,
-                    dirty_strings=true,
-                    ignored_bad_escapes=true
+                #config{escaped_forward_slashes = true,
+                    escaped_strings = true,
+                    unescaped_jsonp = true,
+                    dirty_strings = true,
+                    strict_comments = true,
+                    strict_utf8 = true,
+                    strict_single_quotes = true,
+                    strict_escapes = true,
+                    stream = true
                },
-                parse_config([
-                    replaced_bad_utf8,
-                    escaped_forward_slashes,
-                    explicit_end,
-                    single_quoted_strings,
+                parse_config([escaped_forward_slashes,
+                    escaped_strings,
                    unescaped_jsonp,
-                    comments,
                    dirty_strings,
-                    ignored_bad_escapes
+                    strict,
+                    stream
                ])
            )
        },
-        {"relax flag",
+        {"strict flag",
            ?_assertEqual(
-                #config{
-                    replaced_bad_utf8=true,
-                    single_quoted_strings=true,
-                    comments=true,
-                    ignored_bad_escapes=true
+                #config{strict_comments = true,
+                    strict_utf8 = true,
+                    strict_single_quotes = true,
+                    strict_escapes = true
                },
-                parse_config([relax])
+                parse_config([strict])
            )
        },
-        {"deprecated flags", ?_assertEqual(
-            #config{
-                pre_encode=fun lists:length/1,
-                replaced_bad_utf8=true,
-                escaped_forward_slashes=true,
-                single_quoted_strings=true,
-                unescaped_jsonp=true,
-                escaped_strings=true,
-                ignored_bad_escapes=true
+        {"strict selective",
+            ?_assertEqual(
+                #config{strict_comments = true},
+                parse_config([{strict, [comments]}])
+            )
+        },
+        {"strict expanded",
+            ?_assertEqual(
+                #config{strict_comments = true,
+                    strict_utf8 = true,
+                    strict_single_quotes = true,
+                    strict_escapes = true
+                },
+                parse_config([{strict, [comments, utf8, single_quotes, escapes]}])
+            )
        },
-            parse_config([
-                {pre_encoder, fun lists:length/1},
-                loose_unicode,
-                escape_forward_slash,
-                single_quotes,
-                no_jsonp_escapes,
-                json_escape,
-                ignore_bad_escapes
-            ])
-        )},
-        {"pre_encode flag", ?_assertEqual(
-            #config{pre_encode=fun lists:length/1},
-            parse_config([{pre_encode, fun lists:length/1}])
-        )},
-        {"two pre_encoders defined", ?_assertError(
-            badarg,
-            parse_config([
-                {pre_encode, fun(_) -> true end},
-                {pre_encode, fun(_) -> false end}
-            ])
-        )},
        {"error_handler flag", ?_assertEqual(
            #config{error_handler=fun ?MODULE:fake_error_handler/3},
            parse_config([{error_handler, fun ?MODULE:fake_error_handler/3}])
@ -273,7 +241,7 @@ config_test_() ->
                {incomplete_handler, fun(_) -> false end}
            ])
        )},
-        {"bad option flag", ?_assertError(badarg, parse_config([error]))}
+        {"bad option flag", ?_assertError(badarg, parse_config([this_flag_does_not_exist]))}
    ].


@ -284,32 +252,40 @@ config_to_list_test_() ->
            config_to_list(#config{})
        )},
        {"all flags", ?_assertEqual(
-            [
-                replaced_bad_utf8,
-                escaped_forward_slashes,
-                single_quoted_strings,
+            [escaped_forward_slashes,
+                escaped_strings,
                unescaped_jsonp,
-                comments,
                dirty_strings,
-                ignored_bad_escapes,
-                explicit_end
+                stream,
+                strict
            ],
            config_to_list(
-                #config{
-                    replaced_bad_utf8=true,
-                    escaped_forward_slashes=true,
-                    explicit_end=true,
-                    single_quoted_strings=true,
-                    unescaped_jsonp=true,
-                    comments=true,
-                    dirty_strings=true,
-                    ignored_bad_escapes=true
+                #config{escaped_forward_slashes = true,
+                    escaped_strings = true,
+                    unescaped_jsonp = true,
+                    dirty_strings = true,
+                    strict_comments = true,
+                    strict_utf8 = true,
+                    strict_single_quotes = true,
+                    strict_escapes = true,
+                    stream = true
                }
            )
        )},
-        {"pre_encode", ?_assertEqual(
-            [{pre_encode, fun lists:length/1}],
-            config_to_list(#config{pre_encode=fun lists:length/1})
+        {"single strict", ?_assertEqual(
+            [{strict, [comments]}],
+            config_to_list(#config{strict_comments = true})
+        )},
+        {"multiple strict", ?_assertEqual(
+            [{strict, [utf8, single_quotes, escapes]}],
+            config_to_list(#config{strict_utf8 = true, strict_single_quotes = true, strict_escapes = true})
+        )},
+        {"all strict", ?_assertEqual(
+            [strict],
+            config_to_list(#config{strict_comments = true,
+                strict_utf8 = true,
+                strict_single_quotes = true,
+                strict_escapes = true})
        )},
        {"error handler", ?_assertEqual(
            [{error_handler, fun ?MODULE:fake_error_handler/3}],
--- a/src/jsx_config.hrl
+++ b/src/jsx_config.hrl
@ -1,15 +1,13 @@
 -record(config, {
-    replaced_bad_utf8 = false         :: boolean(),
    escaped_forward_slashes = false     :: boolean(),
-    single_quoted_strings = false     :: boolean(),
-    unescaped_jsonp = false           :: boolean(),
-    comments = false                  :: boolean(),
    escaped_strings = false             :: boolean(),
+    unescaped_jsonp = false             :: boolean(),
    dirty_strings = false               :: boolean(),
-    ignored_bad_escapes = false       :: boolean(),
-    explicit_end = false              :: boolean(),
-    pre_encode = false                :: false | fun((any()) -> any()),
+    strict_comments = false             :: boolean(),
+    strict_utf8 = false                 :: boolean(),
+    strict_single_quotes = false        :: boolean(),
+    strict_escapes = false              :: boolean(),
+    stream = false                      :: boolean(),
    error_handler = false               :: false | jsx_config:handler(),
    incomplete_handler = false          :: false | jsx_config:handler()
 }).
-
--- a/src/jsx_decoder.erl
+++ b/src/jsx_decoder.erl
--- a/src/jsx_encoder.erl
+++ b/src/jsx_encoder.erl
@ -23,310 +23,66 @@

 -module(jsx_encoder).

-export([encoder/3]).
+-export([encoder/3, encode/1, encode/2, unzip/1]).

 -spec encoder(Handler::module(), State::any(), Config::list()) -> jsx:encoder().

 encoder(Handler, State, Config) ->
-    fun(JSON) ->
-        start(
-            JSON,
-            {Handler, Handler:init(State)},
-            jsx_config:parse_config(Config)
-        )
-    end.
+    Parser = jsx:parser(Handler, State, Config),
+    fun(Term) -> Parser(encode(Term) ++ [end_json]) end.


+-spec encode(Term::any()) -> any().

-include("jsx_config.hrl").
+encode(Term) -> encode(Term, ?MODULE).


-ifndef(error).
-define(error(State, Term, Handler, Config),
-    case Config#config.error_handler of
-        false -> erlang:error(badarg);
-        F -> erlang:throw(F(Term, {encoder, State, Handler}, jsx_config:config_to_list(Config)))
-    end
-).
-endif.
+-spec encode(Term::any(), EntryPoint::module()) -> any().

+encode([], _EntryPoint) -> [start_array, end_array];
+encode([{}], _EntryPoint) -> [start_object, end_object];

-start(Term, {Handler, State}, Config) ->
-    try Handler:handle_event(end_json, value(pre_encode(Term, Config), {Handler, State}, Config))
-    catch
-        throw:Error -> Error;
-        Type:Value -> erlang:Type(Value)
-    end.
-
-
-value(String, {Handler, State}, Config) when is_binary(String) ->
-    Handler:handle_event({string, clean_string(String, {Handler, State}, Config)}, State);
-value(Float, {Handler, State}, _Config) when is_float(Float) ->
-    Handler:handle_event({float, Float}, State);
-value(Int, {Handler, State}, _Config) when is_integer(Int) ->
-    Handler:handle_event({integer, Int}, State);
-value(Literal, {Handler, State}, _Config)
-        when Literal == true; Literal == false; Literal == null ->
-    Handler:handle_event({literal, Literal}, State);
-value(String, {Handler, State}, Config) when is_atom(String) ->
-    Handler:handle_event({string, clean_string(atom_to_binary(String,latin1), {Handler, State}, Config)}, State);
-value([{}], {Handler, State}, _Config) ->
-    Handler:handle_event(end_object, Handler:handle_event(start_object, State));
-value([], {Handler, State}, _Config) ->
-    Handler:handle_event(end_array, Handler:handle_event(start_array, State));
-value(List, Handler, Config) when is_list(List) ->
-    list_or_object(List, Handler, Config);
-value(Term, Handler, Config) -> ?error(value, Term, Handler, Config).
-
-
-list_or_object([Term|Rest], {Handler, State}, Config) ->
-    case pre_encode(Term, Config) of
-        {K, V} when is_atom(K); is_binary(K); is_integer(K) ->
-            object([{K, V}|Rest], {Handler, Handler:handle_event(start_object, State)}, Config)
-        ; T ->
-            list([T|Rest], {Handler, Handler:handle_event(start_array, State)}, Config)
-    end.
-
-
-object([{Key, Value}, Next|Rest], {Handler, State}, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
-    V = pre_encode(Value, Config),
-    object(
-        [pre_encode(Next, Config)|Rest],
-        {
-            Handler,
-            value(
-                V,
-                {Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)},
-                Config
-            )
-        },
-        Config
+encode([{_, _}|_] = Term, EntryPoint) ->
+    lists:flatten(
+        [start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unzip(Term) ] ++ [end_object]
    );
-object([{Key, Value}], {Handler, State}, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
-    object(
-        [],
-        {
-            Handler,
-            value(
-                pre_encode(Value, Config),
-                {Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)},
-                Config
-            )
-        },
-        Config
+encode(Term, EntryPoint) when is_list(Term) ->
+    lists:flatten(
+        [start_array] ++ [ EntryPoint:encode(T, EntryPoint) || T <- Term ] ++ [end_array]
    );
-object([], {Handler, State}, _Config) -> Handler:handle_event(end_object, State);
-object(Term, Handler, Config) -> ?error(object, Term, Handler, Config).
+
+encode(Else, _EntryPoint) -> [Else].


-list([Value, Next|Rest], {Handler, State}, Config) ->
-    list([pre_encode(Next, Config)|Rest], {Handler, value(Value, {Handler, State}, Config)}, Config);
-list([Value], {Handler, State}, Config) ->
-    list([], {Handler, value(Value, {Handler, State}, Config)}, Config);
-list([], {Handler, State}, _Config) -> Handler:handle_event(end_array, State).
+unzip(List) -> unzip(List, []).

-pre_encode(Value, #config{pre_encode=false}) -> Value;
-pre_encode(Value, Config) -> (Config#config.pre_encode)(Value).
-
-
-fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
-fix_key(Key) when is_integer(Key) -> fix_key(list_to_binary(integer_to_list(Key)));
-fix_key(Key) when is_binary(Key) -> Key.
-
-
-clean_string(Bin, Handler, Config) ->
-    case clean_string(Bin, Config) of
-        {error, badarg} -> ?error(string, Bin, Handler, Config);
-        String -> String
-    end.
-
-
-
-include("jsx_strings.hrl").
+unzip([], Acc) -> lists:reverse(Acc);
+unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K); is_integer(K) -> unzip(Rest, [V, K] ++ Acc).


 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").


-encode_test_() ->
-    Data = jsx:test_cases(),
-    [
-        {
-            Title, ?_assertEqual(
-                Events ++ [end_json],
-                start(Term, {jsx, []}, #config{})
-            )
-        } || {Title, _, Term, Events} <- Data
-    ].
+parser(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term).


-encode(Term, Config) -> start(Term, {jsx, []}, jsx_config:parse_config(Config)).
-
-pre_encoders_test_() ->
-    Term = [
-        {<<"object">>, [
-            {atomkey, atomvalue},
-            {<<"literals">>, [true, false, null]},
-            {<<"strings">>, [<<"foo">>, <<"bar">>, <<"baz">>]},
-            {<<"numbers">>, [1, 1.0, 1.0e0]}
-        ]}
-    ],
-    [
-        {"no pre encode", ?_assertEqual(
-            [
-                start_object,
-                    {key, <<"object">>}, start_object,
-                        {key, <<"atomkey">>}, {string, <<"atomvalue">>},
-                        {key, <<"literals">>}, start_array,
-                            {literal, true}, {literal, false}, {literal, null},
-                        end_array,
-                        {key, <<"strings">>}, start_array,
-                            {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>},
-                        end_array,
-                        {key, <<"numbers">>}, start_array,
-                            {integer, 1}, {float, 1.0}, {float, 1.0},
-                        end_array,
-                    end_object,
-                end_object,
-                end_json
-            ],
-            encode(Term, [])
-        )},
-        {"replace lists with empty lists", ?_assertEqual(
-            [
-                start_object,
-                    {key, <<"object">>}, start_object,
-                        {key, <<"atomkey">>}, {string, <<"atomvalue">>},
-                        {key, <<"literals">>}, start_array, end_array,
-                        {key, <<"strings">>}, start_array, end_array,
-                        {key, <<"numbers">>}, start_array, end_array,
-                    end_object,
-                end_object,
-                end_json
-            ],
-            encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end}])
-        )},
-        {"replace objects with empty objects", ?_assertEqual(
-            [
-                start_object,
-                end_object,
-                end_json
-            ],
-            encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end}])
-        )},
-        {"replace all non-list and non_tuple values with false", ?_assertEqual(
-            [
-                start_object,
-                    {key, <<"object">>}, start_object,
-                        {key, <<"atomkey">>}, {literal, false},
-                        {key, <<"literals">>}, start_array,
-                            {literal, false}, {literal, false}, {literal, false},
-                        end_array,
-                        {key, <<"strings">>}, start_array,
-                            {literal, false}, {literal, false}, {literal, false},
-                        end_array,
-                        {key, <<"numbers">>}, start_array,
-                            {literal, false}, {literal, false}, {literal, false},
-                        end_array,
-                    end_object,
-                end_object,
-                end_json
-            ],
-            encode(Term, [{pre_encode, fun(V) when is_list(V); is_tuple(V) -> V; (_) -> false end}])
-        )},
-        {"replace all atoms with atom_to_list", ?_assertEqual(
-            [
-                start_object,
-                    {key, <<"object">>}, start_object,
-                        {key, <<"atomkey">>}, {string, <<"atomvalue">>},
-                        {key, <<"literals">>}, start_array,
-                            {string, <<"true">>}, {string, <<"false">>}, {string, <<"null">>},
-                        end_array,
-                        {key, <<"strings">>}, start_array,
-                            {string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>},
-                        end_array,
-                        {key, <<"numbers">>}, start_array,
-                            {integer, 1}, {float, 1.0}, {float, 1.0},
-                        end_array,
-                    end_object,
-                end_object,
-                end_json
-            ],
-            encode(Term, [{pre_encode, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}])
-        )},
-        {"pre_encode tuple", ?_assertEqual(
-            [
-                start_array,
-                    {integer, 1}, {integer, 2}, {integer, 3},
-                end_array,
-                end_json
-            ],
-            encode({1, 2, 3}, [{pre_encode, fun(Tuple) when is_tuple(Tuple) -> tuple_to_list(Tuple); (V) -> V end}])
-        )},
-        {"pre_encode 2-tuples", ?_assertEqual(
-            [
-                start_object,
-                    {key, <<"two">>}, {integer, 2}, {key, <<"three">>}, {integer, 3},
-                end_object,
-                end_json
-            ],
-            encode([{two, 1}, {three, 2}], [{pre_encode, fun({K, V}) -> {K, V + 1}; (V) -> V end}])
-        )},
-        {"pre_encode one field record", ?_assertEqual(
-            [
-                start_object,
-                    {key, <<"bar">>}, {literal, false},
-                end_object,
-                end_json
-            ],
-            encode([{foo, bar}], [{pre_encode, fun({foo, V}) -> {V, undefined}; (undefined) -> false; (V) -> V end}])
-        )},
-        {"pre_encode list", ?_assertEqual(
-            [
-                start_array,
-                    {integer, 2}, {integer, 3}, {integer, 4},
-                end_array,
-                end_json
-            ],
-            encode([1,2,3], [{pre_encode, fun(X) when is_integer(X) -> X + 1; (V) -> V end}])
-        )}
-    ].
-
 error_test_() ->
    [
-        {"value error", ?_assertError(badarg, encode(self(), []))},
-        {"string error", ?_assertError(badarg, encode(<<239, 191, 191>>, []))}
+        {"value error", ?_assertError(badarg, parser(self(), []))},
+        {"string error", ?_assertError(badarg, parser(<<239, 191, 191>>, [strict]))}
    ].

 custom_error_handler_test_() ->
-    Error = fun(Term, {_, State, _}, _) -> {State, Term} end, 
+    Error = fun(Term, {_, State, _, _}, _) -> {State, Term} end,
    [
        {"value error", ?_assertEqual(
-            {value, self()},
-            encode(self(), [{error_handler, Error}])
+            {value, [self()]},
+            parser(self(), [{error_handler, Error}])
        )},
        {"string error", ?_assertEqual(
-            {string, <<239, 191, 191>>},
-            encode(<<239, 191, 191>>, [{error_handler, Error}])
-        )}
-    ].
-
-integer_key_test_() ->
-    Term =  [{123, [{456, 789}]}],
-    [
-        {"basic integer keys", ?_assertEqual(
-            [
-                start_object,
-                    {key, <<"123">>},
-                    start_object,
-                        {key, <<"456">>},
-                        {integer, 789},
-                    end_object,
-                end_object,
-                end_json
-            ],
-            encode(Term, [])
+            {string, [{string, <<239, 191, 191>>}]},
+            parser(<<239, 191, 191>>, [{error_handler, Error}, strict])
        )}
    ].

--- a/src/jsx_parser.erl
+++ b/src/jsx_parser.erl
@ -68,6 +68,8 @@ resume(Rest, State, Handler, Stack, Config) ->
 -endif.


+incomplete(State, Handler, Stack, Config=#config{stream=false}) ->
+    ?error(State, [], Handler, Stack, Config);
 incomplete(State, Handler, Stack, Config=#config{incomplete_handler=false}) ->
    {incomplete, fun(end_stream) ->
                case resume([end_json], State, Handler, Stack, Config) of
@ -82,8 +84,6 @@ incomplete(State, Handler, Stack, Config=#config{incomplete_handler=F}) ->
    F([], {parser, State, Handler, Stack}, jsx_config:config_to_list(Config)).


-%handle_event([], Handler, _Config) -> Handler;
-%handle_event([Event|Rest], Handler, Config) -> handle_event(Rest, handle_event(Event, Handler, Config), Config);
 handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}.


@ -91,24 +91,10 @@ value([start_object|Tokens], Handler, Stack, Config) ->
    object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config);
 value([start_array|Tokens], Handler, Stack, Config) ->
    array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config);
-value([{literal, true}|Tokens], Handler, [], Config) ->
-    done(Tokens, handle_event({literal, true}, Handler, Config), [], Config);
-value([{literal, false}|Tokens], Handler, [], Config) ->
-    done(Tokens, handle_event({literal, false}, Handler, Config), [], Config);
-value([{literal, null}|Tokens], Handler, [], Config) ->
-    done(Tokens, handle_event({literal, null}, Handler, Config), [], Config);
-value([{literal, true}|Tokens], Handler, Stack, Config) ->
-    maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config);
-value([{literal, false}|Tokens], Handler, Stack, Config) ->
-    maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config);
-value([{literal, null}|Tokens], Handler, Stack, Config) ->
-    maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config);
+value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
+    maybe_done(Tokens, handle_event({literal, Literal}, Handler, Config), Stack, Config);
 value([Literal|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
    value([{literal, Literal}] ++ Tokens, Handler, Stack, Config);
-value([{integer, Number}|Tokens], Handler, [], Config) when is_integer(Number) ->
-    done(Tokens, handle_event({integer, Number}, Handler, Config), [], Config);
-value([{float, Number}|Tokens], Handler, [], Config) when is_float(Number) ->
-    done(Tokens, handle_event({float, Number}, Handler, Config), [], Config);
 value([{integer, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) ->
    maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config);
 value([{float, Number}|Tokens], Handler, Stack, Config) when is_float(Number) ->
@ -121,12 +107,6 @@ value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) ->
    value([{integer, Number}] ++ Tokens, Handler, Stack, Config);
 value([Number|Tokens], Handler, Stack, Config) when is_float(Number) ->
    value([{float, Number}] ++ Tokens, Handler, Stack, Config);
-value([{string, String}|Tokens], Handler, [], Config) when is_binary(String) ->
-    case clean_string(String, Tokens, Handler, [], Config) of
-        Clean when is_binary(Clean) ->
-            done(Tokens, handle_event({string, Clean}, Handler, Config), [], Config);
-        Error -> Error
-    end;
 value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) ->
    case clean_string(String, Tokens, Handler, Stack, Config) of
        Clean when is_binary(Clean) ->
@ -135,6 +115,8 @@ value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String)
    end;
 value([String|Tokens], Handler, Stack, Config) when is_binary(String) ->
    value([{string, String}] ++ Tokens, Handler, Stack, Config);
+value([String|Tokens], Handler, Stack, Config) when is_atom(String) ->
+    value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config);
 value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) ->
    value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config);
 value([], Handler, Stack, Config) ->
@ -146,13 +128,13 @@ value(Token, Handler, Stack, Config) ->

 object([end_object|Tokens], Handler, [object|Stack], Config) ->
    maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config);
-object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key) ->
+object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
    case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of
        Clean when is_binary(Clean) ->
            value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config);
        Error -> Error
    end;
-object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key) ->
+object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
    case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of
        Clean when is_binary(Clean) ->
            value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config);
@ -185,7 +167,7 @@ maybe_done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) ->
 maybe_done(Token, Handler, Stack, Config) ->
    maybe_done([Token], Handler, Stack, Config).

-done([], Handler, [], Config=#config{explicit_end=true}) ->
+done([], Handler, [], Config=#config{stream=true}) ->
    incomplete(done, Handler, [], Config);
 done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] ->
    {_, State} = handle_event(end_json, Handler, Config),
@ -196,7 +178,8 @@ done(Token, Handler, Stack, Config) ->
    done([Token], Handler, Stack, Config).


-fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
+fix_key(Key) when is_atom(Key) -> atom_to_binary(Key, utf8);
+fix_key(Key) when is_integer(Key) -> list_to_binary(integer_to_list(Key));
 fix_key(Key) when is_binary(Key) -> Key.


@ -206,6 +189,256 @@ clean_string(Bin, Tokens, Handler, Stack, Config) ->
        String -> String
    end.

+clean_string(Bin, #config{dirty_strings=true}) -> Bin;
+clean_string(Bin, Config) -> clean(Bin, [], Config).
+
+
+%% escape and/or replace bad codepoints if requested
+clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc));
+clean(<<0, Rest/binary>>, Acc, Config) -> maybe_replace(0, Rest, Acc, Config);
+clean(<<1, Rest/binary>>, Acc, Config) -> maybe_replace(1, Rest, Acc, Config);
+clean(<<2, Rest/binary>>, Acc, Config) -> maybe_replace(2, Rest, Acc, Config);
+clean(<<3, Rest/binary>>, Acc, Config) -> maybe_replace(3, Rest, Acc, Config);
+clean(<<4, Rest/binary>>, Acc, Config) -> maybe_replace(4, Rest, Acc, Config);
+clean(<<5, Rest/binary>>, Acc, Config) -> maybe_replace(5, Rest, Acc, Config);
+clean(<<6, Rest/binary>>, Acc, Config) -> maybe_replace(6, Rest, Acc, Config);
+clean(<<7, Rest/binary>>, Acc, Config) -> maybe_replace(7, Rest, Acc, Config);
+clean(<<8, Rest/binary>>, Acc, Config) -> maybe_replace(8, Rest, Acc, Config);
+clean(<<9, Rest/binary>>, Acc, Config) -> maybe_replace(9, Rest, Acc, Config);
+clean(<<10, Rest/binary>>, Acc, Config) -> maybe_replace(10, Rest, Acc, Config);
+clean(<<11, Rest/binary>>, Acc, Config) -> maybe_replace(11, Rest, Acc, Config);
+clean(<<12, Rest/binary>>, Acc, Config) -> maybe_replace(12, Rest, Acc, Config);
+clean(<<13, Rest/binary>>, Acc, Config) -> maybe_replace(13, Rest, Acc, Config);
+clean(<<14, Rest/binary>>, Acc, Config) -> maybe_replace(14, Rest, Acc, Config);
+clean(<<15, Rest/binary>>, Acc, Config) -> maybe_replace(15, Rest, Acc, Config);
+clean(<<16, Rest/binary>>, Acc, Config) -> maybe_replace(16, Rest, Acc, Config);
+clean(<<17, Rest/binary>>, Acc, Config) -> maybe_replace(17, Rest, Acc, Config);
+clean(<<18, Rest/binary>>, Acc, Config) -> maybe_replace(18, Rest, Acc, Config);
+clean(<<19, Rest/binary>>, Acc, Config) -> maybe_replace(19, Rest, Acc, Config);
+clean(<<20, Rest/binary>>, Acc, Config) -> maybe_replace(20, Rest, Acc, Config);
+clean(<<21, Rest/binary>>, Acc, Config) -> maybe_replace(21, Rest, Acc, Config);
+clean(<<22, Rest/binary>>, Acc, Config) -> maybe_replace(22, Rest, Acc, Config);
+clean(<<23, Rest/binary>>, Acc, Config) -> maybe_replace(23, Rest, Acc, Config);
+clean(<<24, Rest/binary>>, Acc, Config) -> maybe_replace(24, Rest, Acc, Config);
+clean(<<25, Rest/binary>>, Acc, Config) -> maybe_replace(25, Rest, Acc, Config);
+clean(<<26, Rest/binary>>, Acc, Config) -> maybe_replace(26, Rest, Acc, Config);
+clean(<<27, Rest/binary>>, Acc, Config) -> maybe_replace(27, Rest, Acc, Config);
+clean(<<28, Rest/binary>>, Acc, Config) -> maybe_replace(28, Rest, Acc, Config);
+clean(<<29, Rest/binary>>, Acc, Config) -> maybe_replace(29, Rest, Acc, Config);
+clean(<<30, Rest/binary>>, Acc, Config) -> maybe_replace(30, Rest, Acc, Config);
+clean(<<31, Rest/binary>>, Acc, Config) -> maybe_replace(31, Rest, Acc, Config);
+clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config);
+clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config);
+clean(<<34, Rest/binary>>, Acc, Config) -> maybe_replace(34, Rest, Acc, Config);
+clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config);
+clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config);
+clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config);
+clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config);
+clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config);
+clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config);
+clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config);
+clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config);
+clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config);
+clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config);
+clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config);
+clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config);
+clean(<<47, Rest/binary>>, Acc, Config) -> maybe_replace(47, Rest, Acc, Config);
+clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config);
+clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config);
+clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config);
+clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config);
+clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config);
+clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config);
+clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config);
+clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config);
+clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config);
+clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config);
+clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config);
+clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config);
+clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config);
+clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config);
+clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config);
+clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config);
+clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config);
+clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config);
+clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config);
+clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config);
+clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config);
+clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config);
+clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config);
+clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config);
+clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config);
+clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config);
+clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config);
+clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config);
+clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config);
+clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config);
+clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config);
+clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config);
+clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config);
+clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config);
+clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config);
+clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config);
+clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config);
+clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config);
+clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config);
+clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config);
+clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config);
+clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config);
+clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config);
+clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config);
+clean(<<92, Rest/binary>>, Acc, Config) -> maybe_replace(92, Rest, Acc, Config);
+clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config);
+clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config);
+clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config);
+clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config);
+clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config);
+clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config);
+clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config);
+clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config);
+clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config);
+clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config);
+clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config);
+clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config);
+clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config);
+clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config);
+clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config);
+clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config);
+clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config);
+clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config);
+clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config);
+clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config);
+clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config);
+clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config);
+clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config);
+clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config);
+clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config);
+clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config);
+clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config);
+clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config);
+clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config);
+clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config);
+clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config);
+clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config);
+clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config);
+clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config);
+clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
+    maybe_replace(X, Rest, Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#d800 ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#dfff, X < 16#fdd0 ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#fdef, X < 16#fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#10000, X < 16#1fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#20000, X < 16#2fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#30000, X < 16#3fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#40000, X < 16#4fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#50000, X < 16#5fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#60000, X < 16#6fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#70000, X < 16#7fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#80000, X < 16#8fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#90000, X < 16#9fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#a0000, X < 16#afffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#b0000, X < 16#bfffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#c0000, X < 16#cfffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#d0000, X < 16#dfffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#e0000, X < 16#efffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#f0000, X < 16#ffffe ->
+    clean(Rest, [X] ++ Acc, Config);
+clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#100000, X < 16#10fffe ->
+    clean(Rest, [X] ++ Acc, Config);
+%% surrogates
+clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
+    maybe_replace(surrogate, Rest, Acc, Config);
+%% noncharacters
+clean(<<_/utf8, Rest/binary>>, Acc, Config) ->
+    maybe_replace(noncharacter, Rest, Acc, Config);
+%% u+fffe and u+ffff for R14BXX
+clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 ->
+    maybe_replace(noncharacter, Rest, Acc, Config);
+%% overlong encodings and missing continuations of a 2 byte sequence
+clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 ->
+    maybe_replace(badutf, strip_continuations(Rest, 1), Acc, Config);
+%% overlong encodings and missing continuations of a 3 byte sequence
+clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 ->
+    maybe_replace(badutf, strip_continuations(Rest, 2), Acc, Config);
+%% overlong encodings and missing continuations of a 4 byte sequence
+clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
+    maybe_replace(badutf, strip_continuations(Rest, 3), Acc, Config);
+clean(<<_, Rest/binary>>, Acc, Config) ->
+    maybe_replace(badutf, Rest, Acc, Config).
+
+
+strip_continuations(Bin, 0) -> Bin;
+strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
+    strip_continuations(Rest, N - 1);
+%% not a continuation byte
+strip_continuations(Bin, _) -> Bin.
+
+
+maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) ->
+    clean(Rest, [$b, $\\] ++ Acc, Config);
+maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) ->
+    clean(Rest, [$t, $\\] ++ Acc, Config);
+maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) ->
+    clean(Rest, [$n, $\\] ++ Acc, Config);
+maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) ->
+    clean(Rest, [$f, $\\] ++ Acc, Config);
+maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) ->
+    clean(Rest, [$r, $\\] ++ Acc, Config);
+maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) ->
+    clean(Rest, [$\", $\\] ++ Acc, Config);
+maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) ->
+    case Config#config.escaped_forward_slashes of
+        true -> clean(Rest, [$/, $\\] ++ Acc, Config);
+        false -> clean(Rest, [$/] ++ Acc, Config)
+    end;
+maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) ->
+    clean(Rest, [$\\, $\\] ++ Acc, Config);
+maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true})  when X == 16#2028; X == 16#2029 ->
+    case Config#config.unescaped_jsonp of
+        true -> clean(Rest, [X] ++ Acc, Config);
+        false -> clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config)
+    end;
+maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 ->
+    clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config);
+maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> {error, badarg};
+maybe_replace(noncharacter, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
+maybe_replace(surrogate, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
+maybe_replace(badutf, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
+maybe_replace(X, Rest, Acc, Config) -> clean(Rest, [X] ++ Acc, Config).
+
+
+%% convert a codepoint to it's \uXXXX equiv.
+json_escape_sequence(X) ->
+    <<A:4, B:4, C:4, D:4>> = <<X:16>>,
+    [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
+
+
+to_hex(10) -> $a;
+to_hex(11) -> $b;
+to_hex(12) -> $c;
+to_hex(13) -> $d;
+to_hex(14) -> $e;
+to_hex(15) -> $f;
+to_hex(X) -> X + 48.    %% ascii "1" is [49], "2" is [50], etc...
+

 %% for raw input
 -spec init(proplists:proplist()) -> list().
@ -218,54 +451,20 @@ handle_event(end_json, State) -> lists:reverse(State);
 handle_event(Event, State) -> [Event] ++ State.


-include("jsx_strings.hrl").
-

 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").


-parse(Events, Config) ->
-    Chunk = try
-        value(Events ++ [end_json], {jsx, []}, [], jsx_config:parse_config(Config))
-    catch
-        error:badarg -> {error, badarg}
-    end,
-    Incremental = try
-        Final = lists:foldl(
-            fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end,
-            parser(jsx, [], [explicit_end] ++ Config),
-            lists:map(fun(X) -> [X] end, Events)
-        ),
-        Final(end_stream)
-    catch
-        error:badarg -> {error, badarg}
-    end,
-    ?assert(Chunk == Incremental),
-    Chunk.
-
-
-parse_test_() ->
-    Data = jsx:test_cases(),
-    [
-        {
-            Title, ?_assertEqual(
-                Events ++ [end_json],
-                parse(Events, [])
-            )
-        } || {Title, _, _, Events} <- Data
-    ].
-
-
-parse_error(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)).
+parse(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)).


 error_test_() ->
    [
-        {"value error", ?_assertError(badarg, parse_error([self()], []))},
-        {"maybe_done error", ?_assertError(badarg, parse_error([start_array, end_array, start_array, end_json], []))},
-        {"done error", ?_assertError(badarg, parse_error([{string, <<"">>}, {literal, true}, end_json], []))},
-        {"string error", ?_assertError(badarg, parse_error([{string, <<239, 191, 191>>}, end_json], []))}
+        {"value error", ?_assertError(badarg, parse([self()], []))},
+        {"maybe_done error", ?_assertError(badarg, parse([start_array, end_array, start_array, end_json], []))},
+        {"done error", ?_assertError(badarg, parse([{string, <<"">>}, {literal, true}, end_json], []))},
+        {"string error", ?_assertError(badarg, parse([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))}
    ].


@ -274,47 +473,540 @@ custom_error_handler_test_() ->
    [
        {"value error", ?_assertEqual(
            {value, [self()]},
-            parse_error([self()], [{error_handler, Error}])
+            parse([self()], [{error_handler, Error}])
        )},
        {"maybe_done error", ?_assertEqual(
            {maybe_done, [start_array, end_json]},
-            parse_error([start_array, end_array, start_array, end_json], [{error_handler, Error}])
+            parse([start_array, end_array, start_array, end_json], [{error_handler, Error}])
        )},
        {"done error", ?_assertEqual(
-            {done, [{literal, true}, end_json]},
-            parse_error([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}])
+            {maybe_done, [{literal, true}, end_json]},
+            parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}])
        )},
        {"string error", ?_assertEqual(
            {string, [{string, <<239, 191, 191>>}, end_json]},
-            parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}])
+            parse([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict])
        )}
    ].


+incomplete_test_() ->
+    Cases = [
+        {"incomplete value", []},
+        {"incomplete object", [start_object]},
+        {"incomplete array", [start_array]},
+        {"incomplete maybe_done", [start_array, end_array]}
+    ],
+    [{Title, ?_assertError(badarg, parse(Events, []))}
+        || {Title, Events} <- Cases
+    ].
+
+
 custom_incomplete_handler_test_() ->
    [
        {"custom incomplete handler", ?_assertError(
            badarg,
-            parse_error([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}])
+            parse([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}])
        )}
    ].


 raw_test_() ->
+    Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end,
    [
        {"raw empty list", ?_assertEqual(
-            [start_array, end_array, end_json],
-            parse([{raw, <<"[]">>}], [])
+            [start_array, end_array],
+            Parse([{raw, <<"[]">>}], [])
        )},
        {"raw empty object", ?_assertEqual(
-            [start_object, end_object, end_json],
-            parse([{raw, <<"{}">>}], [])
+            [start_object, end_object],
+            Parse([{raw, <<"{}">>}], [])
        )},
        {"raw chunk inside stream", ?_assertEqual(
-            [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object, end_json],
-            parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], [])
+            [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object],
+            Parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], [])
        )}
    ].


+%% erlang refuses to encode certain codepoints, so fake them
+to_fake_utf8(N) when N < 16#0080 -> <<N:8>>;
+to_fake_utf8(N) when N < 16#0800 ->
+    <<0:5, Y:5, X:6>> = <<N:16>>,
+    <<2#110:3, Y:5, 2#10:2, X:6>>;
+to_fake_utf8(N) when N < 16#10000 ->
+    <<Z:4, Y:6, X:6>> = <<N:16>>,
+    <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>;
+to_fake_utf8(N) ->
+    <<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>,
+    <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>.
+
+
+codepoints() ->
+    unicode:characters_to_binary(
+        [32, 33]
+        ++ lists:seq(35, 46)
+        ++ lists:seq(48, 91)
+        ++ lists:seq(93, 16#2027)
+        ++ lists:seq(16#202a, 16#d7ff)
+        ++ lists:seq(16#e000, 16#fdcf)
+        ++ lists:seq(16#fdf0, 16#fffd)
+    ).
+
+extended_codepoints() ->
+    unicode:characters_to_binary(
+        lists:seq(16#10000, 16#1fffd) ++ [
+            16#20000, 16#30000, 16#40000, 16#50000, 16#60000,
+            16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000,
+            16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000
+        ]
+    ).
+
+reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ].
+
+surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ].
+
+noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ].
+
+extended_noncharacters() ->
+    [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
+        ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
+        ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
+        ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
+        ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
+        ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
+        ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
+        ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]
+    ].
+
+
+clean_string_test_() ->
+    [
+        {"clean codepoints", ?_assertEqual(
+            codepoints(),
+            clean_string(codepoints(), #config{})
+        )},
+        {"clean extended codepoints", ?_assertEqual(
+            extended_codepoints(),
+            clean_string(extended_codepoints(), #config{})
+        )},
+        {"escape path codepoints", ?_assertEqual(
+            codepoints(),
+            clean_string(codepoints(), #config{escaped_strings=true})
+        )},
+        {"escape path extended codepoints", ?_assertEqual(
+            extended_codepoints(),
+            clean_string(extended_codepoints(), #config{escaped_strings=true})
+        )},
+        {"error reserved space", ?_assertEqual(
+            lists:duplicate(length(reserved_space()), {error, badarg}),
+            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space())
+        )},
+        {"error surrogates", ?_assertEqual(
+            lists:duplicate(length(surrogates()), {error, badarg}),
+            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates())
+        )},
+        {"error noncharacters", ?_assertEqual(
+            lists:duplicate(length(noncharacters()), {error, badarg}),
+            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters())
+        )},
+        {"error extended noncharacters", ?_assertEqual(
+            lists:duplicate(length(extended_noncharacters()), {error, badarg}),
+            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters())
+        )},
+        {"clean reserved space", ?_assertEqual(
+            lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>),
+            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space())
+        )},
+        {"clean surrogates", ?_assertEqual(
+            lists:duplicate(length(surrogates()), <<16#fffd/utf8>>),
+            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates())
+        )},
+        {"clean noncharacters", ?_assertEqual(
+            lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>),
+            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters())
+        )},
+        {"clean extended noncharacters", ?_assertEqual(
+            lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>),
+            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters())
+        )}
+    ].
+
+
+escape_test_() ->
+    [
+        {"maybe_escape backspace", ?_assertEqual(
+            <<"\\b">>,
+            clean_string(<<16#0008/utf8>>, #config{escaped_strings=true})
+        )},
+        {"don't escape backspace", ?_assertEqual(
+            <<"\b">>,
+            clean_string(<<16#0008/utf8>>, #config{})
+        )},
+        {"maybe_escape tab", ?_assertEqual(
+            <<"\\t">>,
+            clean_string(<<16#0009/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape newline", ?_assertEqual(
+            <<"\\n">>,
+            clean_string(<<16#000a/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape formfeed", ?_assertEqual(
+            <<"\\f">>,
+            clean_string(<<16#000c/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape carriage return", ?_assertEqual(
+            <<"\\r">>,
+            clean_string(<<16#000d/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape quote", ?_assertEqual(
+            <<"\\\"">>,
+            clean_string(<<16#0022/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape forward slash", ?_assertEqual(
+            <<"\\/">>,
+            clean_string(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true})
+        )},
+        {"do not maybe_escape forward slash", ?_assertEqual(
+            <<"/">>,
+            clean_string(<<16#002f/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape backslash", ?_assertEqual(
+            <<"\\\\">>,
+            clean_string(<<16#005c/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape jsonp (u2028)", ?_assertEqual(
+            <<"\\u2028">>,
+            clean_string(<<16#2028/utf8>>, #config{escaped_strings=true})
+        )},
+        {"do not maybe_escape jsonp (u2028)", ?_assertEqual(
+            <<16#2028/utf8>>,
+            clean_string(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
+        )},
+        {"maybe_escape jsonp (u2029)", ?_assertEqual(
+            <<"\\u2029">>,
+            clean_string(<<16#2029/utf8>>, #config{escaped_strings=true})
+        )},
+        {"do not maybe_escape jsonp (u2029)", ?_assertEqual(
+            <<16#2029/utf8>>,
+            clean_string(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
+        )},
+        {"maybe_escape u0000", ?_assertEqual(
+            <<"\\u0000">>,
+            clean_string(<<16#0000/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0001", ?_assertEqual(
+            <<"\\u0001">>,
+            clean_string(<<16#0001/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0002", ?_assertEqual(
+            <<"\\u0002">>,
+            clean_string(<<16#0002/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0003", ?_assertEqual(
+            <<"\\u0003">>,
+            clean_string(<<16#0003/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0004", ?_assertEqual(
+            <<"\\u0004">>,
+            clean_string(<<16#0004/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0005", ?_assertEqual(
+            <<"\\u0005">>,
+            clean_string(<<16#0005/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0006", ?_assertEqual(
+            <<"\\u0006">>,
+            clean_string(<<16#0006/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0007", ?_assertEqual(
+            <<"\\u0007">>,
+            clean_string(<<16#0007/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u000b", ?_assertEqual(
+            <<"\\u000b">>,
+            clean_string(<<16#000b/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u000e", ?_assertEqual(
+            <<"\\u000e">>,
+            clean_string(<<16#000e/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u000f", ?_assertEqual(
+            <<"\\u000f">>,
+            clean_string(<<16#000f/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0010", ?_assertEqual(
+            <<"\\u0010">>,
+            clean_string(<<16#0010/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0011", ?_assertEqual(
+            <<"\\u0011">>,
+            clean_string(<<16#0011/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0012", ?_assertEqual(
+            <<"\\u0012">>,
+            clean_string(<<16#0012/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0013", ?_assertEqual(
+            <<"\\u0013">>,
+            clean_string(<<16#0013/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0014", ?_assertEqual(
+            <<"\\u0014">>,
+            clean_string(<<16#0014/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0015", ?_assertEqual(
+            <<"\\u0015">>,
+            clean_string(<<16#0015/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0016", ?_assertEqual(
+            <<"\\u0016">>,
+            clean_string(<<16#0016/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0017", ?_assertEqual(
+            <<"\\u0017">>,
+            clean_string(<<16#0017/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0018", ?_assertEqual(
+            <<"\\u0018">>,
+            clean_string(<<16#0018/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u0019", ?_assertEqual(
+            <<"\\u0019">>,
+            clean_string(<<16#0019/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u001a", ?_assertEqual(
+            <<"\\u001a">>,
+            clean_string(<<16#001a/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u001b", ?_assertEqual(
+            <<"\\u001b">>,
+            clean_string(<<16#001b/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u001c", ?_assertEqual(
+            <<"\\u001c">>,
+            clean_string(<<16#001c/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u001d", ?_assertEqual(
+            <<"\\u001d">>,
+            clean_string(<<16#001d/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u001e", ?_assertEqual(
+            <<"\\u001e">>,
+            clean_string(<<16#001e/utf8>>, #config{escaped_strings=true})
+        )},
+        {"maybe_escape u001f", ?_assertEqual(
+            <<"\\u001f">>,
+            clean_string(<<16#001f/utf8>>, #config{escaped_strings=true})
+        )}
+    ].
+
+
+bad_utf8_test_() ->
+    [
+        {"noncharacter u+fffe", ?_assertEqual(
+            {error, badarg},
+            clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true})
+        )},
+        {"noncharacter u+fffe replaced", ?_assertEqual(
+            <<16#fffd/utf8>>,
+            clean_string(to_fake_utf8(16#fffe), #config{})
+        )},
+        {"noncharacter u+ffff", ?_assertEqual(
+            {error, badarg},
+            clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true})
+        )},
+        {"noncharacter u+ffff replaced", ?_assertEqual(
+            <<16#fffd/utf8>>,
+            clean_string(to_fake_utf8(16#ffff), #config{})
+        )},
+        {"orphan continuation byte u+0080", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#0080>>, #config{strict_utf8=true})
+        )},
+        {"orphan continuation byte u+0080 replaced", ?_assertEqual(
+            <<16#fffd/utf8>>,
+            clean_string(<<16#0080>>, #config{})
+        )},
+        {"orphan continuation byte u+00bf", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#00bf>>, #config{strict_utf8=true})
+        )},
+        {"orphan continuation byte u+00bf replaced", ?_assertEqual(
+            <<16#fffd/utf8>>,
+            clean_string(<<16#00bf>>, #config{})
+        )},
+        {"2 continuation bytes", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true})
+        )},
+        {"2 continuation bytes replaced", ?_assertEqual(
+            binary:copy(<<16#fffd/utf8>>, 2),
+            clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{})
+        )},
+        {"3 continuation bytes", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true})
+        )},
+        {"3 continuation bytes replaced", ?_assertEqual(
+            binary:copy(<<16#fffd/utf8>>, 3),
+            clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{})
+        )},
+        {"4 continuation bytes", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true})
+        )},
+        {"4 continuation bytes replaced", ?_assertEqual(
+            binary:copy(<<16#fffd/utf8>>, 4),
+            clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{})
+        )},
+        {"5 continuation bytes", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true})
+        )},
+        {"5 continuation bytes replaced", ?_assertEqual(
+            binary:copy(<<16#fffd/utf8>>, 5),
+            clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{})
+        )},
+        {"6 continuation bytes", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true})
+        )},
+        {"6 continuation bytes replaced", ?_assertEqual(
+            binary:copy(<<16#fffd/utf8>>, 6),
+            clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{})
+        )},
+        {"all continuation bytes", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true})
+        )},
+        {"all continuation bytes replaced", ?_assertEqual(
+            binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))),
+            clean_string(
+                <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>,
+                #config{}
+            )
+        )},
+        {"lonely start byte", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#00c0>>, #config{strict_utf8=true})
+        )},
+        {"lonely start byte replaced", ?_assertEqual(
+            <<16#fffd/utf8>>,
+            clean_string(<<16#00c0>>, #config{})
+        )},
+        {"lonely start bytes (2 byte)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true})
+        )},
+        {"lonely start bytes (2 byte) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32, 16#fffd/utf8>>,
+            clean_string(<<16#00c0, 32, 16#00df>>, #config{})
+        )},
+        {"lonely start bytes (3 byte)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true})
+        )},
+        {"lonely start bytes (3 byte) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32, 16#fffd/utf8>>,
+            clean_string(<<16#00e0, 32, 16#00ef>>, #config{})
+        )},
+        {"lonely start bytes (4 byte)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true})
+        )},
+        {"lonely start bytes (4 byte) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32, 16#fffd/utf8>>,
+            clean_string(<<16#00f0, 32, 16#00f7>>, #config{})
+        )},
+        {"missing continuation byte (3 byte)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<224, 160, 32>>, #config{strict_utf8=true})
+        )},
+        {"missing continuation byte (3 byte) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<224, 160, 32>>, #config{})
+        )},
+        {"missing continuation byte (4 byte missing one)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true})
+        )},
+        {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<240, 144, 128, 32>>, #config{})
+        )},
+        {"missing continuation byte (4 byte missing two)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<240, 144, 32>>, #config{strict_utf8=true})
+        )},
+        {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<240, 144, 32>>, #config{})
+        )},
+        {"overlong encoding of u+002f (2 byte)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true})
+        )},
+        {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<16#c0, 16#af, 32>>, #config{})
+        )},
+        {"overlong encoding of u+002f (3 byte)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true})
+        )},
+        {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{})
+        )},
+        {"overlong encoding of u+002f (4 byte)", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true})
+        )},
+        {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{})
+        )},
+        {"highest overlong 2 byte sequence", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true})
+        )},
+        {"highest overlong 2 byte sequence replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<16#c1, 16#bf, 32>>, #config{})
+        )},
+        {"highest overlong 3 byte sequence", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true})
+        )},
+        {"highest overlong 3 byte sequence replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{})
+        )},
+        {"highest overlong 4 byte sequence", ?_assertEqual(
+            {error, badarg},
+            clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true})
+        )},
+        {"highest overlong 4 byte sequence replaced", ?_assertEqual(
+            <<16#fffd/utf8, 32>>,
+            clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{})
+        )}
+    ].
+
+
+json_escape_sequence_test_() ->
+    [
+        {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")},
+        {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")},
+        {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
+    ].
+
+
+fix_key_test_() ->
+    [
+        {"binary key", ?_assertEqual(fix_key(<<"foo">>), <<"foo">>)},
+        {"atom key", ?_assertEqual(fix_key(foo), <<"foo">>)},
+        {"integer key", ?_assertEqual(fix_key(123), <<"123">>)}
+    ].
+
 -endif.
--- a/src/jsx_strings.hrl
+++ b/src/jsx_strings.hrl
@ -1,403 +0,0 @@
-clean_string(Bin, #config{dirty_strings=true}) -> Bin;
-clean_string(Bin, Config) ->
-    case Config#config.replaced_bad_utf8 orelse Config#config.escaped_strings of
-        true -> clean(Bin, [], Config);
-        false -> ensure_clean(Bin)
-    end.
-
-
-ensure_clean(Bin) ->
-    case is_clean(Bin) of
-        ok -> Bin;
-        {error, badarg} -> {error, badarg}
-    end.
-
-%% fast path for no escaping and no correcting, throws error if string is 'bad'
-is_clean(<<>>) -> ok;
-is_clean(<<0, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<1, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<2, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<3, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<4, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<5, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<6, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<7, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<8, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<9, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<10, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<11, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<12, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<13, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<14, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<15, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<16, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<17, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<18, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<19, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<20, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<21, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<22, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<23, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<24, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<25, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<26, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<27, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<28, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<29, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<30, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<31, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<32, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<33, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<34, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<35, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<36, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<37, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<38, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<39, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<40, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<41, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<42, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<43, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<44, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<45, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<46, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<47, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<48, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<49, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<50, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<51, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<52, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<53, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<54, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<55, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<56, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<57, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<58, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<59, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<60, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<61, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<62, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<63, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<64, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<65, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<66, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<67, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<68, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<69, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<70, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<71, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<72, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<73, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<74, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<75, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<76, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<77, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<78, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<79, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<80, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<81, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<82, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<83, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<84, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<85, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<86, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<87, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<88, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<89, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<90, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<91, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<92, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<93, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<94, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<95, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<96, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<97, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<98, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<99, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<100, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<101, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<102, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<103, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<104, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<105, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<106, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<107, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<108, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<109, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<110, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<111, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<112, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<113, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<114, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<115, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<116, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<117, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<118, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<119, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<120, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<121, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<122, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<123, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<124, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<125, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<126, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<127, Rest/binary>>) -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X < 16#d800 -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X > 16#dfff, X < 16#fdd0 -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X > 16#fdef, X < 16#fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#10000, X < 16#1fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#20000, X < 16#2fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#30000, X < 16#3fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#40000, X < 16#4fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#50000, X < 16#5fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#60000, X < 16#6fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#70000, X < 16#7fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#80000, X < 16#8fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#90000, X < 16#9fffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#a0000, X < 16#afffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#b0000, X < 16#bfffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#c0000, X < 16#cfffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#d0000, X < 16#dfffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#e0000, X < 16#efffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#f0000, X < 16#ffffe -> is_clean(Rest);
-is_clean(<<X/utf8, Rest/binary>>) when X >= 16#100000, X < 16#10fffe -> is_clean(Rest);
-is_clean(_Bin) -> {error, badarg}.
-
-
-%% escape and/or replace bad codepoints if requested
-clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc));
-clean(<<0, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(0, Config) ++ Acc, Config);
-clean(<<1, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(1, Config) ++ Acc, Config);
-clean(<<2, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(2, Config) ++ Acc, Config);
-clean(<<3, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(3, Config) ++ Acc, Config);
-clean(<<4, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(4, Config) ++ Acc, Config);
-clean(<<5, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(5, Config) ++ Acc, Config);
-clean(<<6, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(6, Config) ++ Acc, Config);
-clean(<<7, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(7, Config) ++ Acc, Config);
-clean(<<8, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(8, Config) ++ Acc, Config);
-clean(<<9, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(9, Config) ++ Acc, Config);
-clean(<<10, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(10, Config) ++ Acc, Config);
-clean(<<11, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(11, Config) ++ Acc, Config);
-clean(<<12, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(12, Config) ++ Acc, Config);
-clean(<<13, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(13, Config) ++ Acc, Config);
-clean(<<14, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(14, Config) ++ Acc, Config);
-clean(<<15, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(15, Config) ++ Acc, Config);
-clean(<<16, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(16, Config) ++ Acc, Config);
-clean(<<17, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(17, Config) ++ Acc, Config);
-clean(<<18, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(18, Config) ++ Acc, Config);
-clean(<<19, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(19, Config) ++ Acc, Config);
-clean(<<20, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(20, Config) ++ Acc, Config);
-clean(<<21, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(21, Config) ++ Acc, Config);
-clean(<<22, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(22, Config) ++ Acc, Config);
-clean(<<23, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(23, Config) ++ Acc, Config);
-clean(<<24, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(24, Config) ++ Acc, Config);
-clean(<<25, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(25, Config) ++ Acc, Config);
-clean(<<26, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(26, Config) ++ Acc, Config);
-clean(<<27, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(27, Config) ++ Acc, Config);
-clean(<<28, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(28, Config) ++ Acc, Config);
-clean(<<29, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(29, Config) ++ Acc, Config);
-clean(<<30, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(30, Config) ++ Acc, Config);
-clean(<<31, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(31, Config) ++ Acc, Config);
-clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config);
-clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config);
-clean(<<34, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(34, Config) ++ Acc, Config);
-clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config);
-clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config);
-clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config);
-clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config);
-clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config);
-clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config);
-clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config);
-clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config);
-clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config);
-clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config);
-clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config);
-clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config);
-clean(<<47, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(47, Config) ++ Acc, Config);
-clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config);
-clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config);
-clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config);
-clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config);
-clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config);
-clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config);
-clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config);
-clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config);
-clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config);
-clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config);
-clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config);
-clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config);
-clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config);
-clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config);
-clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config);
-clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config);
-clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config);
-clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config);
-clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config);
-clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config);
-clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config);
-clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config);
-clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config);
-clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config);
-clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config);
-clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config);
-clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config);
-clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config);
-clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config);
-clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config);
-clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config);
-clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config);
-clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config);
-clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config);
-clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config);
-clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config);
-clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config);
-clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config);
-clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config);
-clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config);
-clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config);
-clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config);
-clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config);
-clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config);
-clean(<<92, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(92, Config) ++ Acc, Config);
-clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config);
-clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config);
-clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config);
-clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config);
-clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config);
-clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config);
-clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config);
-clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config);
-clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config);
-clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config);
-clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config);
-clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config);
-clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config);
-clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config);
-clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config);
-clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config);
-clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config);
-clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config);
-clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config);
-clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config);
-clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config);
-clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config);
-clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config);
-clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config);
-clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config);
-clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config);
-clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config);
-clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config);
-clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config);
-clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config);
-clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config);
-clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config);
-clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config);
-clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config);
-clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
-    clean(Rest, maybe_replace(X, Config) ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#d800 ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#dfff, X < 16#fdd0 ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#fdef, X < 16#fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#10000, X < 16#1fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#20000, X < 16#2fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#30000, X < 16#3fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#40000, X < 16#4fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#50000, X < 16#5fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#60000, X < 16#6fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#70000, X < 16#7fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#80000, X < 16#8fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#90000, X < 16#9fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#a0000, X < 16#afffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#b0000, X < 16#bfffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#c0000, X < 16#cfffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#d0000, X < 16#dfffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#e0000, X < 16#efffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#f0000, X < 16#ffffe ->
-    clean(Rest, [X] ++ Acc, Config);
-clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#100000, X < 16#10fffe ->
-    clean(Rest, [X] ++ Acc, Config);
-%% surrogates
-clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
-    clean(Rest, maybe_replace(surrogate, Config) ++ Acc, Config);
-%% noncharacters
-clean(<<_/utf8, Rest/binary>>, Acc, Config) ->
-    clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config);
-%% u+fffe and u+ffff for R14BXX
-clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 ->
-    clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config);
-%% overlong encodings and missing continuations of a 2 byte sequence
-clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 ->
-    clean(strip_continuations(Rest, 1), maybe_replace(badutf, Config) ++ Acc, Config);
-%% overlong encodings and missing continuations of a 3 byte sequence
-clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 ->
-    clean(strip_continuations(Rest, 2), maybe_replace(badutf, Config) ++ Acc, Config);
-%% overlong encodings and missing continuations of a 4 byte sequence
-clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
-    clean(strip_continuations(Rest, 3), maybe_replace(badutf, Config) ++ Acc, Config);
-clean(<<_, Rest/binary>>, Acc, Config) ->
-    clean(Rest, maybe_replace(badutf, Config) ++ Acc, Config).
-
-
-strip_continuations(Bin, 0) -> Bin;
-strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
-    strip_continuations(Rest, N - 1);
-%% not a continuation byte
-strip_continuations(Bin, _) -> Bin.
-
-
-maybe_replace($\b, #config{escaped_strings=true}) -> [$b, $\\];
-maybe_replace($\t, #config{escaped_strings=true}) -> [$t, $\\];
-maybe_replace($\n, #config{escaped_strings=true}) -> [$n, $\\];
-maybe_replace($\f, #config{escaped_strings=true}) -> [$f, $\\];
-maybe_replace($\r, #config{escaped_strings=true}) -> [$r, $\\];
-maybe_replace($\", #config{escaped_strings=true}) -> [$\", $\\];
-maybe_replace($/, Config=#config{escaped_strings=true}) ->
-    case Config#config.escaped_forward_slashes of
-        true -> [$/, $\\];
-        false -> [$/]
-    end;
-maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\];
-maybe_replace(X, Config=#config{escaped_strings=true})  when X == 16#2028; X == 16#2029 ->
-    case Config#config.unescaped_jsonp of
-        true -> [X];
-        false -> lists:reverse(json_escape_sequence(X))
-    end;
-maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
-    lists:reverse(json_escape_sequence(X));
-maybe_replace(noncharacter, #config{replaced_bad_utf8=true}) -> [16#fffd];
-maybe_replace(surrogate, #config{replaced_bad_utf8=true}) -> [16#fffd];
-maybe_replace(badutf, #config{replaced_bad_utf8=true}) -> [16#fffd];
-maybe_replace(_, _) -> {error, badarg}.
-
-
-%% convert a codepoint to it's \uXXXX equiv.
-json_escape_sequence(X) ->
-    <<A:4, B:4, C:4, D:4>> = <<X:16>>,
-    [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
-
-
-to_hex(10) -> $a;
-to_hex(11) -> $b;
-to_hex(12) -> $c;
-to_hex(13) -> $d;
-to_hex(14) -> $e;
-to_hex(15) -> $f;
-to_hex(X) -> X + 48.    %% ascii "1" is [49], "2" is [50], etc...
--- a/src/jsx_tests.hrl
+++ b/src/jsx_tests.hrl
@ -1,689 +0,0 @@
-%% data and helper functions for tests
-
-export([init/1, handle_event/2]).
-export([test_cases/0]).
-
-
-include_lib("eunit/include/eunit.hrl").
-
-
-%% test handler
-init([]) -> [].
-
-handle_event(end_json, State) -> lists:reverse([end_json] ++ State);
-handle_event(Event, State) -> [Event] ++ State.
-
-
-test_cases() ->
-    empty_array()
-    ++ nested_array()
-    ++ empty_object()
-    ++ nested_object()
-    ++ strings()
-    ++ literals()
-    ++ integers()
-    ++ floats()
-    ++ compound_object().
-
-
-empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}].
-
-nested_array() ->
-    [{
-        "[[[]]]",
-        <<"[[[]]]">>,
-        [[[]]],
-        [start_array, start_array, start_array, end_array, end_array, end_array]
-    }].
-
-
-empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}].
-
-nested_object() ->
-    [{
-        "{\"key\":{\"key\":{}}}",
-        <<"{\"key\":{\"key\":{}}}">>,
-        [{<<"key">>, [{<<"key">>, [{}]}]}],
-        [
-            start_object,
-                {key, <<"key">>},
-                start_object,
-                    {key, <<"key">>},
-                    start_object,
-                    end_object,
-                end_object,
-            end_object
-        ]
-    }].
-
-
-naked_strings() ->
-    Raw = [
-        "",
-        "hello world"
-    ],
-    [
-        {
-            String,
-            <<"\"", (list_to_binary(String))/binary, "\"">>,
-            list_to_binary(String),
-            [{string, list_to_binary(String)}]
-        }
-        || String <- Raw
-    ].
-
-strings() ->
-    naked_strings()
-    ++ [ wrap_with_array(Test) || Test <- naked_strings() ]
-    ++ [ wrap_with_object(Test) || Test <- naked_strings() ].
-
-
-naked_integers() ->
-    Raw = [
-        1, 2, 3,
-        127, 128, 129,
-        255, 256, 257,
-        65534, 65535, 65536,
-        18446744073709551616,
-        18446744073709551617
-    ],
-    [
-        {
-            integer_to_list(X),
-            list_to_binary(integer_to_list(X)),
-            X,
-            [{integer, X}]
-        }
-        || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0]
-    ].
-
-integers() ->
-    naked_integers()
-    ++ [ wrap_with_array(Test) || Test <- naked_integers() ]
-    ++ [ wrap_with_object(Test) || Test <- naked_integers() ].
-
-
-naked_floats() ->
-    Raw = [
-        0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
-        1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9,
-        1234567890.0987654321,
-        0.0e0,
-        1234567890.0987654321e16,
-        0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308,
-        1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308,
-        2.2250738585072014e-308,    %% min normalized float
-        1.7976931348623157e308,     %% max normalized float
-        5.0e-324,                   %% min denormalized float
-        2.225073858507201e-308      %% max denormalized float
-    ],
-    [
-        {
-            sane_float_to_list(X),
-            list_to_binary(sane_float_to_list(X)),
-            X,
-            [{float, X}]
-        }
-        || X <- Raw ++ [ -1 * Y || Y <- Raw ]
-    ].
-
-floats() ->
-    naked_floats()
-    ++ [ wrap_with_array(Test) || Test <- naked_floats() ]
-    ++ [ wrap_with_object(Test) || Test <- naked_floats() ].
-
-
-naked_literals() ->
-    [
-        {
-            atom_to_list(Literal),
-            atom_to_binary(Literal, unicode),
-            Literal,
-            [{literal, Literal}]
-        }
-        || Literal <- [true, false, null]
-    ].
-
-literals() ->
-    naked_literals()
-    ++ [ wrap_with_array(Test) || Test <- naked_literals() ]
-    ++ [ wrap_with_object(Test) || Test <- naked_literals() ].
-
-
-compound_object() ->
-    [{
-        "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]",
-        <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>,
-        [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]],
-        [
-            start_array,
-                start_object,
-                    {key, <<"alpha">>},
-                    start_array,
-                        {integer, 1},
-                        {integer, 2},
-                        {integer, 3},
-                    end_array,
-                    {key, <<"beta">>},
-                    start_object,
-                        {key, <<"alpha">>},
-                        start_array,
-                            {float, 1.0},
-                            {float, 2.0},
-                            {float, 3.0},
-                        end_array,
-                        {key, <<"beta">>},
-                        start_array,
-                            {literal, true},
-                            {literal, false},
-                        end_array,
-                    end_object,
-                end_object,
-                start_array,
-                    start_object,
-                    end_object,
-                end_array,
-            end_array
-        ]
-    }].
-
-
-wrap_with_array({Title, JSON, Term, Events}) ->
-    {
-        "[" ++ Title ++ "]",
-        <<"[", JSON/binary, "]">>,
-        [Term],
-        [start_array] ++ Events ++ [end_array]
-    }.
-
-
-wrap_with_object({Title, JSON, Term, Events}) ->
-    {
-        "{\"key\":" ++ Title ++ "}",
-        <<"{\"key\":", JSON/binary, "}">>,
-        [{<<"key">>, Term}],
-        [start_object, {key, <<"key">>}] ++ Events ++ [end_object]
-    }.
-
-
-sane_float_to_list(X) ->
-    [Output] = io_lib:format("~p", [X]),
-    Output.
-
-include("jsx_config.hrl").
-include("jsx_strings.hrl").
-
-
-%% erlang refuses to encode certain codepoints, so fake them
-to_fake_utf8(N) when N < 16#0080 -> <<N:8>>;
-to_fake_utf8(N) when N < 16#0800 ->
-    <<0:5, Y:5, X:6>> = <<N:16>>,
-    <<2#110:3, Y:5, 2#10:2, X:6>>;
-to_fake_utf8(N) when N < 16#10000 ->
-    <<Z:4, Y:6, X:6>> = <<N:16>>,
-    <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>;
-to_fake_utf8(N) ->
-    <<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>,
-    <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>.
-
-
-codepoints() ->
-    unicode:characters_to_binary(
-        [32, 33]
-        ++ lists:seq(35, 46)
-        ++ lists:seq(48, 91)
-        ++ lists:seq(93, 16#2027)
-        ++ lists:seq(16#202a, 16#d7ff)
-        ++ lists:seq(16#e000, 16#fdcf)
-        ++ lists:seq(16#fdf0, 16#fffd)
-    ).
-
-extended_codepoints() ->
-    unicode:characters_to_binary(
-        lists:seq(16#10000, 16#1fffd) ++ [
-            16#20000, 16#30000, 16#40000, 16#50000, 16#60000,
-            16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000,
-            16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000
-        ]
-    ).
-
-reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ].
-
-surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ].
-
-noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ].
-
-extended_noncharacters() ->
-    [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
-        ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
-        ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
-        ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
-        ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
-        ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
-        ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
-        ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]
-    ].
-
-
-clean_string_test_() ->
-    [
-        {"clean codepoints", ?_assertEqual(
-            codepoints(),
-            clean_string(codepoints(), #config{})
-        )},
-        {"clean extended codepoints", ?_assertEqual(
-            extended_codepoints(),
-            clean_string(extended_codepoints(), #config{})
-        )},
-        {"escape path codepoints", ?_assertEqual(
-            codepoints(),
-            clean_string(codepoints(), #config{escaped_strings=true})
-        )},
-        {"escape path extended codepoints", ?_assertEqual(
-            extended_codepoints(),
-            clean_string(extended_codepoints(), #config{escaped_strings=true})
-        )},
-        {"error reserved space", ?_assertEqual(
-            lists:duplicate(length(reserved_space()), {error, badarg}),
-            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space())
-        )},
-        {"error surrogates", ?_assertEqual(
-            lists:duplicate(length(surrogates()), {error, badarg}),
-            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates())
-        )},
-        {"error noncharacters", ?_assertEqual(
-            lists:duplicate(length(noncharacters()), {error, badarg}),
-            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters())
-        )},
-        {"error extended noncharacters", ?_assertEqual(
-            lists:duplicate(length(extended_noncharacters()), {error, badarg}),
-            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters())
-        )},
-        {"clean reserved space", ?_assertEqual(
-            lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>),
-            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, reserved_space())
-        )},
-        {"clean surrogates", ?_assertEqual(
-            lists:duplicate(length(surrogates()), <<16#fffd/utf8>>),
-            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, surrogates())
-        )},
-        {"clean noncharacters", ?_assertEqual(
-            lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>),
-            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, noncharacters())
-        )},
-        {"clean extended noncharacters", ?_assertEqual(
-            lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>),
-            lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, extended_noncharacters())
-        )}
-    ].
-
-
-maybe_escape(Bin, Config) -> clean_string(Bin, Config).
-
-escape_test_() ->
-    [
-        {"maybe_escape backspace", ?_assertEqual(
-            <<"\\b">>,
-            maybe_escape(<<16#0008/utf8>>, #config{escaped_strings=true})
-        )},
-        {"don't escape backspace", ?_assertEqual(
-            <<"\b">>,
-            maybe_escape(<<16#0008/utf8>>, #config{})
-        )},
-        {"maybe_escape tab", ?_assertEqual(
-            <<"\\t">>,
-            maybe_escape(<<16#0009/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape newline", ?_assertEqual(
-            <<"\\n">>,
-            maybe_escape(<<16#000a/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape formfeed", ?_assertEqual(
-            <<"\\f">>,
-            maybe_escape(<<16#000c/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape carriage return", ?_assertEqual(
-            <<"\\r">>,
-            maybe_escape(<<16#000d/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape quote", ?_assertEqual(
-            <<"\\\"">>,
-            maybe_escape(<<16#0022/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape forward slash", ?_assertEqual(
-            <<"\\/">>,
-            maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true})
-        )},
-        {"do not maybe_escape forward slash", ?_assertEqual(
-            <<"/">>,
-            maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape backslash", ?_assertEqual(
-            <<"\\\\">>,
-            maybe_escape(<<16#005c/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape jsonp (u2028)", ?_assertEqual(
-            <<"\\u2028">>,
-            maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true})
-        )},
-        {"do not maybe_escape jsonp (u2028)", ?_assertEqual(
-            <<16#2028/utf8>>,
-            maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
-        )},
-        {"maybe_escape jsonp (u2029)", ?_assertEqual(
-            <<"\\u2029">>,
-            maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true})
-        )},
-        {"do not maybe_escape jsonp (u2029)", ?_assertEqual(
-            <<16#2029/utf8>>,
-            maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
-        )},
-        {"maybe_escape u0000", ?_assertEqual(
-            <<"\\u0000">>,
-            maybe_escape(<<16#0000/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0001", ?_assertEqual(
-            <<"\\u0001">>,
-            maybe_escape(<<16#0001/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0002", ?_assertEqual(
-            <<"\\u0002">>,
-            maybe_escape(<<16#0002/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0003", ?_assertEqual(
-            <<"\\u0003">>,
-            maybe_escape(<<16#0003/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0004", ?_assertEqual(
-            <<"\\u0004">>,
-            maybe_escape(<<16#0004/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0005", ?_assertEqual(
-            <<"\\u0005">>,
-            maybe_escape(<<16#0005/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0006", ?_assertEqual(
-            <<"\\u0006">>,
-            maybe_escape(<<16#0006/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0007", ?_assertEqual(
-            <<"\\u0007">>,
-            maybe_escape(<<16#0007/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u000b", ?_assertEqual(
-            <<"\\u000b">>,
-            maybe_escape(<<16#000b/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u000e", ?_assertEqual(
-            <<"\\u000e">>,
-            maybe_escape(<<16#000e/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u000f", ?_assertEqual(
-            <<"\\u000f">>,
-            maybe_escape(<<16#000f/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0010", ?_assertEqual(
-            <<"\\u0010">>,
-            maybe_escape(<<16#0010/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0011", ?_assertEqual(
-            <<"\\u0011">>,
-            maybe_escape(<<16#0011/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0012", ?_assertEqual(
-            <<"\\u0012">>,
-            maybe_escape(<<16#0012/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0013", ?_assertEqual(
-            <<"\\u0013">>,
-            maybe_escape(<<16#0013/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0014", ?_assertEqual(
-            <<"\\u0014">>,
-            maybe_escape(<<16#0014/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0015", ?_assertEqual(
-            <<"\\u0015">>,
-            maybe_escape(<<16#0015/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0016", ?_assertEqual(
-            <<"\\u0016">>,
-            maybe_escape(<<16#0016/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0017", ?_assertEqual(
-            <<"\\u0017">>,
-            maybe_escape(<<16#0017/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0018", ?_assertEqual(
-            <<"\\u0018">>,
-            maybe_escape(<<16#0018/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u0019", ?_assertEqual(
-            <<"\\u0019">>,
-            maybe_escape(<<16#0019/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u001a", ?_assertEqual(
-            <<"\\u001a">>,
-            maybe_escape(<<16#001a/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u001b", ?_assertEqual(
-            <<"\\u001b">>,
-            maybe_escape(<<16#001b/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u001c", ?_assertEqual(
-            <<"\\u001c">>,
-            maybe_escape(<<16#001c/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u001d", ?_assertEqual(
-            <<"\\u001d">>,
-            maybe_escape(<<16#001d/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u001e", ?_assertEqual(
-            <<"\\u001e">>,
-            maybe_escape(<<16#001e/utf8>>, #config{escaped_strings=true})
-        )},
-        {"maybe_escape u001f", ?_assertEqual(
-            <<"\\u001f">>,
-            maybe_escape(<<16#001f/utf8>>, #config{escaped_strings=true})
-        )}
-    ].
-
-
-bad_utf8_test_() ->
-    [
-        {"noncharacter u+fffe", ?_assertEqual(
-            {error, badarg},
-            clean_string(to_fake_utf8(16#fffe), #config{})
-        )},
-        {"noncharacter u+fffe replaced", ?_assertEqual(
-            <<16#fffd/utf8>>,
-            clean_string(to_fake_utf8(16#fffe), #config{replaced_bad_utf8=true})
-        )},
-        {"noncharacter u+ffff", ?_assertEqual(
-            {error, badarg},
-            clean_string(to_fake_utf8(16#ffff), #config{})
-        )},
-        {"noncharacter u+ffff replaced", ?_assertEqual(
-            <<16#fffd/utf8>>,
-            clean_string(to_fake_utf8(16#ffff), #config{replaced_bad_utf8=true})
-        )},
-        {"orphan continuation byte u+0080", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#0080>>, #config{})
-        )},
-        {"orphan continuation byte u+0080 replaced", ?_assertEqual(
-            <<16#fffd/utf8>>,
-            clean_string(<<16#0080>>, #config{replaced_bad_utf8=true})
-        )},
-        {"orphan continuation byte u+00bf", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#00bf>>, #config{})
-        )},
-        {"orphan continuation byte u+00bf replaced", ?_assertEqual(
-            <<16#fffd/utf8>>,
-            clean_string(<<16#00bf>>, #config{replaced_bad_utf8=true})
-        )},
-        {"2 continuation bytes", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{})
-        )},
-        {"2 continuation bytes replaced", ?_assertEqual(
-            binary:copy(<<16#fffd/utf8>>, 2),
-            clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{replaced_bad_utf8=true})
-        )},
-        {"3 continuation bytes", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{})
-        )},
-        {"3 continuation bytes replaced", ?_assertEqual(
-            binary:copy(<<16#fffd/utf8>>, 3),
-            clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{replaced_bad_utf8=true})
-        )},
-        {"4 continuation bytes", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{})
-        )},
-        {"4 continuation bytes replaced", ?_assertEqual(
-            binary:copy(<<16#fffd/utf8>>, 4),
-            clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{replaced_bad_utf8=true})
-        )},
-        {"5 continuation bytes", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{})
-        )},
-        {"5 continuation bytes replaced", ?_assertEqual(
-            binary:copy(<<16#fffd/utf8>>, 5),
-            clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{replaced_bad_utf8=true})
-        )},
-        {"6 continuation bytes", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{})
-        )},
-        {"6 continuation bytes replaced", ?_assertEqual(
-            binary:copy(<<16#fffd/utf8>>, 6),
-            clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{replaced_bad_utf8=true})
-        )},
-        {"all continuation bytes", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{})
-        )},
-        {"all continuation bytes replaced", ?_assertEqual(
-            binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))),
-            clean_string(
-                <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>,
-                #config{replaced_bad_utf8=true}
-            )
-        )},
-        {"lonely start byte", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#00c0>>, #config{})
-        )},
-        {"lonely start byte replaced", ?_assertEqual(
-            <<16#fffd/utf8>>,
-            clean_string(<<16#00c0>>, #config{replaced_bad_utf8=true})
-        )},
-        {"lonely start bytes (2 byte)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#00c0, 32, 16#00df>>, #config{})
-        )},
-        {"lonely start bytes (2 byte) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32, 16#fffd/utf8>>,
-            clean_string(<<16#00c0, 32, 16#00df>>, #config{replaced_bad_utf8=true})
-        )},
-        {"lonely start bytes (3 byte)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#00e0, 32, 16#00ef>>, #config{})
-        )},
-        {"lonely start bytes (3 byte) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32, 16#fffd/utf8>>,
-            clean_string(<<16#00e0, 32, 16#00ef>>, #config{replaced_bad_utf8=true})
-        )},
-        {"lonely start bytes (4 byte)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#00f0, 32, 16#00f7>>, #config{})
-        )},
-        {"lonely start bytes (4 byte) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32, 16#fffd/utf8>>,
-            clean_string(<<16#00f0, 32, 16#00f7>>, #config{replaced_bad_utf8=true})
-        )},
-        {"missing continuation byte (3 byte)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<224, 160, 32>>, #config{})
-        )},
-        {"missing continuation byte (3 byte) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<224, 160, 32>>, #config{replaced_bad_utf8=true})
-        )},
-        {"missing continuation byte (4 byte missing one)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<240, 144, 128, 32>>, #config{})
-        )},
-        {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<240, 144, 128, 32>>, #config{replaced_bad_utf8=true})
-        )},
-        {"missing continuation byte (4 byte missing two)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<240, 144, 32>>, #config{})
-        )},
-        {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<240, 144, 32>>, #config{replaced_bad_utf8=true})
-        )},
-        {"overlong encoding of u+002f (2 byte)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#c0, 16#af, 32>>, #config{})
-        )},
-        {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<16#c0, 16#af, 32>>, #config{replaced_bad_utf8=true})
-        )},
-        {"overlong encoding of u+002f (3 byte)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{})
-        )},
-        {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true})
-        )},
-        {"overlong encoding of u+002f (4 byte)", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{})
-        )},
-        {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true})
-        )},
-        {"highest overlong 2 byte sequence", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#c1, 16#bf, 32>>, #config{})
-        )},
-        {"highest overlong 2 byte sequence replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<16#c1, 16#bf, 32>>, #config{replaced_bad_utf8=true})
-        )},
-        {"highest overlong 3 byte sequence", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{})
-        )},
-        {"highest overlong 3 byte sequence replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{replaced_bad_utf8=true})
-        )},
-        {"highest overlong 4 byte sequence", ?_assertEqual(
-            {error, badarg},
-            clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{})
-        )},
-        {"highest overlong 4 byte sequence replaced", ?_assertEqual(
-            <<16#fffd/utf8, 32>>,
-            clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{replaced_bad_utf8=true})
-        )}
-    ].
-
-
-json_escape_sequence_test_() ->
-    [
-        {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")},
-        {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")},
-        {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
-    ].
--- a/src/jsx_to_json.erl
+++ b/src/jsx_to_json.erl
@ -25,6 +25,8 @@

 -export([to_json/2, format/2]).
 -export([init/1, handle_event/2]).
+-export([start_json/0, start_json/1]).
+-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]).


 -record(config, {
@ -74,7 +76,6 @@ parse_config([], Config) ->
    Config.


-
 -define(start_object, <<"{">>).
 -define(start_array, <<"[">>).
 -define(end_object, <<"}">>).
@ -86,95 +87,50 @@ parse_config([], Config) ->
 -define(newline, <<"\n">>).


-type state() :: {any(), unicode:charlist(), #config{}}.
+-type state() :: {unicode:charlist(), #config{}}.
 -spec init(Config::proplists:proplist()) -> state().

-init(Config) -> {start, [], parse_config(Config)}.
+init(Config) -> {[], parse_config(Config)}.
+

 -spec handle_event(Event::any(), State::state()) -> state().

-handle_event(Event, {start, Acc, Config}) ->
-    case Event of
-        {Type, Value} -> {[], [Acc, encode(Type, Value, Config)], Config}
-        ; start_object -> {[object_start], [Acc, ?start_object], Config}
-        ; start_array -> {[array_start], [Acc, ?start_array], Config}
-    end;
-handle_event(Event, {[object_start|Stack], Acc, OldConfig = #config{depth = Depth}}) ->
-    Config = OldConfig#config{depth = Depth + 1},
-    case Event of
-        {key, Key} ->
-            {[object_value|Stack], [Acc, indent(Config), encode(string, Key, Config), ?colon, space(Config)], Config}
-        ; end_object ->
-            {Stack, [Acc, ?end_object], OldConfig}
-    end;
-handle_event(Event, {[object_value|Stack], Acc, Config}) ->
-    case Event of
-        {Type, Value} when Type == string; Type == literal;
-                Type == integer; Type == float ->
-            {[key|Stack], [Acc, encode(Type, Value, Config)], Config}
-        ; start_object -> {[object_start, key|Stack], [Acc, ?start_object], Config}
-        ; start_array -> {[array_start, key|Stack], [Acc, ?start_array], Config}
-    end;
-handle_event(Event, {[key|Stack], Acc, Config = #config{depth = Depth}}) ->
-    case Event of
-        {key, Key} ->
-            {[object_value|Stack], [Acc, ?comma, indent_or_space(Config), encode(string, Key, Config), ?colon, space(Config)], Config}
-        ; end_object ->
-            NewConfig = Config#config{depth = Depth - 1},
-            {Stack, [Acc, indent(NewConfig), ?end_object], NewConfig}
-    end;
-handle_event(Event, {[array_start|Stack], Acc, OldConfig = #config{depth = Depth}}) ->
-    Config = OldConfig#config{depth = Depth + 1},
-    case Event of
-        {Type, Value} when Type == string; Type == literal;
-                Type == integer; Type == float ->
-            {[array|Stack], [Acc, indent(Config), encode(Type, Value, Config)], Config}
-        ; start_object -> {[object_start, array|Stack], [Acc, indent(Config), ?start_object], Config}
-        ; start_array -> {[array_start, array|Stack], [Acc, indent(Config), ?start_array], Config}
-        ; end_array -> {Stack, [Acc, ?end_array], OldConfig}
-    end;
-handle_event(Event, {[array|Stack], Acc, Config = #config{depth = Depth}}) ->
-    case Event of
-        {Type, Value} when Type == string; Type == literal;
-                Type == integer; Type == float ->
-            {[array|Stack], [Acc, ?comma, indent_or_space(Config), encode(Type, Value, Config)], Config}
-        ; end_array ->
-            NewConfig = Config#config{depth = Depth - 1},
-            {Stack, [Acc, indent(NewConfig), ?end_array], NewConfig}
-        ; start_object -> {[object_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_object], Config}
-        ; start_array -> {[array_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_array], Config}
-    end;
-handle_event(end_json, {[], Acc, _Config}) -> unicode:characters_to_binary(Acc, utf8).
+handle_event(end_json, State) -> get_value(State);
+
+handle_event(start_object, State) -> start_object(State);
+handle_event(end_object, State) -> finish(State);
+
+handle_event(start_array, State) -> start_array(State);
+handle_event(end_array, State) -> finish(State);
+
+handle_event({Type, Event}, {_, Config} = State) -> insert(encode(Type, Event, Config), State).


 encode(string, String, _Config) ->
-    [?quote, String, ?quote];
+    <<?quote/binary, String/binary, ?quote/binary>>;
+encode(key, Key, _Config) ->
+    <<?quote/binary, Key/binary, ?quote/binary>>;
 encode(literal, Literal, _Config) ->
-    erlang:atom_to_list(Literal);
+    unicode:characters_to_binary(erlang:atom_to_list(Literal));
 encode(integer, Integer, _Config) ->
-    erlang:integer_to_list(Integer);
+    unicode:characters_to_binary(erlang:integer_to_list(Integer));
 encode(float, Float, _Config) ->
-    [Output] = io_lib:format("~p", [Float]), Output.
+    [Output] = io_lib:format("~p", [Float]), unicode:characters_to_binary(Output).


 space(Config) ->
    case Config#config.space of
-        0 -> []
+        0 -> <<>>
        ; X when X > 0 -> binary:copy(?space, X)
    end.


 indent(Config) ->
    case Config#config.indent of
-        0 -> []
-        ; X when X > 0 ->
-            Indent = binary:copy(?space, X),
-            indent(Indent, Config#config.depth, [?newline])
+        0 -> <<>>
+        ; X when X > 0 -> <<?newline/binary, (binary:copy(?space, X * Config#config.depth))/binary>>
    end.

-indent(_Indent, 0, Acc) -> Acc;
-indent(Indent, N, Acc) -> indent(Indent, N - 1, [Acc, Indent]).
-

 indent_or_space(Config) ->
    case Config#config.indent > 0 of
@ -183,6 +139,119 @@ indent_or_space(Config) ->
    end.


+%% internal state is a stack and a config object
+%%  `{Stack, Config}`
+%% the stack is a list of in progress objects/arrays
+%%  `[Current, Parent, Grandparent,...OriginalAncestor]`
+%% an object has the representation on the stack of
+%%  `{object, Object}`
+%% of if there's a key with a yet to be matched value
+%%  `{object, Key, Object}`
+%% an array looks like
+%%  `{array, Array}`
+%% `Object` and `Array` are utf8 encoded binaries
+
+start_json() -> {[], #config{}}.
+
+start_json(Config) when is_list(Config) -> {[], parse_config(Config)}.
+
+%% allocate a new object on top of the stack
+start_object({Stack, Config}) -> {[{object, ?start_object}] ++ Stack, Config}.
+
+%% allocate a new array on top of the stack
+start_array({Stack, Config}) -> {[{array, ?start_array}] ++ Stack, Config}.
+
+%% finish an object or array and insert it into the parent object if it exists
+finish({[{object, Object}], Config}) ->
+    {<<Object/binary, ?end_object/binary>>, Config};
+finish({[{object, Object}|Rest], Config}) ->
+    insert(<<Object/binary, ?end_object/binary>>, {Rest, Config});
+finish({[{array, Array}], Config}) ->
+    {<<Array/binary, ?end_array/binary>>, Config};
+finish({[{array, Array}|Rest], Config}) ->
+    insert(<<Array/binary, ?end_array/binary>>, {Rest, Config});
+finish(_) -> erlang:error(badarg).
+
+%% insert a value when there's no parent object or array
+insert(Value, {[], Config}) when is_binary(Value) ->
+    {Value, Config};
+%% insert a key or value into an object or array, autodetects the 'right' thing
+insert(Key, {[{object, Object}|Rest], Config}) when is_binary(Key) ->
+    {[{object, Key, Object}] ++ Rest, Config};
+insert(Value, {[{object, Key, ?start_object}|Rest], Config}) when is_binary(Value) ->
+    {
+        [{object, <<?start_object/binary,
+            Key/binary,
+            ?colon/binary,
+            (space(Config))/binary,
+            Value/binary
+        >>}] ++ Rest,
+        Config
+    };
+insert(Value, {[{object, Key, Object}|Rest], Config}) when is_binary(Value) ->
+    {
+        [{object, <<Object/binary,
+            ?comma/binary,
+            (indent_or_space(Config))/binary,
+            Key/binary,
+            ?colon/binary,
+            (space(Config))/binary,
+            Value/binary
+        >>}] ++ Rest,
+        Config
+    };
+insert(Value, {[{array, ?start_array}|Rest], Config}) when is_binary(Value) ->
+    {[{array, <<?start_array/binary, Value/binary>>}] ++ Rest, Config};
+insert(Value, {[{array, Array}|Rest], Config}) when is_binary(Value) ->
+    {
+        [{array, <<Array/binary,
+            ?comma/binary,
+            (indent_or_space(Config))/binary,
+            Value/binary
+        >>}] ++ Rest,
+        Config
+    };
+insert(_, _) -> erlang:error(badarg).
+
+%% insert a key/value pair into an object
+insert(Key, Value, {[{object, ?start_object}|Rest], Config}) when is_binary(Key), is_binary(Value) ->
+    {
+        [{object, <<?start_object/binary,
+            Key/binary,
+            ?colon/binary,
+            (space(Config))/binary,
+            Value/binary
+        >>}] ++ Rest,
+        Config
+    };
+insert(Key, Value, {[{object, Object}|Rest], Config}) when is_binary(Key), is_binary(Value) ->
+    {
+        [{object, <<Object/binary,
+            ?comma/binary,
+            (indent_or_space(Config))/binary,
+            Key/binary,
+            ?colon/binary,
+            (space(Config))/binary,
+            Value/binary
+        >>}] ++ Rest,
+        Config
+    };
+insert(_, _, _) -> erlang:error(badarg).
+
+
+get_key({[{object, Key, _}|_], _}) -> Key;
+get_key(_) -> erlang:error(badarg).
+
+
+get_value({Value, Config}) ->
+    case Value of
+        Value when is_binary(Value) -> Value;
+        _ -> erlang:error(badarg)
+    end;
+get_value(_) -> erlang:error(badarg).
+
+
+
 %% eunit tests

 -ifdef(TEST).
@ -215,7 +284,7 @@ config_test_() ->

 space_test_() ->
    [
-        {"no space", ?_assertEqual([], space(#config{space=0}))},
+        {"no space", ?_assertEqual(<<>>, space(#config{space=0}))},
        {"one space", ?_assertEqual(<<" ">>, space(#config{space=1}))},
        {"four spaces", ?_assertEqual(<<"    ">>, space(#config{space=4}))}
    ].
@ -223,21 +292,21 @@ space_test_() ->

 indent_test_() ->
    [
-        {"no indent", ?_assertEqual([], indent(#config{indent=0, depth=1}))},
+        {"no indent", ?_assertEqual(<<>>, indent(#config{indent=0, depth=1}))},
        {"indent 1 depth 1", ?_assertEqual(
-            [[?newline], ?space],
+            <<?newline/binary, <<" ">>/binary>>,
            indent(#config{indent=1, depth=1})
        )},
        {"indent 1 depth 2", ?_assertEqual(
-            [[[?newline], ?space], ?space],
+            <<?newline/binary, <<"  ">>/binary>>,
            indent(#config{indent=1, depth=2})
        )},
        {"indent 4 depth 1", ?_assertEqual(
-            [[?newline], <<"    ">>],
+            <<?newline/binary, <<"    ">>/binary>>,
            indent(#config{indent=4, depth=1})
        )},
        {"indent 4 depth 2", ?_assertEqual(
-            [[[?newline], <<"    ">>], <<"    ">>],
+            <<?newline/binary, <<"    ">>/binary, <<"    ">>/binary>>,
            indent(#config{indent=4, depth=2})
        )}
    ].
@ -250,7 +319,7 @@ indent_or_space_test_() ->
            indent_or_space(#config{space=1, indent=0, depth=1})
        )},
        {"indent so no space", ?_assertEqual(
-            [[?newline], ?space],
+            <<?newline/binary, <<" ">>/binary>>,
            indent_or_space(#config{space=1, indent=1, depth=1})
        )}
    ].
@ -258,50 +327,137 @@ indent_or_space_test_() ->

 format_test_() ->
    [
-        {"0.0", ?_assert(encode(float, 0.0, #config{}) =:= "0.0")},
-        {"1.0", ?_assert(encode(float, 1.0, #config{}) =:= "1.0")},
-        {"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= "-1.0")},
+        {"0.0", ?_assert(encode(float, 0.0, #config{}) =:= <<"0.0">>)},
+        {"1.0", ?_assert(encode(float, 1.0, #config{}) =:= <<"1.0">>)},
+        {"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= <<"-1.0">>)},
        {"3.1234567890987654321", 
            ?_assert(
-                encode(float, 3.1234567890987654321, #config{}) =:= "3.1234567890987655")
+                encode(float, 3.1234567890987654321, #config{}) =:= <<"3.1234567890987655">>)
        },
-        {"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= "1.0e23")},
-        {"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= "0.3")},
-        {"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= "0.0001")},
-        {"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= "1.0e-5")},
-        {"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= "1.0e-8")},
-        {"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= "1.0e-323")},
-        {"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= "1.0e308")},
+        {"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= <<"1.0e23">>)},
+        {"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= <<"0.3">>)},
+        {"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= <<"0.0001">>)},
+        {"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= <<"1.0e-5">>)},
+        {"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= <<"1.0e-8">>)},
+        {"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= <<"1.0e-323">>)},
+        {"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= <<"1.0e308">>)},
        {"min normalized float", 
            ?_assert(
-                encode(float, math:pow(2, -1022), #config{}) =:= "2.2250738585072014e-308"
+                encode(float, math:pow(2, -1022), #config{}) =:= <<"2.2250738585072014e-308">>
            )
        },
        {"max normalized float", 
            ?_assert(
                encode(float, (2 - math:pow(2, -52)) * math:pow(2, 1023), #config{}) 
-                    =:= "1.7976931348623157e308"
+                    =:= <<"1.7976931348623157e308">>
            )
        },
        {"min denormalized float", 
-            ?_assert(encode(float, math:pow(2, -1074), #config{}) =:= "5.0e-324")
+            ?_assert(encode(float, math:pow(2, -1074), #config{}) =:= <<"5.0e-324">>)
        },
        {"max denormalized float", 
            ?_assert(
                encode(float, (1 - math:pow(2, -52)) * math:pow(2, -1022), #config{}) 
-                    =:= "2.225073858507201e-308"
+                    =:= <<"2.225073858507201e-308">>
            )
-        }
+        },
+        {"hello world", ?_assert(encode(string, <<"hello world">>, #config{}) =:= <<"\"hello world\"">>)},
+        {"key", ?_assert(encode(key, <<"key">>, #config{}) =:= <<"\"key\"">>)},
+        {"1", ?_assert(encode(integer, 1, #config{}) =:= <<"1">>)},
+        {"-1", ?_assert(encode(integer, -1, #config{}) =:= <<"-1">>)},
+        {"true", ?_assert(encode(literal, true, #config{}) =:= <<"true">>)},
+        {"false", ?_assert(encode(literal, false, #config{}) =:= <<"false">>)},
+        {"null", ?_assert(encode(literal, null, #config{}) =:= <<"null">>)}      
+    ].
+
+
+rep_manipulation_test_() ->
+    [
+        {"allocate a new context", ?_assertEqual(
+            {[], #config{}},
+            start_json()
+        )},
+        {"allocate a new context with config", ?_assertEqual(
+            {[], #config{space=1, indent=2}},
+            start_json([{space, 1}, {indent, 2}])
+        )},
+        {"allocate a new object on an empty stack", ?_assertEqual(
+            {[{object, <<"{">>}], #config{}},
+            start_object({[], #config{}})
+        )},
+        {"allocate a new object on a stack", ?_assertEqual(
+            {[{object, <<"{">>}, {object, <<"{">>}], #config{}},
+            start_object({[{object, <<"{">>}], #config{}})
+        )},
+        {"allocate a new array on an empty stack", ?_assertEqual(
+            {[{array, <<"[">>}], #config{}},
+            start_array({[], #config{}})
+        )},
+        {"allocate a new array on a stack", ?_assertEqual(
+            {[{array, <<"[">>}, {object, <<"{">>}], #config{}},
+            start_array({[{object, <<"{">>}], #config{}})
+        )},
+        {"insert a key into an object", ?_assertEqual(
+            {[{object, <<"\"key\"">>, <<"{">>}], #config{}},
+            insert(<<"\"key\"">>, {[{object, <<"{">>}], #config{}})
+        )},
+        {"get current key", ?_assertEqual(
+            key,
+            get_key({[{object, key, <<"{">>}], #config{}})
+        )},
+        {"try to get non-key from object", ?_assertError(
+            badarg,
+            get_key({[{object, <<"{">>}], #config{}})
+        )},
+        {"try to get key from array", ?_assertError(
+            badarg,
+            get_key({[{array, <<"[">>}], #config{}})
+        )},
+        {"insert a value into an object", ?_assertEqual(
+            {[{object, <<"{\"key\":true">>}], #config{}},
+            insert(<<"true">>, {[{object, <<"\"key\"">>, <<"{">>}], #config{}})
+        )},
+        {"insert a value into an array", ?_assertEqual(
+            {[{array, <<"[true">>}], #config{}},
+            insert(<<"true">>, {[{array, <<"[">>}], #config{}})
+        )},
+        {"insert a key/value pair into an object", ?_assertEqual(
+            {[{object, <<"{\"x\":true,\"y\":false">>}], #config{}},
+            insert(<<"\"y\"">>, <<"false">>, {[{object, <<"{\"x\":true">>}], #config{}})
+        )},
+        {"finish an object with no ancestor", ?_assertEqual(
+            {<<"{\"x\":true,\"y\":false}">>, #config{}},
+            finish({[{object, <<"{\"x\":true,\"y\":false">>}], #config{}})
+        )},
+        {"finish an empty object", ?_assertEqual(
+            {<<"{}">>, #config{}},
+            finish({[{object, <<"{">>}], #config{}})
+        )},
+        {"finish an object with an ancestor", ?_assertEqual(
+            {[{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], #config{}},
+            finish({
+                [{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}],
+                #config{}
+            })
+        )},
+        {"finish an array with no ancestor", ?_assertEqual(
+            {<<"[true,false,null]">>, #config{}},
+            finish({[{array, <<"[true,false,null">>}], #config{}})
+        )},
+        {"finish an array with an ancestor", ?_assertEqual(
+            {[{array, <<"[1,2,3,[true,false,null]">>}], #config{}},
+            finish({[{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}], #config{}})
+        )}
    ].


 handle_event_test_() ->
-    Data = jsx:test_cases(),
+    Data = jsx:test_cases() ++ jsx:special_test_cases(),
    [
        {
            Title, ?_assertEqual(
                JSON,
-                lists:foldl(fun handle_event/2, {start, [], #config{}}, Events ++ [end_json])
+                lists:foldl(fun handle_event/2, init([]), Events ++ [end_json])
            )
        } || {Title, JSON, _, Events} <- Data
    ].
--- a/src/jsx_to_term.erl
+++ b/src/jsx_to_term.erl
@ -25,11 +25,12 @@

 -export([to_term/2]).
 -export([init/1, handle_event/2]).
+-export([start_term/0, start_term/1]).
+-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]).


 -record(config, {
-    labels = binary,
-    post_decode = false
+    labels = binary
 }).

 -type config() :: list().
@ -59,8 +60,6 @@ parse_config([{labels, Val}|Rest], Config)
    parse_config(Rest, Config#config{labels = Val});
 parse_config([labels|Rest], Config) ->
    parse_config(Rest, Config#config{labels = binary});
-parse_config([{post_decode, F}|Rest], Config=#config{post_decode=false}) when is_function(F, 1) ->
-    parse_config(Rest, Config#config{post_decode=F});
 parse_config([{K, _}|Rest] = Options, Config) ->
    case lists:member(K, jsx_config:valid_flags()) of
        true -> parse_config(Rest, Config)
@ -77,34 +76,21 @@ parse_config([], Config) ->
 -type state() :: {[any()], #config{}}.
 -spec init(Config::proplists:proplist()) -> state().

-init(Config) -> {[[]], parse_config(Config)}.
+init(Config) -> {[], parse_config(Config)}.

 -spec handle_event(Event::any(), State::state()) -> state().

-handle_event(end_json, {[[Terms]], _Config}) -> Terms;
+handle_event(end_json, State) -> get_value(State);

-handle_event(start_object, {Terms, Config}) -> {[[]|Terms], Config};
-handle_event(end_object, {[[], {key, Key}, Last|Terms], Config}) ->
-    {[[{Key, post_decode([{}], Config)}] ++ Last] ++ Terms, Config};
-handle_event(end_object, {[Object, {key, Key}, Last|Terms], Config}) ->
-    {[[{Key, post_decode(lists:reverse(Object), Config)}] ++ Last] ++ Terms, Config};
-handle_event(end_object, {[[], Last|Terms], Config}) ->
-    {[[post_decode([{}], Config)] ++ Last] ++ Terms, Config};
-handle_event(end_object, {[Object, Last|Terms], Config}) ->
-    {[[post_decode(lists:reverse(Object), Config)] ++ Last] ++ Terms, Config};
+handle_event(start_object, State) -> start_object(State);
+handle_event(end_object, State) -> finish(State);

-handle_event(start_array, {Terms, Config}) -> {[[]|Terms], Config};
-handle_event(end_array, {[List, {key, Key}, Last|Terms], Config}) ->
-    {[[{Key, post_decode(lists:reverse(List), Config)}] ++ Last] ++ Terms, Config};
-handle_event(end_array, {[List, Last|Terms], Config}) ->
-    {[[post_decode(lists:reverse(List), Config)] ++ Last] ++ Terms, Config};
+handle_event(start_array, State) -> start_array(State);
+handle_event(end_array, State) -> finish(State);

-handle_event({key, Key}, {Terms, Config}) -> {[{key, format_key(Key, Config)}] ++ Terms, Config};
+handle_event({key, Key}, {_, Config} = State) -> insert(format_key(Key, Config), State);

-handle_event({_, Event}, {[{key, Key}, Last|Terms], Config}) ->
-    {[[{Key, post_decode(Event, Config)}] ++ Last] ++ Terms, Config};
-handle_event({_, Event}, {[Last|Terms], Config}) ->
-    {[[post_decode(Event, Config)] ++ Last] ++ Terms, Config}.
+handle_event({_, Event}, State) -> insert(Event, State).


 format_key(Key, Config) ->
@ -121,8 +107,60 @@ format_key(Key, Config) ->
    end.


-post_decode(Value, #config{post_decode=false}) -> Value;
-post_decode(Value, Config) -> (Config#config.post_decode)(Value).
+%% internal state is a stack and a config object
+%%  `{Stack, Config}`
+%% the stack is a list of in progress objects/arrays
+%%  `[Current, Parent, Grandparent,...OriginalAncestor]`
+%% an object has the representation on the stack of
+%%  `{object, [{NthKey, NthValue}, {NMinus1Key, NthMinus1Value},...{FirstKey, FirstValue}]}`
+%% of if there's a key with a yet to be matched value
+%%  `{object, Key, [{NthKey, NthValue},...]}`
+%% an array looks like
+%%  `{array, [NthValue, NthMinus1Value,...FirstValue]}`
+
+start_term() -> {[], #config{}}.
+
+start_term(Config) when is_list(Config) -> {[], parse_config(Config)}.
+
+%% allocate a new object on top of the stack
+start_object({Stack, Config}) -> {[{object, []}] ++ Stack, Config}.
+
+%% allocate a new array on top of the stack
+start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}.
+
+%% finish an object or array and insert it into the parent object if it exists or
+%%  return it if it is the root object
+finish({[{object, []}], Config}) -> {[{}], Config};
+finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config});
+finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config};
+finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config});
+finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config};
+finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config});
+finish(_) -> erlang:error(badarg).
+
+%% insert a value when there's no parent object or array
+insert(Value, {[], Config}) -> {Value, Config};
+%% insert a key or value into an object or array, autodetects the 'right' thing
+insert(Key, {[{object, Pairs}|Rest], Config}) ->
+    {[{object, Key, Pairs}] ++ Rest, Config};
+insert(Value, {[{object, Key, Pairs}|Rest], Config}) ->
+    {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config};
+insert(Value, {[{array, Values}|Rest], Config}) ->
+    {[{array, [Value] ++ Values}] ++ Rest, Config};
+insert(_, _) -> erlang:error(badarg).
+
+%% insert a key/value pair into an object
+insert(Key, Value, {[{object, Pairs}|Rest], Config}) ->
+    {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config};
+insert(_, _, _) -> erlang:error(badarg).
+
+
+get_key({[{object, Key, _}|_], _}) -> Key;
+get_key(_) -> erlang:error(badarg).
+
+
+get_value({Value, _Config}) -> Value;
+get_value(_) -> erlang:error(badarg).


 %% eunit tests
@ -132,9 +170,6 @@ post_decode(Value, Config) -> (Config#config.post_decode)(Value).


 config_test_() ->
-    %% for post_decode tests
-    F = fun(X) -> X end,
-    G = fun(X, Y) -> {X, Y} end,
    [
        {"empty config", ?_assertEqual(#config{}, parse_config([]))},
        {"implicit binary labels", ?_assertEqual(#config{}, parse_config([labels]))},
@ -144,15 +179,6 @@ config_test_() ->
            #config{labels=existing_atom},
            parse_config([{labels, existing_atom}])
        )},
-        {"sloppy existing atom labels", ?_assertEqual(
-            #config{labels=attempt_atom},
-            parse_config([{labels, attempt_atom}])
-        )},
-        {"post decode", ?_assertEqual(
-            #config{post_decode=F},
-            parse_config([{post_decode, F}])
-        )},
-        {"post decode wrong arity", ?_assertError(badarg, parse_config([{post_decode, G}]))},
        {"invalid opt flag", ?_assertError(badarg, parse_config([error]))},
        {"invalid opt tuple", ?_assertError(badarg, parse_config([{error, true}]))}
    ].
@ -181,110 +207,79 @@ format_key_test_() ->
    ].


-post_decoders_test_() ->
-    Events = [
-        [{}],
-        [{<<"key">>, <<"value">>}],
-        [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
-        [],
-        [<<"string">>],
-        [true, false, null],
-        true,
-        false,
-        null,
-        <<"hello">>,
-        <<"world">>,
-        1,
-        1.0
-    ],
+rep_manipulation_test_() ->
    [
-        {"no post_decode", ?_assertEqual(
-            Events,
-            [ post_decode(Event, #config{}) || Event <- Events ]
+        {"allocate a new context", ?_assertEqual(
+            {[], #config{}},
+            start_term()
        )},
-        {"replace arrays with empty arrays", ?_assertEqual(
-            [
-                [{}],
-                [{<<"key">>, <<"value">>}],
-                [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
-                [],
-                [],
-                [],
-                true,
-                false,
-                null,
-                <<"hello">>,
-                <<"world">>,
-                1,
-                1.0
-            ],
-            [ post_decode(Event, #config{
-                    post_decode=fun([T|_] = V) when is_tuple(T) -> V; (V) when is_list(V) -> []; (V) -> V end
-                }) || Event <- Events
-            ]
+        {"allocate a new context with option", ?_assertEqual(
+            {[], #config{labels=atom}},
+            start_term([{labels, atom}])
        )},
-        {"replace objects with empty objects", ?_assertEqual(
-            [
-                [{}],
-                [{}],
-                [{}],
-                [],
-                [<<"string">>],
-                [true, false, null],
-                true,
-                false,
-                null,
-                <<"hello">>,
-                <<"world">>,
-                1,
-                1.0
-            ],
-            [ post_decode(Event, #config{
-                    post_decode=fun([T|_]) when is_tuple(T) -> [{}]; (V) -> V end
-                }) || Event <- Events
-            ]
+        {"allocate a new object on an empty stack", ?_assertEqual(
+            {[{object, []}], #config{}},
+            start_object({[], #config{}})
        )},
-        {"replace all non-array/non-object values with false", ?_assertEqual(
-            [
-                [{}],
-                [{<<"key">>, <<"value">>}],
-                [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
-                [],
-                [<<"string">>],
-                [true, false, null],
-                false,
-                false,
-                false,
-                false,
-                false,
-                false,
-                false
-            ],
-            [ post_decode(Event, #config{
-                    post_decode=fun(V) when is_list(V) -> V; (_) -> false end
-                }) || Event <- Events
-            ]
+        {"allocate a new object on a stack", ?_assertEqual(
+            {[{object, []}, {object, []}], #config{}},
+            start_object({[{object, []}], #config{}})
        )},
-        {"atoms_to_strings", ?_assertEqual(
-            [
-                [{}],
-                [{<<"key">>, <<"value">>}],
-                [{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
-                [],
-                [<<"string">>],
-                [true, false, null],
-                <<"true">>,
-                <<"false">>,
-                <<"null">>,
-                <<"hello">>,
-                <<"world">>,
-                1,
-                1.0
-            ],
-            [ post_decode(Event, #config{
-                    post_decode=fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end
-                }) || Event <- Events
-            ]
+        {"allocate a new array on an empty stack", ?_assertEqual(
+            {[{array, []}], #config{}},
+            start_array({[], #config{}})
+        )},
+        {"allocate a new array on a stack", ?_assertEqual(
+            {[{array, []}, {object, []}], #config{}},
+            start_array({[{object, []}], #config{}})
+        )},
+        {"insert a key into an object", ?_assertEqual(
+            {[{object, key, []}, junk], #config{}},
+            insert(key, {[{object, []}, junk], #config{}})
+        )},
+        {"get current key", ?_assertEqual(
+            key,
+            get_key({[{object, key, []}], #config{}})
+        )},
+        {"try to get non-key from object", ?_assertError(
+            badarg,
+            get_key({[{object, []}], #config{}})
+        )},
+        {"try to get key from array", ?_assertError(
+            badarg,
+            get_key({[{array, []}], #config{}})
+        )},
+        {"insert a value into an object", ?_assertEqual(
+            {[{object, [{key, value}]}, junk], #config{}},
+            insert(value, {[{object, key, []}, junk], #config{}})
+        )},
+        {"insert a value into an array", ?_assertEqual(
+            {[{array, [value]}, junk], #config{}},
+            insert(value, {[{array, []}, junk], #config{}})
+        )},
+        {"insert a key/value pair into an object", ?_assertEqual(
+            {[{object, [{key, value}, {x, y}]}, junk], #config{}},
+            insert(key, value, {[{object, [{x, y}]}, junk], #config{}})
+        )},
+        {"finish an object with no ancestor", ?_assertEqual(
+            {[{a, b}, {x, y}], #config{}},
+            finish({[{object, [{x, y}, {a, b}]}], #config{}})
+        )},
+        {"finish an empty object", ?_assertEqual(
+            {[{}], #config{}},
+            finish({[{object, []}], #config{}})
+        )},
+        {"finish an object with an ancestor", ?_assertEqual(
+            {[{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], #config{}},
+            finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}})
+        )},
+        {"finish an array with no ancestor", ?_assertEqual(
+            {[a, b, c], #config{}},
+            finish({[{array, [c, b, a]}], #config{}})
+        )},
+        {"finish an array with an ancestor", ?_assertEqual(
+            {[{array, [[a, b, c], d, e, f]}], #config{}},
+            finish({[{array, [c, b, a]}, {array, [d, e, f]}], #config{}})
        )}
    ].

@ -295,7 +290,7 @@ handle_event_test_() ->
        {
            Title, ?_assertEqual(
                Term,
-                lists:foldl(fun handle_event/2, {[[]], #config{}}, Events ++ [end_json])
+                lists:foldl(fun handle_event/2, init([]), Events ++ [end_json])
            )
        } || {Title, _, Term, Events} <- Data
    ].
--- a/src/jsx_verify.erl
+++ b/src/jsx_verify.erl
@ -159,7 +159,7 @@ repeated_keys_test_() ->


 handle_event_test_() ->
-    Data = jsx:test_cases(),
+    Data = jsx:test_cases() ++ jsx:special_test_cases(),
    [
        {
            Title, ?_assertEqual(