Merge branch 'vtwopointoh' into develop

This commit is contained in:
alisdair sullivan 2014-01-14 00:41:03 +00:00
commit ff3915abbc
13 changed files with 2406 additions and 3116 deletions

View file

@ -28,22 +28,17 @@
-export([format/1, format/2, minify/1, prettify/1]).
-export([encoder/3, decoder/3, parser/3]).
-export([resume/3]).
%% old api
-export([term_to_json/1, term_to_json/2, json_to_term/1, json_to_term/2]).
-export([to_json/1, to_json/2]).
-export([to_term/1, to_term/2]).
-export_type([json_term/0, json_text/0, token/0]).
-export_type([config/0, encoder/0, decoder/0, parser/0, internal_state/0]).
-export_type([encoder/0, decoder/0, parser/0, internal_state/0]).
-ifdef(TEST).
-include("jsx_tests.hrl").
-else.
-include("jsx_config.hrl").
%% data and helper functions for tests
-export([test_cases/0, special_test_cases/0]).
-export([init/1, handle_event/2]).
-endif.
-type config() :: #config{}.
-type json_term()
:: [{binary() | atom(), json_term()}]
@ -64,19 +59,12 @@
encode(Source) -> encode(Source, []).
encode(Source, Config) -> jsx_to_json:to_json(Source, Config).
%% old api, alias for encode/x
-spec to_json(Source::json_term()) -> json_text() | {incomplete, encoder()}.
-spec to_json(Source::json_term(), Config::jsx_to_json:config()) -> json_text() | {incomplete, encoder()}.
-spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}.
-spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}.
to_json(Source) -> encode(Source, []).
to_json(Source, Config) -> encode(Source, Config).
-spec term_to_json(Source::json_term()) -> json_text() | {incomplete, encoder()}.
-spec term_to_json(Source::json_term(), Config::jsx_to_json:config()) -> json_text() | {incomplete, encoder()}.
term_to_json(Source) -> encode(Source, []).
term_to_json(Source, Config) -> encode(Source, Config).
decode(Source) -> decode(Source, []).
decode(Source, Config) -> jsx_to_term:to_term(Source, Config).
-spec format(Source::json_text()) -> json_text() | {incomplete, decoder()}.
@ -96,27 +84,6 @@ minify(Source) -> format(Source, []).
prettify(Source) -> format(Source, [space, {indent, 2}]).
-spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}.
-spec decode(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}.
decode(Source) -> decode(Source, []).
decode(Source, Config) -> jsx_to_term:to_term(Source, Config).
%% old api, alias for to_term/x
-spec to_term(Source::json_text()) -> json_term() | {incomplete, decoder()}.
-spec to_term(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}.
to_term(Source) -> decode(Source, []).
to_term(Source, Config) -> decode(Source, Config).
-spec json_to_term(Source::json_text()) -> json_term() | {incomplete, decoder()}.
-spec json_to_term(Source::json_text(), Config::jsx_to_term:config()) -> json_term() | {incomplete, decoder()}.
json_to_term(Source) -> decode(Source, []).
json_to_term(Source, Config) -> decode(Source, Config).
-spec is_json(Source::any()) -> true | false.
-spec is_json(Source::any(), Config::jsx_verify:config()) -> true | false.
@ -182,3 +149,311 @@ resume(Term, {decoder, State, Handler, Acc, Stack}, Config) ->
resume(Term, {parser, State, Handler, Stack}, Config) ->
jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
%% test handler
init([]) -> [].
handle_event(end_json, State) -> lists:reverse([end_json] ++ State);
handle_event(Event, State) -> [Event] ++ State.
test_cases() ->
empty_array()
++ nested_array()
++ empty_object()
++ nested_object()
++ strings()
++ literals()
++ integers()
++ floats()
++ compound_object().
%% segregate these so we can skip them in `jsx_to_term`
special_test_cases() -> special_objects() ++ special_array().
empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}].
nested_array() ->
[{
"[[[]]]",
<<"[[[]]]">>,
[[[]]],
[start_array, start_array, start_array, end_array, end_array, end_array]
}].
empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}].
nested_object() ->
[{
"{\"key\":{\"key\":{}}}",
<<"{\"key\":{\"key\":{}}}">>,
[{<<"key">>, [{<<"key">>, [{}]}]}],
[
start_object,
{key, <<"key">>},
start_object,
{key, <<"key">>},
start_object,
end_object,
end_object,
end_object
]
}].
naked_strings() ->
Raw = [
"",
"hello world"
],
[
{
String,
<<"\"", (list_to_binary(String))/binary, "\"">>,
list_to_binary(String),
[{string, list_to_binary(String)}]
}
|| String <- Raw
].
strings() ->
naked_strings()
++ [ wrap_with_array(Test) || Test <- naked_strings() ]
++ [ wrap_with_object(Test) || Test <- naked_strings() ].
naked_integers() ->
Raw = [
1, 2, 3,
127, 128, 129,
255, 256, 257,
65534, 65535, 65536,
18446744073709551616,
18446744073709551617
],
[
{
integer_to_list(X),
list_to_binary(integer_to_list(X)),
X,
[{integer, X}]
}
|| X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0]
].
integers() ->
naked_integers()
++ [ wrap_with_array(Test) || Test <- naked_integers() ]
++ [ wrap_with_object(Test) || Test <- naked_integers() ].
naked_floats() ->
Raw = [
0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9,
1234567890.0987654321,
0.0e0,
1234567890.0987654321e16,
0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308,
1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308,
2.2250738585072014e-308, %% min normalized float
1.7976931348623157e308, %% max normalized float
5.0e-324, %% min denormalized float
2.225073858507201e-308 %% max denormalized float
],
[
{
sane_float_to_list(X),
list_to_binary(sane_float_to_list(X)),
X,
[{float, X}]
}
|| X <- Raw ++ [ -1 * Y || Y <- Raw ]
].
floats() ->
naked_floats()
++ [ wrap_with_array(Test) || Test <- naked_floats() ]
++ [ wrap_with_object(Test) || Test <- naked_floats() ].
naked_literals() ->
[
{
atom_to_list(Literal),
atom_to_binary(Literal, unicode),
Literal,
[{literal, Literal}]
}
|| Literal <- [true, false, null]
].
literals() ->
naked_literals()
++ [ wrap_with_array(Test) || Test <- naked_literals() ]
++ [ wrap_with_object(Test) || Test <- naked_literals() ].
compound_object() ->
[{
"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]",
<<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>,
[[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]],
[
start_array,
start_object,
{key, <<"alpha">>},
start_array,
{integer, 1},
{integer, 2},
{integer, 3},
end_array,
{key, <<"beta">>},
start_object,
{key, <<"alpha">>},
start_array,
{float, 1.0},
{float, 2.0},
{float, 3.0},
end_array,
{key, <<"beta">>},
start_array,
{literal, true},
{literal, false},
end_array,
end_object,
end_object,
start_array,
start_object,
end_object,
end_array,
end_array
]
}].
special_objects() ->
[
{
"[{key, atom}]",
<<"{\"key\":\"atom\"}">>,
[{key, atom}],
[start_object, {key, <<"key">>}, {string, <<"atom">>}, end_object]
},
{
"[{1, true}]",
<<"{\"1\":true}">>,
[{1, true}],
[start_object, {key, <<"1">>}, {literal, true}, end_object]
}
].
special_array() ->
[
{
"[foo, bar]",
<<"[\"foo\",\"bar\"]">>,
[foo, bar],
[start_array, {string, <<"foo">>}, {string, <<"bar">>}, end_array]
}
].
wrap_with_array({Title, JSON, Term, Events}) ->
{
"[" ++ Title ++ "]",
<<"[", JSON/binary, "]">>,
[Term],
[start_array] ++ Events ++ [end_array]
}.
wrap_with_object({Title, JSON, Term, Events}) ->
{
"{\"key\":" ++ Title ++ "}",
<<"{\"key\":", JSON/binary, "}">>,
[{<<"key">>, Term}],
[start_object, {key, <<"key">>}] ++ Events ++ [end_object]
}.
sane_float_to_list(X) ->
[Output] = io_lib:format("~p", [X]),
Output.
incremental_decode(JSON) ->
Final = lists:foldl(
fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end,
decoder(jsx, [], [stream]),
json_to_bytes(JSON)
),
Final(end_stream).
incremental_parse(Events) ->
Final = lists:foldl(
fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end,
parser(?MODULE, [], [stream]),
lists:map(fun(X) -> [X] end, Events)
),
Final(end_stream).
%% used to convert a json text into a list of codepoints to be incrementally
%% parsed
json_to_bytes(JSON) -> json_to_bytes(JSON, []).
json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc);
json_to_bytes(<<X, Rest/binary>>, Acc) -> json_to_bytes(Rest, [<<X>>] ++ Acc).
%% actual tests!
decode_test_() ->
Data = test_cases(),
[{Title, ?_assertEqual(Events ++ [end_json], (decoder(?MODULE, [], []))(JSON))}
|| {Title, JSON, _, Events} <- Data
] ++
[{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_decode(JSON))}
|| {Title, JSON, _, Events} <- Data
].
parse_test_() ->
Data = test_cases(),
[{Title, ?_assertEqual(Events ++ [end_json], (parser(?MODULE, [], []))(Events ++ [end_json]))}
|| {Title, _, _, Events} <- Data
] ++
[{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_parse(Events))}
|| {Title, _, _, Events} <- Data
].
encode_test_() ->
Data = test_cases(),
[
{
Title, ?_assertEqual(
Events ++ [end_json],
(jsx:encoder(jsx, [], []))(Term)
)
} || {Title, _, Term, Events} <- Data
].
-endif.

View file

@ -49,41 +49,27 @@
%% parsing of jsx config
-spec parse_config(Config::proplists:proplist()) -> jsx:config().
parse_config(Config) ->
parse_config(Config, #config{}).
parse_config(Config) -> parse_config(Config, #config{}).
parse_config([], Config) ->
Config;
parse_config([replaced_bad_utf8|Rest], Config) ->
parse_config(Rest, Config#config{replaced_bad_utf8=true});
parse_config([], Config) -> Config;
parse_config([escaped_forward_slashes|Rest], Config) ->
parse_config(Rest, Config#config{escaped_forward_slashes=true});
parse_config([explicit_end|Rest], Config) ->
parse_config(Rest, Config#config{explicit_end=true});
parse_config([single_quoted_strings|Rest], Config) ->
parse_config(Rest, Config#config{single_quoted_strings=true});
parse_config([unescaped_jsonp|Rest], Config) ->
parse_config(Rest, Config#config{unescaped_jsonp=true});
parse_config([comments|Rest], Config) ->
parse_config(Rest, Config#config{comments=true});
parse_config([escaped_strings|Rest], Config) ->
parse_config(Rest, Config#config{escaped_strings=true});
parse_config([unescaped_jsonp|Rest], Config) ->
parse_config(Rest, Config#config{unescaped_jsonp=true});
parse_config([dirty_strings|Rest], Config) ->
parse_config(Rest, Config#config{dirty_strings=true});
parse_config([ignored_bad_escapes|Rest], Config) ->
parse_config(Rest, Config#config{ignored_bad_escapes=true});
parse_config([relax|Rest], Config) ->
parse_config(Rest, Config#config{
replaced_bad_utf8 = true,
single_quoted_strings = true,
comments = true,
ignored_bad_escapes = true
parse_config([strict|Rest], Config) ->
parse_config(Rest, Config#config{strict_comments=true,
strict_utf8=true,
strict_single_quotes=true,
strict_escapes=true
});
parse_config([{pre_encode, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) ->
case Config#config.pre_encode of
false -> parse_config(Rest, Config#config{pre_encode=Encoder})
; _ -> erlang:error(badarg, [Options, Config])
end;
parse_config([{strict, Strict}|Rest], Config) ->
parse_strict(Strict, Rest, Config);
parse_config([stream|Rest], Config) ->
parse_config(Rest, Config#config{stream=true});
parse_config([{error_handler, ErrorHandler}|Rest] = Options, Config) when is_function(ErrorHandler, 3) ->
case Config#config.error_handler of
false -> parse_config(Rest, Config#config{error_handler=ErrorHandler})
@ -94,34 +80,28 @@ parse_config([{incomplete_handler, IncompleteHandler}|Rest] = Options, Config) w
false -> parse_config(Rest, Config#config{incomplete_handler=IncompleteHandler})
; _ -> erlang:error(badarg, [Options, Config])
end;
%% deprecated flags
parse_config([{pre_encoder, Encoder}|Rest] = Options, Config) when is_function(Encoder, 1) ->
case Config#config.pre_encode of
false -> parse_config(Rest, Config#config{pre_encode=Encoder})
; _ -> erlang:error(badarg, [Options, Config])
end;
parse_config([loose_unicode|Rest], Config) ->
parse_config(Rest, Config#config{replaced_bad_utf8=true});
parse_config([escape_forward_slash|Rest], Config) ->
parse_config(Rest, Config#config{escaped_forward_slashes=true});
parse_config([single_quotes|Rest], Config) ->
parse_config(Rest, Config#config{single_quoted_strings=true});
parse_config([no_jsonp_escapes|Rest], Config) ->
parse_config(Rest, Config#config{unescaped_jsonp=true});
parse_config([json_escape|Rest], Config) ->
parse_config(Rest, Config#config{escaped_strings=true});
parse_config([ignore_bad_escapes|Rest], Config) ->
parse_config(Rest, Config#config{ignored_bad_escapes=true});
parse_config(Options, Config) ->
erlang:error(badarg, [Options, Config]).
parse_config(_Options, _Config) -> erlang:error(badarg).
parse_strict([], Rest, Config) -> parse_config(Rest, Config);
parse_strict([comments|Strict], Rest, Config) ->
parse_strict(Strict, Rest, Config#config{strict_comments=true});
parse_strict([utf8|Strict], Rest, Config) ->
parse_strict(Strict, Rest, Config#config{strict_utf8=true});
parse_strict([single_quotes|Strict], Rest, Config) ->
parse_strict(Strict, Rest, Config#config{strict_single_quotes=true});
parse_strict([escapes|Strict], Rest, Config) ->
parse_strict(Strict, Rest, Config#config{strict_escapes=true});
parse_strict(_Strict, _Rest, _Config) ->
erlang:error(badarg).
-spec config_to_list(Config::jsx:config()) -> proplists:proplist().
config_to_list(Config) ->
lists:map(
fun ({pre_encode, F}) -> {pre_encode, F};
({error_handler, F}) -> {error_handler, F};
reduce_config(lists:map(
fun ({error_handler, F}) -> {error_handler, F};
({incomplete_handler, F}) -> {incomplete_handler, F};
({Key, true}) -> Key
end,
@ -129,34 +109,41 @@ config_to_list(Config) ->
fun({_, false}) -> false; (_) -> true end,
lists:zip(record_info(fields, config), tl(tuple_to_list(Config)))
)
).
)).
reduce_config(Input) -> reduce_config(Input, [], []).
reduce_config([], Output, Strict) ->
case length(Strict) of
0 -> lists:reverse(Output);
4 -> lists:reverse(Output) ++ [strict];
_ -> lists:reverse(Output) ++ [{strict, lists:reverse(Strict)}]
end;
reduce_config([strict_comments|Input], Output, Strict) ->
reduce_config(Input, Output, [comments] ++ Strict);
reduce_config([strict_utf8|Input], Output, Strict) ->
reduce_config(Input, Output, [utf8] ++ Strict);
reduce_config([strict_single_quotes|Input], Output, Strict) ->
reduce_config(Input, Output, [single_quotes] ++ Strict);
reduce_config([strict_escapes|Input], Output, Strict) ->
reduce_config(Input, Output, [escapes] ++ Strict);
reduce_config([Else|Input], Output, Strict) ->
reduce_config(Input, [Else] ++ Output, Strict).
-spec valid_flags() -> [atom()].
valid_flags() ->
[
replaced_bad_utf8,
escaped_forward_slashes,
single_quoted_strings,
unescaped_jsonp,
comments,
escaped_strings,
unescaped_jsonp,
dirty_strings,
ignored_bad_escapes,
explicit_end,
relax,
pre_encode,
strict,
stream,
error_handler,
incomplete_handler,
%% deprecated flags
pre_encoder, %% pre_encode
loose_unicode, %% replaced_bad_utf8
escape_forward_slash, %% escaped_forward_slashes
single_quotes, %% single_quoted_strings
no_jsonp_escapes, %% unescaped_jsonp
json_escape, %% escaped_strings
ignore_bad_escapes %% ignored_bad_escapes
incomplete_handler
].
@ -187,70 +174,51 @@ config_test_() ->
[
{"all flags",
?_assertEqual(
#config{
replaced_bad_utf8=true,
escaped_forward_slashes=true,
explicit_end=true,
single_quoted_strings=true,
unescaped_jsonp=true,
comments=true,
dirty_strings=true,
ignored_bad_escapes=true
#config{escaped_forward_slashes = true,
escaped_strings = true,
unescaped_jsonp = true,
dirty_strings = true,
strict_comments = true,
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true,
stream = true
},
parse_config([
replaced_bad_utf8,
escaped_forward_slashes,
explicit_end,
single_quoted_strings,
parse_config([escaped_forward_slashes,
escaped_strings,
unescaped_jsonp,
comments,
dirty_strings,
ignored_bad_escapes
strict,
stream
])
)
},
{"relax flag",
{"strict flag",
?_assertEqual(
#config{
replaced_bad_utf8=true,
single_quoted_strings=true,
comments=true,
ignored_bad_escapes=true
#config{strict_comments = true,
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true
},
parse_config([relax])
parse_config([strict])
)
},
{"strict selective",
?_assertEqual(
#config{strict_comments = true},
parse_config([{strict, [comments]}])
)
},
{"strict expanded",
?_assertEqual(
#config{strict_comments = true,
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true
},
parse_config([{strict, [comments, utf8, single_quotes, escapes]}])
)
},
{"deprecated flags", ?_assertEqual(
#config{
pre_encode=fun lists:length/1,
replaced_bad_utf8=true,
escaped_forward_slashes=true,
single_quoted_strings=true,
unescaped_jsonp=true,
escaped_strings=true,
ignored_bad_escapes=true
},
parse_config([
{pre_encoder, fun lists:length/1},
loose_unicode,
escape_forward_slash,
single_quotes,
no_jsonp_escapes,
json_escape,
ignore_bad_escapes
])
)},
{"pre_encode flag", ?_assertEqual(
#config{pre_encode=fun lists:length/1},
parse_config([{pre_encode, fun lists:length/1}])
)},
{"two pre_encoders defined", ?_assertError(
badarg,
parse_config([
{pre_encode, fun(_) -> true end},
{pre_encode, fun(_) -> false end}
])
)},
{"error_handler flag", ?_assertEqual(
#config{error_handler=fun ?MODULE:fake_error_handler/3},
parse_config([{error_handler, fun ?MODULE:fake_error_handler/3}])
@ -273,7 +241,7 @@ config_test_() ->
{incomplete_handler, fun(_) -> false end}
])
)},
{"bad option flag", ?_assertError(badarg, parse_config([error]))}
{"bad option flag", ?_assertError(badarg, parse_config([this_flag_does_not_exist]))}
].
@ -284,32 +252,40 @@ config_to_list_test_() ->
config_to_list(#config{})
)},
{"all flags", ?_assertEqual(
[
replaced_bad_utf8,
escaped_forward_slashes,
single_quoted_strings,
[escaped_forward_slashes,
escaped_strings,
unescaped_jsonp,
comments,
dirty_strings,
ignored_bad_escapes,
explicit_end
stream,
strict
],
config_to_list(
#config{
replaced_bad_utf8=true,
escaped_forward_slashes=true,
explicit_end=true,
single_quoted_strings=true,
unescaped_jsonp=true,
comments=true,
dirty_strings=true,
ignored_bad_escapes=true
#config{escaped_forward_slashes = true,
escaped_strings = true,
unescaped_jsonp = true,
dirty_strings = true,
strict_comments = true,
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true,
stream = true
}
)
)},
{"pre_encode", ?_assertEqual(
[{pre_encode, fun lists:length/1}],
config_to_list(#config{pre_encode=fun lists:length/1})
{"single strict", ?_assertEqual(
[{strict, [comments]}],
config_to_list(#config{strict_comments = true})
)},
{"multiple strict", ?_assertEqual(
[{strict, [utf8, single_quotes, escapes]}],
config_to_list(#config{strict_utf8 = true, strict_single_quotes = true, strict_escapes = true})
)},
{"all strict", ?_assertEqual(
[strict],
config_to_list(#config{strict_comments = true,
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true})
)},
{"error handler", ?_assertEqual(
[{error_handler, fun ?MODULE:fake_error_handler/3}],

View file

@ -1,15 +1,13 @@
-record(config, {
replaced_bad_utf8 = false :: boolean(),
escaped_forward_slashes = false :: boolean(),
single_quoted_strings = false :: boolean(),
unescaped_jsonp = false :: boolean(),
comments = false :: boolean(),
escaped_strings = false :: boolean(),
dirty_strings = false :: boolean(),
ignored_bad_escapes = false :: boolean(),
explicit_end = false :: boolean(),
pre_encode = false :: false | fun((any()) -> any()),
error_handler = false :: false | jsx_config:handler(),
incomplete_handler = false :: false | jsx_config:handler()
escaped_forward_slashes = false :: boolean(),
escaped_strings = false :: boolean(),
unescaped_jsonp = false :: boolean(),
dirty_strings = false :: boolean(),
strict_comments = false :: boolean(),
strict_utf8 = false :: boolean(),
strict_single_quotes = false :: boolean(),
strict_escapes = false :: boolean(),
stream = false :: boolean(),
error_handler = false :: false | jsx_config:handler(),
incomplete_handler = false :: false | jsx_config:handler()
}).

File diff suppressed because it is too large Load diff

View file

@ -23,310 +23,66 @@
-module(jsx_encoder).
-export([encoder/3]).
-export([encoder/3, encode/1, encode/2, unzip/1]).
-spec encoder(Handler::module(), State::any(), Config::list()) -> jsx:encoder().
encoder(Handler, State, Config) ->
fun(JSON) ->
start(
JSON,
{Handler, Handler:init(State)},
jsx_config:parse_config(Config)
)
end.
Parser = jsx:parser(Handler, State, Config),
fun(Term) -> Parser(encode(Term) ++ [end_json]) end.
-spec encode(Term::any()) -> any().
-include("jsx_config.hrl").
encode(Term) -> encode(Term, ?MODULE).
-ifndef(error).
-define(error(State, Term, Handler, Config),
case Config#config.error_handler of
false -> erlang:error(badarg);
F -> erlang:throw(F(Term, {encoder, State, Handler}, jsx_config:config_to_list(Config)))
end
).
-endif.
-spec encode(Term::any(), EntryPoint::module()) -> any().
encode([], _EntryPoint) -> [start_array, end_array];
encode([{}], _EntryPoint) -> [start_object, end_object];
start(Term, {Handler, State}, Config) ->
try Handler:handle_event(end_json, value(pre_encode(Term, Config), {Handler, State}, Config))
catch
throw:Error -> Error;
Type:Value -> erlang:Type(Value)
end.
value(String, {Handler, State}, Config) when is_binary(String) ->
Handler:handle_event({string, clean_string(String, {Handler, State}, Config)}, State);
value(Float, {Handler, State}, _Config) when is_float(Float) ->
Handler:handle_event({float, Float}, State);
value(Int, {Handler, State}, _Config) when is_integer(Int) ->
Handler:handle_event({integer, Int}, State);
value(Literal, {Handler, State}, _Config)
when Literal == true; Literal == false; Literal == null ->
Handler:handle_event({literal, Literal}, State);
value(String, {Handler, State}, Config) when is_atom(String) ->
Handler:handle_event({string, clean_string(atom_to_binary(String,latin1), {Handler, State}, Config)}, State);
value([{}], {Handler, State}, _Config) ->
Handler:handle_event(end_object, Handler:handle_event(start_object, State));
value([], {Handler, State}, _Config) ->
Handler:handle_event(end_array, Handler:handle_event(start_array, State));
value(List, Handler, Config) when is_list(List) ->
list_or_object(List, Handler, Config);
value(Term, Handler, Config) -> ?error(value, Term, Handler, Config).
list_or_object([Term|Rest], {Handler, State}, Config) ->
case pre_encode(Term, Config) of
{K, V} when is_atom(K); is_binary(K); is_integer(K) ->
object([{K, V}|Rest], {Handler, Handler:handle_event(start_object, State)}, Config)
; T ->
list([T|Rest], {Handler, Handler:handle_event(start_array, State)}, Config)
end.
object([{Key, Value}, Next|Rest], {Handler, State}, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
V = pre_encode(Value, Config),
object(
[pre_encode(Next, Config)|Rest],
{
Handler,
value(
V,
{Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)},
Config
)
},
Config
encode([{_, _}|_] = Term, EntryPoint) ->
lists:flatten(
[start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unzip(Term) ] ++ [end_object]
);
object([{Key, Value}], {Handler, State}, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
object(
[],
{
Handler,
value(
pre_encode(Value, Config),
{Handler, Handler:handle_event({key, clean_string(fix_key(Key), {Handler, State}, Config)}, State)},
Config
)
},
Config
encode(Term, EntryPoint) when is_list(Term) ->
lists:flatten(
[start_array] ++ [ EntryPoint:encode(T, EntryPoint) || T <- Term ] ++ [end_array]
);
object([], {Handler, State}, _Config) -> Handler:handle_event(end_object, State);
object(Term, Handler, Config) -> ?error(object, Term, Handler, Config).
encode(Else, _EntryPoint) -> [Else].
list([Value, Next|Rest], {Handler, State}, Config) ->
list([pre_encode(Next, Config)|Rest], {Handler, value(Value, {Handler, State}, Config)}, Config);
list([Value], {Handler, State}, Config) ->
list([], {Handler, value(Value, {Handler, State}, Config)}, Config);
list([], {Handler, State}, _Config) -> Handler:handle_event(end_array, State).
unzip(List) -> unzip(List, []).
pre_encode(Value, #config{pre_encode=false}) -> Value;
pre_encode(Value, Config) -> (Config#config.pre_encode)(Value).
fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
fix_key(Key) when is_integer(Key) -> fix_key(list_to_binary(integer_to_list(Key)));
fix_key(Key) when is_binary(Key) -> Key.
clean_string(Bin, Handler, Config) ->
case clean_string(Bin, Config) of
{error, badarg} -> ?error(string, Bin, Handler, Config);
String -> String
end.
-include("jsx_strings.hrl").
unzip([], Acc) -> lists:reverse(Acc);
unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K); is_integer(K) -> unzip(Rest, [V, K] ++ Acc).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
encode_test_() ->
Data = jsx:test_cases(),
[
{
Title, ?_assertEqual(
Events ++ [end_json],
start(Term, {jsx, []}, #config{})
)
} || {Title, _, Term, Events} <- Data
].
parser(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term).
encode(Term, Config) -> start(Term, {jsx, []}, jsx_config:parse_config(Config)).
pre_encoders_test_() ->
Term = [
{<<"object">>, [
{atomkey, atomvalue},
{<<"literals">>, [true, false, null]},
{<<"strings">>, [<<"foo">>, <<"bar">>, <<"baz">>]},
{<<"numbers">>, [1, 1.0, 1.0e0]}
]}
],
[
{"no pre encode", ?_assertEqual(
[
start_object,
{key, <<"object">>}, start_object,
{key, <<"atomkey">>}, {string, <<"atomvalue">>},
{key, <<"literals">>}, start_array,
{literal, true}, {literal, false}, {literal, null},
end_array,
{key, <<"strings">>}, start_array,
{string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>},
end_array,
{key, <<"numbers">>}, start_array,
{integer, 1}, {float, 1.0}, {float, 1.0},
end_array,
end_object,
end_object,
end_json
],
encode(Term, [])
)},
{"replace lists with empty lists", ?_assertEqual(
[
start_object,
{key, <<"object">>}, start_object,
{key, <<"atomkey">>}, {string, <<"atomvalue">>},
{key, <<"literals">>}, start_array, end_array,
{key, <<"strings">>}, start_array, end_array,
{key, <<"numbers">>}, start_array, end_array,
end_object,
end_object,
end_json
],
encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> V; [{}] -> V; V when is_list(V) -> []; _ -> V end end}])
)},
{"replace objects with empty objects", ?_assertEqual(
[
start_object,
end_object,
end_json
],
encode(Term, [{pre_encode, fun(V) -> case V of [{_,_}|_] -> [{}]; _ -> V end end}])
)},
{"replace all non-list and non_tuple values with false", ?_assertEqual(
[
start_object,
{key, <<"object">>}, start_object,
{key, <<"atomkey">>}, {literal, false},
{key, <<"literals">>}, start_array,
{literal, false}, {literal, false}, {literal, false},
end_array,
{key, <<"strings">>}, start_array,
{literal, false}, {literal, false}, {literal, false},
end_array,
{key, <<"numbers">>}, start_array,
{literal, false}, {literal, false}, {literal, false},
end_array,
end_object,
end_object,
end_json
],
encode(Term, [{pre_encode, fun(V) when is_list(V); is_tuple(V) -> V; (_) -> false end}])
)},
{"replace all atoms with atom_to_list", ?_assertEqual(
[
start_object,
{key, <<"object">>}, start_object,
{key, <<"atomkey">>}, {string, <<"atomvalue">>},
{key, <<"literals">>}, start_array,
{string, <<"true">>}, {string, <<"false">>}, {string, <<"null">>},
end_array,
{key, <<"strings">>}, start_array,
{string, <<"foo">>}, {string, <<"bar">>}, {string, <<"baz">>},
end_array,
{key, <<"numbers">>}, start_array,
{integer, 1}, {float, 1.0}, {float, 1.0},
end_array,
end_object,
end_object,
end_json
],
encode(Term, [{pre_encode, fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end}])
)},
{"pre_encode tuple", ?_assertEqual(
[
start_array,
{integer, 1}, {integer, 2}, {integer, 3},
end_array,
end_json
],
encode({1, 2, 3}, [{pre_encode, fun(Tuple) when is_tuple(Tuple) -> tuple_to_list(Tuple); (V) -> V end}])
)},
{"pre_encode 2-tuples", ?_assertEqual(
[
start_object,
{key, <<"two">>}, {integer, 2}, {key, <<"three">>}, {integer, 3},
end_object,
end_json
],
encode([{two, 1}, {three, 2}], [{pre_encode, fun({K, V}) -> {K, V + 1}; (V) -> V end}])
)},
{"pre_encode one field record", ?_assertEqual(
[
start_object,
{key, <<"bar">>}, {literal, false},
end_object,
end_json
],
encode([{foo, bar}], [{pre_encode, fun({foo, V}) -> {V, undefined}; (undefined) -> false; (V) -> V end}])
)},
{"pre_encode list", ?_assertEqual(
[
start_array,
{integer, 2}, {integer, 3}, {integer, 4},
end_array,
end_json
],
encode([1,2,3], [{pre_encode, fun(X) when is_integer(X) -> X + 1; (V) -> V end}])
)}
].
error_test_() ->
[
{"value error", ?_assertError(badarg, encode(self(), []))},
{"string error", ?_assertError(badarg, encode(<<239, 191, 191>>, []))}
{"value error", ?_assertError(badarg, parser(self(), []))},
{"string error", ?_assertError(badarg, parser(<<239, 191, 191>>, [strict]))}
].
custom_error_handler_test_() ->
Error = fun(Term, {_, State, _}, _) -> {State, Term} end,
Error = fun(Term, {_, State, _, _}, _) -> {State, Term} end,
[
{"value error", ?_assertEqual(
{value, self()},
encode(self(), [{error_handler, Error}])
{value, [self()]},
parser(self(), [{error_handler, Error}])
)},
{"string error", ?_assertEqual(
{string, <<239, 191, 191>>},
encode(<<239, 191, 191>>, [{error_handler, Error}])
)}
].
integer_key_test_() ->
Term = [{123, [{456, 789}]}],
[
{"basic integer keys", ?_assertEqual(
[
start_object,
{key, <<"123">>},
start_object,
{key, <<"456">>},
{integer, 789},
end_object,
end_object,
end_json
],
encode(Term, [])
{string, [{string, <<239, 191, 191>>}]},
parser(<<239, 191, 191>>, [{error_handler, Error}, strict])
)}
].

View file

@ -68,6 +68,8 @@ resume(Rest, State, Handler, Stack, Config) ->
-endif.
incomplete(State, Handler, Stack, Config=#config{stream=false}) ->
?error(State, [], Handler, Stack, Config);
incomplete(State, Handler, Stack, Config=#config{incomplete_handler=false}) ->
{incomplete, fun(end_stream) ->
case resume([end_json], State, Handler, Stack, Config) of
@ -82,8 +84,6 @@ incomplete(State, Handler, Stack, Config=#config{incomplete_handler=F}) ->
F([], {parser, State, Handler, Stack}, jsx_config:config_to_list(Config)).
%handle_event([], Handler, _Config) -> Handler;
%handle_event([Event|Rest], Handler, Config) -> handle_event(Rest, handle_event(Event, Handler, Config), Config);
handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}.
@ -91,24 +91,10 @@ value([start_object|Tokens], Handler, Stack, Config) ->
object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config);
value([start_array|Tokens], Handler, Stack, Config) ->
array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config);
value([{literal, true}|Tokens], Handler, [], Config) ->
done(Tokens, handle_event({literal, true}, Handler, Config), [], Config);
value([{literal, false}|Tokens], Handler, [], Config) ->
done(Tokens, handle_event({literal, false}, Handler, Config), [], Config);
value([{literal, null}|Tokens], Handler, [], Config) ->
done(Tokens, handle_event({literal, null}, Handler, Config), [], Config);
value([{literal, true}|Tokens], Handler, Stack, Config) ->
maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config);
value([{literal, false}|Tokens], Handler, Stack, Config) ->
maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config);
value([{literal, null}|Tokens], Handler, Stack, Config) ->
maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config);
value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
maybe_done(Tokens, handle_event({literal, Literal}, Handler, Config), Stack, Config);
value([Literal|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
value([{literal, Literal}] ++ Tokens, Handler, Stack, Config);
value([{integer, Number}|Tokens], Handler, [], Config) when is_integer(Number) ->
done(Tokens, handle_event({integer, Number}, Handler, Config), [], Config);
value([{float, Number}|Tokens], Handler, [], Config) when is_float(Number) ->
done(Tokens, handle_event({float, Number}, Handler, Config), [], Config);
value([{integer, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) ->
maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config);
value([{float, Number}|Tokens], Handler, Stack, Config) when is_float(Number) ->
@ -121,12 +107,6 @@ value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) ->
value([{integer, Number}] ++ Tokens, Handler, Stack, Config);
value([Number|Tokens], Handler, Stack, Config) when is_float(Number) ->
value([{float, Number}] ++ Tokens, Handler, Stack, Config);
value([{string, String}|Tokens], Handler, [], Config) when is_binary(String) ->
case clean_string(String, Tokens, Handler, [], Config) of
Clean when is_binary(Clean) ->
done(Tokens, handle_event({string, Clean}, Handler, Config), [], Config);
Error -> Error
end;
value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) ->
case clean_string(String, Tokens, Handler, Stack, Config) of
Clean when is_binary(Clean) ->
@ -135,6 +115,8 @@ value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String)
end;
value([String|Tokens], Handler, Stack, Config) when is_binary(String) ->
value([{string, String}] ++ Tokens, Handler, Stack, Config);
value([String|Tokens], Handler, Stack, Config) when is_atom(String) ->
value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config);
value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) ->
value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config);
value([], Handler, Stack, Config) ->
@ -146,13 +128,13 @@ value(Token, Handler, Stack, Config) ->
object([end_object|Tokens], Handler, [object|Stack], Config) ->
maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config);
object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key) ->
object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of
Clean when is_binary(Clean) ->
value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config);
Error -> Error
end;
object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key) ->
object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) ->
case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of
Clean when is_binary(Clean) ->
value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config);
@ -185,7 +167,7 @@ maybe_done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) ->
maybe_done(Token, Handler, Stack, Config) ->
maybe_done([Token], Handler, Stack, Config).
done([], Handler, [], Config=#config{explicit_end=true}) ->
done([], Handler, [], Config=#config{stream=true}) ->
incomplete(done, Handler, [], Config);
done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] ->
{_, State} = handle_event(end_json, Handler, Config),
@ -196,7 +178,8 @@ done(Token, Handler, Stack, Config) ->
done([Token], Handler, Stack, Config).
fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
fix_key(Key) when is_atom(Key) -> atom_to_binary(Key, utf8);
fix_key(Key) when is_integer(Key) -> list_to_binary(integer_to_list(Key));
fix_key(Key) when is_binary(Key) -> Key.
@ -206,6 +189,256 @@ clean_string(Bin, Tokens, Handler, Stack, Config) ->
String -> String
end.
clean_string(Bin, #config{dirty_strings=true}) -> Bin;
clean_string(Bin, Config) -> clean(Bin, [], Config).
%% escape and/or replace bad codepoints if requested
clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc));
clean(<<0, Rest/binary>>, Acc, Config) -> maybe_replace(0, Rest, Acc, Config);
clean(<<1, Rest/binary>>, Acc, Config) -> maybe_replace(1, Rest, Acc, Config);
clean(<<2, Rest/binary>>, Acc, Config) -> maybe_replace(2, Rest, Acc, Config);
clean(<<3, Rest/binary>>, Acc, Config) -> maybe_replace(3, Rest, Acc, Config);
clean(<<4, Rest/binary>>, Acc, Config) -> maybe_replace(4, Rest, Acc, Config);
clean(<<5, Rest/binary>>, Acc, Config) -> maybe_replace(5, Rest, Acc, Config);
clean(<<6, Rest/binary>>, Acc, Config) -> maybe_replace(6, Rest, Acc, Config);
clean(<<7, Rest/binary>>, Acc, Config) -> maybe_replace(7, Rest, Acc, Config);
clean(<<8, Rest/binary>>, Acc, Config) -> maybe_replace(8, Rest, Acc, Config);
clean(<<9, Rest/binary>>, Acc, Config) -> maybe_replace(9, Rest, Acc, Config);
clean(<<10, Rest/binary>>, Acc, Config) -> maybe_replace(10, Rest, Acc, Config);
clean(<<11, Rest/binary>>, Acc, Config) -> maybe_replace(11, Rest, Acc, Config);
clean(<<12, Rest/binary>>, Acc, Config) -> maybe_replace(12, Rest, Acc, Config);
clean(<<13, Rest/binary>>, Acc, Config) -> maybe_replace(13, Rest, Acc, Config);
clean(<<14, Rest/binary>>, Acc, Config) -> maybe_replace(14, Rest, Acc, Config);
clean(<<15, Rest/binary>>, Acc, Config) -> maybe_replace(15, Rest, Acc, Config);
clean(<<16, Rest/binary>>, Acc, Config) -> maybe_replace(16, Rest, Acc, Config);
clean(<<17, Rest/binary>>, Acc, Config) -> maybe_replace(17, Rest, Acc, Config);
clean(<<18, Rest/binary>>, Acc, Config) -> maybe_replace(18, Rest, Acc, Config);
clean(<<19, Rest/binary>>, Acc, Config) -> maybe_replace(19, Rest, Acc, Config);
clean(<<20, Rest/binary>>, Acc, Config) -> maybe_replace(20, Rest, Acc, Config);
clean(<<21, Rest/binary>>, Acc, Config) -> maybe_replace(21, Rest, Acc, Config);
clean(<<22, Rest/binary>>, Acc, Config) -> maybe_replace(22, Rest, Acc, Config);
clean(<<23, Rest/binary>>, Acc, Config) -> maybe_replace(23, Rest, Acc, Config);
clean(<<24, Rest/binary>>, Acc, Config) -> maybe_replace(24, Rest, Acc, Config);
clean(<<25, Rest/binary>>, Acc, Config) -> maybe_replace(25, Rest, Acc, Config);
clean(<<26, Rest/binary>>, Acc, Config) -> maybe_replace(26, Rest, Acc, Config);
clean(<<27, Rest/binary>>, Acc, Config) -> maybe_replace(27, Rest, Acc, Config);
clean(<<28, Rest/binary>>, Acc, Config) -> maybe_replace(28, Rest, Acc, Config);
clean(<<29, Rest/binary>>, Acc, Config) -> maybe_replace(29, Rest, Acc, Config);
clean(<<30, Rest/binary>>, Acc, Config) -> maybe_replace(30, Rest, Acc, Config);
clean(<<31, Rest/binary>>, Acc, Config) -> maybe_replace(31, Rest, Acc, Config);
clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config);
clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config);
clean(<<34, Rest/binary>>, Acc, Config) -> maybe_replace(34, Rest, Acc, Config);
clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config);
clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config);
clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config);
clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config);
clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config);
clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config);
clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config);
clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config);
clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config);
clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config);
clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config);
clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config);
clean(<<47, Rest/binary>>, Acc, Config) -> maybe_replace(47, Rest, Acc, Config);
clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config);
clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config);
clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config);
clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config);
clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config);
clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config);
clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config);
clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config);
clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config);
clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config);
clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config);
clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config);
clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config);
clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config);
clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config);
clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config);
clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config);
clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config);
clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config);
clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config);
clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config);
clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config);
clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config);
clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config);
clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config);
clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config);
clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config);
clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config);
clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config);
clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config);
clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config);
clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config);
clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config);
clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config);
clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config);
clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config);
clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config);
clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config);
clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config);
clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config);
clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config);
clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config);
clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config);
clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config);
clean(<<92, Rest/binary>>, Acc, Config) -> maybe_replace(92, Rest, Acc, Config);
clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config);
clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config);
clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config);
clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config);
clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config);
clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config);
clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config);
clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config);
clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config);
clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config);
clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config);
clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config);
clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config);
clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config);
clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config);
clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config);
clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config);
clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config);
clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config);
clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config);
clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config);
clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config);
clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config);
clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config);
clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config);
clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config);
clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config);
clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config);
clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config);
clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config);
clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config);
clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config);
clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config);
clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config);
clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
maybe_replace(X, Rest, Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#d800 ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#dfff, X < 16#fdd0 ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#fdef, X < 16#fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#10000, X < 16#1fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#20000, X < 16#2fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#30000, X < 16#3fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#40000, X < 16#4fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#50000, X < 16#5fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#60000, X < 16#6fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#70000, X < 16#7fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#80000, X < 16#8fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#90000, X < 16#9fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#a0000, X < 16#afffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#b0000, X < 16#bfffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#c0000, X < 16#cfffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#d0000, X < 16#dfffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#e0000, X < 16#efffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#f0000, X < 16#ffffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#100000, X < 16#10fffe ->
clean(Rest, [X] ++ Acc, Config);
%% surrogates
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
maybe_replace(surrogate, Rest, Acc, Config);
%% noncharacters
clean(<<_/utf8, Rest/binary>>, Acc, Config) ->
maybe_replace(noncharacter, Rest, Acc, Config);
%% u+fffe and u+ffff for R14BXX
clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 ->
maybe_replace(noncharacter, Rest, Acc, Config);
%% overlong encodings and missing continuations of a 2 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 ->
maybe_replace(badutf, strip_continuations(Rest, 1), Acc, Config);
%% overlong encodings and missing continuations of a 3 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 ->
maybe_replace(badutf, strip_continuations(Rest, 2), Acc, Config);
%% overlong encodings and missing continuations of a 4 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
maybe_replace(badutf, strip_continuations(Rest, 3), Acc, Config);
clean(<<_, Rest/binary>>, Acc, Config) ->
maybe_replace(badutf, Rest, Acc, Config).
strip_continuations(Bin, 0) -> Bin;
strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
strip_continuations(Rest, N - 1);
%% not a continuation byte
strip_continuations(Bin, _) -> Bin.
maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) ->
clean(Rest, [$b, $\\] ++ Acc, Config);
maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) ->
clean(Rest, [$t, $\\] ++ Acc, Config);
maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) ->
clean(Rest, [$n, $\\] ++ Acc, Config);
maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) ->
clean(Rest, [$f, $\\] ++ Acc, Config);
maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) ->
clean(Rest, [$r, $\\] ++ Acc, Config);
maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) ->
clean(Rest, [$\", $\\] ++ Acc, Config);
maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) ->
case Config#config.escaped_forward_slashes of
true -> clean(Rest, [$/, $\\] ++ Acc, Config);
false -> clean(Rest, [$/] ++ Acc, Config)
end;
maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) ->
clean(Rest, [$\\, $\\] ++ Acc, Config);
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
case Config#config.unescaped_jsonp of
true -> clean(Rest, [X] ++ Acc, Config);
false -> clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config)
end;
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 ->
clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config);
maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> {error, badarg};
maybe_replace(noncharacter, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
maybe_replace(surrogate, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
maybe_replace(badutf, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
maybe_replace(X, Rest, Acc, Config) -> clean(Rest, [X] ++ Acc, Config).
%% convert a codepoint to it's \uXXXX equiv.
json_escape_sequence(X) ->
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
to_hex(10) -> $a;
to_hex(11) -> $b;
to_hex(12) -> $c;
to_hex(13) -> $d;
to_hex(14) -> $e;
to_hex(15) -> $f;
to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc...
%% for raw input
-spec init(proplists:proplist()) -> list().
@ -218,54 +451,20 @@ handle_event(end_json, State) -> lists:reverse(State);
handle_event(Event, State) -> [Event] ++ State.
-include("jsx_strings.hrl").
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
parse(Events, Config) ->
Chunk = try
value(Events ++ [end_json], {jsx, []}, [], jsx_config:parse_config(Config))
catch
error:badarg -> {error, badarg}
end,
Incremental = try
Final = lists:foldl(
fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end,
parser(jsx, [], [explicit_end] ++ Config),
lists:map(fun(X) -> [X] end, Events)
),
Final(end_stream)
catch
error:badarg -> {error, badarg}
end,
?assert(Chunk == Incremental),
Chunk.
parse_test_() ->
Data = jsx:test_cases(),
[
{
Title, ?_assertEqual(
Events ++ [end_json],
parse(Events, [])
)
} || {Title, _, _, Events} <- Data
].
parse_error(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)).
parse(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)).
error_test_() ->
[
{"value error", ?_assertError(badarg, parse_error([self()], []))},
{"maybe_done error", ?_assertError(badarg, parse_error([start_array, end_array, start_array, end_json], []))},
{"done error", ?_assertError(badarg, parse_error([{string, <<"">>}, {literal, true}, end_json], []))},
{"string error", ?_assertError(badarg, parse_error([{string, <<239, 191, 191>>}, end_json], []))}
{"value error", ?_assertError(badarg, parse([self()], []))},
{"maybe_done error", ?_assertError(badarg, parse([start_array, end_array, start_array, end_json], []))},
{"done error", ?_assertError(badarg, parse([{string, <<"">>}, {literal, true}, end_json], []))},
{"string error", ?_assertError(badarg, parse([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))}
].
@ -274,47 +473,540 @@ custom_error_handler_test_() ->
[
{"value error", ?_assertEqual(
{value, [self()]},
parse_error([self()], [{error_handler, Error}])
parse([self()], [{error_handler, Error}])
)},
{"maybe_done error", ?_assertEqual(
{maybe_done, [start_array, end_json]},
parse_error([start_array, end_array, start_array, end_json], [{error_handler, Error}])
parse([start_array, end_array, start_array, end_json], [{error_handler, Error}])
)},
{"done error", ?_assertEqual(
{done, [{literal, true}, end_json]},
parse_error([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}])
{maybe_done, [{literal, true}, end_json]},
parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}])
)},
{"string error", ?_assertEqual(
{string, [{string, <<239, 191, 191>>}, end_json]},
parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}])
parse([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict])
)}
].
incomplete_test_() ->
Cases = [
{"incomplete value", []},
{"incomplete object", [start_object]},
{"incomplete array", [start_array]},
{"incomplete maybe_done", [start_array, end_array]}
],
[{Title, ?_assertError(badarg, parse(Events, []))}
|| {Title, Events} <- Cases
].
custom_incomplete_handler_test_() ->
[
{"custom incomplete handler", ?_assertError(
badarg,
parse_error([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}])
parse([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}])
)}
].
raw_test_() ->
Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end,
[
{"raw empty list", ?_assertEqual(
[start_array, end_array, end_json],
parse([{raw, <<"[]">>}], [])
[start_array, end_array],
Parse([{raw, <<"[]">>}], [])
)},
{"raw empty object", ?_assertEqual(
[start_object, end_object, end_json],
parse([{raw, <<"{}">>}], [])
[start_object, end_object],
Parse([{raw, <<"{}">>}], [])
)},
{"raw chunk inside stream", ?_assertEqual(
[start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object, end_json],
parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], [])
[start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object],
Parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], [])
)}
].
%% erlang refuses to encode certain codepoints, so fake them
to_fake_utf8(N) when N < 16#0080 -> <<N:8>>;
to_fake_utf8(N) when N < 16#0800 ->
<<0:5, Y:5, X:6>> = <<N:16>>,
<<2#110:3, Y:5, 2#10:2, X:6>>;
to_fake_utf8(N) when N < 16#10000 ->
<<Z:4, Y:6, X:6>> = <<N:16>>,
<<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>;
to_fake_utf8(N) ->
<<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>,
<<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>.
codepoints() ->
unicode:characters_to_binary(
[32, 33]
++ lists:seq(35, 46)
++ lists:seq(48, 91)
++ lists:seq(93, 16#2027)
++ lists:seq(16#202a, 16#d7ff)
++ lists:seq(16#e000, 16#fdcf)
++ lists:seq(16#fdf0, 16#fffd)
).
extended_codepoints() ->
unicode:characters_to_binary(
lists:seq(16#10000, 16#1fffd) ++ [
16#20000, 16#30000, 16#40000, 16#50000, 16#60000,
16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000,
16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000
]
).
reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ].
surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ].
noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ].
extended_noncharacters() ->
[ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]
].
clean_string_test_() ->
[
{"clean codepoints", ?_assertEqual(
codepoints(),
clean_string(codepoints(), #config{})
)},
{"clean extended codepoints", ?_assertEqual(
extended_codepoints(),
clean_string(extended_codepoints(), #config{})
)},
{"escape path codepoints", ?_assertEqual(
codepoints(),
clean_string(codepoints(), #config{escaped_strings=true})
)},
{"escape path extended codepoints", ?_assertEqual(
extended_codepoints(),
clean_string(extended_codepoints(), #config{escaped_strings=true})
)},
{"error reserved space", ?_assertEqual(
lists:duplicate(length(reserved_space()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space())
)},
{"error surrogates", ?_assertEqual(
lists:duplicate(length(surrogates()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates())
)},
{"error noncharacters", ?_assertEqual(
lists:duplicate(length(noncharacters()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters())
)},
{"error extended noncharacters", ?_assertEqual(
lists:duplicate(length(extended_noncharacters()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters())
)},
{"clean reserved space", ?_assertEqual(
lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space())
)},
{"clean surrogates", ?_assertEqual(
lists:duplicate(length(surrogates()), <<16#fffd/utf8>>),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates())
)},
{"clean noncharacters", ?_assertEqual(
lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters())
)},
{"clean extended noncharacters", ?_assertEqual(
lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters())
)}
].
escape_test_() ->
[
{"maybe_escape backspace", ?_assertEqual(
<<"\\b">>,
clean_string(<<16#0008/utf8>>, #config{escaped_strings=true})
)},
{"don't escape backspace", ?_assertEqual(
<<"\b">>,
clean_string(<<16#0008/utf8>>, #config{})
)},
{"maybe_escape tab", ?_assertEqual(
<<"\\t">>,
clean_string(<<16#0009/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape newline", ?_assertEqual(
<<"\\n">>,
clean_string(<<16#000a/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape formfeed", ?_assertEqual(
<<"\\f">>,
clean_string(<<16#000c/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape carriage return", ?_assertEqual(
<<"\\r">>,
clean_string(<<16#000d/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape quote", ?_assertEqual(
<<"\\\"">>,
clean_string(<<16#0022/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape forward slash", ?_assertEqual(
<<"\\/">>,
clean_string(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true})
)},
{"do not maybe_escape forward slash", ?_assertEqual(
<<"/">>,
clean_string(<<16#002f/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape backslash", ?_assertEqual(
<<"\\\\">>,
clean_string(<<16#005c/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape jsonp (u2028)", ?_assertEqual(
<<"\\u2028">>,
clean_string(<<16#2028/utf8>>, #config{escaped_strings=true})
)},
{"do not maybe_escape jsonp (u2028)", ?_assertEqual(
<<16#2028/utf8>>,
clean_string(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
)},
{"maybe_escape jsonp (u2029)", ?_assertEqual(
<<"\\u2029">>,
clean_string(<<16#2029/utf8>>, #config{escaped_strings=true})
)},
{"do not maybe_escape jsonp (u2029)", ?_assertEqual(
<<16#2029/utf8>>,
clean_string(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
)},
{"maybe_escape u0000", ?_assertEqual(
<<"\\u0000">>,
clean_string(<<16#0000/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0001", ?_assertEqual(
<<"\\u0001">>,
clean_string(<<16#0001/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0002", ?_assertEqual(
<<"\\u0002">>,
clean_string(<<16#0002/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0003", ?_assertEqual(
<<"\\u0003">>,
clean_string(<<16#0003/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0004", ?_assertEqual(
<<"\\u0004">>,
clean_string(<<16#0004/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0005", ?_assertEqual(
<<"\\u0005">>,
clean_string(<<16#0005/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0006", ?_assertEqual(
<<"\\u0006">>,
clean_string(<<16#0006/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0007", ?_assertEqual(
<<"\\u0007">>,
clean_string(<<16#0007/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u000b", ?_assertEqual(
<<"\\u000b">>,
clean_string(<<16#000b/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u000e", ?_assertEqual(
<<"\\u000e">>,
clean_string(<<16#000e/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u000f", ?_assertEqual(
<<"\\u000f">>,
clean_string(<<16#000f/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0010", ?_assertEqual(
<<"\\u0010">>,
clean_string(<<16#0010/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0011", ?_assertEqual(
<<"\\u0011">>,
clean_string(<<16#0011/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0012", ?_assertEqual(
<<"\\u0012">>,
clean_string(<<16#0012/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0013", ?_assertEqual(
<<"\\u0013">>,
clean_string(<<16#0013/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0014", ?_assertEqual(
<<"\\u0014">>,
clean_string(<<16#0014/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0015", ?_assertEqual(
<<"\\u0015">>,
clean_string(<<16#0015/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0016", ?_assertEqual(
<<"\\u0016">>,
clean_string(<<16#0016/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0017", ?_assertEqual(
<<"\\u0017">>,
clean_string(<<16#0017/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0018", ?_assertEqual(
<<"\\u0018">>,
clean_string(<<16#0018/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0019", ?_assertEqual(
<<"\\u0019">>,
clean_string(<<16#0019/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001a", ?_assertEqual(
<<"\\u001a">>,
clean_string(<<16#001a/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001b", ?_assertEqual(
<<"\\u001b">>,
clean_string(<<16#001b/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001c", ?_assertEqual(
<<"\\u001c">>,
clean_string(<<16#001c/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001d", ?_assertEqual(
<<"\\u001d">>,
clean_string(<<16#001d/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001e", ?_assertEqual(
<<"\\u001e">>,
clean_string(<<16#001e/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001f", ?_assertEqual(
<<"\\u001f">>,
clean_string(<<16#001f/utf8>>, #config{escaped_strings=true})
)}
].
bad_utf8_test_() ->
[
{"noncharacter u+fffe", ?_assertEqual(
{error, badarg},
clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true})
)},
{"noncharacter u+fffe replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(to_fake_utf8(16#fffe), #config{})
)},
{"noncharacter u+ffff", ?_assertEqual(
{error, badarg},
clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true})
)},
{"noncharacter u+ffff replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(to_fake_utf8(16#ffff), #config{})
)},
{"orphan continuation byte u+0080", ?_assertEqual(
{error, badarg},
clean_string(<<16#0080>>, #config{strict_utf8=true})
)},
{"orphan continuation byte u+0080 replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(<<16#0080>>, #config{})
)},
{"orphan continuation byte u+00bf", ?_assertEqual(
{error, badarg},
clean_string(<<16#00bf>>, #config{strict_utf8=true})
)},
{"orphan continuation byte u+00bf replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(<<16#00bf>>, #config{})
)},
{"2 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true})
)},
{"2 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 2),
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{})
)},
{"3 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true})
)},
{"3 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 3),
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{})
)},
{"4 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true})
)},
{"4 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 4),
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{})
)},
{"5 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true})
)},
{"5 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 5),
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{})
)},
{"6 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true})
)},
{"6 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 6),
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{})
)},
{"all continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true})
)},
{"all continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))),
clean_string(
<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>,
#config{}
)
)},
{"lonely start byte", ?_assertEqual(
{error, badarg},
clean_string(<<16#00c0>>, #config{strict_utf8=true})
)},
{"lonely start byte replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(<<16#00c0>>, #config{})
)},
{"lonely start bytes (2 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true})
)},
{"lonely start bytes (2 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00c0, 32, 16#00df>>, #config{})
)},
{"lonely start bytes (3 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true})
)},
{"lonely start bytes (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00e0, 32, 16#00ef>>, #config{})
)},
{"lonely start bytes (4 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true})
)},
{"lonely start bytes (4 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00f0, 32, 16#00f7>>, #config{})
)},
{"missing continuation byte (3 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<224, 160, 32>>, #config{strict_utf8=true})
)},
{"missing continuation byte (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<224, 160, 32>>, #config{})
)},
{"missing continuation byte (4 byte missing one)", ?_assertEqual(
{error, badarg},
clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true})
)},
{"missing continuation byte (4 byte missing one) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<240, 144, 128, 32>>, #config{})
)},
{"missing continuation byte (4 byte missing two)", ?_assertEqual(
{error, badarg},
clean_string(<<240, 144, 32>>, #config{strict_utf8=true})
)},
{"missing continuation byte (4 byte missing two) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<240, 144, 32>>, #config{})
)},
{"overlong encoding of u+002f (2 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true})
)},
{"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#c0, 16#af, 32>>, #config{})
)},
{"overlong encoding of u+002f (3 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true})
)},
{"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{})
)},
{"overlong encoding of u+002f (4 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true})
)},
{"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{})
)},
{"highest overlong 2 byte sequence", ?_assertEqual(
{error, badarg},
clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true})
)},
{"highest overlong 2 byte sequence replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#c1, 16#bf, 32>>, #config{})
)},
{"highest overlong 3 byte sequence", ?_assertEqual(
{error, badarg},
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true})
)},
{"highest overlong 3 byte sequence replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{})
)},
{"highest overlong 4 byte sequence", ?_assertEqual(
{error, badarg},
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true})
)},
{"highest overlong 4 byte sequence replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{})
)}
].
json_escape_sequence_test_() ->
[
{"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")},
{"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")},
{"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
].
fix_key_test_() ->
[
{"binary key", ?_assertEqual(fix_key(<<"foo">>), <<"foo">>)},
{"atom key", ?_assertEqual(fix_key(foo), <<"foo">>)},
{"integer key", ?_assertEqual(fix_key(123), <<"123">>)}
].
-endif.

View file

@ -1,403 +0,0 @@
clean_string(Bin, #config{dirty_strings=true}) -> Bin;
clean_string(Bin, Config) ->
case Config#config.replaced_bad_utf8 orelse Config#config.escaped_strings of
true -> clean(Bin, [], Config);
false -> ensure_clean(Bin)
end.
ensure_clean(Bin) ->
case is_clean(Bin) of
ok -> Bin;
{error, badarg} -> {error, badarg}
end.
%% fast path for no escaping and no correcting, throws error if string is 'bad'
is_clean(<<>>) -> ok;
is_clean(<<0, Rest/binary>>) -> is_clean(Rest);
is_clean(<<1, Rest/binary>>) -> is_clean(Rest);
is_clean(<<2, Rest/binary>>) -> is_clean(Rest);
is_clean(<<3, Rest/binary>>) -> is_clean(Rest);
is_clean(<<4, Rest/binary>>) -> is_clean(Rest);
is_clean(<<5, Rest/binary>>) -> is_clean(Rest);
is_clean(<<6, Rest/binary>>) -> is_clean(Rest);
is_clean(<<7, Rest/binary>>) -> is_clean(Rest);
is_clean(<<8, Rest/binary>>) -> is_clean(Rest);
is_clean(<<9, Rest/binary>>) -> is_clean(Rest);
is_clean(<<10, Rest/binary>>) -> is_clean(Rest);
is_clean(<<11, Rest/binary>>) -> is_clean(Rest);
is_clean(<<12, Rest/binary>>) -> is_clean(Rest);
is_clean(<<13, Rest/binary>>) -> is_clean(Rest);
is_clean(<<14, Rest/binary>>) -> is_clean(Rest);
is_clean(<<15, Rest/binary>>) -> is_clean(Rest);
is_clean(<<16, Rest/binary>>) -> is_clean(Rest);
is_clean(<<17, Rest/binary>>) -> is_clean(Rest);
is_clean(<<18, Rest/binary>>) -> is_clean(Rest);
is_clean(<<19, Rest/binary>>) -> is_clean(Rest);
is_clean(<<20, Rest/binary>>) -> is_clean(Rest);
is_clean(<<21, Rest/binary>>) -> is_clean(Rest);
is_clean(<<22, Rest/binary>>) -> is_clean(Rest);
is_clean(<<23, Rest/binary>>) -> is_clean(Rest);
is_clean(<<24, Rest/binary>>) -> is_clean(Rest);
is_clean(<<25, Rest/binary>>) -> is_clean(Rest);
is_clean(<<26, Rest/binary>>) -> is_clean(Rest);
is_clean(<<27, Rest/binary>>) -> is_clean(Rest);
is_clean(<<28, Rest/binary>>) -> is_clean(Rest);
is_clean(<<29, Rest/binary>>) -> is_clean(Rest);
is_clean(<<30, Rest/binary>>) -> is_clean(Rest);
is_clean(<<31, Rest/binary>>) -> is_clean(Rest);
is_clean(<<32, Rest/binary>>) -> is_clean(Rest);
is_clean(<<33, Rest/binary>>) -> is_clean(Rest);
is_clean(<<34, Rest/binary>>) -> is_clean(Rest);
is_clean(<<35, Rest/binary>>) -> is_clean(Rest);
is_clean(<<36, Rest/binary>>) -> is_clean(Rest);
is_clean(<<37, Rest/binary>>) -> is_clean(Rest);
is_clean(<<38, Rest/binary>>) -> is_clean(Rest);
is_clean(<<39, Rest/binary>>) -> is_clean(Rest);
is_clean(<<40, Rest/binary>>) -> is_clean(Rest);
is_clean(<<41, Rest/binary>>) -> is_clean(Rest);
is_clean(<<42, Rest/binary>>) -> is_clean(Rest);
is_clean(<<43, Rest/binary>>) -> is_clean(Rest);
is_clean(<<44, Rest/binary>>) -> is_clean(Rest);
is_clean(<<45, Rest/binary>>) -> is_clean(Rest);
is_clean(<<46, Rest/binary>>) -> is_clean(Rest);
is_clean(<<47, Rest/binary>>) -> is_clean(Rest);
is_clean(<<48, Rest/binary>>) -> is_clean(Rest);
is_clean(<<49, Rest/binary>>) -> is_clean(Rest);
is_clean(<<50, Rest/binary>>) -> is_clean(Rest);
is_clean(<<51, Rest/binary>>) -> is_clean(Rest);
is_clean(<<52, Rest/binary>>) -> is_clean(Rest);
is_clean(<<53, Rest/binary>>) -> is_clean(Rest);
is_clean(<<54, Rest/binary>>) -> is_clean(Rest);
is_clean(<<55, Rest/binary>>) -> is_clean(Rest);
is_clean(<<56, Rest/binary>>) -> is_clean(Rest);
is_clean(<<57, Rest/binary>>) -> is_clean(Rest);
is_clean(<<58, Rest/binary>>) -> is_clean(Rest);
is_clean(<<59, Rest/binary>>) -> is_clean(Rest);
is_clean(<<60, Rest/binary>>) -> is_clean(Rest);
is_clean(<<61, Rest/binary>>) -> is_clean(Rest);
is_clean(<<62, Rest/binary>>) -> is_clean(Rest);
is_clean(<<63, Rest/binary>>) -> is_clean(Rest);
is_clean(<<64, Rest/binary>>) -> is_clean(Rest);
is_clean(<<65, Rest/binary>>) -> is_clean(Rest);
is_clean(<<66, Rest/binary>>) -> is_clean(Rest);
is_clean(<<67, Rest/binary>>) -> is_clean(Rest);
is_clean(<<68, Rest/binary>>) -> is_clean(Rest);
is_clean(<<69, Rest/binary>>) -> is_clean(Rest);
is_clean(<<70, Rest/binary>>) -> is_clean(Rest);
is_clean(<<71, Rest/binary>>) -> is_clean(Rest);
is_clean(<<72, Rest/binary>>) -> is_clean(Rest);
is_clean(<<73, Rest/binary>>) -> is_clean(Rest);
is_clean(<<74, Rest/binary>>) -> is_clean(Rest);
is_clean(<<75, Rest/binary>>) -> is_clean(Rest);
is_clean(<<76, Rest/binary>>) -> is_clean(Rest);
is_clean(<<77, Rest/binary>>) -> is_clean(Rest);
is_clean(<<78, Rest/binary>>) -> is_clean(Rest);
is_clean(<<79, Rest/binary>>) -> is_clean(Rest);
is_clean(<<80, Rest/binary>>) -> is_clean(Rest);
is_clean(<<81, Rest/binary>>) -> is_clean(Rest);
is_clean(<<82, Rest/binary>>) -> is_clean(Rest);
is_clean(<<83, Rest/binary>>) -> is_clean(Rest);
is_clean(<<84, Rest/binary>>) -> is_clean(Rest);
is_clean(<<85, Rest/binary>>) -> is_clean(Rest);
is_clean(<<86, Rest/binary>>) -> is_clean(Rest);
is_clean(<<87, Rest/binary>>) -> is_clean(Rest);
is_clean(<<88, Rest/binary>>) -> is_clean(Rest);
is_clean(<<89, Rest/binary>>) -> is_clean(Rest);
is_clean(<<90, Rest/binary>>) -> is_clean(Rest);
is_clean(<<91, Rest/binary>>) -> is_clean(Rest);
is_clean(<<92, Rest/binary>>) -> is_clean(Rest);
is_clean(<<93, Rest/binary>>) -> is_clean(Rest);
is_clean(<<94, Rest/binary>>) -> is_clean(Rest);
is_clean(<<95, Rest/binary>>) -> is_clean(Rest);
is_clean(<<96, Rest/binary>>) -> is_clean(Rest);
is_clean(<<97, Rest/binary>>) -> is_clean(Rest);
is_clean(<<98, Rest/binary>>) -> is_clean(Rest);
is_clean(<<99, Rest/binary>>) -> is_clean(Rest);
is_clean(<<100, Rest/binary>>) -> is_clean(Rest);
is_clean(<<101, Rest/binary>>) -> is_clean(Rest);
is_clean(<<102, Rest/binary>>) -> is_clean(Rest);
is_clean(<<103, Rest/binary>>) -> is_clean(Rest);
is_clean(<<104, Rest/binary>>) -> is_clean(Rest);
is_clean(<<105, Rest/binary>>) -> is_clean(Rest);
is_clean(<<106, Rest/binary>>) -> is_clean(Rest);
is_clean(<<107, Rest/binary>>) -> is_clean(Rest);
is_clean(<<108, Rest/binary>>) -> is_clean(Rest);
is_clean(<<109, Rest/binary>>) -> is_clean(Rest);
is_clean(<<110, Rest/binary>>) -> is_clean(Rest);
is_clean(<<111, Rest/binary>>) -> is_clean(Rest);
is_clean(<<112, Rest/binary>>) -> is_clean(Rest);
is_clean(<<113, Rest/binary>>) -> is_clean(Rest);
is_clean(<<114, Rest/binary>>) -> is_clean(Rest);
is_clean(<<115, Rest/binary>>) -> is_clean(Rest);
is_clean(<<116, Rest/binary>>) -> is_clean(Rest);
is_clean(<<117, Rest/binary>>) -> is_clean(Rest);
is_clean(<<118, Rest/binary>>) -> is_clean(Rest);
is_clean(<<119, Rest/binary>>) -> is_clean(Rest);
is_clean(<<120, Rest/binary>>) -> is_clean(Rest);
is_clean(<<121, Rest/binary>>) -> is_clean(Rest);
is_clean(<<122, Rest/binary>>) -> is_clean(Rest);
is_clean(<<123, Rest/binary>>) -> is_clean(Rest);
is_clean(<<124, Rest/binary>>) -> is_clean(Rest);
is_clean(<<125, Rest/binary>>) -> is_clean(Rest);
is_clean(<<126, Rest/binary>>) -> is_clean(Rest);
is_clean(<<127, Rest/binary>>) -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X < 16#d800 -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X > 16#dfff, X < 16#fdd0 -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X > 16#fdef, X < 16#fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#10000, X < 16#1fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#20000, X < 16#2fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#30000, X < 16#3fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#40000, X < 16#4fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#50000, X < 16#5fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#60000, X < 16#6fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#70000, X < 16#7fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#80000, X < 16#8fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#90000, X < 16#9fffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#a0000, X < 16#afffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#b0000, X < 16#bfffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#c0000, X < 16#cfffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#d0000, X < 16#dfffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#e0000, X < 16#efffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#f0000, X < 16#ffffe -> is_clean(Rest);
is_clean(<<X/utf8, Rest/binary>>) when X >= 16#100000, X < 16#10fffe -> is_clean(Rest);
is_clean(_Bin) -> {error, badarg}.
%% escape and/or replace bad codepoints if requested
clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc));
clean(<<0, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(0, Config) ++ Acc, Config);
clean(<<1, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(1, Config) ++ Acc, Config);
clean(<<2, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(2, Config) ++ Acc, Config);
clean(<<3, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(3, Config) ++ Acc, Config);
clean(<<4, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(4, Config) ++ Acc, Config);
clean(<<5, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(5, Config) ++ Acc, Config);
clean(<<6, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(6, Config) ++ Acc, Config);
clean(<<7, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(7, Config) ++ Acc, Config);
clean(<<8, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(8, Config) ++ Acc, Config);
clean(<<9, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(9, Config) ++ Acc, Config);
clean(<<10, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(10, Config) ++ Acc, Config);
clean(<<11, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(11, Config) ++ Acc, Config);
clean(<<12, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(12, Config) ++ Acc, Config);
clean(<<13, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(13, Config) ++ Acc, Config);
clean(<<14, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(14, Config) ++ Acc, Config);
clean(<<15, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(15, Config) ++ Acc, Config);
clean(<<16, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(16, Config) ++ Acc, Config);
clean(<<17, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(17, Config) ++ Acc, Config);
clean(<<18, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(18, Config) ++ Acc, Config);
clean(<<19, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(19, Config) ++ Acc, Config);
clean(<<20, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(20, Config) ++ Acc, Config);
clean(<<21, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(21, Config) ++ Acc, Config);
clean(<<22, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(22, Config) ++ Acc, Config);
clean(<<23, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(23, Config) ++ Acc, Config);
clean(<<24, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(24, Config) ++ Acc, Config);
clean(<<25, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(25, Config) ++ Acc, Config);
clean(<<26, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(26, Config) ++ Acc, Config);
clean(<<27, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(27, Config) ++ Acc, Config);
clean(<<28, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(28, Config) ++ Acc, Config);
clean(<<29, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(29, Config) ++ Acc, Config);
clean(<<30, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(30, Config) ++ Acc, Config);
clean(<<31, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(31, Config) ++ Acc, Config);
clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config);
clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config);
clean(<<34, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(34, Config) ++ Acc, Config);
clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config);
clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config);
clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config);
clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config);
clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config);
clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config);
clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config);
clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config);
clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config);
clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config);
clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config);
clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config);
clean(<<47, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(47, Config) ++ Acc, Config);
clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config);
clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config);
clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config);
clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config);
clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config);
clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config);
clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config);
clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config);
clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config);
clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config);
clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config);
clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config);
clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config);
clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config);
clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config);
clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config);
clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config);
clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config);
clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config);
clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config);
clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config);
clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config);
clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config);
clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config);
clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config);
clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config);
clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config);
clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config);
clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config);
clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config);
clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config);
clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config);
clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config);
clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config);
clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config);
clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config);
clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config);
clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config);
clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config);
clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config);
clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config);
clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config);
clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config);
clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config);
clean(<<92, Rest/binary>>, Acc, Config) -> clean(Rest, maybe_replace(92, Config) ++ Acc, Config);
clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config);
clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config);
clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config);
clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config);
clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config);
clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config);
clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config);
clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config);
clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config);
clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config);
clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config);
clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config);
clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config);
clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config);
clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config);
clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config);
clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config);
clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config);
clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config);
clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config);
clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config);
clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config);
clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config);
clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config);
clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config);
clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config);
clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config);
clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config);
clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config);
clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config);
clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config);
clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config);
clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config);
clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config);
clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
clean(Rest, maybe_replace(X, Config) ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#d800 ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#dfff, X < 16#fdd0 ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X > 16#fdef, X < 16#fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#10000, X < 16#1fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#20000, X < 16#2fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#30000, X < 16#3fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#40000, X < 16#4fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#50000, X < 16#5fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#60000, X < 16#6fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#70000, X < 16#7fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#80000, X < 16#8fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#90000, X < 16#9fffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#a0000, X < 16#afffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#b0000, X < 16#bfffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#c0000, X < 16#cfffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#d0000, X < 16#dfffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#e0000, X < 16#efffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#f0000, X < 16#ffffe ->
clean(Rest, [X] ++ Acc, Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X >= 16#100000, X < 16#10fffe ->
clean(Rest, [X] ++ Acc, Config);
%% surrogates
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
clean(Rest, maybe_replace(surrogate, Config) ++ Acc, Config);
%% noncharacters
clean(<<_/utf8, Rest/binary>>, Acc, Config) ->
clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config);
%% u+fffe and u+ffff for R14BXX
clean(<<239, 191, X, Rest/binary>>, Acc, Config) when X == 190; X == 191 ->
clean(Rest, maybe_replace(noncharacter, Config) ++ Acc, Config);
%% overlong encodings and missing continuations of a 2 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 ->
clean(strip_continuations(Rest, 1), maybe_replace(badutf, Config) ++ Acc, Config);
%% overlong encodings and missing continuations of a 3 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 ->
clean(strip_continuations(Rest, 2), maybe_replace(badutf, Config) ++ Acc, Config);
%% overlong encodings and missing continuations of a 4 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
clean(strip_continuations(Rest, 3), maybe_replace(badutf, Config) ++ Acc, Config);
clean(<<_, Rest/binary>>, Acc, Config) ->
clean(Rest, maybe_replace(badutf, Config) ++ Acc, Config).
strip_continuations(Bin, 0) -> Bin;
strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
strip_continuations(Rest, N - 1);
%% not a continuation byte
strip_continuations(Bin, _) -> Bin.
maybe_replace($\b, #config{escaped_strings=true}) -> [$b, $\\];
maybe_replace($\t, #config{escaped_strings=true}) -> [$t, $\\];
maybe_replace($\n, #config{escaped_strings=true}) -> [$n, $\\];
maybe_replace($\f, #config{escaped_strings=true}) -> [$f, $\\];
maybe_replace($\r, #config{escaped_strings=true}) -> [$r, $\\];
maybe_replace($\", #config{escaped_strings=true}) -> [$\", $\\];
maybe_replace($/, Config=#config{escaped_strings=true}) ->
case Config#config.escaped_forward_slashes of
true -> [$/, $\\];
false -> [$/]
end;
maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\];
maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
case Config#config.unescaped_jsonp of
true -> [X];
false -> lists:reverse(json_escape_sequence(X))
end;
maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
lists:reverse(json_escape_sequence(X));
maybe_replace(noncharacter, #config{replaced_bad_utf8=true}) -> [16#fffd];
maybe_replace(surrogate, #config{replaced_bad_utf8=true}) -> [16#fffd];
maybe_replace(badutf, #config{replaced_bad_utf8=true}) -> [16#fffd];
maybe_replace(_, _) -> {error, badarg}.
%% convert a codepoint to it's \uXXXX equiv.
json_escape_sequence(X) ->
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
to_hex(10) -> $a;
to_hex(11) -> $b;
to_hex(12) -> $c;
to_hex(13) -> $d;
to_hex(14) -> $e;
to_hex(15) -> $f;
to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc...

View file

@ -1,689 +0,0 @@
%% data and helper functions for tests
-export([init/1, handle_event/2]).
-export([test_cases/0]).
-include_lib("eunit/include/eunit.hrl").
%% test handler
init([]) -> [].
handle_event(end_json, State) -> lists:reverse([end_json] ++ State);
handle_event(Event, State) -> [Event] ++ State.
test_cases() ->
empty_array()
++ nested_array()
++ empty_object()
++ nested_object()
++ strings()
++ literals()
++ integers()
++ floats()
++ compound_object().
empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}].
nested_array() ->
[{
"[[[]]]",
<<"[[[]]]">>,
[[[]]],
[start_array, start_array, start_array, end_array, end_array, end_array]
}].
empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}].
nested_object() ->
[{
"{\"key\":{\"key\":{}}}",
<<"{\"key\":{\"key\":{}}}">>,
[{<<"key">>, [{<<"key">>, [{}]}]}],
[
start_object,
{key, <<"key">>},
start_object,
{key, <<"key">>},
start_object,
end_object,
end_object,
end_object
]
}].
naked_strings() ->
Raw = [
"",
"hello world"
],
[
{
String,
<<"\"", (list_to_binary(String))/binary, "\"">>,
list_to_binary(String),
[{string, list_to_binary(String)}]
}
|| String <- Raw
].
strings() ->
naked_strings()
++ [ wrap_with_array(Test) || Test <- naked_strings() ]
++ [ wrap_with_object(Test) || Test <- naked_strings() ].
naked_integers() ->
Raw = [
1, 2, 3,
127, 128, 129,
255, 256, 257,
65534, 65535, 65536,
18446744073709551616,
18446744073709551617
],
[
{
integer_to_list(X),
list_to_binary(integer_to_list(X)),
X,
[{integer, X}]
}
|| X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0]
].
integers() ->
naked_integers()
++ [ wrap_with_array(Test) || Test <- naked_integers() ]
++ [ wrap_with_object(Test) || Test <- naked_integers() ].
naked_floats() ->
Raw = [
0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9,
1234567890.0987654321,
0.0e0,
1234567890.0987654321e16,
0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308,
1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308,
2.2250738585072014e-308, %% min normalized float
1.7976931348623157e308, %% max normalized float
5.0e-324, %% min denormalized float
2.225073858507201e-308 %% max denormalized float
],
[
{
sane_float_to_list(X),
list_to_binary(sane_float_to_list(X)),
X,
[{float, X}]
}
|| X <- Raw ++ [ -1 * Y || Y <- Raw ]
].
floats() ->
naked_floats()
++ [ wrap_with_array(Test) || Test <- naked_floats() ]
++ [ wrap_with_object(Test) || Test <- naked_floats() ].
naked_literals() ->
[
{
atom_to_list(Literal),
atom_to_binary(Literal, unicode),
Literal,
[{literal, Literal}]
}
|| Literal <- [true, false, null]
].
literals() ->
naked_literals()
++ [ wrap_with_array(Test) || Test <- naked_literals() ]
++ [ wrap_with_object(Test) || Test <- naked_literals() ].
compound_object() ->
[{
"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]",
<<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>,
[[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]],
[
start_array,
start_object,
{key, <<"alpha">>},
start_array,
{integer, 1},
{integer, 2},
{integer, 3},
end_array,
{key, <<"beta">>},
start_object,
{key, <<"alpha">>},
start_array,
{float, 1.0},
{float, 2.0},
{float, 3.0},
end_array,
{key, <<"beta">>},
start_array,
{literal, true},
{literal, false},
end_array,
end_object,
end_object,
start_array,
start_object,
end_object,
end_array,
end_array
]
}].
wrap_with_array({Title, JSON, Term, Events}) ->
{
"[" ++ Title ++ "]",
<<"[", JSON/binary, "]">>,
[Term],
[start_array] ++ Events ++ [end_array]
}.
wrap_with_object({Title, JSON, Term, Events}) ->
{
"{\"key\":" ++ Title ++ "}",
<<"{\"key\":", JSON/binary, "}">>,
[{<<"key">>, Term}],
[start_object, {key, <<"key">>}] ++ Events ++ [end_object]
}.
sane_float_to_list(X) ->
[Output] = io_lib:format("~p", [X]),
Output.
-include("jsx_config.hrl").
-include("jsx_strings.hrl").
%% erlang refuses to encode certain codepoints, so fake them
to_fake_utf8(N) when N < 16#0080 -> <<N:8>>;
to_fake_utf8(N) when N < 16#0800 ->
<<0:5, Y:5, X:6>> = <<N:16>>,
<<2#110:3, Y:5, 2#10:2, X:6>>;
to_fake_utf8(N) when N < 16#10000 ->
<<Z:4, Y:6, X:6>> = <<N:16>>,
<<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>;
to_fake_utf8(N) ->
<<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>,
<<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>.
codepoints() ->
unicode:characters_to_binary(
[32, 33]
++ lists:seq(35, 46)
++ lists:seq(48, 91)
++ lists:seq(93, 16#2027)
++ lists:seq(16#202a, 16#d7ff)
++ lists:seq(16#e000, 16#fdcf)
++ lists:seq(16#fdf0, 16#fffd)
).
extended_codepoints() ->
unicode:characters_to_binary(
lists:seq(16#10000, 16#1fffd) ++ [
16#20000, 16#30000, 16#40000, 16#50000, 16#60000,
16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000,
16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000
]
).
reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ].
surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ].
noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ].
extended_noncharacters() ->
[ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]
].
clean_string_test_() ->
[
{"clean codepoints", ?_assertEqual(
codepoints(),
clean_string(codepoints(), #config{})
)},
{"clean extended codepoints", ?_assertEqual(
extended_codepoints(),
clean_string(extended_codepoints(), #config{})
)},
{"escape path codepoints", ?_assertEqual(
codepoints(),
clean_string(codepoints(), #config{escaped_strings=true})
)},
{"escape path extended codepoints", ?_assertEqual(
extended_codepoints(),
clean_string(extended_codepoints(), #config{escaped_strings=true})
)},
{"error reserved space", ?_assertEqual(
lists:duplicate(length(reserved_space()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space())
)},
{"error surrogates", ?_assertEqual(
lists:duplicate(length(surrogates()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates())
)},
{"error noncharacters", ?_assertEqual(
lists:duplicate(length(noncharacters()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters())
)},
{"error extended noncharacters", ?_assertEqual(
lists:duplicate(length(extended_noncharacters()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters())
)},
{"clean reserved space", ?_assertEqual(
lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, reserved_space())
)},
{"clean surrogates", ?_assertEqual(
lists:duplicate(length(surrogates()), <<16#fffd/utf8>>),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, surrogates())
)},
{"clean noncharacters", ?_assertEqual(
lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, noncharacters())
)},
{"clean extended noncharacters", ?_assertEqual(
lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{replaced_bad_utf8=true}) end, extended_noncharacters())
)}
].
maybe_escape(Bin, Config) -> clean_string(Bin, Config).
escape_test_() ->
[
{"maybe_escape backspace", ?_assertEqual(
<<"\\b">>,
maybe_escape(<<16#0008/utf8>>, #config{escaped_strings=true})
)},
{"don't escape backspace", ?_assertEqual(
<<"\b">>,
maybe_escape(<<16#0008/utf8>>, #config{})
)},
{"maybe_escape tab", ?_assertEqual(
<<"\\t">>,
maybe_escape(<<16#0009/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape newline", ?_assertEqual(
<<"\\n">>,
maybe_escape(<<16#000a/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape formfeed", ?_assertEqual(
<<"\\f">>,
maybe_escape(<<16#000c/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape carriage return", ?_assertEqual(
<<"\\r">>,
maybe_escape(<<16#000d/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape quote", ?_assertEqual(
<<"\\\"">>,
maybe_escape(<<16#0022/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape forward slash", ?_assertEqual(
<<"\\/">>,
maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true})
)},
{"do not maybe_escape forward slash", ?_assertEqual(
<<"/">>,
maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape backslash", ?_assertEqual(
<<"\\\\">>,
maybe_escape(<<16#005c/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape jsonp (u2028)", ?_assertEqual(
<<"\\u2028">>,
maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true})
)},
{"do not maybe_escape jsonp (u2028)", ?_assertEqual(
<<16#2028/utf8>>,
maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
)},
{"maybe_escape jsonp (u2029)", ?_assertEqual(
<<"\\u2029">>,
maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true})
)},
{"do not maybe_escape jsonp (u2029)", ?_assertEqual(
<<16#2029/utf8>>,
maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true})
)},
{"maybe_escape u0000", ?_assertEqual(
<<"\\u0000">>,
maybe_escape(<<16#0000/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0001", ?_assertEqual(
<<"\\u0001">>,
maybe_escape(<<16#0001/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0002", ?_assertEqual(
<<"\\u0002">>,
maybe_escape(<<16#0002/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0003", ?_assertEqual(
<<"\\u0003">>,
maybe_escape(<<16#0003/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0004", ?_assertEqual(
<<"\\u0004">>,
maybe_escape(<<16#0004/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0005", ?_assertEqual(
<<"\\u0005">>,
maybe_escape(<<16#0005/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0006", ?_assertEqual(
<<"\\u0006">>,
maybe_escape(<<16#0006/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0007", ?_assertEqual(
<<"\\u0007">>,
maybe_escape(<<16#0007/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u000b", ?_assertEqual(
<<"\\u000b">>,
maybe_escape(<<16#000b/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u000e", ?_assertEqual(
<<"\\u000e">>,
maybe_escape(<<16#000e/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u000f", ?_assertEqual(
<<"\\u000f">>,
maybe_escape(<<16#000f/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0010", ?_assertEqual(
<<"\\u0010">>,
maybe_escape(<<16#0010/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0011", ?_assertEqual(
<<"\\u0011">>,
maybe_escape(<<16#0011/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0012", ?_assertEqual(
<<"\\u0012">>,
maybe_escape(<<16#0012/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0013", ?_assertEqual(
<<"\\u0013">>,
maybe_escape(<<16#0013/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0014", ?_assertEqual(
<<"\\u0014">>,
maybe_escape(<<16#0014/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0015", ?_assertEqual(
<<"\\u0015">>,
maybe_escape(<<16#0015/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0016", ?_assertEqual(
<<"\\u0016">>,
maybe_escape(<<16#0016/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0017", ?_assertEqual(
<<"\\u0017">>,
maybe_escape(<<16#0017/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0018", ?_assertEqual(
<<"\\u0018">>,
maybe_escape(<<16#0018/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u0019", ?_assertEqual(
<<"\\u0019">>,
maybe_escape(<<16#0019/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001a", ?_assertEqual(
<<"\\u001a">>,
maybe_escape(<<16#001a/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001b", ?_assertEqual(
<<"\\u001b">>,
maybe_escape(<<16#001b/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001c", ?_assertEqual(
<<"\\u001c">>,
maybe_escape(<<16#001c/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001d", ?_assertEqual(
<<"\\u001d">>,
maybe_escape(<<16#001d/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001e", ?_assertEqual(
<<"\\u001e">>,
maybe_escape(<<16#001e/utf8>>, #config{escaped_strings=true})
)},
{"maybe_escape u001f", ?_assertEqual(
<<"\\u001f">>,
maybe_escape(<<16#001f/utf8>>, #config{escaped_strings=true})
)}
].
bad_utf8_test_() ->
[
{"noncharacter u+fffe", ?_assertEqual(
{error, badarg},
clean_string(to_fake_utf8(16#fffe), #config{})
)},
{"noncharacter u+fffe replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(to_fake_utf8(16#fffe), #config{replaced_bad_utf8=true})
)},
{"noncharacter u+ffff", ?_assertEqual(
{error, badarg},
clean_string(to_fake_utf8(16#ffff), #config{})
)},
{"noncharacter u+ffff replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(to_fake_utf8(16#ffff), #config{replaced_bad_utf8=true})
)},
{"orphan continuation byte u+0080", ?_assertEqual(
{error, badarg},
clean_string(<<16#0080>>, #config{})
)},
{"orphan continuation byte u+0080 replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(<<16#0080>>, #config{replaced_bad_utf8=true})
)},
{"orphan continuation byte u+00bf", ?_assertEqual(
{error, badarg},
clean_string(<<16#00bf>>, #config{})
)},
{"orphan continuation byte u+00bf replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(<<16#00bf>>, #config{replaced_bad_utf8=true})
)},
{"2 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{})
)},
{"2 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 2),
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{replaced_bad_utf8=true})
)},
{"3 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{})
)},
{"3 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 3),
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{replaced_bad_utf8=true})
)},
{"4 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{})
)},
{"4 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 4),
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{replaced_bad_utf8=true})
)},
{"5 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{})
)},
{"5 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 5),
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{replaced_bad_utf8=true})
)},
{"6 continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{})
)},
{"6 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 6),
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{replaced_bad_utf8=true})
)},
{"all continuation bytes", ?_assertEqual(
{error, badarg},
clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{})
)},
{"all continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))),
clean_string(
<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>,
#config{replaced_bad_utf8=true}
)
)},
{"lonely start byte", ?_assertEqual(
{error, badarg},
clean_string(<<16#00c0>>, #config{})
)},
{"lonely start byte replaced", ?_assertEqual(
<<16#fffd/utf8>>,
clean_string(<<16#00c0>>, #config{replaced_bad_utf8=true})
)},
{"lonely start bytes (2 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#00c0, 32, 16#00df>>, #config{})
)},
{"lonely start bytes (2 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00c0, 32, 16#00df>>, #config{replaced_bad_utf8=true})
)},
{"lonely start bytes (3 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#00e0, 32, 16#00ef>>, #config{})
)},
{"lonely start bytes (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00e0, 32, 16#00ef>>, #config{replaced_bad_utf8=true})
)},
{"lonely start bytes (4 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#00f0, 32, 16#00f7>>, #config{})
)},
{"lonely start bytes (4 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00f0, 32, 16#00f7>>, #config{replaced_bad_utf8=true})
)},
{"missing continuation byte (3 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<224, 160, 32>>, #config{})
)},
{"missing continuation byte (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<224, 160, 32>>, #config{replaced_bad_utf8=true})
)},
{"missing continuation byte (4 byte missing one)", ?_assertEqual(
{error, badarg},
clean_string(<<240, 144, 128, 32>>, #config{})
)},
{"missing continuation byte (4 byte missing one) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<240, 144, 128, 32>>, #config{replaced_bad_utf8=true})
)},
{"missing continuation byte (4 byte missing two)", ?_assertEqual(
{error, badarg},
clean_string(<<240, 144, 32>>, #config{})
)},
{"missing continuation byte (4 byte missing two) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<240, 144, 32>>, #config{replaced_bad_utf8=true})
)},
{"overlong encoding of u+002f (2 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#c0, 16#af, 32>>, #config{})
)},
{"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#c0, 16#af, 32>>, #config{replaced_bad_utf8=true})
)},
{"overlong encoding of u+002f (3 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{})
)},
{"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true})
)},
{"overlong encoding of u+002f (4 byte)", ?_assertEqual(
{error, badarg},
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{})
)},
{"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{replaced_bad_utf8=true})
)},
{"highest overlong 2 byte sequence", ?_assertEqual(
{error, badarg},
clean_string(<<16#c1, 16#bf, 32>>, #config{})
)},
{"highest overlong 2 byte sequence replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#c1, 16#bf, 32>>, #config{replaced_bad_utf8=true})
)},
{"highest overlong 3 byte sequence", ?_assertEqual(
{error, badarg},
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{})
)},
{"highest overlong 3 byte sequence replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{replaced_bad_utf8=true})
)},
{"highest overlong 4 byte sequence", ?_assertEqual(
{error, badarg},
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{})
)},
{"highest overlong 4 byte sequence replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>,
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{replaced_bad_utf8=true})
)}
].
json_escape_sequence_test_() ->
[
{"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")},
{"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")},
{"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
].

View file

@ -25,6 +25,8 @@
-export([to_json/2, format/2]).
-export([init/1, handle_event/2]).
-export([start_json/0, start_json/1]).
-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]).
-record(config, {
@ -74,7 +76,6 @@ parse_config([], Config) ->
Config.
-define(start_object, <<"{">>).
-define(start_array, <<"[">>).
-define(end_object, <<"}">>).
@ -86,95 +87,50 @@ parse_config([], Config) ->
-define(newline, <<"\n">>).
-type state() :: {any(), unicode:charlist(), #config{}}.
-type state() :: {unicode:charlist(), #config{}}.
-spec init(Config::proplists:proplist()) -> state().
init(Config) -> {start, [], parse_config(Config)}.
init(Config) -> {[], parse_config(Config)}.
-spec handle_event(Event::any(), State::state()) -> state().
handle_event(Event, {start, Acc, Config}) ->
case Event of
{Type, Value} -> {[], [Acc, encode(Type, Value, Config)], Config}
; start_object -> {[object_start], [Acc, ?start_object], Config}
; start_array -> {[array_start], [Acc, ?start_array], Config}
end;
handle_event(Event, {[object_start|Stack], Acc, OldConfig = #config{depth = Depth}}) ->
Config = OldConfig#config{depth = Depth + 1},
case Event of
{key, Key} ->
{[object_value|Stack], [Acc, indent(Config), encode(string, Key, Config), ?colon, space(Config)], Config}
; end_object ->
{Stack, [Acc, ?end_object], OldConfig}
end;
handle_event(Event, {[object_value|Stack], Acc, Config}) ->
case Event of
{Type, Value} when Type == string; Type == literal;
Type == integer; Type == float ->
{[key|Stack], [Acc, encode(Type, Value, Config)], Config}
; start_object -> {[object_start, key|Stack], [Acc, ?start_object], Config}
; start_array -> {[array_start, key|Stack], [Acc, ?start_array], Config}
end;
handle_event(Event, {[key|Stack], Acc, Config = #config{depth = Depth}}) ->
case Event of
{key, Key} ->
{[object_value|Stack], [Acc, ?comma, indent_or_space(Config), encode(string, Key, Config), ?colon, space(Config)], Config}
; end_object ->
NewConfig = Config#config{depth = Depth - 1},
{Stack, [Acc, indent(NewConfig), ?end_object], NewConfig}
end;
handle_event(Event, {[array_start|Stack], Acc, OldConfig = #config{depth = Depth}}) ->
Config = OldConfig#config{depth = Depth + 1},
case Event of
{Type, Value} when Type == string; Type == literal;
Type == integer; Type == float ->
{[array|Stack], [Acc, indent(Config), encode(Type, Value, Config)], Config}
; start_object -> {[object_start, array|Stack], [Acc, indent(Config), ?start_object], Config}
; start_array -> {[array_start, array|Stack], [Acc, indent(Config), ?start_array], Config}
; end_array -> {Stack, [Acc, ?end_array], OldConfig}
end;
handle_event(Event, {[array|Stack], Acc, Config = #config{depth = Depth}}) ->
case Event of
{Type, Value} when Type == string; Type == literal;
Type == integer; Type == float ->
{[array|Stack], [Acc, ?comma, indent_or_space(Config), encode(Type, Value, Config)], Config}
; end_array ->
NewConfig = Config#config{depth = Depth - 1},
{Stack, [Acc, indent(NewConfig), ?end_array], NewConfig}
; start_object -> {[object_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_object], Config}
; start_array -> {[array_start, array|Stack], [Acc, ?comma, indent_or_space(Config), ?start_array], Config}
end;
handle_event(end_json, {[], Acc, _Config}) -> unicode:characters_to_binary(Acc, utf8).
handle_event(end_json, State) -> get_value(State);
handle_event(start_object, State) -> start_object(State);
handle_event(end_object, State) -> finish(State);
handle_event(start_array, State) -> start_array(State);
handle_event(end_array, State) -> finish(State);
handle_event({Type, Event}, {_, Config} = State) -> insert(encode(Type, Event, Config), State).
encode(string, String, _Config) ->
[?quote, String, ?quote];
<<?quote/binary, String/binary, ?quote/binary>>;
encode(key, Key, _Config) ->
<<?quote/binary, Key/binary, ?quote/binary>>;
encode(literal, Literal, _Config) ->
erlang:atom_to_list(Literal);
unicode:characters_to_binary(erlang:atom_to_list(Literal));
encode(integer, Integer, _Config) ->
erlang:integer_to_list(Integer);
unicode:characters_to_binary(erlang:integer_to_list(Integer));
encode(float, Float, _Config) ->
[Output] = io_lib:format("~p", [Float]), Output.
[Output] = io_lib:format("~p", [Float]), unicode:characters_to_binary(Output).
space(Config) ->
case Config#config.space of
0 -> []
0 -> <<>>
; X when X > 0 -> binary:copy(?space, X)
end.
indent(Config) ->
case Config#config.indent of
0 -> []
; X when X > 0 ->
Indent = binary:copy(?space, X),
indent(Indent, Config#config.depth, [?newline])
0 -> <<>>
; X when X > 0 -> <<?newline/binary, (binary:copy(?space, X * Config#config.depth))/binary>>
end.
indent(_Indent, 0, Acc) -> Acc;
indent(Indent, N, Acc) -> indent(Indent, N - 1, [Acc, Indent]).
indent_or_space(Config) ->
case Config#config.indent > 0 of
@ -183,6 +139,119 @@ indent_or_space(Config) ->
end.
%% internal state is a stack and a config object
%% `{Stack, Config}`
%% the stack is a list of in progress objects/arrays
%% `[Current, Parent, Grandparent,...OriginalAncestor]`
%% an object has the representation on the stack of
%% `{object, Object}`
%% of if there's a key with a yet to be matched value
%% `{object, Key, Object}`
%% an array looks like
%% `{array, Array}`
%% `Object` and `Array` are utf8 encoded binaries
start_json() -> {[], #config{}}.
start_json(Config) when is_list(Config) -> {[], parse_config(Config)}.
%% allocate a new object on top of the stack
start_object({Stack, Config}) -> {[{object, ?start_object}] ++ Stack, Config}.
%% allocate a new array on top of the stack
start_array({Stack, Config}) -> {[{array, ?start_array}] ++ Stack, Config}.
%% finish an object or array and insert it into the parent object if it exists
finish({[{object, Object}], Config}) ->
{<<Object/binary, ?end_object/binary>>, Config};
finish({[{object, Object}|Rest], Config}) ->
insert(<<Object/binary, ?end_object/binary>>, {Rest, Config});
finish({[{array, Array}], Config}) ->
{<<Array/binary, ?end_array/binary>>, Config};
finish({[{array, Array}|Rest], Config}) ->
insert(<<Array/binary, ?end_array/binary>>, {Rest, Config});
finish(_) -> erlang:error(badarg).
%% insert a value when there's no parent object or array
insert(Value, {[], Config}) when is_binary(Value) ->
{Value, Config};
%% insert a key or value into an object or array, autodetects the 'right' thing
insert(Key, {[{object, Object}|Rest], Config}) when is_binary(Key) ->
{[{object, Key, Object}] ++ Rest, Config};
insert(Value, {[{object, Key, ?start_object}|Rest], Config}) when is_binary(Value) ->
{
[{object, <<?start_object/binary,
Key/binary,
?colon/binary,
(space(Config))/binary,
Value/binary
>>}] ++ Rest,
Config
};
insert(Value, {[{object, Key, Object}|Rest], Config}) when is_binary(Value) ->
{
[{object, <<Object/binary,
?comma/binary,
(indent_or_space(Config))/binary,
Key/binary,
?colon/binary,
(space(Config))/binary,
Value/binary
>>}] ++ Rest,
Config
};
insert(Value, {[{array, ?start_array}|Rest], Config}) when is_binary(Value) ->
{[{array, <<?start_array/binary, Value/binary>>}] ++ Rest, Config};
insert(Value, {[{array, Array}|Rest], Config}) when is_binary(Value) ->
{
[{array, <<Array/binary,
?comma/binary,
(indent_or_space(Config))/binary,
Value/binary
>>}] ++ Rest,
Config
};
insert(_, _) -> erlang:error(badarg).
%% insert a key/value pair into an object
insert(Key, Value, {[{object, ?start_object}|Rest], Config}) when is_binary(Key), is_binary(Value) ->
{
[{object, <<?start_object/binary,
Key/binary,
?colon/binary,
(space(Config))/binary,
Value/binary
>>}] ++ Rest,
Config
};
insert(Key, Value, {[{object, Object}|Rest], Config}) when is_binary(Key), is_binary(Value) ->
{
[{object, <<Object/binary,
?comma/binary,
(indent_or_space(Config))/binary,
Key/binary,
?colon/binary,
(space(Config))/binary,
Value/binary
>>}] ++ Rest,
Config
};
insert(_, _, _) -> erlang:error(badarg).
get_key({[{object, Key, _}|_], _}) -> Key;
get_key(_) -> erlang:error(badarg).
get_value({Value, Config}) ->
case Value of
Value when is_binary(Value) -> Value;
_ -> erlang:error(badarg)
end;
get_value(_) -> erlang:error(badarg).
%% eunit tests
-ifdef(TEST).
@ -215,7 +284,7 @@ config_test_() ->
space_test_() ->
[
{"no space", ?_assertEqual([], space(#config{space=0}))},
{"no space", ?_assertEqual(<<>>, space(#config{space=0}))},
{"one space", ?_assertEqual(<<" ">>, space(#config{space=1}))},
{"four spaces", ?_assertEqual(<<" ">>, space(#config{space=4}))}
].
@ -223,21 +292,21 @@ space_test_() ->
indent_test_() ->
[
{"no indent", ?_assertEqual([], indent(#config{indent=0, depth=1}))},
{"no indent", ?_assertEqual(<<>>, indent(#config{indent=0, depth=1}))},
{"indent 1 depth 1", ?_assertEqual(
[[?newline], ?space],
<<?newline/binary, <<" ">>/binary>>,
indent(#config{indent=1, depth=1})
)},
{"indent 1 depth 2", ?_assertEqual(
[[[?newline], ?space], ?space],
<<?newline/binary, <<" ">>/binary>>,
indent(#config{indent=1, depth=2})
)},
{"indent 4 depth 1", ?_assertEqual(
[[?newline], <<" ">>],
<<?newline/binary, <<" ">>/binary>>,
indent(#config{indent=4, depth=1})
)},
{"indent 4 depth 2", ?_assertEqual(
[[[?newline], <<" ">>], <<" ">>],
<<?newline/binary, <<" ">>/binary, <<" ">>/binary>>,
indent(#config{indent=4, depth=2})
)}
].
@ -250,7 +319,7 @@ indent_or_space_test_() ->
indent_or_space(#config{space=1, indent=0, depth=1})
)},
{"indent so no space", ?_assertEqual(
[[?newline], ?space],
<<?newline/binary, <<" ">>/binary>>,
indent_or_space(#config{space=1, indent=1, depth=1})
)}
].
@ -258,50 +327,137 @@ indent_or_space_test_() ->
format_test_() ->
[
{"0.0", ?_assert(encode(float, 0.0, #config{}) =:= "0.0")},
{"1.0", ?_assert(encode(float, 1.0, #config{}) =:= "1.0")},
{"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= "-1.0")},
{"0.0", ?_assert(encode(float, 0.0, #config{}) =:= <<"0.0">>)},
{"1.0", ?_assert(encode(float, 1.0, #config{}) =:= <<"1.0">>)},
{"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= <<"-1.0">>)},
{"3.1234567890987654321",
?_assert(
encode(float, 3.1234567890987654321, #config{}) =:= "3.1234567890987655")
encode(float, 3.1234567890987654321, #config{}) =:= <<"3.1234567890987655">>)
},
{"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= "1.0e23")},
{"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= "0.3")},
{"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= "0.0001")},
{"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= "1.0e-5")},
{"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= "1.0e-8")},
{"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= "1.0e-323")},
{"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= "1.0e308")},
{"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= <<"1.0e23">>)},
{"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= <<"0.3">>)},
{"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= <<"0.0001">>)},
{"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= <<"1.0e-5">>)},
{"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= <<"1.0e-8">>)},
{"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= <<"1.0e-323">>)},
{"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= <<"1.0e308">>)},
{"min normalized float",
?_assert(
encode(float, math:pow(2, -1022), #config{}) =:= "2.2250738585072014e-308"
encode(float, math:pow(2, -1022), #config{}) =:= <<"2.2250738585072014e-308">>
)
},
{"max normalized float",
?_assert(
encode(float, (2 - math:pow(2, -52)) * math:pow(2, 1023), #config{})
=:= "1.7976931348623157e308"
=:= <<"1.7976931348623157e308">>
)
},
{"min denormalized float",
?_assert(encode(float, math:pow(2, -1074), #config{}) =:= "5.0e-324")
?_assert(encode(float, math:pow(2, -1074), #config{}) =:= <<"5.0e-324">>)
},
{"max denormalized float",
?_assert(
encode(float, (1 - math:pow(2, -52)) * math:pow(2, -1022), #config{})
=:= "2.225073858507201e-308"
=:= <<"2.225073858507201e-308">>
)
}
},
{"hello world", ?_assert(encode(string, <<"hello world">>, #config{}) =:= <<"\"hello world\"">>)},
{"key", ?_assert(encode(key, <<"key">>, #config{}) =:= <<"\"key\"">>)},
{"1", ?_assert(encode(integer, 1, #config{}) =:= <<"1">>)},
{"-1", ?_assert(encode(integer, -1, #config{}) =:= <<"-1">>)},
{"true", ?_assert(encode(literal, true, #config{}) =:= <<"true">>)},
{"false", ?_assert(encode(literal, false, #config{}) =:= <<"false">>)},
{"null", ?_assert(encode(literal, null, #config{}) =:= <<"null">>)}
].
rep_manipulation_test_() ->
[
{"allocate a new context", ?_assertEqual(
{[], #config{}},
start_json()
)},
{"allocate a new context with config", ?_assertEqual(
{[], #config{space=1, indent=2}},
start_json([{space, 1}, {indent, 2}])
)},
{"allocate a new object on an empty stack", ?_assertEqual(
{[{object, <<"{">>}], #config{}},
start_object({[], #config{}})
)},
{"allocate a new object on a stack", ?_assertEqual(
{[{object, <<"{">>}, {object, <<"{">>}], #config{}},
start_object({[{object, <<"{">>}], #config{}})
)},
{"allocate a new array on an empty stack", ?_assertEqual(
{[{array, <<"[">>}], #config{}},
start_array({[], #config{}})
)},
{"allocate a new array on a stack", ?_assertEqual(
{[{array, <<"[">>}, {object, <<"{">>}], #config{}},
start_array({[{object, <<"{">>}], #config{}})
)},
{"insert a key into an object", ?_assertEqual(
{[{object, <<"\"key\"">>, <<"{">>}], #config{}},
insert(<<"\"key\"">>, {[{object, <<"{">>}], #config{}})
)},
{"get current key", ?_assertEqual(
key,
get_key({[{object, key, <<"{">>}], #config{}})
)},
{"try to get non-key from object", ?_assertError(
badarg,
get_key({[{object, <<"{">>}], #config{}})
)},
{"try to get key from array", ?_assertError(
badarg,
get_key({[{array, <<"[">>}], #config{}})
)},
{"insert a value into an object", ?_assertEqual(
{[{object, <<"{\"key\":true">>}], #config{}},
insert(<<"true">>, {[{object, <<"\"key\"">>, <<"{">>}], #config{}})
)},
{"insert a value into an array", ?_assertEqual(
{[{array, <<"[true">>}], #config{}},
insert(<<"true">>, {[{array, <<"[">>}], #config{}})
)},
{"insert a key/value pair into an object", ?_assertEqual(
{[{object, <<"{\"x\":true,\"y\":false">>}], #config{}},
insert(<<"\"y\"">>, <<"false">>, {[{object, <<"{\"x\":true">>}], #config{}})
)},
{"finish an object with no ancestor", ?_assertEqual(
{<<"{\"x\":true,\"y\":false}">>, #config{}},
finish({[{object, <<"{\"x\":true,\"y\":false">>}], #config{}})
)},
{"finish an empty object", ?_assertEqual(
{<<"{}">>, #config{}},
finish({[{object, <<"{">>}], #config{}})
)},
{"finish an object with an ancestor", ?_assertEqual(
{[{object, <<"{\"a\":[],\"b\":{\"x\":true,\"y\":false}">>}], #config{}},
finish({
[{object, <<"{\"x\":true,\"y\":false">>}, {object, <<"\"b\"">>, <<"{\"a\":[]">>}],
#config{}
})
)},
{"finish an array with no ancestor", ?_assertEqual(
{<<"[true,false,null]">>, #config{}},
finish({[{array, <<"[true,false,null">>}], #config{}})
)},
{"finish an array with an ancestor", ?_assertEqual(
{[{array, <<"[1,2,3,[true,false,null]">>}], #config{}},
finish({[{array, <<"[true,false,null">>}, {array, <<"[1,2,3">>}], #config{}})
)}
].
handle_event_test_() ->
Data = jsx:test_cases(),
Data = jsx:test_cases() ++ jsx:special_test_cases(),
[
{
Title, ?_assertEqual(
JSON,
lists:foldl(fun handle_event/2, {start, [], #config{}}, Events ++ [end_json])
lists:foldl(fun handle_event/2, init([]), Events ++ [end_json])
)
} || {Title, JSON, _, Events} <- Data
].

View file

@ -25,11 +25,12 @@
-export([to_term/2]).
-export([init/1, handle_event/2]).
-export([start_term/0, start_term/1]).
-export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]).
-record(config, {
labels = binary,
post_decode = false
labels = binary
}).
-type config() :: list().
@ -59,8 +60,6 @@ parse_config([{labels, Val}|Rest], Config)
parse_config(Rest, Config#config{labels = Val});
parse_config([labels|Rest], Config) ->
parse_config(Rest, Config#config{labels = binary});
parse_config([{post_decode, F}|Rest], Config=#config{post_decode=false}) when is_function(F, 1) ->
parse_config(Rest, Config#config{post_decode=F});
parse_config([{K, _}|Rest] = Options, Config) ->
case lists:member(K, jsx_config:valid_flags()) of
true -> parse_config(Rest, Config)
@ -77,34 +76,21 @@ parse_config([], Config) ->
-type state() :: {[any()], #config{}}.
-spec init(Config::proplists:proplist()) -> state().
init(Config) -> {[[]], parse_config(Config)}.
init(Config) -> {[], parse_config(Config)}.
-spec handle_event(Event::any(), State::state()) -> state().
handle_event(end_json, {[[Terms]], _Config}) -> Terms;
handle_event(end_json, State) -> get_value(State);
handle_event(start_object, {Terms, Config}) -> {[[]|Terms], Config};
handle_event(end_object, {[[], {key, Key}, Last|Terms], Config}) ->
{[[{Key, post_decode([{}], Config)}] ++ Last] ++ Terms, Config};
handle_event(end_object, {[Object, {key, Key}, Last|Terms], Config}) ->
{[[{Key, post_decode(lists:reverse(Object), Config)}] ++ Last] ++ Terms, Config};
handle_event(end_object, {[[], Last|Terms], Config}) ->
{[[post_decode([{}], Config)] ++ Last] ++ Terms, Config};
handle_event(end_object, {[Object, Last|Terms], Config}) ->
{[[post_decode(lists:reverse(Object), Config)] ++ Last] ++ Terms, Config};
handle_event(start_object, State) -> start_object(State);
handle_event(end_object, State) -> finish(State);
handle_event(start_array, {Terms, Config}) -> {[[]|Terms], Config};
handle_event(end_array, {[List, {key, Key}, Last|Terms], Config}) ->
{[[{Key, post_decode(lists:reverse(List), Config)}] ++ Last] ++ Terms, Config};
handle_event(end_array, {[List, Last|Terms], Config}) ->
{[[post_decode(lists:reverse(List), Config)] ++ Last] ++ Terms, Config};
handle_event(start_array, State) -> start_array(State);
handle_event(end_array, State) -> finish(State);
handle_event({key, Key}, {Terms, Config}) -> {[{key, format_key(Key, Config)}] ++ Terms, Config};
handle_event({key, Key}, {_, Config} = State) -> insert(format_key(Key, Config), State);
handle_event({_, Event}, {[{key, Key}, Last|Terms], Config}) ->
{[[{Key, post_decode(Event, Config)}] ++ Last] ++ Terms, Config};
handle_event({_, Event}, {[Last|Terms], Config}) ->
{[[post_decode(Event, Config)] ++ Last] ++ Terms, Config}.
handle_event({_, Event}, State) -> insert(Event, State).
format_key(Key, Config) ->
@ -121,8 +107,60 @@ format_key(Key, Config) ->
end.
post_decode(Value, #config{post_decode=false}) -> Value;
post_decode(Value, Config) -> (Config#config.post_decode)(Value).
%% internal state is a stack and a config object
%% `{Stack, Config}`
%% the stack is a list of in progress objects/arrays
%% `[Current, Parent, Grandparent,...OriginalAncestor]`
%% an object has the representation on the stack of
%% `{object, [{NthKey, NthValue}, {NMinus1Key, NthMinus1Value},...{FirstKey, FirstValue}]}`
%% of if there's a key with a yet to be matched value
%% `{object, Key, [{NthKey, NthValue},...]}`
%% an array looks like
%% `{array, [NthValue, NthMinus1Value,...FirstValue]}`
start_term() -> {[], #config{}}.
start_term(Config) when is_list(Config) -> {[], parse_config(Config)}.
%% allocate a new object on top of the stack
start_object({Stack, Config}) -> {[{object, []}] ++ Stack, Config}.
%% allocate a new array on top of the stack
start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}.
%% finish an object or array and insert it into the parent object if it exists or
%% return it if it is the root object
finish({[{object, []}], Config}) -> {[{}], Config};
finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config});
finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config};
finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config});
finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config};
finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config});
finish(_) -> erlang:error(badarg).
%% insert a value when there's no parent object or array
insert(Value, {[], Config}) -> {Value, Config};
%% insert a key or value into an object or array, autodetects the 'right' thing
insert(Key, {[{object, Pairs}|Rest], Config}) ->
{[{object, Key, Pairs}] ++ Rest, Config};
insert(Value, {[{object, Key, Pairs}|Rest], Config}) ->
{[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config};
insert(Value, {[{array, Values}|Rest], Config}) ->
{[{array, [Value] ++ Values}] ++ Rest, Config};
insert(_, _) -> erlang:error(badarg).
%% insert a key/value pair into an object
insert(Key, Value, {[{object, Pairs}|Rest], Config}) ->
{[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config};
insert(_, _, _) -> erlang:error(badarg).
get_key({[{object, Key, _}|_], _}) -> Key;
get_key(_) -> erlang:error(badarg).
get_value({Value, _Config}) -> Value;
get_value(_) -> erlang:error(badarg).
%% eunit tests
@ -132,9 +170,6 @@ post_decode(Value, Config) -> (Config#config.post_decode)(Value).
config_test_() ->
%% for post_decode tests
F = fun(X) -> X end,
G = fun(X, Y) -> {X, Y} end,
[
{"empty config", ?_assertEqual(#config{}, parse_config([]))},
{"implicit binary labels", ?_assertEqual(#config{}, parse_config([labels]))},
@ -144,15 +179,6 @@ config_test_() ->
#config{labels=existing_atom},
parse_config([{labels, existing_atom}])
)},
{"sloppy existing atom labels", ?_assertEqual(
#config{labels=attempt_atom},
parse_config([{labels, attempt_atom}])
)},
{"post decode", ?_assertEqual(
#config{post_decode=F},
parse_config([{post_decode, F}])
)},
{"post decode wrong arity", ?_assertError(badarg, parse_config([{post_decode, G}]))},
{"invalid opt flag", ?_assertError(badarg, parse_config([error]))},
{"invalid opt tuple", ?_assertError(badarg, parse_config([{error, true}]))}
].
@ -181,110 +207,79 @@ format_key_test_() ->
].
post_decoders_test_() ->
Events = [
[{}],
[{<<"key">>, <<"value">>}],
[{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
[],
[<<"string">>],
[true, false, null],
true,
false,
null,
<<"hello">>,
<<"world">>,
1,
1.0
],
rep_manipulation_test_() ->
[
{"no post_decode", ?_assertEqual(
Events,
[ post_decode(Event, #config{}) || Event <- Events ]
{"allocate a new context", ?_assertEqual(
{[], #config{}},
start_term()
)},
{"replace arrays with empty arrays", ?_assertEqual(
[
[{}],
[{<<"key">>, <<"value">>}],
[{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
[],
[],
[],
true,
false,
null,
<<"hello">>,
<<"world">>,
1,
1.0
],
[ post_decode(Event, #config{
post_decode=fun([T|_] = V) when is_tuple(T) -> V; (V) when is_list(V) -> []; (V) -> V end
}) || Event <- Events
]
{"allocate a new context with option", ?_assertEqual(
{[], #config{labels=atom}},
start_term([{labels, atom}])
)},
{"replace objects with empty objects", ?_assertEqual(
[
[{}],
[{}],
[{}],
[],
[<<"string">>],
[true, false, null],
true,
false,
null,
<<"hello">>,
<<"world">>,
1,
1.0
],
[ post_decode(Event, #config{
post_decode=fun([T|_]) when is_tuple(T) -> [{}]; (V) -> V end
}) || Event <- Events
]
{"allocate a new object on an empty stack", ?_assertEqual(
{[{object, []}], #config{}},
start_object({[], #config{}})
)},
{"replace all non-array/non-object values with false", ?_assertEqual(
[
[{}],
[{<<"key">>, <<"value">>}],
[{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
[],
[<<"string">>],
[true, false, null],
false,
false,
false,
false,
false,
false,
false
],
[ post_decode(Event, #config{
post_decode=fun(V) when is_list(V) -> V; (_) -> false end
}) || Event <- Events
]
{"allocate a new object on a stack", ?_assertEqual(
{[{object, []}, {object, []}], #config{}},
start_object({[{object, []}], #config{}})
)},
{"atoms_to_strings", ?_assertEqual(
[
[{}],
[{<<"key">>, <<"value">>}],
[{<<"true">>, true}, {<<"false">>, false}, {<<"null">>, null}],
[],
[<<"string">>],
[true, false, null],
<<"true">>,
<<"false">>,
<<"null">>,
<<"hello">>,
<<"world">>,
1,
1.0
],
[ post_decode(Event, #config{
post_decode=fun(V) when is_atom(V) -> unicode:characters_to_binary(atom_to_list(V)); (V) -> V end
}) || Event <- Events
]
{"allocate a new array on an empty stack", ?_assertEqual(
{[{array, []}], #config{}},
start_array({[], #config{}})
)},
{"allocate a new array on a stack", ?_assertEqual(
{[{array, []}, {object, []}], #config{}},
start_array({[{object, []}], #config{}})
)},
{"insert a key into an object", ?_assertEqual(
{[{object, key, []}, junk], #config{}},
insert(key, {[{object, []}, junk], #config{}})
)},
{"get current key", ?_assertEqual(
key,
get_key({[{object, key, []}], #config{}})
)},
{"try to get non-key from object", ?_assertError(
badarg,
get_key({[{object, []}], #config{}})
)},
{"try to get key from array", ?_assertError(
badarg,
get_key({[{array, []}], #config{}})
)},
{"insert a value into an object", ?_assertEqual(
{[{object, [{key, value}]}, junk], #config{}},
insert(value, {[{object, key, []}, junk], #config{}})
)},
{"insert a value into an array", ?_assertEqual(
{[{array, [value]}, junk], #config{}},
insert(value, {[{array, []}, junk], #config{}})
)},
{"insert a key/value pair into an object", ?_assertEqual(
{[{object, [{key, value}, {x, y}]}, junk], #config{}},
insert(key, value, {[{object, [{x, y}]}, junk], #config{}})
)},
{"finish an object with no ancestor", ?_assertEqual(
{[{a, b}, {x, y}], #config{}},
finish({[{object, [{x, y}, {a, b}]}], #config{}})
)},
{"finish an empty object", ?_assertEqual(
{[{}], #config{}},
finish({[{object, []}], #config{}})
)},
{"finish an object with an ancestor", ?_assertEqual(
{[{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], #config{}},
finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}})
)},
{"finish an array with no ancestor", ?_assertEqual(
{[a, b, c], #config{}},
finish({[{array, [c, b, a]}], #config{}})
)},
{"finish an array with an ancestor", ?_assertEqual(
{[{array, [[a, b, c], d, e, f]}], #config{}},
finish({[{array, [c, b, a]}, {array, [d, e, f]}], #config{}})
)}
].
@ -295,7 +290,7 @@ handle_event_test_() ->
{
Title, ?_assertEqual(
Term,
lists:foldl(fun handle_event/2, {[[]], #config{}}, Events ++ [end_json])
lists:foldl(fun handle_event/2, init([]), Events ++ [end_json])
)
} || {Title, _, Term, Events} <- Data
].

View file

@ -159,7 +159,7 @@ repeated_keys_test_() ->
handle_event_test_() ->
Data = jsx:test_cases(),
Data = jsx:test_cases() ++ jsx:special_test_cases(),
[
{
Title, ?_assertEqual(