diff --git a/src/jsx.erl b/src/jsx.erl index 8c45bfe..b791699 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -34,7 +34,9 @@ -ifdef(TEST). --include("jsx_tests.hrl"). +%% data and helper functions for tests +-export([test_cases/0]). +-export([init/1, handle_event/2]). -endif. @@ -146,3 +148,275 @@ resume(Term, {decoder, State, Handler, Acc, Stack}, Config) -> jsx_decoder:resume(Term, State, Handler, Acc, Stack, jsx_config:parse_config(Config)); resume(Term, {parser, State, Handler, Stack}, Config) -> jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)). + + + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + + +%% test handler +init([]) -> []. + +handle_event(end_json, State) -> lists:reverse([end_json] ++ State); +handle_event(Event, State) -> [Event] ++ State. + + +test_cases() -> + empty_array() + ++ nested_array() + ++ empty_object() + ++ nested_object() + ++ strings() + ++ literals() + ++ integers() + ++ floats() + ++ compound_object(). + + +empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}]. + +nested_array() -> + [{ + "[[[]]]", + <<"[[[]]]">>, + [[[]]], + [start_array, start_array, start_array, end_array, end_array, end_array] + }]. + + +empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}]. + +nested_object() -> + [{ + "{\"key\":{\"key\":{}}}", + <<"{\"key\":{\"key\":{}}}">>, + [{<<"key">>, [{<<"key">>, [{}]}]}], + [ + start_object, + {key, <<"key">>}, + start_object, + {key, <<"key">>}, + start_object, + end_object, + end_object, + end_object + ] + }]. + + +naked_strings() -> + Raw = [ + "", + "hello world" + ], + [ + { + String, + <<"\"", (list_to_binary(String))/binary, "\"">>, + list_to_binary(String), + [{string, list_to_binary(String)}] + } + || String <- Raw + ]. + +strings() -> + naked_strings() + ++ [ wrap_with_array(Test) || Test <- naked_strings() ] + ++ [ wrap_with_object(Test) || Test <- naked_strings() ]. + + +naked_integers() -> + Raw = [ + 1, 2, 3, + 127, 128, 129, + 255, 256, 257, + 65534, 65535, 65536, + 18446744073709551616, + 18446744073709551617 + ], + [ + { + integer_to_list(X), + list_to_binary(integer_to_list(X)), + X, + [{integer, X}] + } + || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0] + ]. + +integers() -> + naked_integers() + ++ [ wrap_with_array(Test) || Test <- naked_integers() ] + ++ [ wrap_with_object(Test) || Test <- naked_integers() ]. + + +naked_floats() -> + Raw = [ + 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, + 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, + 1234567890.0987654321, + 0.0e0, + 1234567890.0987654321e16, + 0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308, + 1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308, + 2.2250738585072014e-308, %% min normalized float + 1.7976931348623157e308, %% max normalized float + 5.0e-324, %% min denormalized float + 2.225073858507201e-308 %% max denormalized float + ], + [ + { + sane_float_to_list(X), + list_to_binary(sane_float_to_list(X)), + X, + [{float, X}] + } + || X <- Raw ++ [ -1 * Y || Y <- Raw ] + ]. + +floats() -> + naked_floats() + ++ [ wrap_with_array(Test) || Test <- naked_floats() ] + ++ [ wrap_with_object(Test) || Test <- naked_floats() ]. + + +naked_literals() -> + [ + { + atom_to_list(Literal), + atom_to_binary(Literal, unicode), + Literal, + [{literal, Literal}] + } + || Literal <- [true, false, null] + ]. + +literals() -> + naked_literals() + ++ [ wrap_with_array(Test) || Test <- naked_literals() ] + ++ [ wrap_with_object(Test) || Test <- naked_literals() ]. + + +compound_object() -> + [{ + "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]", + <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>, + [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]], + [ + start_array, + start_object, + {key, <<"alpha">>}, + start_array, + {integer, 1}, + {integer, 2}, + {integer, 3}, + end_array, + {key, <<"beta">>}, + start_object, + {key, <<"alpha">>}, + start_array, + {float, 1.0}, + {float, 2.0}, + {float, 3.0}, + end_array, + {key, <<"beta">>}, + start_array, + {literal, true}, + {literal, false}, + end_array, + end_object, + end_object, + start_array, + start_object, + end_object, + end_array, + end_array + ] + }]. + + +wrap_with_array({Title, JSON, Term, Events}) -> + { + "[" ++ Title ++ "]", + <<"[", JSON/binary, "]">>, + [Term], + [start_array] ++ Events ++ [end_array] + }. + + +wrap_with_object({Title, JSON, Term, Events}) -> + { + "{\"key\":" ++ Title ++ "}", + <<"{\"key\":", JSON/binary, "}">>, + [{<<"key">>, Term}], + [start_object, {key, <<"key">>}] ++ Events ++ [end_object] + }. + + +sane_float_to_list(X) -> + [Output] = io_lib:format("~p", [X]), + Output. + + +incremental_decode(JSON) -> + Final = lists:foldl( + fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, + decoder(jsx, [], [stream]), + json_to_bytes(JSON) + ), + Final(end_stream). + + +incremental_parse(Events) -> + Final = lists:foldl( + fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end, + parser(?MODULE, [], [stream]), + lists:map(fun(X) -> [X] end, Events) + ), + Final(end_stream). + + +%% used to convert a json text into a list of codepoints to be incrementally +%% parsed +json_to_bytes(JSON) -> json_to_bytes(JSON, []). + +json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc); +json_to_bytes(<>, Acc) -> json_to_bytes(Rest, [<>] ++ Acc). + + +%% actual tests! +decode_test_() -> + Data = test_cases(), + [{Title, ?_assertEqual(Events ++ [end_json], (decoder(?MODULE, [], []))(JSON))} + || {Title, JSON, _, Events} <- Data + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_decode(JSON))} + || {Title, JSON, _, Events} <- Data + ]. + + +parse_test_() -> + Data = test_cases(), + [{Title, ?_assertEqual(Events ++ [end_json], (parser(?MODULE, [], []))(Events ++ [end_json]))} + || {Title, _, _, Events} <- Data + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_parse(Events))} + || {Title, _, _, Events} <- Data + ]. + + +encode_test_() -> + Data = test_cases(), + [ + { + Title, ?_assertEqual( + Events ++ [end_json], + (jsx:encoder(jsx, [], []))(Term) + ) + } || {Title, _, Term, Events} <- Data + ]. + + +-endif. diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 2816b0e..9039d3f 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -183,6 +183,12 @@ config_test_() -> parse_config([strict]) ) }, + {"strict selective", + ?_assertEqual( + #config{strict_comments = true}, + parse_config([{strict, [comments]}]) + ) + }, {"strict expanded", ?_assertEqual( #config{strict_comments = true, diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 58ba3d8..a858257 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -966,167 +966,141 @@ json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc); json_to_bytes(<>, Acc) -> json_to_bytes(Rest, [<>] ++ Acc). -decode(JSON, Config) -> - Chunk = try - start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) - catch - error:badarg -> {error, badarg} - end, - Incremental = try - Final = lists:foldl( - fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, - decoder(jsx, [], [stream] ++ Config), - json_to_bytes(JSON) - ), - Final(end_stream) - catch - error:badarg -> {error, badarg} - end, - ?assert(Chunk == Incremental), - Chunk. +decode(JSON) -> decode(JSON, []). +decode(JSON, Config) -> (decoder(jsx, [], Config))(JSON). -decode_test_() -> - Data = jsx:test_cases(), - [{Title, ?_assertEqual(Events ++ [end_json], decode(JSON, []))} - || {Title, JSON, _, Events} <- Data - ]. +incremental_decode(JSON) -> incremental_decode(JSON, []). +incremental_decode(JSON, Config) -> + Final = lists:foldl( + fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, + decoder(jsx, [], [stream] ++ Config), + json_to_bytes(JSON) + ), + Final(end_stream). %% all these numbers have different representation in erlang than in javascript and %% do not roundtrip like most integers/floats special_number_test_() -> - [ - {"-0", ?_assertEqual( - [{integer, 0}, end_json], - decode(<<"-0">>, []) - )}, - {"-0.0", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"-0.0">>, []) - )}, - {"0e0", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"0e0">>, []) - )}, - {"0e4", ?_assertEqual( - [{float, 0.0}, end_json], - decode(<<"0e4">>, []) - )}, - {"1e0", ?_assertEqual( - [{float, 1.0}, end_json], - decode(<<"1e0">>, []) - )}, - {"-1e0", ?_assertEqual( - [{float, -1.0}, end_json], - decode(<<"-1e0">>, []) - )}, - {"1e4", ?_assertEqual( - [{float, 1.0e4}, end_json], - decode(<<"1e4">>, []) - )}, - {"number terminated by whitespace", ?_assertEqual( + Cases = [ + % {title, test form, json, opt flags} + {"-0", [{integer, 0}, end_json], <<"-0">>}, + {"-0.0", [{float, 0.0}, end_json], <<"-0.0">>}, + {"0e0", [{float, 0.0}, end_json], <<"0e0">>}, + {"0e4", [{float, 0.0}, end_json], <<"0e4">>}, + {"1e0", [{float, 1.0}, end_json], <<"1e0">>}, + {"-1e0", [{float, -1.0}, end_json], <<"-1e0">>}, + {"1e4", [{float, 1.0e4}, end_json], <<"1e4">>}, + {"number terminated by whitespace", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1 ]">>, []) - )}, - {"number terminated by comma", ?_assertEqual( + <<"[ 1 ]">> + }, + {"number terminated by comma", [start_array, {integer, 1}, {integer, 1}, end_array, end_json], - decode(<<"[ 1, 1 ]">>, []) - )}, - {"number terminated by comma in object", ?_assertEqual( + <<"[ 1, 1 ]">> + }, + {"number terminated by comma in object", [start_object, {key, <<"x">>}, {integer, 1}, {key, <<"y">>}, {integer, 1}, end_object, end_json], - decode(<<"{\"x\": 1, \"y\": 1}">>, []) - )} - ]. + <<"{\"x\": 1, \"y\": 1}">> + } + ], + [{Title, ?_assertEqual(Events, decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON))} + || {Title, Events, JSON} <- Cases + ]. comments_test_() -> - [ - {"preceeding // comment", ?_assertEqual( + Cases = [ + % {title, test form, json, opt flags} + {"preceeding // comment", [start_array, end_array, end_json], - decode(<<"// comment ", ?newline, "[]">>, []) - )}, - {"preceeding /**/ comment", ?_assertEqual( + <<"// comment ", ?newline, "[]">> + }, + {"preceeding /**/ comment", [start_array, end_array, end_json], - decode(<<"/* comment */[]">>, []) - )}, - {"trailing // comment", ?_assertEqual( + <<"/* comment */[]">> + }, + {"trailing // comment", [start_array, end_array, end_json], - decode(<<"[]// comment", ?newline>>, []) - )}, - {"trailing // comment (no newline)", ?_assertEqual( + <<"[]// comment", ?newline>> + }, + {"trailing // comment (no newline)", [start_array, end_array, end_json], - decode(<<"[]// comment">>, []) - )}, - {"trailing /**/ comment", ?_assertEqual( + <<"[]// comment">> + }, + {"trailing /**/ comment", [start_array, end_array, end_json], - decode(<<"[] /* comment */">>, []) - )}, - {"// comment inside array", ?_assertEqual( + <<"[] /* comment */">> + }, + {"// comment inside array", [start_array, end_array, end_json], - decode(<<"[ // comment", ?newline, "]">>, []) - )}, - {"/**/ comment inside array", ?_assertEqual( + <<"[ // comment", ?newline, "]">> + }, + {"/**/ comment inside array", [start_array, end_array, end_json], - decode(<<"[ /* comment */ ]">>, []) - )}, - {"// comment at beginning of array", ?_assertEqual( + <<"[ /* comment */ ]">> + }, + {"// comment at beginning of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, []) - )}, - {"/**/ comment at beginning of array", ?_assertEqual( + <<"[ // comment", ?newline, "true", ?newline, "]">> + }, + {"/**/ comment at beginning of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment */ true ]">>, []) - )}, - {"// comment at end of array", ?_assertEqual( + <<"[ /* comment */ true ]">> + }, + {"// comment at end of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ true // comment", ?newline, "]">>, []) - )}, - {"/**/ comment at end of array", ?_assertEqual( + <<"[ true // comment", ?newline, "]">> + }, + {"/**/ comment at end of array", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ true /* comment */ ]">>, []) - )}, - {"// comment midarray (post comma)", ?_assertEqual( + <<"[ true /* comment */ ]">> + }, + {"// comment midarray (post comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true, // comment", ?newline, "false ]">>, []) - )}, - {"/**/ comment midarray (post comma)", ?_assertEqual( + <<"[ true, // comment", ?newline, "false ]">> + }, + {"/**/ comment midarray (post comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true, /* comment */ false ]">>, []) - )}, - {"// comment midarray (pre comma)", ?_assertEqual( + <<"[ true, /* comment */ false ]">> + }, + {"// comment midarray (pre comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true// comment", ?newline, ", false ]">>, []) - )}, - {"/**/ comment midarray (pre comma)", ?_assertEqual( + <<"[ true// comment", ?newline, ", false ]">> + }, + {"/**/ comment midarray (pre comma)", [start_array, {literal, true}, {literal, false}, end_array, end_json], - decode(<<"[ true/* comment */, false ]">>, []) - )}, - {"// comment inside object", ?_assertEqual( + <<"[ true/* comment */, false ]">> + }, + {"// comment inside object", [start_object, end_object, end_json], - decode(<<"{ // comment", ?newline, "}">>, []) - )}, - {"/**/ comment inside object", ?_assertEqual( + <<"{ // comment", ?newline, "}">> + }, + {"/**/ comment inside object", [start_object, end_object, end_json], - decode(<<"{ /* comment */ }">>, []) - )}, - {"// comment at beginning of object", ?_assertEqual( + <<"{ /* comment */ }">> + }, + {"// comment at beginning of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, []) - )}, - {"/**/ comment at beginning of object", ?_assertEqual( + <<"{ // comment", ?newline, " \"key\": true", ?newline, "}">> + }, + {"/**/ comment at beginning of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ /* comment */ \"key\": true }">>, []) - )}, - {"// comment at end of object", ?_assertEqual( + <<"{ /* comment */ \"key\": true }">> + }, + {"// comment at end of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": true // comment", ?newline, "}">>, []) - )}, - {"/**/ comment at end of object", ?_assertEqual( + <<"{ \"key\": true // comment", ?newline, "}">> + }, + {"/**/ comment at end of object", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": true /* comment */ }">>, []) - )}, - {"// comment midobject (post comma)", ?_assertEqual( + <<"{ \"key\": true /* comment */ }">> + }, + {"// comment midobject (post comma)", [ start_object, {key, <<"x">>}, @@ -1136,9 +1110,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, []) - )}, - {"/**/ comment midobject (post comma)", ?_assertEqual( + <<"{ \"x\": true, // comment", ?newline, "\"y\": false }">> + }, + {"/**/ comment midobject (post comma)", [ start_object, {key, <<"x">>}, @@ -1148,9 +1122,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, []) - )}, - {"// comment midobject (pre comma)", ?_assertEqual( + <<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">> + }, + {"// comment midobject (pre comma)", [ start_object, {key, <<"x">>}, @@ -1160,9 +1134,9 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, []) - )}, - {"/**/ comment midobject (pre comma)", ?_assertEqual( + <<"{ \"x\": true// comment", ?newline, ", \"y\": false }">> + }, + {"/**/ comment midobject (pre comma)", [ start_object, {key, <<"x">>}, @@ -1172,330 +1146,168 @@ comments_test_() -> end_object, end_json ], - decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, []) - )}, - {"// comment precolon", ?_assertEqual( + <<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">> + }, + {"// comment precolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\" // comment", ?newline, ": true }">>, []) - )}, - {"/**/ comment precolon", ?_assertEqual( + <<"{ \"key\" // comment", ?newline, ": true }">> + }, + {"/**/ comment precolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\"/* comment */: true }">>, []) - )}, - {"// comment postcolon", ?_assertEqual( + <<"{ \"key\"/* comment */: true }">> + }, + {"// comment postcolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\": // comment", ?newline, " true }">>, []) - )}, - {"/**/ comment postcolon", ?_assertEqual( + <<"{ \"key\": // comment", ?newline, " true }">> + }, + {"/**/ comment postcolon", [start_object, {key, <<"key">>}, {literal, true}, end_object, end_json], - decode(<<"{ \"key\":/* comment */ true }">>, []) - )}, - {"// comment terminating zero", ?_assertEqual( + <<"{ \"key\":/* comment */ true }">> + }, + {"// comment terminating zero", [start_array, {integer, 0}, end_array, end_json], - decode(<<"[ 0// comment", ?newline, "]">>, []) - )}, - {"// comment terminating integer", ?_assertEqual( + <<"[ 0// comment", ?newline, "]">> + }, + {"// comment terminating integer", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1// comment", ?newline, "]">>, []) - )}, - {"// comment terminating float", ?_assertEqual( + <<"[ 1// comment", ?newline, "]">> + }, + {"// comment terminating float", [start_array, {float, 1.0}, end_array, end_json], - decode(<<"[ 1.0// comment", ?newline, "]">>, []) - )}, - {"// comment terminating exp", ?_assertEqual( + <<"[ 1.0// comment", ?newline, "]">> + }, + {"// comment terminating exp", [start_array, {float, 1.0e1}, end_array, end_json], - decode(<<"[ 1e1// comment", ?newline, "]">>, []) - )}, - {"/**/ comment terminating zero", ?_assertEqual( + <<"[ 1e1// comment", ?newline, "]">> + }, + {"/**/ comment terminating zero", [start_array, {integer, 0}, end_array, end_json], - decode(<<"[ 0/* comment */ ]">>, []) - )}, - {"/**/ comment terminating integer", ?_assertEqual( + <<"[ 0/* comment */ ]">> + }, + {"/**/ comment terminating integer", [start_array, {integer, 1}, end_array, end_json], - decode(<<"[ 1/* comment */ ]">>, []) - )}, - {"/**/ comment terminating float", ?_assertEqual( + <<"[ 1/* comment */ ]">> + }, + {"/**/ comment terminating float", [start_array, {float, 1.0}, end_array, end_json], - decode(<<"[ 1.0/* comment */ ]">>, []) - )}, - {"/**/ comment terminating exp", ?_assertEqual( + <<"[ 1.0/* comment */ ]">> + }, + {"/**/ comment terminating exp", [start_array, {float, 1.0e1}, end_array, end_json], - decode(<<"[ 1e1/* comment */ ]">>, []) - )}, - {"/**/ comment following /**/ comment", ?_assertEqual( + <<"[ 1e1/* comment */ ]">> + }, + {"/**/ comment following /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[/* comment *//* comment */true]">>, []) - )}, - {"/**/ comment following // comment", ?_assertEqual( + <<"[/* comment *//* comment */true]">> + }, + {"/**/ comment following // comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[// comment", ?newline, "/* comment */true]">>, []) - )}, - {"// comment following /**/ comment", ?_assertEqual( + <<"[// comment", ?newline, "/* comment */true]">> + }, + {"// comment following /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[/* comment */// comment", ?newline, "true]">>, []) - )}, - {"// comment following // comment", ?_assertEqual( + <<"[/* comment */// comment", ?newline, "true]">> + }, + {"// comment following // comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, []) - )}, - {"/**/ comment inside /**/ comment", ?_assertEqual( + <<"[// comment", ?newline, "// comment", ?newline, "true]">> + }, + {"/**/ comment inside /**/ comment", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* /* comment */ */ true ]">>, []) - )}, - {"/**/ comment with /", ?_assertEqual( + <<"[ /* /* comment */ */ true ]">> + }, + {"/**/ comment with /", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* / */ true ]">>, []) - )}, - {"/**/ comment with *", ?_assertEqual( + <<"[ /* / */ true ]">> + }, + {"/**/ comment with *", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* * */ true ]">>, []) - )}, - {"// comment with badutf", ?_assertEqual( + <<"[ /* * */ true ]">> + }, + {"// comment with badutf", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, []) - )}, - {"/**/ comment with badutf", ?_assertEqual( + <<"[ // comment ", 16#00c0, " ", ?newline, "true]">> + }, + {"/**/ comment with badutf", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment ", 16#00c0, " */ true]">>, []) - )}, - {"/**/ comment with badutf preceeded by /", ?_assertEqual( + <<"[ /* comment ", 16#00c0, " */ true]">> + }, + {"/**/ comment with badutf preceeded by /", [start_array, {literal, true}, end_array, end_json], - decode(<<"[ /* comment /", 16#00c0, " */ true]">>, []) - )} + <<"[ /* comment /", 16#00c0, " */ true]">> + } + ], + [{Title, ?_assertEqual(Events, decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON))} + || {Title, Events, JSON} <- Cases + ] ++ + % error when `{strict, [comments]}` is present + [{Title, ?_assertError(badarg, decode(JSON, [{strict, [comments]}]))} + || {Title, _Events, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError( + badarg, + incremental_decode(JSON, [{strict, [comments]}]) + )} || {Title, _Events, JSON} <- Cases ]. no_comments_test_() -> - Decode = fun(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_config:parse_config(Config)) end, - [ - {"preceeding // comment", ?_assertError( + Cases = [ + {"// comment with badutf", badarg, - Decode(<<"// comment ", ?newline, "[]">>, [{strict, [comments]}]) - )}, - {"preceeding /**/ comment", ?_assertError( + <<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, + [{strict, [utf8]}] + }, + {"/**/ comment with badutf", badarg, - Decode(<<"/* comment */[]">>, [{strict, [comments]}]) - )}, - {"trailing // comment", ?_assertError( + <<"[ /* comment ", 16#00c0, " */ true]">>, + [{strict, [utf8]}] + }, + {"/**/ comment with badutf preceeded by /", badarg, - Decode(<<"[]// comment", ?newline>>, [{strict, [comments]}]) - )}, - {"trailing // comment (no newline)", ?_assertError( - badarg, - Decode(<<"[]// comment">>, [{strict, [comments]}]) - )}, - {"trailing /**/ comment", ?_assertError( - badarg, - Decode(<<"[] /* comment */">>, [{strict, [comments]}]) - )}, - {"// comment inside array", ?_assertError( - badarg, - Decode(<<"[ // comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"/**/ comment inside array", ?_assertError( - badarg, - Decode(<<"[ /* comment */ ]">>, [{strict, [comments]}]) - )}, - {"// comment at beginning of array", ?_assertError( - badarg, - Decode(<<"[ // comment", ?newline, "true", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"/**/ comment at beginning of array", ?_assertError( - badarg, - Decode(<<"[ /* comment */ true ]">>, [{strict, [comments]}]) - )}, - {"// comment at end of array", ?_assertError( - badarg, - Decode(<<"[ true // comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"/**/ comment at end of array", ?_assertError( - badarg, - Decode(<<"[ true /* comment */ ]">>, [{strict, [comments]}]) - )}, - {"// comment midarray (post comma)", ?_assertError( - badarg, - Decode(<<"[ true, // comment", ?newline, "false ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment midarray (post comma)", ?_assertError( - badarg, - Decode(<<"[ true, /* comment */ false ]">>, [{strict, [comments]}]) - )}, - {"// comment midarray (pre comma)", ?_assertError( - badarg, - Decode(<<"[ true// comment", ?newline, ", false ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment midarray (pre comma)", ?_assertError( - badarg, - Decode(<<"[ true/* comment */, false ]">>, [{strict, [comments]}]) - )}, - {"// comment inside object", ?_assertError( - badarg, - Decode(<<"{ // comment", ?newline, "}">>, [{strict, [comments]}]) - )}, - {"/**/ comment inside object", ?_assertError( - badarg, - Decode(<<"{ /* comment */ }">>, [{strict, [comments]}]) - )}, - {"// comment at beginning of object", ?_assertError( - badarg, - Decode(<<"{ // comment", ?newline, " \"key\": true", ?newline, "}">>, [{strict, [comments]}]) - )}, - {"/**/ comment at beginning of object", ?_assertError( - badarg, - Decode(<<"{ /* comment */ \"key\": true }">>, [{strict, [comments]}]) - )}, - {"// comment at end of object", ?_assertError( - badarg, - Decode(<<"{ \"key\": true // comment", ?newline, "}">>, [{strict, [comments]}]) - )}, - {"/**/ comment at end of object", ?_assertError( - badarg, - Decode(<<"{ \"key\": true /* comment */ }">>, [{strict, [comments]}]) - )}, - {"// comment midobject (post comma)", ?_assertError( - badarg, - Decode(<<"{ \"x\": true, // comment", ?newline, "\"y\": false }">>, [{strict, [comments]}]) - )}, - {"/**/ comment midobject (post comma)", ?_assertError( - badarg, - Decode(<<"{ \"x\": true, /* comment */", ?newline, "\"y\": false }">>, [{strict, [comments]}]) - )}, - {"// comment midobject (pre comma)", ?_assertError( - badarg, - Decode(<<"{ \"x\": true// comment", ?newline, ", \"y\": false }">>, [{strict, [comments]}]) - )}, - {"/**/ comment midobject (pre comma)", ?_assertError( - badarg, - Decode(<<"{ \"x\": true/* comment */", ?newline, ", \"y\": false }">>, [{strict, [comments]}]) - )}, - {"// comment precolon", ?_assertError( - badarg, - Decode(<<"{ \"key\" // comment", ?newline, ": true }">>, [{strict, [comments]}]) - )}, - {"/**/ comment precolon", ?_assertError( - badarg, - Decode(<<"{ \"key\"/* comment */: true }">>, [{strict, [comments]}]) - )}, - {"// comment postcolon", ?_assertError( - badarg, - Decode(<<"{ \"key\": // comment", ?newline, " true }">>, [{strict, [comments]}]) - )}, - {"/**/ comment postcolon", ?_assertError( - badarg, - Decode(<<"{ \"key\":/* comment */ true }">>, [{strict, [comments]}]) - )}, - {"// comment terminating zero", ?_assertError( - badarg, - Decode(<<"[ 0// comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"// comment terminating integer", ?_assertError( - badarg, - Decode(<<"[ 1// comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"// comment terminating float", ?_assertError( - badarg, - Decode(<<"[ 1.0// comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"// comment terminating exp", ?_assertError( - badarg, - Decode(<<"[ 1e1// comment", ?newline, "]">>, [{strict, [comments]}]) - )}, - {"/**/ comment terminating zero", ?_assertError( - badarg, - Decode(<<"[ 0/* comment */ ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment terminating integer", ?_assertError( - badarg, - Decode(<<"[ 1/* comment */ ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment terminating float", ?_assertError( - badarg, - Decode(<<"[ 1.0/* comment */ ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment terminating exp", ?_assertError( - badarg, - Decode(<<"[ 1e1/* comment */ ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment following /**/ comment", ?_assertError( - badarg, - Decode(<<"[/* comment *//* comment */true]">>, [{strict, [comments]}]) - )}, - {"/**/ comment following // comment", ?_assertError( - badarg, - Decode(<<"[// comment", ?newline, "/* comment */true]">>, [{strict, [comments]}]) - )}, - {"// comment following /**/ comment", ?_assertError( - badarg, - Decode(<<"[/* comment */// comment", ?newline, "true]">>, [{strict, [comments]}]) - )}, - {"// comment following // comment", ?_assertError( - badarg, - Decode(<<"[// comment", ?newline, "// comment", ?newline, "true]">>, [{strict, [comments]}]) - )}, - {"/**/ comment inside /**/ comment", ?_assertError( - badarg, - Decode(<<"[ /* /* comment */ */ true ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment with /", ?_assertError( - badarg, - Decode(<<"[ /* / */ true ]">>, [{strict, [comments]}]) - )}, - {"/**/ comment with *", ?_assertError( - badarg, - Decode(<<"[ /* * */ true ]">>, [{strict, [comments]}]) - )}, - {"// comment with badutf", ?_assertError( - badarg, - Decode(<<"[ // comment ", 16#00c0, " ", ?newline, "true]">>, [{strict, [comments]}]) - )}, - {"/**/ comment with badutf", ?_assertError( - badarg, - Decode(<<"[ /* comment ", 16#00c0, " */ true]">>, [{strict, [comments]}]) - )}, - {"/**/ comment with badutf preceeded by /", ?_assertError( - badarg, - Decode(<<"[ /* comment /", 16#00c0, " */ true]">>, [{strict, [comments]}]) - )} + <<"[ /* comment /", 16#00c0, " */ true]">>, + [{strict, [utf8]}] + } + ], + [{Title, ?_assertError(Error, decode(JSON, Config))} + || {Title, Error, JSON, Config} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError(Error, incremental_decode(JSON, Config))} + || {Title, Error, JSON, Config} <- Cases ]. +% doing the full unicode range takes foreverrrrrrr so just do boundaries +% excludes characters that may need escaping codepoints() -> - unicode:characters_to_binary( - [32, 33] - ++ lists:seq(35, 38) - ++ lists:seq(40, 46) - ++ lists:seq(48, 91) - ++ lists:seq(93, 16#2027) - ++ lists:seq(16#202a, 16#d7ff) - ++ lists:seq(16#e000, 16#fdcf) - ++ lists:seq(16#fdf0, 16#fffd) - ). + [32, 33] ++ + lists:seq(35, 46) ++ + lists:seq(48, 91) ++ + lists:seq(93, 127) ++ + [16#2027, 16#202a, 16#d7ff, 16#e000, 16#fdcf, 16#fdf0, 16#fffd] ++ + [16#10000, 16#1fffd, 16#20000, 16#30000, 16#40000, 16#50000] ++ + [16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++ + [16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000]. -extended_codepoints() -> - unicode:characters_to_binary( - lists:seq(16#10000, 16#1fffd) ++ [ - 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, - 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, - 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 - ] - ). +reserved_space() -> lists:seq(16#fdd0, 16#fdef). -reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. +surrogates() -> lists:seq(16#d800, 16#dfff). -surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. - -noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. +noncharacters() -> lists:seq(16#fffe, 16#ffff). extended_noncharacters() -> - [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] - ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] - ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] - ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] - ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] - ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] - ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] - ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] - ]. + [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] ++ + [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] ++ + [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] ++ + [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] ++ + [16#9fffe, 16#9ffff, 16#afffe, 16#affff] ++ + [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] ++ + [16#dfffe, 16#dffff, 16#efffe, 16#effff] ++ + [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]. + %% erlang refuses to decode certain codepoints, so fake them all to_fake_utf8(N) when N < 16#0080 -> <<34/utf8, N:8, 34/utf8>>; @@ -1511,540 +1323,267 @@ to_fake_utf8(N) -> clean_string_test_() -> - [ - {"clean codepoints", ?_assertEqual( - [{string, codepoints()}, end_json], - decode(<<34, (codepoints())/binary, 34>>, []) - )}, - {"clean extended codepoints", ?_assertEqual( - [{string, extended_codepoints()}, end_json], - decode(<<34, (extended_codepoints())/binary, 34>>, []) - )}, - {"error reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, reserved_space()) - )}, - {"error surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, surrogates()) - )}, - {"error noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, noncharacters()) - )}, - {"error extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> decode(Codepoint, [{strict, [utf8]}]) end, extended_noncharacters()) - )}, - {"clean reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, reserved_space()) - )}, - {"clean surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, surrogates()) - )}, - {"clean noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, noncharacters()) - )}, - {"clean extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), - lists:map(fun(Codepoint) -> decode(Codepoint, []) end, extended_noncharacters()) - )}, - {"dirty \\uwxyz", ?_assertEqual( - [{string, <<"\\uwxyz">>}, end_json], - decode(<<34, "\\uwxyz", 34>>, [dirty_strings]) - )}, - {"dirty \\x23", ?_assertEqual( - [{string, <<"\\x23">>}, end_json], - decode(<<34, "\\x23", 34>>, [dirty_strings]) - )}, - {"dirty 0", ?_assertEqual( - [{string, <<0>>}, end_json], - decode(<<34, 0, 34>>, [dirty_strings]) - )}, - {"dirty 0\"0", ?_assertEqual( - [{string, <<0, ?doublequote, 0>>}, end_json], - decode(<<34, 0, ?rsolidus, ?doublequote, 0, 34>>, [dirty_strings]) - )}, - {"dirty 0\"0", ?_assertEqual( - [{string, <<0, ?rsolidus, ?doublequote, 0>>}, end_json], - decode(<<34, 0, ?rsolidus, ?rsolidus, ?doublequote, 0, 34>>, [dirty_strings]) - )}, - {"dirty 16#d800", ?_assertEqual( - [{string, <<237, 160, 128>>}, end_json], - decode(<<34, 237, 160, 128, 34>>, [dirty_strings]) - )}, - {"dirty 16#10ffff", ?_assertEqual( - [{string, <<244, 143, 191, 191>>}, end_json], - decode(<<34, 244, 143, 191, 191, 34>>, [dirty_strings]) - )}, - {"dirty /", ?_assertEqual( - [{string, <<$/>>}, end_json], - decode(<<34, $/, 34>>, [dirty_strings, escaped_forward_slashes]) - )}, - {"dirty <<194, 129>>", ?_assertEqual( - [{string, <<194, 129>>}, end_json], - decode(<<34, 194, 129, 34>>, [dirty_strings]) - )} + Clean = codepoints(), + Dirty = reserved_space() ++ surrogates() ++ noncharacters() ++ extended_noncharacters(), + % clean codepoints + [{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual( + [{string, <>}, end_json], + decode(<<34/utf8, Codepoint/utf8, 34/utf8>>) + )} || Codepoint <- Clean + ] ++ + % bad codepoints replaced by u+FFFD + [{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual( + [{string, <<16#fffd/utf8>>}, end_json], + decode(to_fake_utf8(Codepoint)) + )} || Codepoint <- Dirty + ] ++ + % bad codepoints that cause errors + [{"dirty u+" ++ integer_to_list(Codepoint, 16), ?_assertError( + badarg, + decode(to_fake_utf8(Codepoint), [{strict, [utf8]}]) + )} || Codepoint <- Dirty ]. -decode_bad_utf(String, Config) -> - case decode(<<34, String/binary, 34>>, Config) of - {error, badarg} -> erlang:error(badarg); - [{string, S}, end_json] -> S - end. +dirty_string_test_() -> + Cases = [ + {"dirty \\n", + [start_array, {string, <<"\\n">>}, end_array, end_json], + <<"[\"\\n\"]">>, + [dirty_strings] + }, + {"dirty \\uwxyz", + [start_array, {string, <<"\\uwxyz">>}, end_array, end_json], + <<"[\"\\uwxyz\"]">>, + [dirty_strings] + }, + {"dirty \\x23", + [start_array, {string, <<"\\x23">>}, end_array, end_json], + <<"[\"\\x23\"]">>, + [dirty_strings] + }, + {"dirty 0", + [start_array, {string, <<0>>}, end_array, end_json], + <<"[\"", 0, "\"]">>, + [dirty_strings] + }, + {"dirty 16#d800", + [start_array, {string, <<237, 160, 128>>}, end_array, end_json], + <<"[\"", 237, 160, 128, "\"]">>, + [dirty_strings] + }, + {"dirty 16#10ffff", + [start_array, {string, <<244, 143, 191, 191>>}, end_array, end_json], + <<"[\"", 244, 143, 191, 191, "\"]">>, + [dirty_strings] + }, + {"dirty /", + [start_array, {string, <<$/>>}, end_array, end_json], + <<"[\"", $/, "\"]">>, + [dirty_strings, escaped_forward_slashes] + }, + {"dirty <<194, 129>>", + [start_array, {string, <<194, 129>>}, end_array, end_json], + <<"[\"", 194, 129, "\"]">>, + [dirty_strings] + } + ], + [{Title, ?_assertEqual(Events, decode(JSON, Config))} + || {Title, Events, JSON, Config} <- Cases + ] ++ + % ensure `dirty_strings` and `strict` interact properly + [{Title, ?_assertEqual(Events, decode(JSON, Config ++ [strict]))} + || {Title, Events, JSON, Config} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual(Events, incremental_decode(JSON, Config))} + || {Title, Events, JSON, Config} <- Cases + ]. + bad_utf8_test_() -> - [ - {"noncharacter u+fffe", ?_assertError( - badarg, - decode_bad_utf(<<239, 191, 190>>, [{strict, [utf8]}]) - )}, - {"noncharacter u+fffe replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<239, 191, 190>>, []) - )}, - {"noncharacter u+ffff", ?_assertError( - badarg, - decode_bad_utf(<<239, 191, 191>>, [{strict, [utf8]}]) - )}, - {"noncharacter u+ffff replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<239, 191, 191>>, []) - )}, - {"orphan continuation byte u+0080", ?_assertError( - badarg, - decode_bad_utf(<<16#0080>>, [{strict, [utf8]}]) - )}, - {"orphan continuation byte u+0080 replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#0080>>, []) - )}, - {"orphan continuation byte u+00bf", ?_assertError( - badarg, - decode_bad_utf(<<16#00bf>>, [{strict, [utf8]}]) - )}, - {"orphan continuation byte u+00bf replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#00bf>>, []) - )}, - {"2 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, [{strict, [utf8]}]) - )}, - {"2 continuation bytes replaced", ?_assertEqual( + Cases = [ + {"noncharacter u+fffe", <<16#fffd/utf8>>, <<239, 191, 190>>}, + {"noncharacter u+ffff", <<16#fffd/utf8>>, <<239, 191, 191>>}, + {"orphan continuation byte u+0080", <<16#fffd/utf8>>, <<16#0080>>}, + {"orphan continuation byte u+00bf", <<16#fffd/utf8>>, <<16#00bf>>}, + {"2 continuation bytes", binary:copy(<<16#fffd/utf8>>, 2), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 2))/binary>>, []) - )}, - {"3 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, [{strict, [utf8]}]) - )}, - {"3 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 2))/binary>> + }, + {"3 continuation bytes", binary:copy(<<16#fffd/utf8>>, 3), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 3))/binary>>, []) - )}, - {"4 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, [{strict, [utf8]}]) - )}, - {"4 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 3))/binary>> + }, + {"4 continuation bytes", binary:copy(<<16#fffd/utf8>>, 4), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 4))/binary>>, []) - )}, - {"5 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, [{strict, [utf8]}]) - )}, - {"5 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 4))/binary>> + }, + {"5 continuation bytes", binary:copy(<<16#fffd/utf8>>, 5), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 5))/binary>>, []) - )}, - {"6 continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, [{strict, [utf8]}]) - )}, - {"6 continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 5))/binary>> + }, + {"6 continuation bytes", binary:copy(<<16#fffd/utf8>>, 6), - decode_bad_utf(<<(binary:copy(<<16#0080>>, 6))/binary>>, []) - )}, - {"all continuation bytes", ?_assertError( - badarg, - decode_bad_utf(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [{strict, [utf8]}]) - )}, - {"all continuation bytes replaced", ?_assertEqual( + <<(binary:copy(<<16#0080>>, 6))/binary>> + }, + {"all continuation bytes", binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), - decode_bad_utf( - <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, - [] - ) - )}, - {"lonely start byte", ?_assertError( - badarg, - decode_bad_utf(<<16#00c0>>, [{strict, [utf8]}]) - )}, - {"lonely start byte replaced", ?_assertEqual( - <<16#fffd/utf8>>, - decode_bad_utf(<<16#00c0>>, []) - )}, - {"lonely start bytes (2 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, [{strict, [utf8]}]) - )}, - {"lonely start bytes (2 byte) replaced", ?_assertEqual( + <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>> + }, + {"lonely start byte", <<16#fffd/utf8>>, <<16#00c0>>}, + {"lonely start bytes (2 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00c0, 32, 16#00df>>, []) - )}, - {"lonely start bytes (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, [{strict, [utf8]}]) - )}, - {"lonely start bytes (3 byte) replaced", ?_assertEqual( + <<16#00c0, 32, 16#00df>> + }, + {"lonely start bytes (3 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00e0, 32, 16#00ef>>, []) - )}, - {"lonely start bytes (4 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, [{strict, [utf8]}]) - )}, - {"lonely start bytes (4 byte) replaced", ?_assertEqual( + <<16#00e0, 32, 16#00ef>> + }, + {"lonely start bytes (4 byte)", <<16#fffd/utf8, 32, 16#fffd/utf8>>, - decode_bad_utf(<<16#00f0, 32, 16#00f7>>, []) - )}, - {"missing continuation byte (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<224, 160, 32>>, [{strict, [utf8]}]) - )}, - {"missing continuation byte (3 byte) replaced", ?_assertEqual( + <<16#00f0, 32, 16#00f7>> + }, + {"missing continuation byte (3 byte)", <<16#fffd/utf8, 32>>, <<224, 160, 32>>}, + {"missing continuation byte (4 byte missing one)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<224, 160, 32>>, []) - )}, - {"missing continuation byte (4 byte missing one)", ?_assertError( - badarg, - decode_bad_utf(<<240, 144, 128, 32>>, [{strict, [utf8]}]) - )}, - {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( + <<240, 144, 128, 32>> + }, + {"missing continuation byte (4 byte missing two)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<240, 144, 128, 32>>, []) - )}, - {"missing continuation byte (4 byte missing two)", ?_assertError( - badarg, - decode_bad_utf(<<240, 144, 32>>, [{strict, [utf8]}]) - )}, - {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( + <<240, 144, 32>> + }, + {"overlong encoding of u+002f (2 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<240, 144, 32>>, []) - )}, - {"overlong encoding of u+002f (2 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#c0, 16#af, 32>>, [{strict, [utf8]}]) - )}, - {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( + <<16#c0, 16#af, 32>> + }, + {"overlong encoding of u+002f (3 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#c0, 16#af, 32>>, []) - )}, - {"overlong encoding of u+002f (3 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, [{strict, [utf8]}]) - )}, - {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( + <<16#e0, 16#80, 16#af, 32>> + }, + {"overlong encoding of u+002f (4 byte)", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#e0, 16#80, 16#af, 32>>, []) - )}, - {"overlong encoding of u+002f (4 byte)", ?_assertError( - badarg, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, [{strict, [utf8]}]) - )}, - {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( + <<16#f0, 16#80, 16#80, 16#af, 32>> + }, + {"highest overlong 2 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#f0, 16#80, 16#80, 16#af, 32>>, []) - )}, - {"highest overlong 2 byte sequence", ?_assertError( - badarg, - decode_bad_utf(<<16#c1, 16#bf, 32>>, [{strict, [utf8]}]) - )}, - {"highest overlong 2 byte sequence replaced", ?_assertEqual( + <<16#c1, 16#bf, 32>> + }, + {"highest overlong 3 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#c1, 16#bf, 32>>, []) - )}, - {"highest overlong 3 byte sequence", ?_assertError( - badarg, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, [{strict, [utf8]}]) - )}, - {"highest overlong 3 byte sequence replaced", ?_assertEqual( + <<16#e0, 16#9f, 16#bf, 32>> + }, + {"highest overlong 4 byte sequence", <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#e0, 16#9f, 16#bf, 32>>, []) - )}, - {"highest overlong 4 byte sequence", ?_assertError( + <<16#f0, 16#8f, 16#bf, 16#bf, 32>> + } + ], + [{Title, ?_assertError( badarg, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [{strict, [utf8]}]) - )}, - {"highest overlong 4 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - decode_bad_utf(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, []) - )} + decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]) + )} || {Title, _, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertError( + badarg, + incremental_decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]) + )} || {Title, _, JSON} <- Cases + ] ++ + [{Title ++ " replaced", ?_assertEqual( + [{string, Replacement}, end_json], + decode(<<34, JSON/binary, 34>>) + )} || {Title, Replacement, JSON} <- Cases + ] ++ + [{Title ++ " replaced (incremental)", ?_assertEqual( + [{string, Replacement}, end_json], + incremental_decode(<<34, JSON/binary, 34>>) + )} || {Title, Replacement, JSON} <- Cases ]. -unescape(Bin, Config) -> - case decode(<<34, Bin/binary, 34>>, Config) of - [{string, String}, end_json] -> String; - {error, badarg} -> erlang:error(badarg) - end. - unescape_test_() -> - [ - {"unescape backspace", ?_assertEqual( - <<"\b">>, - unescape(<<"\\b"/utf8>>, []) - )}, - {"unescape tab", ?_assertEqual( - <<"\t">>, - unescape(<<"\\t"/utf8>>, []) - )}, - {"unescape newline", ?_assertEqual( - <<"\n">>, - unescape(<<"\\n"/utf8>>, []) - )}, - {"unescape formfeed", ?_assertEqual( - <<"\f">>, - unescape(<<"\\f"/utf8>>, []) - )}, - {"unescape carriage return", ?_assertEqual( - <<"\r">>, - unescape(<<"\\r"/utf8>>, []) - )}, - {"unescape quote", ?_assertEqual( - <<"\"">>, - unescape(<<"\\\""/utf8>>, []) - )}, - {"unescape solidus", ?_assertEqual( - <<"/">>, - unescape(<<"\\/"/utf8>>, []) - )}, - {"unescape reverse solidus", ?_assertEqual( - <<"\\">>, - unescape(<<"\\\\"/utf8>>, []) - )}, - {"unescape control", ?_assertEqual( - <<0>>, - unescape(<<"\\u0000"/utf8>>, []) - )}, - {"unescape surrogate pair", ?_assertEqual( - <<16#10000/utf8>>, - unescape(<<"\\ud800\\udc00"/utf8>>, []) - )}, - {"replace bad high surrogate", ?_assertEqual( - <<16#fffd/utf8>>, - unescape(<<"\\udc00"/utf8>>, []) - )}, - {"do not unescape bad high surrogate", ?_assertError( - badarg, - unescape(<<"\\udc00"/utf8>>, [{strict, [utf8]}]) - )}, - {"replace naked high surrogate", ?_assertEqual( + Cases = [ + {"unescape backspace", <<"\b">>, <<"\\b"/utf8>>}, + {"unescape tab", <<"\t">>, <<"\\t"/utf8>>}, + {"unescape newline", <<"\n">>, <<"\\n"/utf8>>}, + {"unescape formfeed", <<"\f">>, <<"\\f"/utf8>>}, + {"unescape carriage return", <<"\r">>, <<"\\r"/utf8>>}, + {"unescape quote", <<"\"">>, <<"\\\""/utf8>>}, + {"unescape solidus", <<"/">>, <<"\\/"/utf8>>}, + {"unescape reverse solidus", <<"\\">>, <<"\\\\"/utf8>>}, + {"unescape control", <<0>>, <<"\\u0000"/utf8>>}, + {"unescape surrogate pair", <<16#10000/utf8>>, <<"\\ud800\\udc00"/utf8>>}, + {"replace bad high surrogate", <<16#fffd/utf8>>, <<"\\udc00"/utf8>>}, + {"replace naked high surrogate", <<16#fffd/utf8, "hello world">>, - unescape(<<"\\ud800hello world"/utf8>>, []) - )}, - {"do not unescape naked high surrogate", ?_assertError( - badarg, - unescape(<<"\\ud800hello world"/utf8>>, [{strict, [utf8]}]) - )}, - {"replace naked low surrogate", ?_assertEqual( + <<"\\ud800hello world"/utf8>> + }, + {"replace naked low surrogate", <<16#fffd/utf8, "hello world">>, - unescape(<<"\\udc00hello world"/utf8>>, []) - )}, - {"do not unescape naked low surrogate", ?_assertError( - badarg, - unescape(<<"\\udc00hello world"/utf8>>, [{strict, [utf8]}]) - )}, - {"replace bad surrogate pair", ?_assertEqual( - <<16#fffd/utf8, 16#fffd/utf8>>, - unescape(<<"\\ud800\\u0000">>, []) - )}, - {"do not unescape bad surrogate pair", ?_assertError( - badarg, - unescape(<<"\\ud800\\u0000">>, [{strict, [utf8]}]) - )}, - {"bad pseudo escape sequence", ?_assertError( - badarg, - unescape(<<"\\uabcg">>, [strict]) - )} + <<"\\udc00hello world"/utf8>> + }, + {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\ud800\\u0000">>} + ], + [{Title, ?_assertEqual([{string, Escaped}, end_json], decode(<<34, JSON/binary, 34>>))} + || {Title, Escaped, JSON} <- Cases + ] ++ + [{Title ++ " (incremental)", ?_assertEqual( + [{string, Escaped}, end_json], + incremental_decode(<<34, JSON/binary, 34>>) + )} || {Title, Escaped, JSON} <- Cases ]. -maybe_escape(Bin, Config) -> - [{string, String}, end_json] = decode(Bin, Config), - String. +bad_escaped_surrogate_test_() -> + Cases = [ + {"do not unescape bad high surrogate", <<"\\udc00">>}, + {"do not unescape naked high surrogate", <<"\\ud800hello world">>}, + {"do not unescape naked low surrogate", <<"\\udc00hello world">>}, + {"do not unescape bad surrogate pair", <<"\\ud800\\u0000">>} + ], + [{Title, ?_assertError(badarg, decode(<<34, JSON/binary, 34>>, [{strict, [utf8]}]))} + || {Title, JSON} <- Cases + ]. + escape_test_() -> + Cases = [ + {"backspace", <<"\b">>, <<"\\b">>}, + {"tab", <<"\t">>, <<"\\t">>}, + {"newline", <<"\n">>, <<"\\n">>}, + {"formfeed", <<"\f">>, <<"\\f">>}, + {"carriage return", <<"\r">>, <<"\\r">>}, + {"quote", <<"\"">>, <<"\\\"">>}, + {"backslash", <<"\\">>, <<"\\\\">>}, + {"control", <<0>>, <<"\\u0000">>} + ], + [{"escape " ++ Title, ?_assertEqual( + [{string, Escaped}, end_json], + decode(<<34, Escaped/binary, 34>>, [escaped_strings]) + )} || {Title, _Unescaped, Escaped} <- Cases + ] ++ + [{"do not escape " ++ Title, ?_assertEqual( + [{string, Unescaped}, end_json], + decode(<<34, Escaped/binary, 34>>) + )} || {Title, Unescaped, Escaped} <- Cases + ]. + + +special_escape_test_() -> [ - {"maybe_escape backspace", ?_assertEqual( - <<"\\b">>, - maybe_escape(<<34, "\\b"/utf8, 34>>, [escaped_strings]) + {"escape forward slash", ?_assertEqual( + [{string, <<"\\/">>}, end_json], + decode(<<34, "/"/utf8, 34>>, [escaped_strings, escaped_forward_slashes]) )}, - {"don't escape backspace", ?_assertEqual( - <<"\b">>, - maybe_escape(<<34, "\\b"/utf8, 34>>, []) + {"do not escape forward slash", ?_assertEqual( + [{string, <<"/">>}, end_json], + decode(<<34, "/"/utf8, 34>>, [escaped_strings]) )}, - {"maybe_escape tab", ?_assertEqual( - <<"\\t">>, - maybe_escape(<<34, "\\t"/utf8, 34>>, [escaped_strings]) + {"escape jsonp", ?_assertEqual( + [{string, <<"\\u2028">>}, end_json], + decode(<<34, 16#2028/utf8, 34>>, [escaped_strings]) )}, - {"maybe_escape newline", ?_assertEqual( - <<"\\n">>, - maybe_escape(<<34, "\\n"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape formfeed", ?_assertEqual( - <<"\\f">>, - maybe_escape(<<34, "\\f"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape carriage return", ?_assertEqual( - <<"\\r">>, - maybe_escape(<<34, "\\r"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape quote", ?_assertEqual( - <<"\\\"">>, - maybe_escape(<<34, "\\\""/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape forward slash", ?_assertEqual( - <<"\\/">>, - maybe_escape(<<34, "/"/utf8, 34>>, [escaped_strings, escaped_forward_slashes]) - )}, - {"do not maybe_escape forward slash", ?_assertEqual( - <<"/">>, - maybe_escape(<<34, "/"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape backslash", ?_assertEqual( - <<"\\\\">>, - maybe_escape(<<34, "\\\\"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape jsonp (u2028)", ?_assertEqual( - <<"\\u2028">>, - maybe_escape(<<34, 16#2028/utf8, 34>>, [escaped_strings]) - )}, - {"do not maybe_escape jsonp (u2028)", ?_assertEqual( - <<16#2028/utf8>>, - maybe_escape(<<34, 16#2028/utf8, 34>>, [escaped_strings, unescaped_jsonp]) - )}, - {"maybe_escape jsonp (u2029)", ?_assertEqual( - <<"\\u2029">>, - maybe_escape(<<34, 16#2029/utf8, 34>>, [escaped_strings]) - )}, - {"do not maybe_escape jsonp (u2029)", ?_assertEqual( - <<16#2029/utf8>>, - maybe_escape(<<34, 16#2029/utf8, 34>>, [escaped_strings, unescaped_jsonp]) - )}, - {"maybe_escape u0000", ?_assertEqual( - <<"\\u0000">>, - maybe_escape(<<34, "\\u0000"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0001", ?_assertEqual( - <<"\\u0001">>, - maybe_escape(<<34, "\\u0001"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0002", ?_assertEqual( - <<"\\u0002">>, - maybe_escape(<<34, "\\u0002"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0003", ?_assertEqual( - <<"\\u0003">>, - maybe_escape(<<34, "\\u0003"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0004", ?_assertEqual( - <<"\\u0004">>, - maybe_escape(<<34, "\\u0004"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0005", ?_assertEqual( - <<"\\u0005">>, - maybe_escape(<<34, "\\u0005"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0006", ?_assertEqual( - <<"\\u0006">>, - maybe_escape(<<34, "\\u0006"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0007", ?_assertEqual( - <<"\\u0007">>, - maybe_escape(<<34, "\\u0007"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000b", ?_assertEqual( - <<"\\u000b">>, - maybe_escape(<<34, "\\u000b"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000e", ?_assertEqual( - <<"\\u000e">>, - maybe_escape(<<34, "\\u000e"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u000f", ?_assertEqual( - <<"\\u000f">>, - maybe_escape(<<34, "\\u000f"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0010", ?_assertEqual( - <<"\\u0010">>, - maybe_escape(<<34, "\\u0010"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0011", ?_assertEqual( - <<"\\u0011">>, - maybe_escape(<<34, "\\u0011"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0012", ?_assertEqual( - <<"\\u0012">>, - maybe_escape(<<34, "\\u0012"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0013", ?_assertEqual( - <<"\\u0013">>, - maybe_escape(<<34, "\\u0013"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0014", ?_assertEqual( - <<"\\u0014">>, - maybe_escape(<<34, "\\u0014"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0015", ?_assertEqual( - <<"\\u0015">>, - maybe_escape(<<34, "\\u0015"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0016", ?_assertEqual( - <<"\\u0016">>, - maybe_escape(<<34, "\\u0016"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0017", ?_assertEqual( - <<"\\u0017">>, - maybe_escape(<<34, "\\u0017"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0018", ?_assertEqual( - <<"\\u0018">>, - maybe_escape(<<34, "\\u0018"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u0019", ?_assertEqual( - <<"\\u0019">>, - maybe_escape(<<34, "\\u0019"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001a", ?_assertEqual( - <<"\\u001a">>, - maybe_escape(<<34, "\\u001a"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001b", ?_assertEqual( - <<"\\u001b">>, - maybe_escape(<<34, "\\u001b"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001c", ?_assertEqual( - <<"\\u001c">>, - maybe_escape(<<34, "\\u001c"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001d", ?_assertEqual( - <<"\\u001d">>, - maybe_escape(<<34, "\\u001d"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001e", ?_assertEqual( - <<"\\u001e">>, - maybe_escape(<<34, "\\u001e"/utf8, 34>>, [escaped_strings]) - )}, - {"maybe_escape u001f", ?_assertEqual( - <<"\\u001f">>, - maybe_escape(<<34, "\\u001f"/utf8, 34>>, [escaped_strings]) + {"do not escape jsonp", ?_assertEqual( + [{string, <<16#2028/utf8>>}, end_json], + decode(<<34, 16#2028/utf8, 34>>, [escaped_strings, unescaped_jsonp]) )} ]. @@ -2055,8 +1594,8 @@ single_quoted_string_test_() -> [{string, <<"hello world">>}, end_json], decode(<<39, "hello world", 39>>, []) )}, - {"single quoted string error", ?_assertEqual( - {error, badarg}, + {"single quoted string error", ?_assertError( + badarg, decode(<<39, "hello world", 39>>, [{strict, [single_quotes]}]) )}, {"single quoted string with embedded double quotes", ?_assertEqual( @@ -2078,8 +1617,8 @@ single_quoted_string_test_() -> end_object, end_json], decode(<<"{'key':'value','another key':'another value'}">>, []) )}, - {"single quoted key error", ?_assertEqual( - {error, badarg}, + {"single quoted key error", ?_assertError( + badarg, decode(<<"{'key':'value','another key':'another value'}">>, [{strict, [single_quotes]}]) )} ]. diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 1c22e46..f4e8699 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -61,24 +61,12 @@ unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K) -> unzip(Rest, [V, K] ++ -include_lib("eunit/include/eunit.hrl"). -encode_test_() -> - Data = jsx:test_cases(), - Encode = encoder(jsx, [], []), - [ - { - Title, ?_assertEqual( - Events, - Encode(Term) -- [end_json] - ) - } || {Title, _, Term, Events} <- Data - ]. - -err(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). +parser(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). error_test_() -> [ - {"value error", ?_assertError(badarg, err(self(), []))}, - {"string error", ?_assertError(badarg, err(<<239, 191, 191>>, [strict]))} + {"value error", ?_assertError(badarg, parser(self(), []))}, + {"string error", ?_assertError(badarg, parser(<<239, 191, 191>>, [strict]))} ]. custom_error_handler_test_() -> @@ -86,11 +74,11 @@ custom_error_handler_test_() -> [ {"value error", ?_assertEqual( {value, [self()]}, - err(self(), [{error_handler, Error}]) + parser(self(), [{error_handler, Error}]) )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}]}, - err(<<239, 191, 191>>, [{error_handler, Error}, strict]) + parser(<<239, 191, 191>>, [{error_handler, Error}, strict]) )} ]. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index e59ff1e..dc2d436 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -26,10 +26,6 @@ -export([parser/3, resume/5]). -export([init/1, handle_event/2]). --ifdef(TEST). --export([clean_string/2, json_escape_sequence/1]). --endif. - -spec parser(Handler::module(), State::any(), Config::jsx:config()) -> jsx:parser(). @@ -455,47 +451,15 @@ handle_event(Event, State) -> [Event] ++ State. -include_lib("eunit/include/eunit.hrl"). -parse(Events, Config) -> - Chunk = try - value(Events ++ [end_json], {jsx, []}, [], jsx_config:parse_config(Config)) - catch - error:badarg -> {error, badarg} - end, - Incremental = try - Final = lists:foldl( - fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end, - parser(jsx, [], [stream] ++ Config), - lists:map(fun(X) -> [X] end, Events) - ), - Final(end_stream) - catch - error:badarg -> {error, badarg} - end, - ?assert(Chunk == Incremental), - Chunk. - - -parse_test_() -> - Data = jsx:test_cases(), - [ - { - Title, ?_assertEqual( - Events ++ [end_json], - parse(Events, []) - ) - } || {Title, _, _, Events} <- Data - ]. - - -parse_error(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)). +parse(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)). error_test_() -> [ - {"value error", ?_assertError(badarg, parse_error([self()], []))}, - {"maybe_done error", ?_assertError(badarg, parse_error([start_array, end_array, start_array, end_json], []))}, - {"done error", ?_assertError(badarg, parse_error([{string, <<"">>}, {literal, true}, end_json], []))}, - {"string error", ?_assertError(badarg, parse_error([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))} + {"value error", ?_assertError(badarg, parse([self()], []))}, + {"maybe_done error", ?_assertError(badarg, parse([start_array, end_array, start_array, end_json], []))}, + {"done error", ?_assertError(badarg, parse([{string, <<"">>}, {literal, true}, end_json], []))}, + {"string error", ?_assertError(badarg, parse([{string, <<239, 191, 191>>}, end_json], [strict_utf8]))} ]. @@ -504,47 +468,533 @@ custom_error_handler_test_() -> [ {"value error", ?_assertEqual( {value, [self()]}, - parse_error([self()], [{error_handler, Error}]) + parse([self()], [{error_handler, Error}]) )}, {"maybe_done error", ?_assertEqual( {maybe_done, [start_array, end_json]}, - parse_error([start_array, end_array, start_array, end_json], [{error_handler, Error}]) + parse([start_array, end_array, start_array, end_json], [{error_handler, Error}]) )}, {"done error", ?_assertEqual( {maybe_done, [{literal, true}, end_json]}, - parse_error([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) + parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) )}, {"string error", ?_assertEqual( {string, [{string, <<239, 191, 191>>}, end_json]}, - parse_error([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict]) + parse([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict]) )} ]. +incomplete_test_() -> + Cases = [ + {"incomplete value", []}, + {"incomplete object", [start_object]}, + {"incomplete array", [start_array]}, + {"incomplete maybe_done", [start_array, end_array]} + ], + [{Title, ?_assertError(badarg, parse(Events, []))} + || {Title, Events} <- Cases + ]. + + custom_incomplete_handler_test_() -> [ {"custom incomplete handler", ?_assertError( badarg, - parse_error([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) + parse([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) )} ]. raw_test_() -> + Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end, [ {"raw empty list", ?_assertEqual( - [start_array, end_array, end_json], - parse([{raw, <<"[]">>}], []) + [start_array, end_array], + Parse([{raw, <<"[]">>}], []) )}, {"raw empty object", ?_assertEqual( - [start_object, end_object, end_json], - parse([{raw, <<"{}">>}], []) + [start_object, end_object], + Parse([{raw, <<"{}">>}], []) )}, {"raw chunk inside stream", ?_assertEqual( - [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object, end_json], - parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], []) + [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object], + Parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], []) )} ]. +%% erlang refuses to encode certain codepoints, so fake them +to_fake_utf8(N) when N < 16#0080 -> <>; +to_fake_utf8(N) when N < 16#0800 -> + <<0:5, Y:5, X:6>> = <>, + <<2#110:3, Y:5, 2#10:2, X:6>>; +to_fake_utf8(N) when N < 16#10000 -> + <> = <>, + <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; +to_fake_utf8(N) -> + <<0:3, W:3, Z:6, Y:6, X:6>> = <>, + <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. + + +codepoints() -> + unicode:characters_to_binary( + [32, 33] + ++ lists:seq(35, 46) + ++ lists:seq(48, 91) + ++ lists:seq(93, 16#2027) + ++ lists:seq(16#202a, 16#d7ff) + ++ lists:seq(16#e000, 16#fdcf) + ++ lists:seq(16#fdf0, 16#fffd) + ). + +extended_codepoints() -> + unicode:characters_to_binary( + lists:seq(16#10000, 16#1fffd) ++ [ + 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, + 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, + 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 + ] + ). + +reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. + +surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. + +noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. + +extended_noncharacters() -> + [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] + ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] + ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] + ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] + ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] + ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] + ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] + ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] + ]. + + +clean_string_test_() -> + [ + {"clean codepoints", ?_assertEqual( + codepoints(), + clean_string(codepoints(), #config{}) + )}, + {"clean extended codepoints", ?_assertEqual( + extended_codepoints(), + clean_string(extended_codepoints(), #config{}) + )}, + {"escape path codepoints", ?_assertEqual( + codepoints(), + clean_string(codepoints(), #config{escaped_strings=true}) + )}, + {"escape path extended codepoints", ?_assertEqual( + extended_codepoints(), + clean_string(extended_codepoints(), #config{escaped_strings=true}) + )}, + {"error reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space()) + )}, + {"error surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates()) + )}, + {"error noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters()) + )}, + {"error extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters()) + )}, + {"clean reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, reserved_space()) + )}, + {"clean surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates()) + )}, + {"clean noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, noncharacters()) + )}, + {"clean extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>), + lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, extended_noncharacters()) + )} + ]. + + +escape_test_() -> + [ + {"maybe_escape backspace", ?_assertEqual( + <<"\\b">>, + clean_string(<<16#0008/utf8>>, #config{escaped_strings=true}) + )}, + {"don't escape backspace", ?_assertEqual( + <<"\b">>, + clean_string(<<16#0008/utf8>>, #config{}) + )}, + {"maybe_escape tab", ?_assertEqual( + <<"\\t">>, + clean_string(<<16#0009/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape newline", ?_assertEqual( + <<"\\n">>, + clean_string(<<16#000a/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape formfeed", ?_assertEqual( + <<"\\f">>, + clean_string(<<16#000c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape carriage return", ?_assertEqual( + <<"\\r">>, + clean_string(<<16#000d/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape quote", ?_assertEqual( + <<"\\\"">>, + clean_string(<<16#0022/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape forward slash", ?_assertEqual( + <<"\\/">>, + clean_string(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true}) + )}, + {"do not maybe_escape forward slash", ?_assertEqual( + <<"/">>, + clean_string(<<16#002f/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape backslash", ?_assertEqual( + <<"\\\\">>, + clean_string(<<16#005c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape jsonp (u2028)", ?_assertEqual( + <<"\\u2028">>, + clean_string(<<16#2028/utf8>>, #config{escaped_strings=true}) + )}, + {"do not maybe_escape jsonp (u2028)", ?_assertEqual( + <<16#2028/utf8>>, + clean_string(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) + )}, + {"maybe_escape jsonp (u2029)", ?_assertEqual( + <<"\\u2029">>, + clean_string(<<16#2029/utf8>>, #config{escaped_strings=true}) + )}, + {"do not maybe_escape jsonp (u2029)", ?_assertEqual( + <<16#2029/utf8>>, + clean_string(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) + )}, + {"maybe_escape u0000", ?_assertEqual( + <<"\\u0000">>, + clean_string(<<16#0000/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0001", ?_assertEqual( + <<"\\u0001">>, + clean_string(<<16#0001/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0002", ?_assertEqual( + <<"\\u0002">>, + clean_string(<<16#0002/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0003", ?_assertEqual( + <<"\\u0003">>, + clean_string(<<16#0003/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0004", ?_assertEqual( + <<"\\u0004">>, + clean_string(<<16#0004/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0005", ?_assertEqual( + <<"\\u0005">>, + clean_string(<<16#0005/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0006", ?_assertEqual( + <<"\\u0006">>, + clean_string(<<16#0006/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0007", ?_assertEqual( + <<"\\u0007">>, + clean_string(<<16#0007/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000b", ?_assertEqual( + <<"\\u000b">>, + clean_string(<<16#000b/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000e", ?_assertEqual( + <<"\\u000e">>, + clean_string(<<16#000e/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u000f", ?_assertEqual( + <<"\\u000f">>, + clean_string(<<16#000f/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0010", ?_assertEqual( + <<"\\u0010">>, + clean_string(<<16#0010/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0011", ?_assertEqual( + <<"\\u0011">>, + clean_string(<<16#0011/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0012", ?_assertEqual( + <<"\\u0012">>, + clean_string(<<16#0012/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0013", ?_assertEqual( + <<"\\u0013">>, + clean_string(<<16#0013/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0014", ?_assertEqual( + <<"\\u0014">>, + clean_string(<<16#0014/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0015", ?_assertEqual( + <<"\\u0015">>, + clean_string(<<16#0015/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0016", ?_assertEqual( + <<"\\u0016">>, + clean_string(<<16#0016/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0017", ?_assertEqual( + <<"\\u0017">>, + clean_string(<<16#0017/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0018", ?_assertEqual( + <<"\\u0018">>, + clean_string(<<16#0018/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u0019", ?_assertEqual( + <<"\\u0019">>, + clean_string(<<16#0019/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001a", ?_assertEqual( + <<"\\u001a">>, + clean_string(<<16#001a/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001b", ?_assertEqual( + <<"\\u001b">>, + clean_string(<<16#001b/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001c", ?_assertEqual( + <<"\\u001c">>, + clean_string(<<16#001c/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001d", ?_assertEqual( + <<"\\u001d">>, + clean_string(<<16#001d/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001e", ?_assertEqual( + <<"\\u001e">>, + clean_string(<<16#001e/utf8>>, #config{escaped_strings=true}) + )}, + {"maybe_escape u001f", ?_assertEqual( + <<"\\u001f">>, + clean_string(<<16#001f/utf8>>, #config{escaped_strings=true}) + )} + ]. + + +bad_utf8_test_() -> + [ + {"noncharacter u+fffe", ?_assertEqual( + {error, badarg}, + clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true}) + )}, + {"noncharacter u+fffe replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(to_fake_utf8(16#fffe), #config{}) + )}, + {"noncharacter u+ffff", ?_assertEqual( + {error, badarg}, + clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true}) + )}, + {"noncharacter u+ffff replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(to_fake_utf8(16#ffff), #config{}) + )}, + {"orphan continuation byte u+0080", ?_assertEqual( + {error, badarg}, + clean_string(<<16#0080>>, #config{strict_utf8=true}) + )}, + {"orphan continuation byte u+0080 replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#0080>>, #config{}) + )}, + {"orphan continuation byte u+00bf", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00bf>>, #config{strict_utf8=true}) + )}, + {"orphan continuation byte u+00bf replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#00bf>>, #config{}) + )}, + {"2 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) + )}, + {"2 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 2), + clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) + )}, + {"3 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) + )}, + {"3 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 3), + clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) + )}, + {"4 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) + )}, + {"4 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 4), + clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) + )}, + {"5 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) + )}, + {"5 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 5), + clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) + )}, + {"6 continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) + )}, + {"6 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 6), + clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) + )}, + {"all continuation bytes", ?_assertEqual( + {error, badarg}, + clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) + )}, + {"all continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), + clean_string( + <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, + #config{} + ) + )}, + {"lonely start byte", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00c0>>, #config{strict_utf8=true}) + )}, + {"lonely start byte replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#00c0>>, #config{}) + )}, + {"lonely start bytes (2 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (2 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00c0, 32, 16#00df>>, #config{}) + )}, + {"lonely start bytes (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) + )}, + {"lonely start bytes (4 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) + )}, + {"lonely start bytes (4 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) + )}, + {"missing continuation byte (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<224, 160, 32>>, #config{}) + )}, + {"missing continuation byte (4 byte missing one)", ?_assertEqual( + {error, badarg}, + clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<240, 144, 128, 32>>, #config{}) + )}, + {"missing continuation byte (4 byte missing two)", ?_assertEqual( + {error, badarg}, + clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) + )}, + {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<240, 144, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (2 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#c0, 16#af, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (3 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) + )}, + {"overlong encoding of u+002f (4 byte)", ?_assertEqual( + {error, badarg}, + clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) + )}, + {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) + )}, + {"highest overlong 2 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 2 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#c1, 16#bf, 32>>, #config{}) + )}, + {"highest overlong 3 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 3 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) + )}, + {"highest overlong 4 byte sequence", ?_assertEqual( + {error, badarg}, + clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) + )}, + {"highest overlong 4 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) + )} + ]. + + +json_escape_sequence_test_() -> + [ + {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")}, + {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")}, + {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} + ]. + + -endif. \ No newline at end of file diff --git a/src/jsx_tests.hrl b/src/jsx_tests.hrl deleted file mode 100644 index e6ec7aa..0000000 --- a/src/jsx_tests.hrl +++ /dev/null @@ -1,688 +0,0 @@ -%% data and helper functions for tests - --export([init/1, handle_event/2]). --export([test_cases/0]). - - --include_lib("eunit/include/eunit.hrl"). - - -%% test handler -init([]) -> []. - -handle_event(end_json, State) -> lists:reverse([end_json] ++ State); -handle_event(Event, State) -> [Event] ++ State. - - -test_cases() -> - empty_array() - ++ nested_array() - ++ empty_object() - ++ nested_object() - ++ strings() - ++ literals() - ++ integers() - ++ floats() - ++ compound_object(). - - -empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}]. - -nested_array() -> - [{ - "[[[]]]", - <<"[[[]]]">>, - [[[]]], - [start_array, start_array, start_array, end_array, end_array, end_array] - }]. - - -empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}]. - -nested_object() -> - [{ - "{\"key\":{\"key\":{}}}", - <<"{\"key\":{\"key\":{}}}">>, - [{<<"key">>, [{<<"key">>, [{}]}]}], - [ - start_object, - {key, <<"key">>}, - start_object, - {key, <<"key">>}, - start_object, - end_object, - end_object, - end_object - ] - }]. - - -naked_strings() -> - Raw = [ - "", - "hello world" - ], - [ - { - String, - <<"\"", (list_to_binary(String))/binary, "\"">>, - list_to_binary(String), - [{string, list_to_binary(String)}] - } - || String <- Raw - ]. - -strings() -> - naked_strings() - ++ [ wrap_with_array(Test) || Test <- naked_strings() ] - ++ [ wrap_with_object(Test) || Test <- naked_strings() ]. - - -naked_integers() -> - Raw = [ - 1, 2, 3, - 127, 128, 129, - 255, 256, 257, - 65534, 65535, 65536, - 18446744073709551616, - 18446744073709551617 - ], - [ - { - integer_to_list(X), - list_to_binary(integer_to_list(X)), - X, - [{integer, X}] - } - || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0] - ]. - -integers() -> - naked_integers() - ++ [ wrap_with_array(Test) || Test <- naked_integers() ] - ++ [ wrap_with_object(Test) || Test <- naked_integers() ]. - - -naked_floats() -> - Raw = [ - 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, - 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, - 1234567890.0987654321, - 0.0e0, - 1234567890.0987654321e16, - 0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308, - 1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308, - 2.2250738585072014e-308, %% min normalized float - 1.7976931348623157e308, %% max normalized float - 5.0e-324, %% min denormalized float - 2.225073858507201e-308 %% max denormalized float - ], - [ - { - sane_float_to_list(X), - list_to_binary(sane_float_to_list(X)), - X, - [{float, X}] - } - || X <- Raw ++ [ -1 * Y || Y <- Raw ] - ]. - -floats() -> - naked_floats() - ++ [ wrap_with_array(Test) || Test <- naked_floats() ] - ++ [ wrap_with_object(Test) || Test <- naked_floats() ]. - - -naked_literals() -> - [ - { - atom_to_list(Literal), - atom_to_binary(Literal, unicode), - Literal, - [{literal, Literal}] - } - || Literal <- [true, false, null] - ]. - -literals() -> - naked_literals() - ++ [ wrap_with_array(Test) || Test <- naked_literals() ] - ++ [ wrap_with_object(Test) || Test <- naked_literals() ]. - - -compound_object() -> - [{ - "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]", - <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>, - [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]], - [ - start_array, - start_object, - {key, <<"alpha">>}, - start_array, - {integer, 1}, - {integer, 2}, - {integer, 3}, - end_array, - {key, <<"beta">>}, - start_object, - {key, <<"alpha">>}, - start_array, - {float, 1.0}, - {float, 2.0}, - {float, 3.0}, - end_array, - {key, <<"beta">>}, - start_array, - {literal, true}, - {literal, false}, - end_array, - end_object, - end_object, - start_array, - start_object, - end_object, - end_array, - end_array - ] - }]. - - -wrap_with_array({Title, JSON, Term, Events}) -> - { - "[" ++ Title ++ "]", - <<"[", JSON/binary, "]">>, - [Term], - [start_array] ++ Events ++ [end_array] - }. - - -wrap_with_object({Title, JSON, Term, Events}) -> - { - "{\"key\":" ++ Title ++ "}", - <<"{\"key\":", JSON/binary, "}">>, - [{<<"key">>, Term}], - [start_object, {key, <<"key">>}] ++ Events ++ [end_object] - }. - - -sane_float_to_list(X) -> - [Output] = io_lib:format("~p", [X]), - Output. - --include("jsx_config.hrl"). - - -%% erlang refuses to encode certain codepoints, so fake them -to_fake_utf8(N) when N < 16#0080 -> <>; -to_fake_utf8(N) when N < 16#0800 -> - <<0:5, Y:5, X:6>> = <>, - <<2#110:3, Y:5, 2#10:2, X:6>>; -to_fake_utf8(N) when N < 16#10000 -> - <> = <>, - <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; -to_fake_utf8(N) -> - <<0:3, W:3, Z:6, Y:6, X:6>> = <>, - <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. - - -codepoints() -> - unicode:characters_to_binary( - [32, 33] - ++ lists:seq(35, 46) - ++ lists:seq(48, 91) - ++ lists:seq(93, 16#2027) - ++ lists:seq(16#202a, 16#d7ff) - ++ lists:seq(16#e000, 16#fdcf) - ++ lists:seq(16#fdf0, 16#fffd) - ). - -extended_codepoints() -> - unicode:characters_to_binary( - lists:seq(16#10000, 16#1fffd) ++ [ - 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, - 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, - 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 - ] - ). - -reserved_space() -> [ to_fake_utf8(N) || N <- lists:seq(16#fdd0, 16#fdef) ]. - -surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. - -noncharacters() -> [ to_fake_utf8(N) || N <- lists:seq(16#fffe, 16#ffff) ]. - -extended_noncharacters() -> - [ to_fake_utf8(N) || N <- [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] - ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] - ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] - ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] - ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] - ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] - ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] - ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] - ]. - - -clean_string_test_() -> - [ - {"clean codepoints", ?_assertEqual( - codepoints(), - jsx_parser:clean_string(codepoints(), #config{}) - )}, - {"clean extended codepoints", ?_assertEqual( - extended_codepoints(), - jsx_parser:clean_string(extended_codepoints(), #config{}) - )}, - {"escape path codepoints", ?_assertEqual( - codepoints(), - jsx_parser:clean_string(codepoints(), #config{escaped_strings=true}) - )}, - {"escape path extended codepoints", ?_assertEqual( - extended_codepoints(), - jsx_parser:clean_string(extended_codepoints(), #config{escaped_strings=true}) - )}, - {"error reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space()) - )}, - {"error surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates()) - )}, - {"error noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters()) - )}, - {"error extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), {error, badarg}), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters()) - )}, - {"clean reserved space", ?_assertEqual( - lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, reserved_space()) - )}, - {"clean surrogates", ?_assertEqual( - lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, surrogates()) - )}, - {"clean noncharacters", ?_assertEqual( - lists:duplicate(length(noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, noncharacters()) - )}, - {"clean extended noncharacters", ?_assertEqual( - lists:duplicate(length(extended_noncharacters()), <<16#fffd/utf8>>), - lists:map(fun(Codepoint) -> jsx_parser:clean_string(Codepoint, #config{}) end, extended_noncharacters()) - )} - ]. - - -maybe_escape(Bin, Config) -> jsx_parser:clean_string(Bin, Config). - -escape_test_() -> - [ - {"maybe_escape backspace", ?_assertEqual( - <<"\\b">>, - maybe_escape(<<16#0008/utf8>>, #config{escaped_strings=true}) - )}, - {"don't escape backspace", ?_assertEqual( - <<"\b">>, - maybe_escape(<<16#0008/utf8>>, #config{}) - )}, - {"maybe_escape tab", ?_assertEqual( - <<"\\t">>, - maybe_escape(<<16#0009/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape newline", ?_assertEqual( - <<"\\n">>, - maybe_escape(<<16#000a/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape formfeed", ?_assertEqual( - <<"\\f">>, - maybe_escape(<<16#000c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape carriage return", ?_assertEqual( - <<"\\r">>, - maybe_escape(<<16#000d/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape quote", ?_assertEqual( - <<"\\\"">>, - maybe_escape(<<16#0022/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape forward slash", ?_assertEqual( - <<"\\/">>, - maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true}) - )}, - {"do not maybe_escape forward slash", ?_assertEqual( - <<"/">>, - maybe_escape(<<16#002f/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape backslash", ?_assertEqual( - <<"\\\\">>, - maybe_escape(<<16#005c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape jsonp (u2028)", ?_assertEqual( - <<"\\u2028">>, - maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true}) - )}, - {"do not maybe_escape jsonp (u2028)", ?_assertEqual( - <<16#2028/utf8>>, - maybe_escape(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) - )}, - {"maybe_escape jsonp (u2029)", ?_assertEqual( - <<"\\u2029">>, - maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true}) - )}, - {"do not maybe_escape jsonp (u2029)", ?_assertEqual( - <<16#2029/utf8>>, - maybe_escape(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) - )}, - {"maybe_escape u0000", ?_assertEqual( - <<"\\u0000">>, - maybe_escape(<<16#0000/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0001", ?_assertEqual( - <<"\\u0001">>, - maybe_escape(<<16#0001/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0002", ?_assertEqual( - <<"\\u0002">>, - maybe_escape(<<16#0002/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0003", ?_assertEqual( - <<"\\u0003">>, - maybe_escape(<<16#0003/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0004", ?_assertEqual( - <<"\\u0004">>, - maybe_escape(<<16#0004/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0005", ?_assertEqual( - <<"\\u0005">>, - maybe_escape(<<16#0005/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0006", ?_assertEqual( - <<"\\u0006">>, - maybe_escape(<<16#0006/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0007", ?_assertEqual( - <<"\\u0007">>, - maybe_escape(<<16#0007/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000b", ?_assertEqual( - <<"\\u000b">>, - maybe_escape(<<16#000b/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000e", ?_assertEqual( - <<"\\u000e">>, - maybe_escape(<<16#000e/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u000f", ?_assertEqual( - <<"\\u000f">>, - maybe_escape(<<16#000f/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0010", ?_assertEqual( - <<"\\u0010">>, - maybe_escape(<<16#0010/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0011", ?_assertEqual( - <<"\\u0011">>, - maybe_escape(<<16#0011/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0012", ?_assertEqual( - <<"\\u0012">>, - maybe_escape(<<16#0012/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0013", ?_assertEqual( - <<"\\u0013">>, - maybe_escape(<<16#0013/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0014", ?_assertEqual( - <<"\\u0014">>, - maybe_escape(<<16#0014/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0015", ?_assertEqual( - <<"\\u0015">>, - maybe_escape(<<16#0015/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0016", ?_assertEqual( - <<"\\u0016">>, - maybe_escape(<<16#0016/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0017", ?_assertEqual( - <<"\\u0017">>, - maybe_escape(<<16#0017/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0018", ?_assertEqual( - <<"\\u0018">>, - maybe_escape(<<16#0018/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u0019", ?_assertEqual( - <<"\\u0019">>, - maybe_escape(<<16#0019/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001a", ?_assertEqual( - <<"\\u001a">>, - maybe_escape(<<16#001a/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001b", ?_assertEqual( - <<"\\u001b">>, - maybe_escape(<<16#001b/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001c", ?_assertEqual( - <<"\\u001c">>, - maybe_escape(<<16#001c/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001d", ?_assertEqual( - <<"\\u001d">>, - maybe_escape(<<16#001d/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001e", ?_assertEqual( - <<"\\u001e">>, - maybe_escape(<<16#001e/utf8>>, #config{escaped_strings=true}) - )}, - {"maybe_escape u001f", ?_assertEqual( - <<"\\u001f">>, - maybe_escape(<<16#001f/utf8>>, #config{escaped_strings=true}) - )} - ]. - - -bad_utf8_test_() -> - [ - {"noncharacter u+fffe", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true}) - )}, - {"noncharacter u+fffe replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(to_fake_utf8(16#fffe), #config{}) - )}, - {"noncharacter u+ffff", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true}) - )}, - {"noncharacter u+ffff replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(to_fake_utf8(16#ffff), #config{}) - )}, - {"orphan continuation byte u+0080", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#0080>>, #config{strict_utf8=true}) - )}, - {"orphan continuation byte u+0080 replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#0080>>, #config{}) - )}, - {"orphan continuation byte u+00bf", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00bf>>, #config{strict_utf8=true}) - )}, - {"orphan continuation byte u+00bf replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00bf>>, #config{}) - )}, - {"2 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) - )}, - {"2 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 2), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) - )}, - {"3 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) - )}, - {"3 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 3), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) - )}, - {"4 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) - )}, - {"4 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 4), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) - )}, - {"5 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) - )}, - {"5 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 5), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) - )}, - {"6 continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) - )}, - {"6 continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, 6), - jsx_parser:clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) - )}, - {"all continuation bytes", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) - )}, - {"all continuation bytes replaced", ?_assertEqual( - binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), - jsx_parser:clean_string( - <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, - #config{} - ) - )}, - {"lonely start byte", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00c0>>, #config{strict_utf8=true}) - )}, - {"lonely start byte replaced", ?_assertEqual( - <<16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00c0>>, #config{}) - )}, - {"lonely start bytes (2 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) - )}, - {"lonely start bytes (2 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00c0, 32, 16#00df>>, #config{}) - )}, - {"lonely start bytes (3 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) - )}, - {"lonely start bytes (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) - )}, - {"lonely start bytes (4 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) - )}, - {"lonely start bytes (4 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32, 16#fffd/utf8>>, - jsx_parser:clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) - )}, - {"missing continuation byte (3 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) - )}, - {"missing continuation byte (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<224, 160, 32>>, #config{}) - )}, - {"missing continuation byte (4 byte missing one)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) - )}, - {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<240, 144, 128, 32>>, #config{}) - )}, - {"missing continuation byte (4 byte missing two)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) - )}, - {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<240, 144, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (2 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) - )}, - {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#c0, 16#af, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (3 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) - )}, - {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) - )}, - {"overlong encoding of u+002f (4 byte)", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) - )}, - {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) - )}, - {"highest overlong 2 byte sequence", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) - )}, - {"highest overlong 2 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#c1, 16#bf, 32>>, #config{}) - )}, - {"highest overlong 3 byte sequence", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) - )}, - {"highest overlong 3 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) - )}, - {"highest overlong 4 byte sequence", ?_assertEqual( - {error, badarg}, - jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) - )}, - {"highest overlong 4 byte sequence replaced", ?_assertEqual( - <<16#fffd/utf8, 32>>, - jsx_parser:clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) - )} - ]. - - -json_escape_sequence_test_() -> - [ - {"json escape sequence test - 16#0000", ?_assertEqual(jsx_parser:json_escape_sequence(16#0000), "\\u0000")}, - {"json escape sequence test - 16#abc", ?_assertEqual(jsx_parser:json_escape_sequence(16#abc), "\\u0abc")}, - {"json escape sequence test - 16#def", ?_assertEqual(jsx_parser:json_escape_sequence(16#def), "\\u0def")} - ]. \ No newline at end of file