prevent json objects with repeated keys from being emitted from the

parser
This commit is contained in:
alisdair sullivan 2014-06-15 22:12:07 +00:00
parent 4df8e5092a
commit 4ad9c38cb9
3 changed files with 98 additions and 77 deletions

View file

@ -250,7 +250,9 @@ to understand and they prevented evolution of the encoding and decoding code
the shorthand form of property lists using atoms as properties so all the shorthand form of property lists using atoms as properties so all
properties must be tuples. all keys must be encoded as in `string` or as properties must be tuples. all keys must be encoded as in `string` or as
atoms or integers (which will be escaped and converted to binaries for atoms or integers (which will be escaped and converted to binaries for
presentation to handlers). values should be valid json values presentation to handlers). values should be valid json values. repeated
keys are tolerated in json text decoded to erlang terms but are not allowed
in erlang terms encoded to json
### incomplete input ### ### incomplete input ###

View file

@ -103,7 +103,7 @@ custom_error_handler_test_() ->
parser(self(), [{error_handler, Error}]) parser(self(), [{error_handler, Error}])
)}, )},
{"string error", ?_assertEqual( {"string error", ?_assertEqual(
{string, [{string, <<239, 191, 191>>}]}, {value, [{string, <<239, 191, 191>>}]},
parser(<<239, 191, 191>>, [{error_handler, Error}, strict]) parser(<<239, 191, 191>>, [{error_handler, Error}, strict])
)} )}
]. ].

View file

@ -88,7 +88,7 @@ handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event
value([start_object|Tokens], Handler, Stack, Config) -> value([start_object|Tokens], Handler, Stack, Config) ->
object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); object(Tokens, handle_event(start_object, Handler, Config), [{object, sets:new()}|Stack], Config);
value([start_array|Tokens], Handler, Stack, Config) -> value([start_array|Tokens], Handler, Stack, Config) ->
array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config);
value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
@ -108,10 +108,10 @@ value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) ->
value([Number|Tokens], Handler, Stack, Config) when is_float(Number) -> value([Number|Tokens], Handler, Stack, Config) when is_float(Number) ->
value([{float, Number}] ++ Tokens, Handler, Stack, Config); value([{float, Number}] ++ Tokens, Handler, Stack, Config);
value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) -> value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) ->
case clean_string(String, Tokens, Handler, Stack, Config) of try clean_string(String, Config) of Clean ->
Clean when is_binary(Clean) -> maybe_done(Tokens, handle_event({string, Clean}, Handler, Config), Stack, Config)
maybe_done(Tokens, handle_event({string, Clean}, Handler, Config), Stack, Config); catch error:badarg ->
Error -> Error ?error(value, [{string, String}|Tokens], Handler, Stack, Config)
end; end;
value([String|Tokens], Handler, Stack, Config) when is_binary(String) -> value([String|Tokens], Handler, Stack, Config) when is_binary(String) ->
value([{string, String}] ++ Tokens, Handler, Stack, Config); value([{string, String}] ++ Tokens, Handler, Stack, Config);
@ -126,19 +126,26 @@ value(BadTokens, Handler, Stack, Config) when is_list(BadTokens) ->
value(Token, Handler, Stack, Config) -> value(Token, Handler, Stack, Config) ->
value([Token], Handler, Stack, Config). value([Token], Handler, Stack, Config).
object([end_object|Tokens], Handler, [object|Stack], Config) -> object([end_object|Tokens], Handler, [{object, _}|Stack], Config) ->
maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config); maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config);
object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) -> object([{key, Key}|Tokens], Handler, Stack, Config)
case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of when is_atom(Key); is_binary(Key); is_integer(Key) ->
Clean when is_binary(Clean) -> object([Key|Tokens], Handler, Stack, Config);
value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config); object([Key|Tokens], Handler, [{object, Keys}|Stack], Config)
Error -> Error when is_atom(Key); is_binary(Key); is_integer(Key) ->
end; try
object([Key|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) -> CleanKey = clean_string(fix_key(Key), Config),
case clean_string(fix_key(Key), Tokens, Handler, Stack, Config) of case sets:is_element(CleanKey, Keys) of true -> erlang:error(badarg); _ -> ok end,
Clean when is_binary(Clean) -> CleanKey
value(Tokens, handle_event({key, Clean}, Handler, Config), Stack, Config); of K ->
Error -> Error value(
Tokens,
handle_event({key, K}, Handler, Config),
[{object, sets:add_element(K, Keys)}|Stack],
Config
)
catch error:badarg ->
?error(object, [{string, Key}|Tokens], Handler, Stack, Config)
end; end;
object([], Handler, Stack, Config) -> object([], Handler, Stack, Config) ->
incomplete(object, Handler, Stack, Config); incomplete(object, Handler, Stack, Config);
@ -156,7 +163,7 @@ array(Token, Handler, Stack, Config) ->
maybe_done([end_json], Handler, [], Config) -> maybe_done([end_json], Handler, [], Config) ->
done([end_json], Handler, [], Config); done([end_json], Handler, [], Config);
maybe_done(Tokens, Handler, [object|_] = Stack, Config) when is_list(Tokens) -> maybe_done(Tokens, Handler, [{object, _}|_] = Stack, Config) when is_list(Tokens) ->
object(Tokens, Handler, Stack, Config); object(Tokens, Handler, Stack, Config);
maybe_done(Tokens, Handler, [array|_] = Stack, Config) when is_list(Tokens) -> maybe_done(Tokens, Handler, [array|_] = Stack, Config) when is_list(Tokens) ->
array(Tokens, Handler, Stack, Config); array(Tokens, Handler, Stack, Config);
@ -183,15 +190,13 @@ fix_key(Key) when is_integer(Key) -> list_to_binary(integer_to_list(Key));
fix_key(Key) when is_binary(Key) -> Key. fix_key(Key) when is_binary(Key) -> Key.
clean_string(Bin, Tokens, Handler, Stack, Config) -> clean_string(Bin, #config{dirty_strings=true}) -> Bin;
case clean_string(Bin, Config) of clean_string(Bin, Config) ->
{error, badarg} -> ?error(string, [{string, Bin}|Tokens], Handler, Stack, Config); case clean(Bin, [], Config) of
{error, badarg} -> erlang:error(badarg);
String -> String String -> String
end. end.
clean_string(Bin, #config{dirty_strings=true}) -> Bin;
clean_string(Bin, Config) -> clean(Bin, [], Config).
%% escape and/or replace bad codepoints if requested %% escape and/or replace bad codepoints if requested
clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc)); clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc));
@ -484,7 +489,7 @@ custom_error_handler_test_() ->
parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}])
)}, )},
{"string error", ?_assertEqual( {"string error", ?_assertEqual(
{string, [{string, <<239, 191, 191>>}, end_json]}, {value, [{string, <<239, 191, 191>>}, end_json]},
parse([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict]) parse([{string, <<239, 191, 191>>}, end_json], [{error_handler, Error}, strict])
)} )}
]. ].
@ -579,6 +584,10 @@ extended_noncharacters() ->
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff] ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]
]. ].
clean_string_helper(String) ->
try clean_string(String, #config{strict_utf8=true}) of Clean -> Clean
catch error:badarg -> {error, badarg}
end.
clean_string_test_() -> clean_string_test_() ->
[ [
@ -600,19 +609,19 @@ clean_string_test_() ->
)}, )},
{"error reserved space", ?_assertEqual( {"error reserved space", ?_assertEqual(
lists:duplicate(length(reserved_space()), {error, badarg}), lists:duplicate(length(reserved_space()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, reserved_space()) lists:map(fun(Codepoint) -> clean_string_helper(Codepoint) end, reserved_space())
)}, )},
{"error surrogates", ?_assertEqual( {"error surrogates", ?_assertEqual(
lists:duplicate(length(surrogates()), {error, badarg}), lists:duplicate(length(surrogates()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, surrogates()) lists:map(fun(Codepoint) -> clean_string_helper(Codepoint) end, surrogates())
)}, )},
{"error noncharacters", ?_assertEqual( {"error noncharacters", ?_assertEqual(
lists:duplicate(length(noncharacters()), {error, badarg}), lists:duplicate(length(noncharacters()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, noncharacters()) lists:map(fun(Codepoint) -> clean_string_helper(Codepoint) end, noncharacters())
)}, )},
{"error extended noncharacters", ?_assertEqual( {"error extended noncharacters", ?_assertEqual(
lists:duplicate(length(extended_noncharacters()), {error, badarg}), lists:duplicate(length(extended_noncharacters()), {error, badarg}),
lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{strict_utf8=true}) end, extended_noncharacters()) lists:map(fun(Codepoint) -> clean_string_helper(Codepoint) end, extended_noncharacters())
)}, )},
{"clean reserved space", ?_assertEqual( {"clean reserved space", ?_assertEqual(
lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>), lists:duplicate(length(reserved_space()), <<16#fffd/utf8>>),
@ -804,80 +813,80 @@ escape_test_() ->
bad_utf8_test_() -> bad_utf8_test_() ->
[ [
{"noncharacter u+fffe", ?_assertEqual( {"noncharacter u+fffe", ?_assertError(
{error, badarg}, badarg,
clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true}) clean_string(to_fake_utf8(16#fffe), #config{strict_utf8=true})
)}, )},
{"noncharacter u+fffe replaced", ?_assertEqual( {"noncharacter u+fffe replaced", ?_assertEqual(
<<16#fffd/utf8>>, <<16#fffd/utf8>>,
clean_string(to_fake_utf8(16#fffe), #config{}) clean_string(to_fake_utf8(16#fffe), #config{})
)}, )},
{"noncharacter u+ffff", ?_assertEqual( {"noncharacter u+ffff", ?_assertError(
{error, badarg}, badarg,
clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true}) clean_string(to_fake_utf8(16#ffff), #config{strict_utf8=true})
)}, )},
{"noncharacter u+ffff replaced", ?_assertEqual( {"noncharacter u+ffff replaced", ?_assertEqual(
<<16#fffd/utf8>>, <<16#fffd/utf8>>,
clean_string(to_fake_utf8(16#ffff), #config{}) clean_string(to_fake_utf8(16#ffff), #config{})
)}, )},
{"orphan continuation byte u+0080", ?_assertEqual( {"orphan continuation byte u+0080", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#0080>>, #config{strict_utf8=true}) clean_string(<<16#0080>>, #config{strict_utf8=true})
)}, )},
{"orphan continuation byte u+0080 replaced", ?_assertEqual( {"orphan continuation byte u+0080 replaced", ?_assertEqual(
<<16#fffd/utf8>>, <<16#fffd/utf8>>,
clean_string(<<16#0080>>, #config{}) clean_string(<<16#0080>>, #config{})
)}, )},
{"orphan continuation byte u+00bf", ?_assertEqual( {"orphan continuation byte u+00bf", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#00bf>>, #config{strict_utf8=true}) clean_string(<<16#00bf>>, #config{strict_utf8=true})
)}, )},
{"orphan continuation byte u+00bf replaced", ?_assertEqual( {"orphan continuation byte u+00bf replaced", ?_assertEqual(
<<16#fffd/utf8>>, <<16#fffd/utf8>>,
clean_string(<<16#00bf>>, #config{}) clean_string(<<16#00bf>>, #config{})
)}, )},
{"2 continuation bytes", ?_assertEqual( {"2 continuation bytes", ?_assertError(
{error, badarg}, badarg,
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true})
)}, )},
{"2 continuation bytes replaced", ?_assertEqual( {"2 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 2), binary:copy(<<16#fffd/utf8>>, 2),
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{})
)}, )},
{"3 continuation bytes", ?_assertEqual( {"3 continuation bytes", ?_assertError(
{error, badarg}, badarg,
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true})
)}, )},
{"3 continuation bytes replaced", ?_assertEqual( {"3 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 3), binary:copy(<<16#fffd/utf8>>, 3),
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{})
)}, )},
{"4 continuation bytes", ?_assertEqual( {"4 continuation bytes", ?_assertError(
{error, badarg}, badarg,
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true})
)}, )},
{"4 continuation bytes replaced", ?_assertEqual( {"4 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 4), binary:copy(<<16#fffd/utf8>>, 4),
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{})
)}, )},
{"5 continuation bytes", ?_assertEqual( {"5 continuation bytes", ?_assertError(
{error, badarg}, badarg,
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true})
)}, )},
{"5 continuation bytes replaced", ?_assertEqual( {"5 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 5), binary:copy(<<16#fffd/utf8>>, 5),
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{})
)}, )},
{"6 continuation bytes", ?_assertEqual( {"6 continuation bytes", ?_assertError(
{error, badarg}, badarg,
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true})
)}, )},
{"6 continuation bytes replaced", ?_assertEqual( {"6 continuation bytes replaced", ?_assertEqual(
binary:copy(<<16#fffd/utf8>>, 6), binary:copy(<<16#fffd/utf8>>, 6),
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{})
)}, )},
{"all continuation bytes", ?_assertEqual( {"all continuation bytes", ?_assertError(
{error, badarg}, badarg,
clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true})
)}, )},
{"all continuation bytes replaced", ?_assertEqual( {"all continuation bytes replaced", ?_assertEqual(
@ -887,104 +896,104 @@ bad_utf8_test_() ->
#config{} #config{}
) )
)}, )},
{"lonely start byte", ?_assertEqual( {"lonely start byte", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#00c0>>, #config{strict_utf8=true}) clean_string(<<16#00c0>>, #config{strict_utf8=true})
)}, )},
{"lonely start byte replaced", ?_assertEqual( {"lonely start byte replaced", ?_assertEqual(
<<16#fffd/utf8>>, <<16#fffd/utf8>>,
clean_string(<<16#00c0>>, #config{}) clean_string(<<16#00c0>>, #config{})
)}, )},
{"lonely start bytes (2 byte)", ?_assertEqual( {"lonely start bytes (2 byte)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true})
)}, )},
{"lonely start bytes (2 byte) replaced", ?_assertEqual( {"lonely start bytes (2 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>, <<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00c0, 32, 16#00df>>, #config{}) clean_string(<<16#00c0, 32, 16#00df>>, #config{})
)}, )},
{"lonely start bytes (3 byte)", ?_assertEqual( {"lonely start bytes (3 byte)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true})
)}, )},
{"lonely start bytes (3 byte) replaced", ?_assertEqual( {"lonely start bytes (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>, <<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) clean_string(<<16#00e0, 32, 16#00ef>>, #config{})
)}, )},
{"lonely start bytes (4 byte)", ?_assertEqual( {"lonely start bytes (4 byte)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true})
)}, )},
{"lonely start bytes (4 byte) replaced", ?_assertEqual( {"lonely start bytes (4 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32, 16#fffd/utf8>>, <<16#fffd/utf8, 32, 16#fffd/utf8>>,
clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) clean_string(<<16#00f0, 32, 16#00f7>>, #config{})
)}, )},
{"missing continuation byte (3 byte)", ?_assertEqual( {"missing continuation byte (3 byte)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) clean_string(<<224, 160, 32>>, #config{strict_utf8=true})
)}, )},
{"missing continuation byte (3 byte) replaced", ?_assertEqual( {"missing continuation byte (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>, <<16#fffd/utf8, 32>>,
clean_string(<<224, 160, 32>>, #config{}) clean_string(<<224, 160, 32>>, #config{})
)}, )},
{"missing continuation byte (4 byte missing one)", ?_assertEqual( {"missing continuation byte (4 byte missing one)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true})
)}, )},
{"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>, <<16#fffd/utf8, 32>>,
clean_string(<<240, 144, 128, 32>>, #config{}) clean_string(<<240, 144, 128, 32>>, #config{})
)}, )},
{"missing continuation byte (4 byte missing two)", ?_assertEqual( {"missing continuation byte (4 byte missing two)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) clean_string(<<240, 144, 32>>, #config{strict_utf8=true})
)}, )},
{"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>, <<16#fffd/utf8, 32>>,
clean_string(<<240, 144, 32>>, #config{}) clean_string(<<240, 144, 32>>, #config{})
)}, )},
{"overlong encoding of u+002f (2 byte)", ?_assertEqual( {"overlong encoding of u+002f (2 byte)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true})
)}, )},
{"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>, <<16#fffd/utf8, 32>>,
clean_string(<<16#c0, 16#af, 32>>, #config{}) clean_string(<<16#c0, 16#af, 32>>, #config{})
)}, )},
{"overlong encoding of u+002f (3 byte)", ?_assertEqual( {"overlong encoding of u+002f (3 byte)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true})
)}, )},
{"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>, <<16#fffd/utf8, 32>>,
clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{})
)}, )},
{"overlong encoding of u+002f (4 byte)", ?_assertEqual( {"overlong encoding of u+002f (4 byte)", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true})
)}, )},
{"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>, <<16#fffd/utf8, 32>>,
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{})
)}, )},
{"highest overlong 2 byte sequence", ?_assertEqual( {"highest overlong 2 byte sequence", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true})
)}, )},
{"highest overlong 2 byte sequence replaced", ?_assertEqual( {"highest overlong 2 byte sequence replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>, <<16#fffd/utf8, 32>>,
clean_string(<<16#c1, 16#bf, 32>>, #config{}) clean_string(<<16#c1, 16#bf, 32>>, #config{})
)}, )},
{"highest overlong 3 byte sequence", ?_assertEqual( {"highest overlong 3 byte sequence", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true})
)}, )},
{"highest overlong 3 byte sequence replaced", ?_assertEqual( {"highest overlong 3 byte sequence replaced", ?_assertEqual(
<<16#fffd/utf8, 32>>, <<16#fffd/utf8, 32>>,
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{})
)}, )},
{"highest overlong 4 byte sequence", ?_assertEqual( {"highest overlong 4 byte sequence", ?_assertError(
{error, badarg}, badarg,
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true})
)}, )},
{"highest overlong 4 byte sequence replaced", ?_assertEqual( {"highest overlong 4 byte sequence replaced", ?_assertEqual(
@ -1009,4 +1018,14 @@ fix_key_test_() ->
{"integer key", ?_assertEqual(fix_key(123), <<"123">>)} {"integer key", ?_assertEqual(fix_key(123), <<"123">>)}
]. ].
repeated_key_test_() ->
Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end,
[
{"repeated key", ?_assertError(
badarg,
Parse([start_object, <<"key">>, true, <<"key">>, true, end_object], [])
)}
].
-endif. -endif.