From 052a92d3253722e10b6086da7e457d05dff56700 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 5 Feb 2013 12:24:56 -0800 Subject: [PATCH] bad utf8 tests for clean_string/2 --- src/jsx_utils.erl | 317 ++++++++++++++++++++-------------------------- 1 file changed, 140 insertions(+), 177 deletions(-) diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index 19be8f9..2be2c7a 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -589,204 +589,167 @@ opts_test_() -> ]. -xcode(Bin) -> xcode(Bin, #opts{}). - -xcode(Bin, [replaced_bad_utf8]) -> xcode(Bin, #opts{replaced_bad_utf8=true}); -xcode(Bin, Opts) -> - try clean_string(Bin, Opts) - catch error:badarg -> {error, badarg} - end. - - -is_bad({error, badarg}) -> true; -is_bad(_) -> false. - - bad_utf8_test_() -> [ {"orphan continuation byte u+0080", - ?_assert(is_bad(xcode(<<16#0080>>))) + ?_assertError(badarg, clean_string(<<16#0080>>, #opts{})) }, {"orphan continuation byte u+0080 replaced", - ?_assertEqual(xcode(<<16#0080>>, [replaced_bad_utf8]), <<16#fffd/utf8>>) + ?_assertEqual(<<16#fffd/utf8>>, clean_string(<<16#0080>>, #opts{replaced_bad_utf8=true})) }, {"orphan continuation byte u+00bf", - ?_assert(is_bad(xcode(<<16#00bf>>))) + ?_assertError(badarg, clean_string(<<16#00bf>>, #opts{})) }, {"orphan continuation byte u+00bf replaced", - ?_assertEqual(xcode(<<16#00bf>>, [replaced_bad_utf8]), <<16#fffd/utf8>>) + ?_assertEqual(<<16#fffd/utf8>>, clean_string(<<16#00bf>>, #opts{replaced_bad_utf8=true})) }, {"2 continuation bytes", - ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>))) - }, - {"2 continuation bytes replaced", - ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [replaced_bad_utf8]), - binary:copy(<<16#fffd/utf8>>, 2) - ) + ?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #opts{})) }, + {"2 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 2), + clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #opts{replaced_bad_utf8=true}) + )}, {"3 continuation bytes", - ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>))) - }, - {"3 continuation bytes replaced", - ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [replaced_bad_utf8]), - binary:copy(<<16#fffd/utf8>>, 3) - ) + ?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #opts{})) }, + {"3 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 3), + clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #opts{replaced_bad_utf8=true}) + )}, {"4 continuation bytes", - ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>))) - }, - {"4 continuation bytes replaced", - ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [replaced_bad_utf8]), - binary:copy(<<16#fffd/utf8>>, 4) - ) + ?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #opts{})) }, + {"4 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 4), + clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #opts{replaced_bad_utf8=true}) + )}, {"5 continuation bytes", - ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>))) - }, - {"5 continuation bytes replaced", - ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [replaced_bad_utf8]), - binary:copy(<<16#fffd/utf8>>, 5) - ) + ?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #opts{})) }, + {"5 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 5), + clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #opts{replaced_bad_utf8=true}) + )}, {"6 continuation bytes", - ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>))) + ?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #opts{})) }, - {"6 continuation bytes replaced", - ?_assertEqual( - xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [replaced_bad_utf8]), - binary:copy(<<16#fffd/utf8>>, 6) + {"6 continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, 6), + clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #opts{replaced_bad_utf8=true}) + )}, + {"all continuation bytes", ?_assertError( + badarg, + clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #opts{}) + )}, + {"all continuation bytes replaced", ?_assertEqual( + binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), + clean_string( + <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, + #opts{replaced_bad_utf8=true} ) - }, - {"all continuation bytes", - ?_assert(is_bad(xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>))) - }, - {"all continuation bytes replaced", - ?_assertEqual( - xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [replaced_bad_utf8]), - binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))) - ) - }, - {"lonely start byte", - ?_assert(is_bad(xcode(<<16#00c0>>))) - }, - {"lonely start byte replaced", - ?_assertEqual( - xcode(<<16#00c0>>, [replaced_bad_utf8]), - <<16#fffd/utf8>> - ) - }, - {"lonely start bytes (2 byte)", - ?_assert(is_bad(xcode(<<16#00c0, 32, 16#00df>>))) - }, - {"lonely start bytes (2 byte) replaced", - ?_assertEqual( - xcode(<<16#00c0, 32, 16#00df>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32, 16#fffd/utf8>> - ) - }, - {"lonely start bytes (3 byte)", - ?_assert(is_bad(xcode(<<16#00e0, 32, 16#00ef>>))) - }, - {"lonely start bytes (3 byte) replaced", - ?_assertEqual( - xcode(<<16#00e0, 32, 16#00ef>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32, 16#fffd/utf8>> - ) - }, - {"lonely start bytes (4 byte)", - ?_assert(is_bad(xcode(<<16#00f0, 32, 16#00f7>>))) - }, - {"lonely start bytes (4 byte) replaced", - ?_assertEqual( - xcode(<<16#00f0, 32, 16#00f7>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32, 16#fffd/utf8>> - ) - }, - {"missing continuation byte (3 byte)", - ?_assert(is_bad(xcode(<<224, 160, 32>>))) - }, - {"missing continuation byte (3 byte) replaced", - ?_assertEqual( - xcode(<<224, 160, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - }, - {"missing continuation byte (4 byte missing one)", - ?_assert(is_bad(xcode(<<240, 144, 128, 32>>))) - }, - {"missing continuation byte (4 byte missing one) replaced", - ?_assertEqual( - xcode(<<240, 144, 128, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - }, - {"missing continuation byte (4 byte missing two)", - ?_assert(is_bad(xcode(<<240, 144, 32>>))) - }, - {"missing continuation byte (4 byte missing two) replaced", - ?_assertEqual( - xcode(<<240, 144, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - }, - {"overlong encoding of u+002f (2 byte)", - ?_assert(is_bad(xcode(<<16#c0, 16#af, 32>>))) - }, - {"overlong encoding of u+002f (2 byte) replaced", - ?_assertEqual( - xcode(<<16#c0, 16#af, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - }, - {"overlong encoding of u+002f (3 byte)", - ?_assert(is_bad(xcode(<<16#e0, 16#80, 16#af, 32>>))) - }, - {"overlong encoding of u+002f (3 byte) replaced", - ?_assertEqual( - xcode(<<16#e0, 16#80, 16#af, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - }, - {"overlong encoding of u+002f (4 byte)", - ?_assert(is_bad(xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>))) - }, - {"overlong encoding of u+002f (4 byte) replaced", - ?_assertEqual( - xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - }, - {"highest overlong 2 byte sequence", - ?_assert(is_bad(xcode(<<16#c1, 16#bf, 32>>))) - }, - {"highest overlong 2 byte sequence replaced", - ?_assertEqual( - xcode(<<16#c1, 16#bf, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - }, - {"highest overlong 3 byte sequence", - ?_assert(is_bad(xcode(<<16#e0, 16#9f, 16#bf, 32>>))) - }, - {"highest overlong 3 byte sequence replaced", - ?_assertEqual( - xcode(<<16#e0, 16#9f, 16#bf, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - }, - {"highest overlong 4 byte sequence", - ?_assert(is_bad(xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>))) - }, - {"highest overlong 4 byte sequence replaced", - ?_assertEqual( - xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [replaced_bad_utf8]), - <<16#fffd/utf8, 32>> - ) - } + )}, + {"lonely start byte", ?_assertError(badarg, clean_string(<<16#00c0>>, #opts{}))}, + {"lonely start byte replaced", ?_assertEqual( + <<16#fffd/utf8>>, + clean_string(<<16#00c0>>, #opts{replaced_bad_utf8=true}) + )}, + {"lonely start bytes (2 byte)", ?_assertError( + badarg, + clean_string(<<16#00c0, 32, 16#00df>>, #opts{}) + )}, + {"lonely start bytes (2 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00c0, 32, 16#00df>>, #opts{replaced_bad_utf8=true}) + )}, + {"lonely start bytes (3 byte)", ?_assertError( + badarg, + clean_string(<<16#00e0, 32, 16#00ef>>, #opts{}) + )}, + {"lonely start bytes (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00e0, 32, 16#00ef>>, #opts{replaced_bad_utf8=true}) + )}, + {"lonely start bytes (4 byte)", ?_assertError( + badarg, + clean_string(<<16#00f0, 32, 16#00f7>>, #opts{}) + )}, + {"lonely start bytes (4 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32, 16#fffd/utf8>>, + clean_string(<<16#00f0, 32, 16#00f7>>, #opts{replaced_bad_utf8=true}) + )}, + {"missing continuation byte (3 byte)", ?_assertError( + badarg, + clean_string(<<224, 160, 32>>, #opts{}) + )}, + {"missing continuation byte (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<224, 160, 32>>, #opts{replaced_bad_utf8=true}) + )}, + {"missing continuation byte (4 byte missing one)", ?_assertError( + badarg, + clean_string(<<240, 144, 128, 32>>, #opts{}) + )}, + {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<240, 144, 128, 32>>, #opts{replaced_bad_utf8=true}) + )}, + {"missing continuation byte (4 byte missing two)", ?_assertError( + badarg, + clean_string(<<240, 144, 32>>, #opts{}) + )}, + {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<240, 144, 32>>, #opts{replaced_bad_utf8=true}) + )}, + {"overlong encoding of u+002f (2 byte)", ?_assertError( + badarg, + clean_string(<<16#c0, 16#af, 32>>, #opts{}) + )}, + {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#c0, 16#af, 32>>, #opts{replaced_bad_utf8=true}) + )}, + {"overlong encoding of u+002f (3 byte)", ?_assertError( + badarg, + clean_string(<<16#e0, 16#80, 16#af, 32>>, #opts{}) + )}, + {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#e0, 16#80, 16#af, 32>>, #opts{replaced_bad_utf8=true}) + )}, + {"overlong encoding of u+002f (4 byte)", ?_assertError( + badarg, + clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #opts{}) + )}, + {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #opts{replaced_bad_utf8=true}) + )}, + {"highest overlong 2 byte sequence", ?_assertError( + badarg, + clean_string(<<16#c1, 16#bf, 32>>, #opts{}) + )}, + {"highest overlong 2 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#c1, 16#bf, 32>>, #opts{replaced_bad_utf8=true}) + )}, + {"highest overlong 3 byte sequence", ?_assertError( + badarg, + clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #opts{}) + )}, + {"highest overlong 3 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #opts{replaced_bad_utf8=true}) + )}, + {"highest overlong 4 byte sequence", ?_assertError( + badarg, + clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #opts{}) + )}, + {"highest overlong 4 byte sequence replaced", ?_assertEqual( + <<16#fffd/utf8, 32>>, + clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #opts{replaced_bad_utf8=true}) + )} ].