bad utf8 tests for clean_string/2
This commit is contained in:
parent
d8cde35e45
commit
052a92d325
1 changed files with 140 additions and 177 deletions
|
@ -589,204 +589,167 @@ opts_test_() ->
|
||||||
].
|
].
|
||||||
|
|
||||||
|
|
||||||
xcode(Bin) -> xcode(Bin, #opts{}).
|
|
||||||
|
|
||||||
xcode(Bin, [replaced_bad_utf8]) -> xcode(Bin, #opts{replaced_bad_utf8=true});
|
|
||||||
xcode(Bin, Opts) ->
|
|
||||||
try clean_string(Bin, Opts)
|
|
||||||
catch error:badarg -> {error, badarg}
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
is_bad({error, badarg}) -> true;
|
|
||||||
is_bad(_) -> false.
|
|
||||||
|
|
||||||
|
|
||||||
bad_utf8_test_() ->
|
bad_utf8_test_() ->
|
||||||
[
|
[
|
||||||
{"orphan continuation byte u+0080",
|
{"orphan continuation byte u+0080",
|
||||||
?_assert(is_bad(xcode(<<16#0080>>)))
|
?_assertError(badarg, clean_string(<<16#0080>>, #opts{}))
|
||||||
},
|
},
|
||||||
{"orphan continuation byte u+0080 replaced",
|
{"orphan continuation byte u+0080 replaced",
|
||||||
?_assertEqual(xcode(<<16#0080>>, [replaced_bad_utf8]), <<16#fffd/utf8>>)
|
?_assertEqual(<<16#fffd/utf8>>, clean_string(<<16#0080>>, #opts{replaced_bad_utf8=true}))
|
||||||
},
|
},
|
||||||
{"orphan continuation byte u+00bf",
|
{"orphan continuation byte u+00bf",
|
||||||
?_assert(is_bad(xcode(<<16#00bf>>)))
|
?_assertError(badarg, clean_string(<<16#00bf>>, #opts{}))
|
||||||
},
|
},
|
||||||
{"orphan continuation byte u+00bf replaced",
|
{"orphan continuation byte u+00bf replaced",
|
||||||
?_assertEqual(xcode(<<16#00bf>>, [replaced_bad_utf8]), <<16#fffd/utf8>>)
|
?_assertEqual(<<16#fffd/utf8>>, clean_string(<<16#00bf>>, #opts{replaced_bad_utf8=true}))
|
||||||
},
|
},
|
||||||
{"2 continuation bytes",
|
{"2 continuation bytes",
|
||||||
?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>)))
|
?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #opts{}))
|
||||||
},
|
|
||||||
{"2 continuation bytes replaced",
|
|
||||||
?_assertEqual(
|
|
||||||
xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [replaced_bad_utf8]),
|
|
||||||
binary:copy(<<16#fffd/utf8>>, 2)
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
|
{"2 continuation bytes replaced", ?_assertEqual(
|
||||||
|
binary:copy(<<16#fffd/utf8>>, 2),
|
||||||
|
clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #opts{replaced_bad_utf8=true})
|
||||||
|
)},
|
||||||
{"3 continuation bytes",
|
{"3 continuation bytes",
|
||||||
?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>)))
|
?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #opts{}))
|
||||||
},
|
|
||||||
{"3 continuation bytes replaced",
|
|
||||||
?_assertEqual(
|
|
||||||
xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [replaced_bad_utf8]),
|
|
||||||
binary:copy(<<16#fffd/utf8>>, 3)
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
|
{"3 continuation bytes replaced", ?_assertEqual(
|
||||||
|
binary:copy(<<16#fffd/utf8>>, 3),
|
||||||
|
clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #opts{replaced_bad_utf8=true})
|
||||||
|
)},
|
||||||
{"4 continuation bytes",
|
{"4 continuation bytes",
|
||||||
?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>)))
|
?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #opts{}))
|
||||||
},
|
|
||||||
{"4 continuation bytes replaced",
|
|
||||||
?_assertEqual(
|
|
||||||
xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [replaced_bad_utf8]),
|
|
||||||
binary:copy(<<16#fffd/utf8>>, 4)
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
|
{"4 continuation bytes replaced", ?_assertEqual(
|
||||||
|
binary:copy(<<16#fffd/utf8>>, 4),
|
||||||
|
clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #opts{replaced_bad_utf8=true})
|
||||||
|
)},
|
||||||
{"5 continuation bytes",
|
{"5 continuation bytes",
|
||||||
?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>)))
|
?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #opts{}))
|
||||||
},
|
|
||||||
{"5 continuation bytes replaced",
|
|
||||||
?_assertEqual(
|
|
||||||
xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [replaced_bad_utf8]),
|
|
||||||
binary:copy(<<16#fffd/utf8>>, 5)
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
|
{"5 continuation bytes replaced", ?_assertEqual(
|
||||||
|
binary:copy(<<16#fffd/utf8>>, 5),
|
||||||
|
clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #opts{replaced_bad_utf8=true})
|
||||||
|
)},
|
||||||
{"6 continuation bytes",
|
{"6 continuation bytes",
|
||||||
?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>)))
|
?_assertError(badarg, clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #opts{}))
|
||||||
},
|
},
|
||||||
{"6 continuation bytes replaced",
|
{"6 continuation bytes replaced", ?_assertEqual(
|
||||||
?_assertEqual(
|
binary:copy(<<16#fffd/utf8>>, 6),
|
||||||
xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [replaced_bad_utf8]),
|
clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #opts{replaced_bad_utf8=true})
|
||||||
binary:copy(<<16#fffd/utf8>>, 6)
|
)},
|
||||||
|
{"all continuation bytes", ?_assertError(
|
||||||
|
badarg,
|
||||||
|
clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #opts{})
|
||||||
|
)},
|
||||||
|
{"all continuation bytes replaced", ?_assertEqual(
|
||||||
|
binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))),
|
||||||
|
clean_string(
|
||||||
|
<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>,
|
||||||
|
#opts{replaced_bad_utf8=true}
|
||||||
)
|
)
|
||||||
},
|
)},
|
||||||
{"all continuation bytes",
|
{"lonely start byte", ?_assertError(badarg, clean_string(<<16#00c0>>, #opts{}))},
|
||||||
?_assert(is_bad(xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>)))
|
{"lonely start byte replaced", ?_assertEqual(
|
||||||
},
|
<<16#fffd/utf8>>,
|
||||||
{"all continuation bytes replaced",
|
clean_string(<<16#00c0>>, #opts{replaced_bad_utf8=true})
|
||||||
?_assertEqual(
|
)},
|
||||||
xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [replaced_bad_utf8]),
|
{"lonely start bytes (2 byte)", ?_assertError(
|
||||||
binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf)))
|
badarg,
|
||||||
)
|
clean_string(<<16#00c0, 32, 16#00df>>, #opts{})
|
||||||
},
|
)},
|
||||||
{"lonely start byte",
|
{"lonely start bytes (2 byte) replaced", ?_assertEqual(
|
||||||
?_assert(is_bad(xcode(<<16#00c0>>)))
|
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||||
},
|
clean_string(<<16#00c0, 32, 16#00df>>, #opts{replaced_bad_utf8=true})
|
||||||
{"lonely start byte replaced",
|
)},
|
||||||
?_assertEqual(
|
{"lonely start bytes (3 byte)", ?_assertError(
|
||||||
xcode(<<16#00c0>>, [replaced_bad_utf8]),
|
badarg,
|
||||||
<<16#fffd/utf8>>
|
clean_string(<<16#00e0, 32, 16#00ef>>, #opts{})
|
||||||
)
|
)},
|
||||||
},
|
{"lonely start bytes (3 byte) replaced", ?_assertEqual(
|
||||||
{"lonely start bytes (2 byte)",
|
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||||
?_assert(is_bad(xcode(<<16#00c0, 32, 16#00df>>)))
|
clean_string(<<16#00e0, 32, 16#00ef>>, #opts{replaced_bad_utf8=true})
|
||||||
},
|
)},
|
||||||
{"lonely start bytes (2 byte) replaced",
|
{"lonely start bytes (4 byte)", ?_assertError(
|
||||||
?_assertEqual(
|
badarg,
|
||||||
xcode(<<16#00c0, 32, 16#00df>>, [replaced_bad_utf8]),
|
clean_string(<<16#00f0, 32, 16#00f7>>, #opts{})
|
||||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>
|
)},
|
||||||
)
|
{"lonely start bytes (4 byte) replaced", ?_assertEqual(
|
||||||
},
|
<<16#fffd/utf8, 32, 16#fffd/utf8>>,
|
||||||
{"lonely start bytes (3 byte)",
|
clean_string(<<16#00f0, 32, 16#00f7>>, #opts{replaced_bad_utf8=true})
|
||||||
?_assert(is_bad(xcode(<<16#00e0, 32, 16#00ef>>)))
|
)},
|
||||||
},
|
{"missing continuation byte (3 byte)", ?_assertError(
|
||||||
{"lonely start bytes (3 byte) replaced",
|
badarg,
|
||||||
?_assertEqual(
|
clean_string(<<224, 160, 32>>, #opts{})
|
||||||
xcode(<<16#00e0, 32, 16#00ef>>, [replaced_bad_utf8]),
|
)},
|
||||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>
|
{"missing continuation byte (3 byte) replaced", ?_assertEqual(
|
||||||
)
|
<<16#fffd/utf8, 32>>,
|
||||||
},
|
clean_string(<<224, 160, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
{"lonely start bytes (4 byte)",
|
)},
|
||||||
?_assert(is_bad(xcode(<<16#00f0, 32, 16#00f7>>)))
|
{"missing continuation byte (4 byte missing one)", ?_assertError(
|
||||||
},
|
badarg,
|
||||||
{"lonely start bytes (4 byte) replaced",
|
clean_string(<<240, 144, 128, 32>>, #opts{})
|
||||||
?_assertEqual(
|
)},
|
||||||
xcode(<<16#00f0, 32, 16#00f7>>, [replaced_bad_utf8]),
|
{"missing continuation byte (4 byte missing one) replaced", ?_assertEqual(
|
||||||
<<16#fffd/utf8, 32, 16#fffd/utf8>>
|
<<16#fffd/utf8, 32>>,
|
||||||
)
|
clean_string(<<240, 144, 128, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
},
|
)},
|
||||||
{"missing continuation byte (3 byte)",
|
{"missing continuation byte (4 byte missing two)", ?_assertError(
|
||||||
?_assert(is_bad(xcode(<<224, 160, 32>>)))
|
badarg,
|
||||||
},
|
clean_string(<<240, 144, 32>>, #opts{})
|
||||||
{"missing continuation byte (3 byte) replaced",
|
)},
|
||||||
?_assertEqual(
|
{"missing continuation byte (4 byte missing two) replaced", ?_assertEqual(
|
||||||
xcode(<<224, 160, 32>>, [replaced_bad_utf8]),
|
<<16#fffd/utf8, 32>>,
|
||||||
<<16#fffd/utf8, 32>>
|
clean_string(<<240, 144, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
)
|
)},
|
||||||
},
|
{"overlong encoding of u+002f (2 byte)", ?_assertError(
|
||||||
{"missing continuation byte (4 byte missing one)",
|
badarg,
|
||||||
?_assert(is_bad(xcode(<<240, 144, 128, 32>>)))
|
clean_string(<<16#c0, 16#af, 32>>, #opts{})
|
||||||
},
|
)},
|
||||||
{"missing continuation byte (4 byte missing one) replaced",
|
{"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual(
|
||||||
?_assertEqual(
|
<<16#fffd/utf8, 32>>,
|
||||||
xcode(<<240, 144, 128, 32>>, [replaced_bad_utf8]),
|
clean_string(<<16#c0, 16#af, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
<<16#fffd/utf8, 32>>
|
)},
|
||||||
)
|
{"overlong encoding of u+002f (3 byte)", ?_assertError(
|
||||||
},
|
badarg,
|
||||||
{"missing continuation byte (4 byte missing two)",
|
clean_string(<<16#e0, 16#80, 16#af, 32>>, #opts{})
|
||||||
?_assert(is_bad(xcode(<<240, 144, 32>>)))
|
)},
|
||||||
},
|
{"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual(
|
||||||
{"missing continuation byte (4 byte missing two) replaced",
|
<<16#fffd/utf8, 32>>,
|
||||||
?_assertEqual(
|
clean_string(<<16#e0, 16#80, 16#af, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
xcode(<<240, 144, 32>>, [replaced_bad_utf8]),
|
)},
|
||||||
<<16#fffd/utf8, 32>>
|
{"overlong encoding of u+002f (4 byte)", ?_assertError(
|
||||||
)
|
badarg,
|
||||||
},
|
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #opts{})
|
||||||
{"overlong encoding of u+002f (2 byte)",
|
)},
|
||||||
?_assert(is_bad(xcode(<<16#c0, 16#af, 32>>)))
|
{"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual(
|
||||||
},
|
<<16#fffd/utf8, 32>>,
|
||||||
{"overlong encoding of u+002f (2 byte) replaced",
|
clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
?_assertEqual(
|
)},
|
||||||
xcode(<<16#c0, 16#af, 32>>, [replaced_bad_utf8]),
|
{"highest overlong 2 byte sequence", ?_assertError(
|
||||||
<<16#fffd/utf8, 32>>
|
badarg,
|
||||||
)
|
clean_string(<<16#c1, 16#bf, 32>>, #opts{})
|
||||||
},
|
)},
|
||||||
{"overlong encoding of u+002f (3 byte)",
|
{"highest overlong 2 byte sequence replaced", ?_assertEqual(
|
||||||
?_assert(is_bad(xcode(<<16#e0, 16#80, 16#af, 32>>)))
|
<<16#fffd/utf8, 32>>,
|
||||||
},
|
clean_string(<<16#c1, 16#bf, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
{"overlong encoding of u+002f (3 byte) replaced",
|
)},
|
||||||
?_assertEqual(
|
{"highest overlong 3 byte sequence", ?_assertError(
|
||||||
xcode(<<16#e0, 16#80, 16#af, 32>>, [replaced_bad_utf8]),
|
badarg,
|
||||||
<<16#fffd/utf8, 32>>
|
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #opts{})
|
||||||
)
|
)},
|
||||||
},
|
{"highest overlong 3 byte sequence replaced", ?_assertEqual(
|
||||||
{"overlong encoding of u+002f (4 byte)",
|
<<16#fffd/utf8, 32>>,
|
||||||
?_assert(is_bad(xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>)))
|
clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
},
|
)},
|
||||||
{"overlong encoding of u+002f (4 byte) replaced",
|
{"highest overlong 4 byte sequence", ?_assertError(
|
||||||
?_assertEqual(
|
badarg,
|
||||||
xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [replaced_bad_utf8]),
|
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #opts{})
|
||||||
<<16#fffd/utf8, 32>>
|
)},
|
||||||
)
|
{"highest overlong 4 byte sequence replaced", ?_assertEqual(
|
||||||
},
|
<<16#fffd/utf8, 32>>,
|
||||||
{"highest overlong 2 byte sequence",
|
clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #opts{replaced_bad_utf8=true})
|
||||||
?_assert(is_bad(xcode(<<16#c1, 16#bf, 32>>)))
|
)}
|
||||||
},
|
|
||||||
{"highest overlong 2 byte sequence replaced",
|
|
||||||
?_assertEqual(
|
|
||||||
xcode(<<16#c1, 16#bf, 32>>, [replaced_bad_utf8]),
|
|
||||||
<<16#fffd/utf8, 32>>
|
|
||||||
)
|
|
||||||
},
|
|
||||||
{"highest overlong 3 byte sequence",
|
|
||||||
?_assert(is_bad(xcode(<<16#e0, 16#9f, 16#bf, 32>>)))
|
|
||||||
},
|
|
||||||
{"highest overlong 3 byte sequence replaced",
|
|
||||||
?_assertEqual(
|
|
||||||
xcode(<<16#e0, 16#9f, 16#bf, 32>>, [replaced_bad_utf8]),
|
|
||||||
<<16#fffd/utf8, 32>>
|
|
||||||
)
|
|
||||||
},
|
|
||||||
{"highest overlong 4 byte sequence",
|
|
||||||
?_assert(is_bad(xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>)))
|
|
||||||
},
|
|
||||||
{"highest overlong 4 byte sequence replaced",
|
|
||||||
?_assertEqual(
|
|
||||||
xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [replaced_bad_utf8]),
|
|
||||||
<<16#fffd/utf8, 32>>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
].
|
].
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue