From 7e243bafd1416ca2795cc4429d6dc0e74a733c30 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 28 Mar 2012 21:23:35 -0700 Subject: [PATCH] remove tests for malformed 5 and 6 byte sequences --- src/jsx_decoder.erl | 57 ++------------------------------------------ src/jsx_encoder.erl | 34 -------------------------- src/jsx_utils.erl | 58 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 56 insertions(+), 93 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 0fe7645..7c5d295 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -517,8 +517,8 @@ string(<>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) ; _ -> case Opts#opts.loose_unicode of - true -> noncharacter(<>, Handler, [Acc|Stack], Opts) - ; false -> ?error([<>, Handler, [Acc|Stack], Opts]) + true -> noncharacter(<>, Handler, [Acc|Stack], Opts) + ; false -> ?error([<>, Handler, [Acc|Stack], Opts]) end end; string(Bin, Handler, Stack, Opts) -> @@ -551,12 +551,6 @@ noncharacter(<>, Handler, Stack, Opts) when X >= 224, X =< 239 - %% overlong encodings and missing continuations of a 4 byte sequence noncharacter(<>, Handler, Stack, Opts) when X >= 240, X =< 247 -> strip_continuations(Rest, Handler, [3|Stack], Opts); -%% overlong encodings and missing continuations of a 5 byte sequence -noncharacter(<>, Handler, Stack, Opts) when X >= 248, X =< 251 -> - strip_continuations(Rest, Handler, [4|Stack], Opts); -%% overlong encodings and missing continuations of a 6 byte sequence -noncharacter(<>, Handler, Stack, Opts) when X == 252, X == 253 -> - strip_continuations(Rest, Handler, [5|Stack], Opts); %% unexpected bytes, including orphan continuations noncharacter(<<_, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts); @@ -1295,51 +1289,6 @@ good_characters_test_() -> ?_assertEqual(check_good(good_extended()), []) } ]. - -malformed_test_() -> - [ - {"malformed codepoint with 1 byte", - ?_assertEqual({error, badjson}, decode(<<128>>)) - }, - {"malformed codepoint with 2 bytes", - ?_assertEqual({error, badjson}, decode(<<128, 192>>)) - }, - {"malformed codepoint with 3 bytes", - ?_assertEqual({error, badjson}, decode(<<128, 192, 192>>)) - }, - {"malformed codepoint with 4 bytes", - ?_assertEqual({error, badjson}, decode(<<128, 192, 192, 192>>)) - } - ]. - -malformed_replaced_test_() -> - F = <<16#fffd/utf8>>, - [ - {"malformed codepoint with 1 byte", - ?_assertEqual( - [{string, <>}, end_json], - decode(<<34, 128, 34>>, [loose_unicode]) - ) - }, - {"malformed codepoint with 2 bytes", - ?_assertEqual( - [{string, <>}, end_json], - decode(<<34, 128, 192, 34>>, [loose_unicode]) - ) - }, - {"malformed codepoint with 3 bytes", - ?_assertEqual( - [{string, <>}, end_json], - decode(<<34, 128, 192, 192, 34>>, [loose_unicode]) - ) - }, - {"malformed codepoint with 4 bytes", - ?_assertEqual( - [{string, <>}, end_json], - decode(<<34, 128, 192, 192, 192, 34>>, [loose_unicode]) - ) - } - ]. check_bad(List) -> @@ -1365,8 +1314,6 @@ check([H|T], Opts, Acc) -> check(T, Opts, [{H, R}] ++ Acc). -decode(JSON) -> decode(JSON, []). - decode(JSON, Opts) -> try (decoder(jsx, [], Opts))(JSON) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 6f48804..f4655bc 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -181,12 +181,6 @@ clean_string(<>, Acc, Opts) when X >= 224, X =< 239 -> %% overlong encodings and missing continuations of a 4 byte sequence clean_string(<>, Acc, Opts) when X >= 240, X =< 247 -> clean_string(strip_continuations(Rest, 3), [16#fffd] ++ Acc, Opts); -%% overlong encodings and missing continuations of a 5 byte sequence -clean_string(<>, Acc, Opts) when X >= 248, X =< 251 -> - clean_string(strip_continuations(Rest, 4), [16#fffd] ++ Acc, Opts); -%% overlong encodings and missing continuations of a 6 byte sequence -clean_string(<>, Acc, Opts) when X == 252, X == 253 -> - clean_string(strip_continuations(Rest, 5), [16#fffd] ++ Acc, Opts); %% bad codepoints clean_string(<<_, Rest/binary>>, Acc, Opts) -> clean_string(Rest, [16#fffd] ++ Acc, Opts). @@ -341,34 +335,6 @@ malformed_test_() -> {"malformed codepoint with 4 bytes", ?_assertError(badarg, encode(<<128, 192, 192, 192>>))} ]. -malformed_replaced_test_() -> - F = <<16#fffd/utf8>>, - [ - {"malformed codepoint with 1 byte", - ?_assertEqual( - [{string, <>}, end_json], - encode(<<128>>, [loose_unicode]) - ) - }, - {"malformed codepoint with 2 bytes", - ?_assertEqual( - [{string, <>}, end_json], - encode(<<128, 192>>, [loose_unicode]) - ) - }, - {"malformed codepoint with 3 bytes", - ?_assertEqual( - [{string, <>}, end_json], - encode(<<128, 192, 192>>, [loose_unicode]) - ) - }, - {"malformed codepoint with 4 bytes", - ?_assertEqual( - [{string, <>}, end_json], - encode(<<128, 192, 192, 192>>, [loose_unicode]) - ) - } - ]. check_bad(List) -> lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end, diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index 462e31d..96a9f53 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -243,7 +243,7 @@ json_escape(Str, Opts, L, Len) when L < Len -> json_escape(<>, Opts, L + 3, Len); false -> B = unicode:characters_to_binary(json_escape_sequence(16#2028)), - json_escape(<>, Opts, L + size(B), Len + size(B) - size(<<16#2028/utf8>>)) + json_escape(<>, Opts, L + 6, Len + 3) end; <> -> case Opts#opts.no_jsonp_escapes of @@ -251,21 +251,65 @@ json_escape(Str, Opts, L, Len) when L < Len -> json_escape(<>, Opts, L + 3, Len); false -> B = unicode:characters_to_binary(json_escape_sequence(16#2029)), - json_escape(<>, Opts, L + size(B), Len + size(B) - size(<<16#2029/utf8>>)) + json_escape(<>, Opts, L + 6, Len + 3) end; <<_:L/binary, X/utf8, _/binary>> when X < 16#0080 -> json_escape(Str, Opts, L + 1, Len); <<_:L/binary, X/utf8, _/binary>> when X < 16#0800 -> json_escape(Str, Opts, L + 2, Len); - <<_:L/binary, X/utf8, _/binary>> when X < 16#10000 -> + <<_:L/binary, X/utf8, _/binary>> when X < 16#dcff -> json_escape(Str, Opts, L + 3, Len); - <<_:L/binary, _/utf8, _/binary>> -> + <<_:L/binary, X/utf8, _/binary>> when X > 16#dfff, X < 16#fdd0 -> + json_escape(Str, Opts, L + 3, Len); + <<_:L/binary, X/utf8, _/binary>> when X > 16#fdef, X < 16#fffe -> + json_escape(Str, Opts, L + 3, Len); + <> when X < 16#10000 -> + case Opts#opts.loose_unicode of + true -> json_escape(<>, Opts, L + 3, Len); + false -> erlang:error(badarg, [Str, Opts]) + end; + <> + when X == 16#1fffe; X == 16#1ffff; + X == 16#2fffe; X == 16#2ffff; + X == 16#3fffe; X == 16#3ffff; + X == 16#4fffe; X == 16#4ffff; + X == 16#5fffe; X == 16#5ffff; + X == 16#6fffe; X == 16#6ffff; + X == 16#7fffe; X == 16#7ffff; + X == 16#8fffe; X == 16#8ffff; + X == 16#9fffe; X == 16#9ffff; + X == 16#afffe; X == 16#affff; + X == 16#bfffe; X == 16#bffff; + X == 16#cfffe; X == 16#cffff; + X == 16#dfffe; X == 16#dffff; + X == 16#efffe; X == 16#effff; + X == 16#ffffe; X == 16#fffff; + X == 16#10fffe; X == 16#10ffff -> + case Opts#opts.loose_unicode of + true -> json_escape(<>, Opts, L + 3, Len - 1); + false -> erlang:error(badarg, [Str, Opts]) + end; + <<_:L/binary, X/utf8, _/binary>> when X >= 16#10000 -> json_escape(Str, Opts, L + 4, Len); <> when X >= 160 -> case Opts#opts.loose_unicode of true -> json_escape(<>, Opts, L + 3, Len); false -> erlang:error(badarg, [Str, Opts]) end; + <> when X == 190; X == 191 -> + case Opts#opts.loose_unicode of + true -> json_escape(<>, Opts, L + 3, Len); + false -> erlang:error(badarg, [Str, Opts]) + end; + <> when X >= 192, X =< 223 -> + {Rest, Stripped} = strip_continuations(T, 1, 0), + json_escape(<>, Opts, L + 3, Len + 2 - Stripped); + <> when X >= 224, X =< 239 -> + {Rest, Stripped} = strip_continuations(T, 2, 0), + json_escape(<>, Opts, L + 3, Len + 2 - Stripped); + <> when X >= 240, X =< 247 -> + {Rest, Stripped} = strip_continuations(T, 3, 0), + json_escape(<>, Opts, L + 3, Len + 2 - Stripped); <> -> case Opts#opts.loose_unicode of true -> json_escape(<>, Opts, L + 3, Len + 2); @@ -291,6 +335,12 @@ to_hex(15) -> $f; to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... +strip_continuations(Bin, 0, N) -> {Bin, N}; +strip_continuations(<>, N, M) when X >= 128, X =< 191 -> + strip_continuations(Rest, N - 1, M + 1); +%% not a continuation byte +strip_continuations(Bin, _, N) -> {Bin, N}. + %% eunit tests -ifdef(TEST).