remove tests for malformed 5 and 6 byte sequences

2012-03-28 21:23:35 -07:00 · 2012-03-28 21:23:35 -07:00 · 7e243bafd1
commit 7e243bafd1
parent edbe4d16ab
3 changed files with 56 additions and 93 deletions
--- a/src/jsx_decoder.erl
+++ b/src/jsx_decoder.erl
@ -517,8 +517,8 @@ string(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
            string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts)
        ; _ ->
            case Opts#opts.loose_unicode of
-                true -> noncharacter(<<S, Rest/binary>>, Handler, [Acc|Stack], Opts)
+                true -> noncharacter(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts)
-                ; false -> ?error([<<S, Rest/binary>>, Handler, [Acc|Stack], Opts])
+                ; false -> ?error([<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts])
            end
    end;
 string(Bin, Handler, Stack, Opts) ->
@ -551,12 +551,6 @@ noncharacter(<<X, Rest/binary>>, Handler, Stack, Opts) when X >= 224, X =< 239 -
 %% overlong encodings and missing continuations of a 4 byte sequence
 noncharacter(<<X, Rest/binary>>, Handler, Stack, Opts) when X >= 240, X =< 247 ->
    strip_continuations(Rest, Handler, [3|Stack], Opts);
 %% overlong encodings and missing continuations of a 5 byte sequence
 noncharacter(<<X, Rest/binary>>, Handler, Stack, Opts) when X >= 248, X =< 251 ->
    strip_continuations(Rest, Handler, [4|Stack], Opts);
 %% overlong encodings and missing continuations of a 6 byte sequence
 noncharacter(<<X, Rest/binary>>, Handler, Stack, Opts) when X == 252, X == 253 ->
    strip_continuations(Rest, Handler, [5|Stack], Opts);
 %% unexpected bytes, including orphan continuations
 noncharacter(<<_, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
    string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts);
@ -1296,51 +1290,6 @@ good_characters_test_() ->
        }
    ].
 malformed_test_() ->
    [
        {"malformed codepoint with 1 byte",
            ?_assertEqual({error, badjson}, decode(<<128>>))
        },
        {"malformed codepoint with 2 bytes",
            ?_assertEqual({error, badjson}, decode(<<128, 192>>))
        },
        {"malformed codepoint with 3 bytes",
            ?_assertEqual({error, badjson}, decode(<<128, 192, 192>>))
        },
        {"malformed codepoint with 4 bytes",
            ?_assertEqual({error, badjson}, decode(<<128, 192, 192, 192>>))
        }
    ].
 malformed_replaced_test_() ->
    F = <<16#fffd/utf8>>,
    [
        {"malformed codepoint with 1 byte",
            ?_assertEqual(
                [{string, <<F/binary>>}, end_json],
                decode(<<34, 128, 34>>, [loose_unicode])
            )
        },
        {"malformed codepoint with 2 bytes",
            ?_assertEqual(
                [{string, <<F/binary, F/binary>>}, end_json],
                decode(<<34, 128, 192, 34>>, [loose_unicode])
            )
        },
        {"malformed codepoint with 3 bytes",
            ?_assertEqual(
                [{string, <<F/binary, F/binary, F/binary>>}, end_json],
                decode(<<34, 128, 192, 192, 34>>, [loose_unicode])
            )
        },
        {"malformed codepoint with 4 bytes",
            ?_assertEqual(
                [{string, <<F/binary, F/binary, F/binary, F/binary>>}, end_json],
                decode(<<34, 128, 192, 192, 192, 34>>, [loose_unicode])
            )
        }
    ].
 check_bad(List) ->
    lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end,
@ -1365,8 +1314,6 @@ check([H|T], Opts, Acc) ->
    check(T, Opts, [{H, R}] ++ Acc).
 decode(JSON) -> decode(JSON, []).
 decode(JSON, Opts) ->
    try
        (decoder(jsx, [], Opts))(JSON)
--- a/src/jsx_encoder.erl
+++ b/src/jsx_encoder.erl
@ -181,12 +181,6 @@ clean_string(<<X, Rest/binary>>, Acc, Opts) when X >= 224, X =< 239 ->
 %% overlong encodings and missing continuations of a 4 byte sequence
 clean_string(<<X, Rest/binary>>, Acc, Opts) when X >= 240, X =< 247 ->
    clean_string(strip_continuations(Rest, 3), [16#fffd] ++ Acc, Opts);
 %% overlong encodings and missing continuations of a 5 byte sequence
 clean_string(<<X, Rest/binary>>, Acc, Opts) when X >= 248, X =< 251 ->
    clean_string(strip_continuations(Rest, 4), [16#fffd] ++ Acc, Opts);
 %% overlong encodings and missing continuations of a 6 byte sequence
 clean_string(<<X, Rest/binary>>, Acc, Opts) when X == 252, X == 253 ->
    clean_string(strip_continuations(Rest, 5), [16#fffd] ++ Acc, Opts);
 %% bad codepoints
 clean_string(<<_, Rest/binary>>, Acc, Opts) ->
    clean_string(Rest, [16#fffd] ++ Acc, Opts).
@ -341,34 +335,6 @@ malformed_test_() ->
        {"malformed codepoint with 4 bytes", ?_assertError(badarg, encode(<<128, 192, 192, 192>>))}
    ].
 malformed_replaced_test_() ->
    F = <<16#fffd/utf8>>,
    [
        {"malformed codepoint with 1 byte",
            ?_assertEqual(
                [{string, <<F/binary>>}, end_json],
                encode(<<128>>, [loose_unicode])
            )
        },
        {"malformed codepoint with 2 bytes",
            ?_assertEqual(
                [{string, <<F/binary, F/binary>>}, end_json],
                encode(<<128, 192>>, [loose_unicode])
            )
        },
        {"malformed codepoint with 3 bytes",
            ?_assertEqual(
                [{string, <<F/binary, F/binary, F/binary>>}, end_json],
                encode(<<128, 192, 192>>, [loose_unicode])
            )
        },
        {"malformed codepoint with 4 bytes",
            ?_assertEqual(
                [{string, <<F/binary, F/binary, F/binary, F/binary>>}, end_json],
                encode(<<128, 192, 192, 192>>, [loose_unicode])
            )
        }
    ].
 check_bad(List) ->
    lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end,
--- a/src/jsx_utils.erl
+++ b/src/jsx_utils.erl
@ -243,7 +243,7 @@ json_escape(Str, Opts, L, Len) when L < Len ->
                    json_escape(<<H/binary, 16#2028/utf8, T/binary>>, Opts, L + 3, Len);
                false ->
                    B = unicode:characters_to_binary(json_escape_sequence(16#2028)),
-                    json_escape(<<H/binary, B/binary, T/binary>>, Opts, L + size(B), Len + size(B) - size(<<16#2028/utf8>>))
+                    json_escape(<<H/binary, B/binary, T/binary>>, Opts, L + 6, Len + 3)
            end;
        <<H:L/binary, 16#2029/utf8, T/binary>> ->
            case Opts#opts.no_jsonp_escapes of
@ -251,21 +251,65 @@ json_escape(Str, Opts, L, Len) when L < Len ->
                    json_escape(<<H/binary, 16#2029/utf8, T/binary>>, Opts, L + 3, Len);
                false ->
                    B = unicode:characters_to_binary(json_escape_sequence(16#2029)),
-                    json_escape(<<H/binary, B/binary, T/binary>>, Opts, L + size(B), Len + size(B) - size(<<16#2029/utf8>>))
+                    json_escape(<<H/binary, B/binary, T/binary>>, Opts, L + 6, Len + 3)
            end;
        <<_:L/binary, X/utf8, _/binary>> when X < 16#0080 ->   
            json_escape(Str, Opts, L + 1, Len);
        <<_:L/binary, X/utf8, _/binary>> when X < 16#0800 ->
            json_escape(Str, Opts, L + 2, Len);
-        <<_:L/binary, X/utf8, _/binary>> when X < 16#10000 ->
+        <<_:L/binary, X/utf8, _/binary>> when X < 16#dcff ->
            json_escape(Str, Opts, L + 3, Len);
-        <<_:L/binary, _/utf8, _/binary>> ->
+        <<_:L/binary, X/utf8, _/binary>> when X > 16#dfff, X < 16#fdd0 ->
            json_escape(Str, Opts, L + 3, Len);
        <<_:L/binary, X/utf8, _/binary>> when X > 16#fdef, X < 16#fffe ->
            json_escape(Str, Opts, L + 3, Len);
        <<H:L/binary, X/utf8, T/binary>> when X < 16#10000 ->
            case Opts#opts.loose_unicode of
                true -> json_escape(<<H/binary, 16#fffd/utf8, T/binary>>, Opts, L + 3, Len);
                false -> erlang:error(badarg, [Str, Opts])
            end;
        <<H:L/binary, X/utf8, T/binary>>
                when X == 16#1fffe; X == 16#1ffff;
                X == 16#2fffe; X == 16#2ffff;
                X == 16#3fffe; X == 16#3ffff;
                X == 16#4fffe; X == 16#4ffff;
                X == 16#5fffe; X == 16#5ffff;
                X == 16#6fffe; X == 16#6ffff;
                X == 16#7fffe; X == 16#7ffff;
                X == 16#8fffe; X == 16#8ffff;
                X == 16#9fffe; X == 16#9ffff;
                X == 16#afffe; X == 16#affff;
                X == 16#bfffe; X == 16#bffff;
                X == 16#cfffe; X == 16#cffff;
                X == 16#dfffe; X == 16#dffff;
                X == 16#efffe; X == 16#effff;
                X == 16#ffffe; X == 16#fffff;
                X == 16#10fffe; X == 16#10ffff ->    
            case Opts#opts.loose_unicode of
                true -> json_escape(<<H/binary, 16#fffd/utf8, T/binary>>, Opts, L + 3, Len - 1);
                false -> erlang:error(badarg, [Str, Opts])
            end;
        <<_:L/binary, X/utf8, _/binary>> when X >= 16#10000 ->
            json_escape(Str, Opts, L + 4, Len);
        <<H:L/binary, 237, X, _, T/binary>> when X >= 160 ->
            case Opts#opts.loose_unicode of
                true -> json_escape(<<H/binary, 16#fffd/utf8, T/binary>>, Opts, L + 3, Len);
                false -> erlang:error(badarg, [Str, Opts])
            end;
        <<H:L/binary, 239, 191, X, T/binary>> when X == 190; X == 191 ->
            case Opts#opts.loose_unicode of
                true -> json_escape(<<H/binary, 16#fffd/utf8, T/binary>>, Opts, L + 3, Len);
                false -> erlang:error(badarg, [Str, Opts])
            end;
        <<H:L/binary, X, T/binary>> when X >= 192, X =< 223 ->
            {Rest, Stripped} = strip_continuations(T, 1, 0),
            json_escape(<<H:L/binary, 16#fffd/utf8, Rest/binary>>, Opts, L + 3, Len + 2 - Stripped);
        <<H:L/binary, X, T/binary>> when X >= 224, X =< 239 ->
            {Rest, Stripped} = strip_continuations(T, 2, 0),
            json_escape(<<H:L/binary, 16#fffd/utf8, Rest/binary>>, Opts, L + 3, Len + 2 - Stripped);
        <<H:L/binary, X, T/binary>> when X >= 240, X =< 247 ->
            {Rest, Stripped} = strip_continuations(T, 3, 0),
            json_escape(<<H:L/binary, 16#fffd/utf8, Rest/binary>>, Opts, L + 3, Len + 2 - Stripped);
        <<H:L/binary, _, T/binary>> ->
            case Opts#opts.loose_unicode of
                true -> json_escape(<<H/binary, 16#fffd/utf8, T/binary>>, Opts, L + 3, Len + 2);
@ -291,6 +335,12 @@ to_hex(15) -> $f;
 to_hex(X) -> X + 48.    %% ascii "1" is [49], "2" is [50], etc...
 strip_continuations(Bin, 0, N) -> {Bin, N};
 strip_continuations(<<X, Rest/binary>>, N, M) when X >= 128, X =< 191 ->
    strip_continuations(Rest, N - 1, M + 1);
 %% not a continuation byte
 strip_continuations(Bin, _, N) -> {Bin, N}. 
 %% eunit tests
 -ifdef(TEST).