surrogates that encode noncharacters now are replaced by a single u+fffd rather than two

This commit is contained in:
alisdair sullivan 2011-07-28 21:15:21 -07:00
parent f699bdeedc
commit 16c29a0070

View file

@ -490,7 +490,7 @@ noncharacter(Bin, _Stack, _Opts, _Acc) ->
-ifdef(utf16). -ifdef(utf16).
%% non-characters blah blah %% non-characters blah blah
noncharacter(<<S/utf16, Rest/binary>>, Stack, Opts, Acc) noncharacter(<<S/utf16, Rest/binary>>, Stack, Opts, Acc)
when ?is_noncontrol(S), S < 16#fffe -> when ?is_noncontrol(S) ->
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>); string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
%% u+ffff and u+fffe %% u+ffff and u+fffe
noncharacter(<<255, X, Rest/binary>>, Stack, Opts, Acc) noncharacter(<<255, X, Rest/binary>>, Stack, Opts, Acc)
@ -507,7 +507,7 @@ noncharacter(Bin, _Stack, _Opts, _Acc) ->
-ifdef(utf16le). -ifdef(utf16le).
%% non-characters blah blah %% non-characters blah blah
noncharacter(<<S/utf16-little, Rest/binary>>, Stack, Opts, Acc) noncharacter(<<S/utf16-little, Rest/binary>>, Stack, Opts, Acc)
when ?is_noncontrol(S), S < 16#fffe -> when ?is_noncontrol(S) ->
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>); string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
%% u+ffff and u+fffe %% u+ffff and u+fffe
noncharacter(<<X, 255, Rest/binary>>, Stack, Opts, Acc) noncharacter(<<X, 255, Rest/binary>>, Stack, Opts, Acc)
@ -1173,7 +1173,7 @@ extended_noncharacters_test_() ->
?_assertEqual(check_bad(extended_noncharacters()), []) ?_assertEqual(check_bad(extended_noncharacters()), [])
}, },
{"extended noncharacters - replaced", {"extended noncharacters - replaced",
?_assertEqual(check_extended_replaced(extended_noncharacters()), []) ?_assertEqual(check_replaced(extended_noncharacters()), [])
} }
]. ].
@ -1232,15 +1232,6 @@ check_replaced(List) ->
check(List, [loose_unicode], []) check(List, [loose_unicode], [])
). ).
check_extended_replaced(List) ->
Replace = case ?encoding of
E when E == utf16; E == utf16le -> <<16#fffd/utf8, 16#fffd/utf8>>
; _ -> <<16#fffd/utf8>>
end,
lists:dropwhile(fun({_, [{string, S}|_]}) -> S == Replace ; (_) -> false end,
check(List, [loose_unicode], [])
).
check_good(List) -> check_good(List) ->
lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end, lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end,
check(List, [], []) check(List, [], [])