surrogates that encode noncharacters now are replaced by a single u+fffd rather than two
This commit is contained in:
parent
f699bdeedc
commit
16c29a0070
1 changed files with 3 additions and 12 deletions
|
@ -490,7 +490,7 @@ noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
|||
-ifdef(utf16).
|
||||
%% non-characters blah blah
|
||||
noncharacter(<<S/utf16, Rest/binary>>, Stack, Opts, Acc)
|
||||
when ?is_noncontrol(S), S < 16#fffe ->
|
||||
when ?is_noncontrol(S) ->
|
||||
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||
%% u+ffff and u+fffe
|
||||
noncharacter(<<255, X, Rest/binary>>, Stack, Opts, Acc)
|
||||
|
@ -507,7 +507,7 @@ noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
|||
-ifdef(utf16le).
|
||||
%% non-characters blah blah
|
||||
noncharacter(<<S/utf16-little, Rest/binary>>, Stack, Opts, Acc)
|
||||
when ?is_noncontrol(S), S < 16#fffe ->
|
||||
when ?is_noncontrol(S) ->
|
||||
string(Rest, Stack, Opts, <<Acc/binary, 16#fffd/utf8>>);
|
||||
%% u+ffff and u+fffe
|
||||
noncharacter(<<X, 255, Rest/binary>>, Stack, Opts, Acc)
|
||||
|
@ -1173,7 +1173,7 @@ extended_noncharacters_test_() ->
|
|||
?_assertEqual(check_bad(extended_noncharacters()), [])
|
||||
},
|
||||
{"extended noncharacters - replaced",
|
||||
?_assertEqual(check_extended_replaced(extended_noncharacters()), [])
|
||||
?_assertEqual(check_replaced(extended_noncharacters()), [])
|
||||
}
|
||||
].
|
||||
|
||||
|
@ -1232,15 +1232,6 @@ check_replaced(List) ->
|
|||
check(List, [loose_unicode], [])
|
||||
).
|
||||
|
||||
check_extended_replaced(List) ->
|
||||
Replace = case ?encoding of
|
||||
E when E == utf16; E == utf16le -> <<16#fffd/utf8, 16#fffd/utf8>>
|
||||
; _ -> <<16#fffd/utf8>>
|
||||
end,
|
||||
lists:dropwhile(fun({_, [{string, S}|_]}) -> S == Replace ; (_) -> false end,
|
||||
check(List, [loose_unicode], [])
|
||||
).
|
||||
|
||||
check_good(List) ->
|
||||
lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end,
|
||||
check(List, [], [])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue