minor fixes for illegal utf8 sequences and better testing thereof

This commit is contained in:
alisdair sullivan 2012-03-20 22:47:16 -07:00
parent 036dd72ecf
commit 11d2d0bae1

View file

@ -161,10 +161,10 @@ clean_string(<<C/utf8, Rest/binary>>, Acc)
C == 16#ffffe orelse C == 16#fffff orelse
C == 16#10fffe orelse C == 16#10ffff ->
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>);
clean_string(<<C/utf8, Rest/binary>>, Acc) ->
clean_string(Rest, <<Acc/binary, C/utf8>>);
clean_string(<<237, X, _, Rest/binary>>, Acc) when X >= 160 ->
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>);
clean_string(<<C/utf8, Rest/binary>>, Acc) ->
clean_string(Rest, <<Acc/binary, C/utf8>>);
clean_string(<<_, Rest/binary>>, Acc) ->
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>);
clean_string(<<>>, Acc) -> Acc.