loose_unicode now applies to string escaping performed on binaries converted TO json as well as from

This commit is contained in:
alisdair sullivan 2011-11-29 21:04:24 -08:00
parent d7c1ef9b4d
commit 0035cd67a7

View file

@ -245,6 +245,17 @@ json_escape(<<C/utf8, Rest/binary>>, Opts, Acc)
%% any other legal codepoint
json_escape(<<C/utf8, Rest/binary>>, Opts, Acc) ->
json_escape(Rest, Opts, <<Acc/binary, C/utf8>>);
%% if loose_unicode is true, replace illegal sequences with u+fffd
%% u+fffe and u+ffff
json_escape(<<239, 191, X, Rest/binary>>, Opts=#opts{loose_unicode=true}, Acc)
when X == 190; X == 191 ->
json_escape(Rest, Opts, <<Acc/binary, 16#fffd/utf8>>);
%% surrogates
json_escape(<<237, X, _, Rest/binary>>, Opts=#opts{loose_unicode=true}, Acc)
when X >= 160 ->
json_escape(Rest, Opts, <<Acc/binary, 16#fffd/utf8>>);
json_escape(<<_, Rest/binary>>, Opts=#opts{loose_unicode=true}, Acc) ->
json_escape(Rest, Opts, <<Acc/binary, 16#fffd/utf8>>);
json_escape(<<>>, _Opts, Acc) ->
Acc;
json_escape(Rest, Opts, Acc) ->
@ -334,6 +345,13 @@ binary_escape_test_() ->
#opts{escape_forward_slash=true}
) =:= <<"\\/Date(1303502009425)\\/">>
)
},
%% <<239, 191, 191>> is u+ffff
{"loose unicode",
?_assert(json_escape(<<"hi there "/utf8, 239, 191, 191, "!"/utf8>>,
#opts{loose_unicode=true}
) =:= <<"hi there "/utf8, 16#fffd/utf8, "!"/utf8>>
)
}
].