diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index 8f4dcf7..5c70797 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -245,6 +245,17 @@ json_escape(<>, Opts, Acc) %% any other legal codepoint json_escape(<>, Opts, Acc) -> json_escape(Rest, Opts, <>); +%% if loose_unicode is true, replace illegal sequences with u+fffd +%% u+fffe and u+ffff +json_escape(<<239, 191, X, Rest/binary>>, Opts=#opts{loose_unicode=true}, Acc) + when X == 190; X == 191 -> + json_escape(Rest, Opts, <>); +%% surrogates +json_escape(<<237, X, _, Rest/binary>>, Opts=#opts{loose_unicode=true}, Acc) + when X >= 160 -> + json_escape(Rest, Opts, <>); +json_escape(<<_, Rest/binary>>, Opts=#opts{loose_unicode=true}, Acc) -> + json_escape(Rest, Opts, <>); json_escape(<<>>, _Opts, Acc) -> Acc; json_escape(Rest, Opts, Acc) -> @@ -334,6 +345,13 @@ binary_escape_test_() -> #opts{escape_forward_slash=true} ) =:= <<"\\/Date(1303502009425)\\/">> ) + }, + %% <<239, 191, 191>> is u+ffff + {"loose unicode", + ?_assert(json_escape(<<"hi there "/utf8, 239, 191, 191, "!"/utf8>>, + #opts{loose_unicode=true} + ) =:= <<"hi there "/utf8, 16#fffd/utf8, "!"/utf8>> + ) } ].