From 16c29a0070ad14a4ba590dc5440a4ed9d3783738 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Thu, 28 Jul 2011 21:15:21 -0700 Subject: [PATCH] surrogates that encode noncharacters now are replaced by a single u+fffd rather than two --- src/jsx_decoder.hrl | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index eda032a..686ea7b 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -490,7 +490,7 @@ noncharacter(Bin, _Stack, _Opts, _Acc) -> -ifdef(utf16). %% non-characters blah blah noncharacter(<>, Stack, Opts, Acc) - when ?is_noncontrol(S), S < 16#fffe -> + when ?is_noncontrol(S) -> string(Rest, Stack, Opts, <>); %% u+ffff and u+fffe noncharacter(<<255, X, Rest/binary>>, Stack, Opts, Acc) @@ -507,7 +507,7 @@ noncharacter(Bin, _Stack, _Opts, _Acc) -> -ifdef(utf16le). %% non-characters blah blah noncharacter(<>, Stack, Opts, Acc) - when ?is_noncontrol(S), S < 16#fffe -> + when ?is_noncontrol(S) -> string(Rest, Stack, Opts, <>); %% u+ffff and u+fffe noncharacter(<>, Stack, Opts, Acc) @@ -1173,7 +1173,7 @@ extended_noncharacters_test_() -> ?_assertEqual(check_bad(extended_noncharacters()), []) }, {"extended noncharacters - replaced", - ?_assertEqual(check_extended_replaced(extended_noncharacters()), []) + ?_assertEqual(check_replaced(extended_noncharacters()), []) } ]. @@ -1232,15 +1232,6 @@ check_replaced(List) -> check(List, [loose_unicode], []) ). -check_extended_replaced(List) -> - Replace = case ?encoding of - E when E == utf16; E == utf16le -> <<16#fffd/utf8, 16#fffd/utf8>> - ; _ -> <<16#fffd/utf8>> - end, - lists:dropwhile(fun({_, [{string, S}|_]}) -> S == Replace ; (_) -> false end, - check(List, [loose_unicode], []) - ). - check_good(List) -> lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end, check(List, [], [])