adds loose_unicode option which replaces badly formed unicode (escaped non-characters and restricted codepoints, as well as partial surrogates) with u+fffd instead of throwing an error

This commit is contained in:
alisdair sullivan 2011-07-27 01:59:03 -07:00
parent 51d27bb3b5
commit 653205501c
15 changed files with 121 additions and 10 deletions

View file

@ -0,0 +1 @@
"\ud83f\udfff"

View file

@ -0,0 +1,4 @@
{name, "escaped noncharacter (extended)"}.
{jsx, [{string, <<16#fffd/utf8, 16#fffd/utf8>>}, end_json]}.
{json, "escaped_noncharacter_ext.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -0,0 +1 @@
"\uffff"

View file

@ -0,0 +1,4 @@
{name, "escaped noncharacter replacement"}.
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
{json, "escaped_noncharacter_replaced.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -0,0 +1 @@
"\u0000"

View file

@ -0,0 +1,4 @@
{name, "escaped nullbyte replaced"}.
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
{json, "escaped_nullbyte_replaced.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -0,0 +1 @@
"﷐"

View file

@ -0,0 +1,3 @@
{name, "noncharacter"}.
{jsx, {error, badjson}}.
{json, "noncharacter.json"}.

View file

@ -0,0 +1 @@
["\ud801blah"]

View file

@ -0,0 +1,3 @@
{name, "unpaired_surrogate"}.
{jsx, {error, badjson}}.
{json, "unpaired_surrogate.json"}.

View file

@ -0,0 +1 @@
["\ud801blah"]

View file

@ -0,0 +1,4 @@
{name, "unpaired surrogate replaced"}.
{jsx, [start_array,{string,<<16#fffd/utf8, "blah">>},end_array,end_json]}.
{json, "unpaired_surrogate_replaced.json"}.
{jsx_flags, [loose_unicode]}.