From 16aef3b1d0aa0c8a775517f5e42e58bb419a8214 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Thu, 4 Dec 2014 14:24:54 +0000 Subject: [PATCH 1/3] README fixups add example for `return_maps` and remove note about `jsxn` --- README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f750d77..dbe1a16 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# jsx (v2.3) # +# jsx (v2.3.0) # an erlang application for consuming, producing and manipulating [json][json]. inspired by [yajl][yajl] @@ -16,11 +16,6 @@ copyright 2010-2014 alisdair sullivan there are a few changes for users upgrading from 1.x. see [CHANGES.md](CHANGES.md) for the overview or [migrating from 1.x](#migrating) for the details -## slightly less important note ## - -**jsx** supports encoding maps to json but not decoding json to a map [jsxn][jsxn] is a -thin wrapper around **jsx** that uses maps as it's object representation if you're into -that ## index ## @@ -64,7 +59,9 @@ $ rebar eunit ```erlang 1> jsx:decode(<<"{\"library\": \"jsx\", \"awesome\": true}">>). [{<<"library">>,<<"jsx">>},{<<"awesome">>,true}] -2> jsx:decode(<<"[\"a\",\"list\",\"of\",\"words\"]">>). +2> jsx:decode(<<"{\"library\": \"jsx\", \"awesome\": true}">>, [return_maps]). +#{<<"awesome">> => true,<<"library">> => <<"jsx">>} +3> jsx:decode(<<"[\"a\",\"list\",\"of\",\"words\"]">>). [<<"a">>, <<"list">>, <<"of">>, <<"words">>] ``` From c96d83b48832da0d1ac9b87aaf7a2ba1566d0401 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 7 Dec 2014 15:30:37 -0800 Subject: [PATCH 2/3] certain astral plane json escape sequences were inadvertently being replaced with the replacement character --- src/jsx_decoder.erl | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 81f06ba..a91c27c 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -687,31 +687,36 @@ unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace($\\, Config)], Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace($/, Config)], Stack, Config); -unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) - when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), - (X == $c orelse X == $d orelse X == $e orelse X == $f), - ?is_hex(B), ?is_hex(C), ?is_hex(Y), ?is_hex(Z) +unescape(<<$u, F, A, B, C, ?rsolidus, $u, G, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) + when (A == $8 orelse A == $9 orelse A == $a orelse A == $b orelse A == $A orelse A == $B), + (X == $c orelse X == $d orelse X == $e orelse X == $f orelse X == $C orelse X == $D orelse X == $E orelse X == $F), + (F == $d orelse F == $D), + (G == $d orelse G == $D), + ?is_hex(B), ?is_hex(C), ?is_hex(Y), ?is_hex(Z) -> High = erlang:list_to_integer([$d, A, B, C], 16), Low = erlang:list_to_integer([$d, X, Y, Z], 16), Codepoint = (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000, string(Rest, Handler, [Acc, <>], Stack, Config); -unescape(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) - when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), - ?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z) +unescape(<<$u, F, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) + when (A == $8 orelse A == $9 orelse A == $a orelse A == $b orelse A == $A orelse A == $B), + (F == $d orelse F == $D), + ?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z) -> case Config#config.strict_utf8 of true -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config); false -> string(Rest, Handler, [Acc, <<16#fffd/utf8>>, <<16#fffd/utf8>>], Stack, Config) end; -unescape(<<$u, $d, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) - when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), - ?is_hex(B), ?is_hex(C) +unescape(<<$u, F, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) + when (A == $8 orelse A == $9 orelse A == $a orelse A == $b orelse A == $A orelse A == $B), + (F == $d orelse F == $D), + ?is_hex(B), ?is_hex(C) -> incomplete(string, <>, Handler, Acc, Stack, Config); -unescape(<<$u, $d, A, B, C>>, Handler, Acc, Stack, Config) - when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), - ?is_hex(B), ?is_hex(C) +unescape(<<$u, F, A, B, C>>, Handler, Acc, Stack, Config) + when (A == $8 orelse A == $9 orelse A == $a orelse A == $b orelse A == $A orelse A == $B), + (F == $d orelse F == $D), + ?is_hex(B), ?is_hex(C) -> incomplete(string, <>, Handler, Acc, Stack, Config); unescape(<<$u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config) @@ -1529,16 +1534,27 @@ unescape_test_() -> {"unescape reverse solidus", <<"\\">>, <<"\\\\"/utf8>>}, {"unescape control", <<0>>, <<"\\u0000"/utf8>>}, {"unescape surrogate pair", <<16#10000/utf8>>, <<"\\ud800\\udc00"/utf8>>}, + {"unescape surrogate pair", <<16#10000/utf8>>, <<"\\uD800\\uDC00"/utf8>>}, {"replace bad high surrogate", <<16#fffd/utf8>>, <<"\\udc00"/utf8>>}, + {"replace bad high surrogate", <<16#fffd/utf8>>, <<"\\uDC00"/utf8>>}, {"replace naked high surrogate", <<16#fffd/utf8, "hello world">>, <<"\\ud800hello world"/utf8>> }, + {"replace naked high surrogate", + <<16#fffd/utf8, "hello world">>, + <<"\\uD800hello world"/utf8>> + }, {"replace naked low surrogate", <<16#fffd/utf8, "hello world">>, <<"\\udc00hello world"/utf8>> }, - {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\ud800\\u0000">>} + {"replace naked low surrogate", + <<16#fffd/utf8, "hello world">>, + <<"\\uDC00hello world"/utf8>> + }, + {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\ud800\\u0000">>}, + {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\uD800\\u0000">>} ], [{Title, ?_assertEqual([{string, Escaped}, end_json], decode(<<34, JSON/binary, 34>>))} || {Title, Escaped, JSON} <- Cases From 5317596ebe76afc9436ec3f4e0b99f568bda719c Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 7 Dec 2014 15:38:59 -0800 Subject: [PATCH 3/3] v2.3.1 --- CHANGES.md | 6 ++++++ README.md | 2 +- src/jsx.app.src | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 424c56b..4a66843 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +v2.3.1 + +* fixes an issue where astral plane json escape sequences were + inadvertently being converted to the unicode replacement + character + v2.3 * switched to a faster implementation of string parsing in both diff --git a/README.md b/README.md index dbe1a16..0f78133 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# jsx (v2.3.0) # +# jsx (v2.3.1) # an erlang application for consuming, producing and manipulating [json][json]. inspired by [yajl][yajl] diff --git a/src/jsx.app.src b/src/jsx.app.src index e00b6e9..a81e5c0 100644 --- a/src/jsx.app.src +++ b/src/jsx.app.src @@ -1,7 +1,7 @@ {application, jsx, [ {description, "a streaming, evented json parsing toolkit"}, - {vsn, "2.3.0"}, + {vsn, "2.3.1"}, {modules, [ jsx, jsx_encoder,