From 9f2c353d892d523c01be6cb7cc216c2ca5908320 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 26 Jul 2011 13:32:15 -0700 Subject: [PATCH 1/8] add option for streaming mode for value based decoder --- src/jsx_common.hrl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/jsx_common.hrl b/src/jsx_common.hrl index 242f94d..6431434 100644 --- a/src/jsx_common.hrl +++ b/src/jsx_common.hrl @@ -113,8 +113,10 @@ -type decoder_opts() :: [decoder_opt()]. -type decoder_opt() :: {strict, true | false} + | {stream, true | false} | {encoding, supported_utf()}. + -type verify_opts() :: [verify_opt()]. -type verify_opt() :: {encoding, auto | supported_utf()} | {repeated_keys, true | false} From ed71ce7b7ffebd3c368fc580223c665f2085ef37 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 26 Jul 2011 13:32:38 -0700 Subject: [PATCH 2/8] string escape u+2028 and u+2029 for compatibility with jsonp --- src/jsx_encoder.erl | 12 +++++++----- src/jsx_terms.erl | 18 +++++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 3b3fae8..27f2900 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -160,6 +160,9 @@ json_escape(<<$\t, Rest/binary>>, Acc) -> %% other control characters json_escape(<>, Acc) when C >= 0, C < $\s -> json_escape(Rest, <>); +%% escape u+2028 and u+2029 to avoid problems with jsonp +json_escape(<>, Acc) when C == 16#2028; C == 16#2029 -> + json_escape(Rest, <>); %% any other legal codepoint json_escape(<>, Acc) -> json_escape(Rest, <>); @@ -169,11 +172,10 @@ json_escape(_, _) -> erlang:error(badarg). -%% convert a codepoint to it's \uXXXX equiv. for laziness, this only handles -%% codepoints this module might escape, ie, control characters -json_escape_sequence(C) when C < 16#20 -> - <<_:8, A:4, B:4>> = <>, % first two hex digits are always zero - <<$\\, $u, $0, $0, (to_hex(A)), (to_hex(B))>>. +%% convert a codepoint to it's \uXXXX equiv. +json_escape_sequence(X) -> + <> = <>, + <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>. to_hex(15) -> $f; diff --git a/src/jsx_terms.erl b/src/jsx_terms.erl index ecd947a..c1ce743 100644 --- a/src/jsx_terms.erl +++ b/src/jsx_terms.erl @@ -236,6 +236,9 @@ json_escape(<<$\t, Rest/binary>>, Acc) -> %% other control characters json_escape(<>, Acc) when C >= 0, C < $\s -> json_escape(Rest, <>); +%% escape u+2028 and u+2029 to avoid problems with jsonp +json_escape(<>, Acc) when C == 16#2028; C == 16#2029 -> + json_escape(Rest, <>); %% any other legal codepoint json_escape(<>, Acc) -> json_escape(Rest, <>); @@ -245,11 +248,10 @@ json_escape(_, _) -> erlang:error(badarg). -%% convert a codepoint to it's \uXXXX equiv. for laziness, this only handles -%% codepoints this module might escape, ie, control characters -json_escape_sequence(C) when C < 16#20 -> - <<_:8, A:4, B:4>> = <>, % first two hex digits are always zero - <<$\\, $u, $0, $0, (to_hex(A)), (to_hex(B))>>. +%% convert a codepoint to it's \uXXXX equiv. +json_escape_sequence(X) -> + <> = <>, + <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>. to_hex(15) -> $f; @@ -402,6 +404,12 @@ escape_test_() -> <<1, 2, 3, 11, 26, 30, 31>> ) =:= <<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">> ) + }, + {"jsonp protection", + ?_assert(json_escape( + <<226, 128, 168, 226, 128, 169>> + ) =:= <<"\\u2028\\u2029">> + ) } ]. From 0507dc38dc589e87bd8217ef78d3cddc2886597a Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 26 Jul 2011 13:34:15 -0700 Subject: [PATCH 3/8] error upon encountering escaped u+0000 to prevent malicious json --- src/jsx_decoder.hrl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 0a893fa..5bccbab 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -468,6 +468,10 @@ escaped_unicode(<>, Stack, Opts, String, [C, B, A]) %% non-characters, you're not allowed to exchange these ; X when X == 16#fffe; X == 16#ffff -> {error, {badjson, <>}} + %% allowing interchange of null bytes allows attackers to forge + %% malicious streams + ; X when X == 16#0000 -> + {error, {badjson, <>}} %% anything else ; X -> string(Rest, Stack, Opts, <>) From 6d0d2cfb8cb9985e5d534e110686cc0b577d5e46 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 26 Jul 2011 13:34:48 -0700 Subject: [PATCH 4/8] removed wayward io:format --- src/jsx_decoder.hrl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 5bccbab..5f19f6b 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -572,7 +572,6 @@ low_surrogate(Bin, Stack, Opts, String, Acc, High) -> %% stole this from the unicode spec surrogate_to_codepoint(High, Low) -> - io:format("~p ~p~n", [High, Low]), (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. From 4ba8c4d57e9d4684bd8a71717b2c1c0327a03c69 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 26 Jul 2011 13:46:24 -0700 Subject: [PATCH 5/8] stricter rejection of unicode non-characters --- src/jsx_decoder.hrl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 5f19f6b..1bb8331 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -540,14 +540,16 @@ low_surrogate(<>, Stack, Opts, String, [C, B, A], High) when ?is_hex(D) -> case erlang:list_to_integer([A, B, C, D], 16) of X when X >= 16#dc00, X =< 16#dfff -> - string(Rest, - Stack, - Opts, - <> - ) + V = surrogate_to_codepoint(High, X), + case V rem 16#10000 of + X when X == 16#fffe; X == 16#ffff -> + {error, {badjson, <>}} + ; _ -> + string(Rest, Stack, Opts, <>) + end %% not a low surrogate, bad bad bad - ; X -> - {error, {badjson, <>}} + ; _ -> + {error, {badjson, <>}} end; low_surrogate(<>, Stack, Opts, String, Acc, High) when ?is_hex(S) -> From c5c98482fc8ce03bdeeeca3cd7fc7a4cb93a47d5 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 26 Jul 2011 19:28:41 -0700 Subject: [PATCH 6/8] test for noncharacters, fixed bug discovered by test --- src/jsx.erl | 16 +++++++++++++++- src/jsx_decoder.hrl | 5 +++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/jsx.erl b/src/jsx.erl index 79c831c..b10eda0 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -232,7 +232,9 @@ decode_loop({jsx, end_json, _Next}, Acc) -> decode_loop({jsx, incomplete, More}, Acc) -> decode_loop(More(end_stream), Acc); decode_loop({jsx, E, Next}, Acc) -> - decode_loop(Next(), [E] ++ Acc). + decode_loop(Next(), [E] ++ Acc); +decode_loop({error, {badjson, _Error}}, _Acc) -> + {error, badjson}. incremental_decode(<>, Flags) -> @@ -249,6 +251,17 @@ incremental_decode_loop({jsx, Event, Next}, Rest, Acc) -> incremental_decode_loop(Next(), Rest, [Event] ++ Acc). +bad_escapes_test_() -> + [ + {"null byte", + ?_assertEqual({error, badjson}, decode(<<"\"\\u0000\"">>, [])) + }, + {"escaped noncharacter", + ?_assertEqual({error, badjson}, decode(<<"\"\\ud83f\\udfff\"">>, [])) + } + ]. + + multi_decode_test_() -> [ {"multiple values in a single stream", ?_assert( @@ -290,5 +303,6 @@ multi_test_result() -> ]. + -endif. \ No newline at end of file diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 1bb8331..398b172 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -542,9 +542,10 @@ low_surrogate(<>, Stack, Opts, String, [C, B, A], High) X when X >= 16#dc00, X =< 16#dfff -> V = surrogate_to_codepoint(High, X), case V rem 16#10000 of - X when X == 16#fffe; X == 16#ffff -> + Y when Y == 16#fffe; Y == 16#ffff -> {error, {badjson, <>}} - ; _ -> + ; Y -> + io:format("~p ~p~n", [V, Y]), string(Rest, Stack, Opts, <>) end %% not a low surrogate, bad bad bad From 2720b2e0fb6c8d15e27c5416df14144219a770f4 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 26 Jul 2011 19:46:31 -0700 Subject: [PATCH 7/8] noncharacter tests --- src/jsx.erl | 20 +++++++++++++++++--- src/jsx_decoder.hrl | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/jsx.erl b/src/jsx.erl index b10eda0..58ada1b 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -174,10 +174,10 @@ jsx_decoder_gen([Test|_] = Tests, [Encoding|Encodings]) -> Flags = proplists:get_value(jsx_flags, Test, []), {generator, fun() -> - [{Name, ?_assert(decode(JSON, Flags) =:= JSX)} + [{Name, ?_assertEqual(decode(JSON, Flags), JSX)} | {generator, - fun() -> [{Name ++ " incremental", ?_assert( - incremental_decode(JSON, Flags) =:= JSX) + fun() -> [{Name ++ " incremental", ?_assertEqual( + incremental_decode(JSON, Flags), JSX) } | jsx_decoder_gen(Tests, Encodings)] end } @@ -258,6 +258,20 @@ bad_escapes_test_() -> }, {"escaped noncharacter", ?_assertEqual({error, badjson}, decode(<<"\"\\ud83f\\udfff\"">>, [])) + }, + {"noncharacter", + ?_assertEqual({error, badjson}, decode(<<"\"\\uffff\"">>, [])) + }, + {"more noncharacters", + ?_assertEqual({error, badjson}, decode(<<"\"\\ufdd0\"">>, [])) + }, + {"last noncharacter", + ?_assertEqual({error, badjson}, decode(<<"\"\\ufdef\"">>, [])) + }, + {"ok character", + ?_assertEqual([{string, <<239, 183, 176>>}, end_json], + decode(<<"\"\\ufdf0\"">>, []) + ) } ]. diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 398b172..6aa5b6b 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -466,7 +466,7 @@ escaped_unicode(<>, Stack, Opts, String, [C, B, A]) X when X >= 16#d800, X =< 16#dbff -> low_surrogate(Rest, Stack, Opts, String, X) %% non-characters, you're not allowed to exchange these - ; X when X == 16#fffe; X == 16#ffff -> + ; X when X == 16#fffe; X == 16#ffff; X >= 16#fdd0, X =< 16#fdef -> {error, {badjson, <>}} %% allowing interchange of null bytes allows attackers to forge %% malicious streams From b153ce3685de61414361b65ea2f86495bfc749e3 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 26 Jul 2011 19:58:48 -0700 Subject: [PATCH 8/8] moves escaping tests to general test lib, minor refactoring of test runner to facilitate --- src/jsx.erl | 28 +++--------------------- test/cases/deep_array.test | 2 +- test/cases/escaped_noncharacter.json | 1 + test/cases/escaped_noncharacter.test | 3 +++ test/cases/escaped_noncharacter_ext.json | 1 + test/cases/escaped_noncharacter_ext.test | 3 +++ test/cases/escaped_reserved_a.json | 1 + test/cases/escaped_reserved_a.test | 3 +++ test/cases/escaped_reserved_b.json | 1 + test/cases/escaped_reserved_b.test | 3 +++ test/cases/nullbyte_forbidden.json | 1 + test/cases/nullbyte_forbidden.test | 3 +++ test/cases/unbalanced_array.json | 1 + test/cases/unbalanced_array.test | 3 +++ 14 files changed, 28 insertions(+), 26 deletions(-) create mode 100644 test/cases/escaped_noncharacter.json create mode 100644 test/cases/escaped_noncharacter.test create mode 100644 test/cases/escaped_noncharacter_ext.json create mode 100644 test/cases/escaped_noncharacter_ext.test create mode 100644 test/cases/escaped_reserved_a.json create mode 100644 test/cases/escaped_reserved_a.test create mode 100644 test/cases/escaped_reserved_b.json create mode 100644 test/cases/escaped_reserved_b.test create mode 100644 test/cases/nullbyte_forbidden.json create mode 100644 test/cases/nullbyte_forbidden.test create mode 100644 test/cases/unbalanced_array.json create mode 100644 test/cases/unbalanced_array.test diff --git a/src/jsx.erl b/src/jsx.erl index 58ada1b..5d591d0 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -248,33 +248,11 @@ incremental_decode_loop({jsx, incomplete, Next}, <>, Ac incremental_decode_loop({jsx, end_json, _Next}, _Rest, Acc) -> lists:reverse([end_json] ++ Acc); incremental_decode_loop({jsx, Event, Next}, Rest, Acc) -> - incremental_decode_loop(Next(), Rest, [Event] ++ Acc). + incremental_decode_loop(Next(), Rest, [Event] ++ Acc); +incremental_decode_loop({error, {badjson, _Error}}, _Rest, _Acc) -> + {error, badjson}. -bad_escapes_test_() -> - [ - {"null byte", - ?_assertEqual({error, badjson}, decode(<<"\"\\u0000\"">>, [])) - }, - {"escaped noncharacter", - ?_assertEqual({error, badjson}, decode(<<"\"\\ud83f\\udfff\"">>, [])) - }, - {"noncharacter", - ?_assertEqual({error, badjson}, decode(<<"\"\\uffff\"">>, [])) - }, - {"more noncharacters", - ?_assertEqual({error, badjson}, decode(<<"\"\\ufdd0\"">>, [])) - }, - {"last noncharacter", - ?_assertEqual({error, badjson}, decode(<<"\"\\ufdef\"">>, [])) - }, - {"ok character", - ?_assertEqual([{string, <<239, 183, 176>>}, end_json], - decode(<<"\"\\ufdf0\"">>, []) - ) - } - ]. - multi_decode_test_() -> [ diff --git a/test/cases/deep_array.test b/test/cases/deep_array.test index 1933fac..35ef9b9 100644 --- a/test/cases/deep_array.test +++ b/test/cases/deep_array.test @@ -1,3 +1,3 @@ -{name, "deep_array"}. +{name, "deep array"}. {jsx, [start_array,start_array,start_array,end_array,end_array,end_array,end_json]}. {json, "deep_array.json"}. diff --git a/test/cases/escaped_noncharacter.json b/test/cases/escaped_noncharacter.json new file mode 100644 index 0000000..e5c1b65 --- /dev/null +++ b/test/cases/escaped_noncharacter.json @@ -0,0 +1 @@ +"\uffff" \ No newline at end of file diff --git a/test/cases/escaped_noncharacter.test b/test/cases/escaped_noncharacter.test new file mode 100644 index 0000000..4e20bc3 --- /dev/null +++ b/test/cases/escaped_noncharacter.test @@ -0,0 +1,3 @@ +{name, "escaped noncharacter"}. +{jsx, {error, badjson}}. +{json, "escaped_noncharacter.json"}. \ No newline at end of file diff --git a/test/cases/escaped_noncharacter_ext.json b/test/cases/escaped_noncharacter_ext.json new file mode 100644 index 0000000..f10ec2b --- /dev/null +++ b/test/cases/escaped_noncharacter_ext.json @@ -0,0 +1 @@ +"\ud83f\udfff" \ No newline at end of file diff --git a/test/cases/escaped_noncharacter_ext.test b/test/cases/escaped_noncharacter_ext.test new file mode 100644 index 0000000..7049148 --- /dev/null +++ b/test/cases/escaped_noncharacter_ext.test @@ -0,0 +1,3 @@ +{name, "escaped noncharacter (extended)"}. +{jsx, {error, badjson}}. +{json, "escaped_noncharacter_ext.json"}. \ No newline at end of file diff --git a/test/cases/escaped_reserved_a.json b/test/cases/escaped_reserved_a.json new file mode 100644 index 0000000..dab850b --- /dev/null +++ b/test/cases/escaped_reserved_a.json @@ -0,0 +1 @@ +"\ufdd0" \ No newline at end of file diff --git a/test/cases/escaped_reserved_a.test b/test/cases/escaped_reserved_a.test new file mode 100644 index 0000000..8a5cba2 --- /dev/null +++ b/test/cases/escaped_reserved_a.test @@ -0,0 +1,3 @@ +{name, "escaped reserved a"}. +{jsx, {error, badjson}}. +{json, "escaped_reserved_a.json"}. \ No newline at end of file diff --git a/test/cases/escaped_reserved_b.json b/test/cases/escaped_reserved_b.json new file mode 100644 index 0000000..be11b6e --- /dev/null +++ b/test/cases/escaped_reserved_b.json @@ -0,0 +1 @@ +"\ufdef" \ No newline at end of file diff --git a/test/cases/escaped_reserved_b.test b/test/cases/escaped_reserved_b.test new file mode 100644 index 0000000..414f024 --- /dev/null +++ b/test/cases/escaped_reserved_b.test @@ -0,0 +1,3 @@ +{name, "escaped reserved b"}. +{jsx, {error, badjson}}. +{json, "escaped_reserved_b.json"}. \ No newline at end of file diff --git a/test/cases/nullbyte_forbidden.json b/test/cases/nullbyte_forbidden.json new file mode 100644 index 0000000..ed6780d --- /dev/null +++ b/test/cases/nullbyte_forbidden.json @@ -0,0 +1 @@ +"\u0000" \ No newline at end of file diff --git a/test/cases/nullbyte_forbidden.test b/test/cases/nullbyte_forbidden.test new file mode 100644 index 0000000..2feb2f2 --- /dev/null +++ b/test/cases/nullbyte_forbidden.test @@ -0,0 +1,3 @@ +{name, "nullbyte forbidden"}. +{jsx, {error, badjson}}. +{json, "nullbyte_forbidden.json"}. \ No newline at end of file diff --git a/test/cases/unbalanced_array.json b/test/cases/unbalanced_array.json new file mode 100644 index 0000000..7f3fb61 --- /dev/null +++ b/test/cases/unbalanced_array.json @@ -0,0 +1 @@ +[[[[]]] \ No newline at end of file diff --git a/test/cases/unbalanced_array.test b/test/cases/unbalanced_array.test new file mode 100644 index 0000000..c271c0d --- /dev/null +++ b/test/cases/unbalanced_array.test @@ -0,0 +1,3 @@ +{name, "unbalanced array"}. +{jsx, {error, badjson}}. +{json, "unbalanced_array.json"}.