diff --git a/src/jsx.erl b/src/jsx.erl index 79c831c..5d591d0 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -174,10 +174,10 @@ jsx_decoder_gen([Test|_] = Tests, [Encoding|Encodings]) -> Flags = proplists:get_value(jsx_flags, Test, []), {generator, fun() -> - [{Name, ?_assert(decode(JSON, Flags) =:= JSX)} + [{Name, ?_assertEqual(decode(JSON, Flags), JSX)} | {generator, - fun() -> [{Name ++ " incremental", ?_assert( - incremental_decode(JSON, Flags) =:= JSX) + fun() -> [{Name ++ " incremental", ?_assertEqual( + incremental_decode(JSON, Flags), JSX) } | jsx_decoder_gen(Tests, Encodings)] end } @@ -232,7 +232,9 @@ decode_loop({jsx, end_json, _Next}, Acc) -> decode_loop({jsx, incomplete, More}, Acc) -> decode_loop(More(end_stream), Acc); decode_loop({jsx, E, Next}, Acc) -> - decode_loop(Next(), [E] ++ Acc). + decode_loop(Next(), [E] ++ Acc); +decode_loop({error, {badjson, _Error}}, _Acc) -> + {error, badjson}. incremental_decode(<>, Flags) -> @@ -246,7 +248,10 @@ incremental_decode_loop({jsx, incomplete, Next}, <>, Ac incremental_decode_loop({jsx, end_json, _Next}, _Rest, Acc) -> lists:reverse([end_json] ++ Acc); incremental_decode_loop({jsx, Event, Next}, Rest, Acc) -> - incremental_decode_loop(Next(), Rest, [Event] ++ Acc). + incremental_decode_loop(Next(), Rest, [Event] ++ Acc); +incremental_decode_loop({error, {badjson, _Error}}, _Rest, _Acc) -> + {error, badjson}. + multi_decode_test_() -> @@ -290,5 +295,6 @@ multi_test_result() -> ]. + -endif. \ No newline at end of file diff --git a/src/jsx_common.hrl b/src/jsx_common.hrl index 242f94d..6431434 100644 --- a/src/jsx_common.hrl +++ b/src/jsx_common.hrl @@ -113,8 +113,10 @@ -type decoder_opts() :: [decoder_opt()]. -type decoder_opt() :: {strict, true | false} + | {stream, true | false} | {encoding, supported_utf()}. + -type verify_opts() :: [verify_opt()]. -type verify_opt() :: {encoding, auto | supported_utf()} | {repeated_keys, true | false} diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 0a893fa..6aa5b6b 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -466,7 +466,11 @@ escaped_unicode(<>, Stack, Opts, String, [C, B, A]) X when X >= 16#d800, X =< 16#dbff -> low_surrogate(Rest, Stack, Opts, String, X) %% non-characters, you're not allowed to exchange these - ; X when X == 16#fffe; X == 16#ffff -> + ; X when X == 16#fffe; X == 16#ffff; X >= 16#fdd0, X =< 16#fdef -> + {error, {badjson, <>}} + %% allowing interchange of null bytes allows attackers to forge + %% malicious streams + ; X when X == 16#0000 -> {error, {badjson, <>}} %% anything else ; X -> @@ -536,14 +540,17 @@ low_surrogate(<>, Stack, Opts, String, [C, B, A], High) when ?is_hex(D) -> case erlang:list_to_integer([A, B, C, D], 16) of X when X >= 16#dc00, X =< 16#dfff -> - string(Rest, - Stack, - Opts, - <> - ) + V = surrogate_to_codepoint(High, X), + case V rem 16#10000 of + Y when Y == 16#fffe; Y == 16#ffff -> + {error, {badjson, <>}} + ; Y -> + io:format("~p ~p~n", [V, Y]), + string(Rest, Stack, Opts, <>) + end %% not a low surrogate, bad bad bad - ; X -> - {error, {badjson, <>}} + ; _ -> + {error, {badjson, <>}} end; low_surrogate(<>, Stack, Opts, String, Acc, High) when ?is_hex(S) -> @@ -568,7 +575,6 @@ low_surrogate(Bin, Stack, Opts, String, Acc, High) -> %% stole this from the unicode spec surrogate_to_codepoint(High, Low) -> - io:format("~p ~p~n", [High, Low]), (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 3b3fae8..27f2900 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -160,6 +160,9 @@ json_escape(<<$\t, Rest/binary>>, Acc) -> %% other control characters json_escape(<>, Acc) when C >= 0, C < $\s -> json_escape(Rest, <>); +%% escape u+2028 and u+2029 to avoid problems with jsonp +json_escape(<>, Acc) when C == 16#2028; C == 16#2029 -> + json_escape(Rest, <>); %% any other legal codepoint json_escape(<>, Acc) -> json_escape(Rest, <>); @@ -169,11 +172,10 @@ json_escape(_, _) -> erlang:error(badarg). -%% convert a codepoint to it's \uXXXX equiv. for laziness, this only handles -%% codepoints this module might escape, ie, control characters -json_escape_sequence(C) when C < 16#20 -> - <<_:8, A:4, B:4>> = <>, % first two hex digits are always zero - <<$\\, $u, $0, $0, (to_hex(A)), (to_hex(B))>>. +%% convert a codepoint to it's \uXXXX equiv. +json_escape_sequence(X) -> + <> = <>, + <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>. to_hex(15) -> $f; diff --git a/src/jsx_terms.erl b/src/jsx_terms.erl index ecd947a..c1ce743 100644 --- a/src/jsx_terms.erl +++ b/src/jsx_terms.erl @@ -236,6 +236,9 @@ json_escape(<<$\t, Rest/binary>>, Acc) -> %% other control characters json_escape(<>, Acc) when C >= 0, C < $\s -> json_escape(Rest, <>); +%% escape u+2028 and u+2029 to avoid problems with jsonp +json_escape(<>, Acc) when C == 16#2028; C == 16#2029 -> + json_escape(Rest, <>); %% any other legal codepoint json_escape(<>, Acc) -> json_escape(Rest, <>); @@ -245,11 +248,10 @@ json_escape(_, _) -> erlang:error(badarg). -%% convert a codepoint to it's \uXXXX equiv. for laziness, this only handles -%% codepoints this module might escape, ie, control characters -json_escape_sequence(C) when C < 16#20 -> - <<_:8, A:4, B:4>> = <>, % first two hex digits are always zero - <<$\\, $u, $0, $0, (to_hex(A)), (to_hex(B))>>. +%% convert a codepoint to it's \uXXXX equiv. +json_escape_sequence(X) -> + <> = <>, + <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>. to_hex(15) -> $f; @@ -402,6 +404,12 @@ escape_test_() -> <<1, 2, 3, 11, 26, 30, 31>> ) =:= <<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">> ) + }, + {"jsonp protection", + ?_assert(json_escape( + <<226, 128, 168, 226, 128, 169>> + ) =:= <<"\\u2028\\u2029">> + ) } ]. diff --git a/test/cases/deep_array.test b/test/cases/deep_array.test index 1933fac..35ef9b9 100644 --- a/test/cases/deep_array.test +++ b/test/cases/deep_array.test @@ -1,3 +1,3 @@ -{name, "deep_array"}. +{name, "deep array"}. {jsx, [start_array,start_array,start_array,end_array,end_array,end_array,end_json]}. {json, "deep_array.json"}. diff --git a/test/cases/escaped_noncharacter.json b/test/cases/escaped_noncharacter.json new file mode 100644 index 0000000..e5c1b65 --- /dev/null +++ b/test/cases/escaped_noncharacter.json @@ -0,0 +1 @@ +"\uffff" \ No newline at end of file diff --git a/test/cases/escaped_noncharacter.test b/test/cases/escaped_noncharacter.test new file mode 100644 index 0000000..4e20bc3 --- /dev/null +++ b/test/cases/escaped_noncharacter.test @@ -0,0 +1,3 @@ +{name, "escaped noncharacter"}. +{jsx, {error, badjson}}. +{json, "escaped_noncharacter.json"}. \ No newline at end of file diff --git a/test/cases/escaped_noncharacter_ext.json b/test/cases/escaped_noncharacter_ext.json new file mode 100644 index 0000000..f10ec2b --- /dev/null +++ b/test/cases/escaped_noncharacter_ext.json @@ -0,0 +1 @@ +"\ud83f\udfff" \ No newline at end of file diff --git a/test/cases/escaped_noncharacter_ext.test b/test/cases/escaped_noncharacter_ext.test new file mode 100644 index 0000000..7049148 --- /dev/null +++ b/test/cases/escaped_noncharacter_ext.test @@ -0,0 +1,3 @@ +{name, "escaped noncharacter (extended)"}. +{jsx, {error, badjson}}. +{json, "escaped_noncharacter_ext.json"}. \ No newline at end of file diff --git a/test/cases/escaped_reserved_a.json b/test/cases/escaped_reserved_a.json new file mode 100644 index 0000000..dab850b --- /dev/null +++ b/test/cases/escaped_reserved_a.json @@ -0,0 +1 @@ +"\ufdd0" \ No newline at end of file diff --git a/test/cases/escaped_reserved_a.test b/test/cases/escaped_reserved_a.test new file mode 100644 index 0000000..8a5cba2 --- /dev/null +++ b/test/cases/escaped_reserved_a.test @@ -0,0 +1,3 @@ +{name, "escaped reserved a"}. +{jsx, {error, badjson}}. +{json, "escaped_reserved_a.json"}. \ No newline at end of file diff --git a/test/cases/escaped_reserved_b.json b/test/cases/escaped_reserved_b.json new file mode 100644 index 0000000..be11b6e --- /dev/null +++ b/test/cases/escaped_reserved_b.json @@ -0,0 +1 @@ +"\ufdef" \ No newline at end of file diff --git a/test/cases/escaped_reserved_b.test b/test/cases/escaped_reserved_b.test new file mode 100644 index 0000000..414f024 --- /dev/null +++ b/test/cases/escaped_reserved_b.test @@ -0,0 +1,3 @@ +{name, "escaped reserved b"}. +{jsx, {error, badjson}}. +{json, "escaped_reserved_b.json"}. \ No newline at end of file diff --git a/test/cases/nullbyte_forbidden.json b/test/cases/nullbyte_forbidden.json new file mode 100644 index 0000000..ed6780d --- /dev/null +++ b/test/cases/nullbyte_forbidden.json @@ -0,0 +1 @@ +"\u0000" \ No newline at end of file diff --git a/test/cases/nullbyte_forbidden.test b/test/cases/nullbyte_forbidden.test new file mode 100644 index 0000000..2feb2f2 --- /dev/null +++ b/test/cases/nullbyte_forbidden.test @@ -0,0 +1,3 @@ +{name, "nullbyte forbidden"}. +{jsx, {error, badjson}}. +{json, "nullbyte_forbidden.json"}. \ No newline at end of file diff --git a/test/cases/unbalanced_array.json b/test/cases/unbalanced_array.json new file mode 100644 index 0000000..7f3fb61 --- /dev/null +++ b/test/cases/unbalanced_array.json @@ -0,0 +1 @@ +[[[[]]] \ No newline at end of file diff --git a/test/cases/unbalanced_array.test b/test/cases/unbalanced_array.test new file mode 100644 index 0000000..c271c0d --- /dev/null +++ b/test/cases/unbalanced_array.test @@ -0,0 +1,3 @@ +{name, "unbalanced array"}. +{jsx, {error, badjson}}. +{json, "unbalanced_array.json"}.