Merge branch 'feature/cleanup' into develop

This commit is contained in:
alisdair sullivan 2011-07-26 19:59:05 -07:00
commit 51d27bb3b5
18 changed files with 73 additions and 25 deletions

View file

@ -174,10 +174,10 @@ jsx_decoder_gen([Test|_] = Tests, [Encoding|Encodings]) ->
Flags = proplists:get_value(jsx_flags, Test, []), Flags = proplists:get_value(jsx_flags, Test, []),
{generator, {generator,
fun() -> fun() ->
[{Name, ?_assert(decode(JSON, Flags) =:= JSX)} [{Name, ?_assertEqual(decode(JSON, Flags), JSX)}
| {generator, | {generator,
fun() -> [{Name ++ " incremental", ?_assert( fun() -> [{Name ++ " incremental", ?_assertEqual(
incremental_decode(JSON, Flags) =:= JSX) incremental_decode(JSON, Flags), JSX)
} | jsx_decoder_gen(Tests, Encodings)] } | jsx_decoder_gen(Tests, Encodings)]
end end
} }
@ -232,7 +232,9 @@ decode_loop({jsx, end_json, _Next}, Acc) ->
decode_loop({jsx, incomplete, More}, Acc) -> decode_loop({jsx, incomplete, More}, Acc) ->
decode_loop(More(end_stream), Acc); decode_loop(More(end_stream), Acc);
decode_loop({jsx, E, Next}, Acc) -> decode_loop({jsx, E, Next}, Acc) ->
decode_loop(Next(), [E] ++ Acc). decode_loop(Next(), [E] ++ Acc);
decode_loop({error, {badjson, _Error}}, _Acc) ->
{error, badjson}.
incremental_decode(<<C:1/binary, Rest/binary>>, Flags) -> incremental_decode(<<C:1/binary, Rest/binary>>, Flags) ->
@ -246,7 +248,10 @@ incremental_decode_loop({jsx, incomplete, Next}, <<C:1/binary, Rest/binary>>, Ac
incremental_decode_loop({jsx, end_json, _Next}, _Rest, Acc) -> incremental_decode_loop({jsx, end_json, _Next}, _Rest, Acc) ->
lists:reverse([end_json] ++ Acc); lists:reverse([end_json] ++ Acc);
incremental_decode_loop({jsx, Event, Next}, Rest, Acc) -> incremental_decode_loop({jsx, Event, Next}, Rest, Acc) ->
incremental_decode_loop(Next(), Rest, [Event] ++ Acc). incremental_decode_loop(Next(), Rest, [Event] ++ Acc);
incremental_decode_loop({error, {badjson, _Error}}, _Rest, _Acc) ->
{error, badjson}.
multi_decode_test_() -> multi_decode_test_() ->
@ -290,5 +295,6 @@ multi_test_result() ->
]. ].
-endif. -endif.

View file

@ -113,8 +113,10 @@
-type decoder_opts() :: [decoder_opt()]. -type decoder_opts() :: [decoder_opt()].
-type decoder_opt() :: {strict, true | false} -type decoder_opt() :: {strict, true | false}
| {stream, true | false}
| {encoding, supported_utf()}. | {encoding, supported_utf()}.
-type verify_opts() :: [verify_opt()]. -type verify_opts() :: [verify_opt()].
-type verify_opt() :: {encoding, auto | supported_utf()} -type verify_opt() :: {encoding, auto | supported_utf()}
| {repeated_keys, true | false} | {repeated_keys, true | false}

View file

@ -466,7 +466,11 @@ escaped_unicode(<<D/?utfx, Rest/binary>>, Stack, Opts, String, [C, B, A])
X when X >= 16#d800, X =< 16#dbff -> X when X >= 16#d800, X =< 16#dbff ->
low_surrogate(Rest, Stack, Opts, String, X) low_surrogate(Rest, Stack, Opts, String, X)
%% non-characters, you're not allowed to exchange these %% non-characters, you're not allowed to exchange these
; X when X == 16#fffe; X == 16#ffff -> ; X when X == 16#fffe; X == 16#ffff; X >= 16#fdd0, X =< 16#fdef ->
{error, {badjson, <<D/?utfx, Rest/binary>>}}
%% allowing interchange of null bytes allows attackers to forge
%% malicious streams
; X when X == 16#0000 ->
{error, {badjson, <<D/?utfx, Rest/binary>>}} {error, {badjson, <<D/?utfx, Rest/binary>>}}
%% anything else %% anything else
; X -> ; X ->
@ -536,14 +540,17 @@ low_surrogate(<<D/?utfx, Rest/binary>>, Stack, Opts, String, [C, B, A], High)
when ?is_hex(D) -> when ?is_hex(D) ->
case erlang:list_to_integer([A, B, C, D], 16) of case erlang:list_to_integer([A, B, C, D], 16) of
X when X >= 16#dc00, X =< 16#dfff -> X when X >= 16#dc00, X =< 16#dfff ->
string(Rest, V = surrogate_to_codepoint(High, X),
Stack, case V rem 16#10000 of
Opts, Y when Y == 16#fffe; Y == 16#ffff ->
<<String/binary, (surrogate_to_codepoint(High, X))/utf8>> {error, {badjson, <<D/?utfx, Rest/binary>>}}
) ; Y ->
io:format("~p ~p~n", [V, Y]),
string(Rest, Stack, Opts, <<String/binary, V/utf8>>)
end
%% not a low surrogate, bad bad bad %% not a low surrogate, bad bad bad
; X -> ; _ ->
{error, {badjson, <<X/?utfx, Rest/binary>>}} {error, {badjson, <<D/?utfx, Rest/binary>>}}
end; end;
low_surrogate(<<S/?utfx, Rest/binary>>, Stack, Opts, String, Acc, High) low_surrogate(<<S/?utfx, Rest/binary>>, Stack, Opts, String, Acc, High)
when ?is_hex(S) -> when ?is_hex(S) ->
@ -568,7 +575,6 @@ low_surrogate(Bin, Stack, Opts, String, Acc, High) ->
%% stole this from the unicode spec %% stole this from the unicode spec
surrogate_to_codepoint(High, Low) -> surrogate_to_codepoint(High, Low) ->
io:format("~p ~p~n", [High, Low]),
(High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000.

View file

@ -160,6 +160,9 @@ json_escape(<<$\t, Rest/binary>>, Acc) ->
%% other control characters %% other control characters
json_escape(<<C/utf8, Rest/binary>>, Acc) when C >= 0, C < $\s -> json_escape(<<C/utf8, Rest/binary>>, Acc) when C >= 0, C < $\s ->
json_escape(Rest, <<Acc/binary, (json_escape_sequence(C))/binary>>); json_escape(Rest, <<Acc/binary, (json_escape_sequence(C))/binary>>);
%% escape u+2028 and u+2029 to avoid problems with jsonp
json_escape(<<C/utf8, Rest/binary>>, Acc) when C == 16#2028; C == 16#2029 ->
json_escape(Rest, <<Acc/binary, (json_escape_sequence(C))/binary>>);
%% any other legal codepoint %% any other legal codepoint
json_escape(<<C/utf8, Rest/binary>>, Acc) -> json_escape(<<C/utf8, Rest/binary>>, Acc) ->
json_escape(Rest, <<Acc/binary, C/utf8>>); json_escape(Rest, <<Acc/binary, C/utf8>>);
@ -169,11 +172,10 @@ json_escape(_, _) ->
erlang:error(badarg). erlang:error(badarg).
%% convert a codepoint to it's \uXXXX equiv. for laziness, this only handles %% convert a codepoint to it's \uXXXX equiv.
%% codepoints this module might escape, ie, control characters json_escape_sequence(X) ->
json_escape_sequence(C) when C < 16#20 -> <<A:4, B:4, C:4, D:4>> = <<X:16>>,
<<_:8, A:4, B:4>> = <<C:16>>, % first two hex digits are always zero <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>.
<<$\\, $u, $0, $0, (to_hex(A)), (to_hex(B))>>.
to_hex(15) -> $f; to_hex(15) -> $f;

View file

@ -236,6 +236,9 @@ json_escape(<<$\t, Rest/binary>>, Acc) ->
%% other control characters %% other control characters
json_escape(<<C/utf8, Rest/binary>>, Acc) when C >= 0, C < $\s -> json_escape(<<C/utf8, Rest/binary>>, Acc) when C >= 0, C < $\s ->
json_escape(Rest, <<Acc/binary, (json_escape_sequence(C))/binary>>); json_escape(Rest, <<Acc/binary, (json_escape_sequence(C))/binary>>);
%% escape u+2028 and u+2029 to avoid problems with jsonp
json_escape(<<C/utf8, Rest/binary>>, Acc) when C == 16#2028; C == 16#2029 ->
json_escape(Rest, <<Acc/binary, (json_escape_sequence(C))/binary>>);
%% any other legal codepoint %% any other legal codepoint
json_escape(<<C/utf8, Rest/binary>>, Acc) -> json_escape(<<C/utf8, Rest/binary>>, Acc) ->
json_escape(Rest, <<Acc/binary, C/utf8>>); json_escape(Rest, <<Acc/binary, C/utf8>>);
@ -245,11 +248,10 @@ json_escape(_, _) ->
erlang:error(badarg). erlang:error(badarg).
%% convert a codepoint to it's \uXXXX equiv. for laziness, this only handles %% convert a codepoint to it's \uXXXX equiv.
%% codepoints this module might escape, ie, control characters json_escape_sequence(X) ->
json_escape_sequence(C) when C < 16#20 -> <<A:4, B:4, C:4, D:4>> = <<X:16>>,
<<_:8, A:4, B:4>> = <<C:16>>, % first two hex digits are always zero <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>.
<<$\\, $u, $0, $0, (to_hex(A)), (to_hex(B))>>.
to_hex(15) -> $f; to_hex(15) -> $f;
@ -402,6 +404,12 @@ escape_test_() ->
<<1, 2, 3, 11, 26, 30, 31>> <<1, 2, 3, 11, 26, 30, 31>>
) =:= <<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">> ) =:= <<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">>
) )
},
{"jsonp protection",
?_assert(json_escape(
<<226, 128, 168, 226, 128, 169>>
) =:= <<"\\u2028\\u2029">>
)
} }
]. ].

View file

@ -1,3 +1,3 @@
{name, "deep_array"}. {name, "deep array"}.
{jsx, [start_array,start_array,start_array,end_array,end_array,end_array,end_json]}. {jsx, [start_array,start_array,start_array,end_array,end_array,end_array,end_json]}.
{json, "deep_array.json"}. {json, "deep_array.json"}.

View file

@ -0,0 +1 @@
"\uffff"

View file

@ -0,0 +1,3 @@
{name, "escaped noncharacter"}.
{jsx, {error, badjson}}.
{json, "escaped_noncharacter.json"}.

View file

@ -0,0 +1 @@
"\ud83f\udfff"

View file

@ -0,0 +1,3 @@
{name, "escaped noncharacter (extended)"}.
{jsx, {error, badjson}}.
{json, "escaped_noncharacter_ext.json"}.

View file

@ -0,0 +1 @@
"\ufdd0"

View file

@ -0,0 +1,3 @@
{name, "escaped reserved a"}.
{jsx, {error, badjson}}.
{json, "escaped_reserved_a.json"}.

View file

@ -0,0 +1 @@
"\ufdef"

View file

@ -0,0 +1,3 @@
{name, "escaped reserved b"}.
{jsx, {error, badjson}}.
{json, "escaped_reserved_b.json"}.

View file

@ -0,0 +1 @@
"\u0000"

View file

@ -0,0 +1,3 @@
{name, "nullbyte forbidden"}.
{jsx, {error, badjson}}.
{json, "nullbyte_forbidden.json"}.

View file

@ -0,0 +1 @@
[[[[]]]

View file

@ -0,0 +1,3 @@
{name, "unbalanced array"}.
{jsx, {error, badjson}}.
{json, "unbalanced_array.json"}.