fixes wrongheaded and stupid escaping of strings

This commit is contained in:
alisdair sullivan 2012-03-20 19:36:27 -07:00
parent 1870a74d76
commit 0c04e485a3
2 changed files with 82 additions and 25 deletions

View file

@ -25,7 +25,6 @@
-export([encoder/3]).
-spec encoder(Handler::module(), State::any(), Opts::jsx:opts()) -> jsx:encoder().
encoder(Handler, State, Opts) ->
@ -54,7 +53,7 @@ start(Term, {Handler, State}, Opts) ->
value(String, {Handler, State}, Opts) when is_binary(String) ->
Handler:handle_event({string, escape(String, {Handler, State}, Opts)}, State);
Handler:handle_event({string, check_string(String, {Handler, State}, Opts)}, State);
value(Float, {Handler, State}, _Opts) when is_float(Float) ->
Handler:handle_event({float, Float}, State);
value(Int, {Handler, State}, _Opts) when is_integer(Int) ->
@ -84,7 +83,7 @@ object([{Key, Value}|Rest], {Handler, State}, Opts) ->
Handler,
value(
Value,
{Handler, Handler:handle_event({key, escape(fix_key(Key), {Handler, State}, Opts)}, State)},
{Handler, Handler:handle_event({key, check_string(fix_key(Key), {Handler, State}, Opts)}, State)},
Opts
)
},
@ -104,17 +103,79 @@ fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
fix_key(Key) when is_binary(Key) -> Key.
escape(String, Handler, Opts) ->
try jsx_utils:json_escape(String, Opts)
catch error:badarg -> erlang:error(badarg, [String, Handler, Opts])
check_string(String, Handler, Opts) ->
case check_string(String) of
true -> String;
false ->
case Opts#opts.loose_unicode of
true -> clean_string(String, <<>>);
false -> erlang:error(badarg, [String, Handler, Opts])
end
end.
check_string(<<C/utf8, Rest/binary>>) when C < 16#fdd0 ->
check_string(Rest);
check_string(<<C/utf8, Rest/binary>>) when C > 16#fdef, C < 16#fffe ->
check_string(Rest);
check_string(<<C/utf8, Rest/binary>>)
when C =/= 16#fffe andalso C =/= 16#ffff andalso
C =/= 16#1fffe andalso C =/= 16#1ffff andalso
C =/= 16#2fffe andalso C =/= 16#2ffff andalso
C =/= 16#3fffe andalso C =/= 16#3ffff andalso
C =/= 16#4fffe andalso C =/= 16#4ffff andalso
C =/= 16#5fffe andalso C =/= 16#5ffff andalso
C =/= 16#6fffe andalso C =/= 16#6ffff andalso
C =/= 16#7fffe andalso C =/= 16#7ffff andalso
C =/= 16#8fffe andalso C =/= 16#8ffff andalso
C =/= 16#9fffe andalso C =/= 16#9ffff andalso
C =/= 16#afffe andalso C =/= 16#affff andalso
C =/= 16#bfffe andalso C =/= 16#bffff andalso
C =/= 16#cfffe andalso C =/= 16#cffff andalso
C =/= 16#dfffe andalso C =/= 16#dffff andalso
C =/= 16#efffe andalso C =/= 16#effff andalso
C =/= 16#ffffe andalso C =/= 16#fffff andalso
C =/= 16#10fffe andalso C =/= 16#10ffff ->
check_string(Rest);
check_string(<<>>) -> true;
check_string(<<_, _/binary>>) -> false.
clean_string(<<C/utf8, Rest/binary>>, Acc) when C >= 16#fdd0, C =< 16#fdef ->
io:format("1: ~p~n", [C]),
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>);
clean_string(<<C/utf8, Rest/binary>>, Acc)
when C == 16#fffe orelse C == 16#ffff orelse
C == 16#1fffe orelse C == 16#1ffff orelse
C == 16#2fffe orelse C == 16#2ffff orelse
C == 16#3fffe orelse C == 16#3ffff orelse
C == 16#4fffe orelse C == 16#4ffff orelse
C == 16#5fffe orelse C == 16#5ffff orelse
C == 16#6fffe orelse C == 16#6ffff orelse
C == 16#7fffe orelse C == 16#7ffff orelse
C == 16#8fffe orelse C == 16#8ffff orelse
C == 16#9fffe orelse C == 16#9ffff orelse
C == 16#afffe orelse C == 16#affff orelse
C == 16#bfffe orelse C == 16#bffff orelse
C == 16#cfffe orelse C == 16#cffff orelse
C == 16#dfffe orelse C == 16#dffff orelse
C == 16#efffe orelse C == 16#effff orelse
C == 16#ffffe orelse C == 16#fffff orelse
C == 16#10fffe orelse C == 16#10ffff ->
io:format("2: ~p~n", [C]),
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>);
clean_string(<<C/utf8, Rest/binary>>, Acc) ->
io:format("3: ~p~n", [C]),
clean_string(Rest, <<Acc/binary, C/utf8>>);
clean_string(<<>>, Acc) -> Acc.
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
encode(Term) -> (encoder(jsx, [], []))(Term).
encode(Term, Opts) -> (encoder(jsx, [], Opts))(Term).
encode_test_() ->
[
@ -184,6 +245,16 @@ encode_test_() ->
encode([{key, <<"value">>}]),
[start_object, {key, <<"key">>}, {string, <<"value">>}, end_object, end_json]
)
},
{"bad string", ?_assertError(
badarg,
encode([<<"a bad string: ", 16#ffff/utf8>>])
)
},
{"allow bad string", ?_assertEqual(
encode([<<"a bad string: ", 16#1ffff/utf8>>], [loose_unicode]),
[start_array, {string, <<"a bad string: ", 16#fffd/utf8>>}, end_array, end_json]
)
}
].

View file

@ -110,10 +110,7 @@ json_escape(<<$\t, Rest/binary>>, Opts, Acc) ->
json_escape(Rest, Opts, <<Acc/binary, $\\, $t>>);
%% other control characters
json_escape(<<C/utf8, Rest/binary>>, Opts, Acc) when C >= 0, C < $\s ->
json_escape(Rest,
Opts,
<<Acc/binary, (json_escape_sequence(C))/binary>>
);
json_escape(Rest, Opts, <<Acc/binary, (json_escape_sequence(C))/binary>>);
%% escape forward slashes -- optionally -- to faciliate microsoft's retarded
%% date format
json_escape(<<$/, Rest/binary>>, Opts=#opts{escape_forward_slash=true}, Acc) ->
@ -125,19 +122,14 @@ json_escape(<<C/utf8, Rest/binary>>, Opts=#opts{no_jsonp_escapes=true}, Acc)
%% escape u+2028 and u+2029 to avoid problems with jsonp
json_escape(<<C/utf8, Rest/binary>>, Opts, Acc)
when C == 16#2028; C == 16#2029 ->
json_escape(Rest,
Opts,
<<Acc/binary, (json_escape_sequence(C))/binary>>
);
json_escape(Rest, Opts, <<Acc/binary, (json_escape_sequence(C))/binary>>);
%% any other legal codepoint
json_escape(<<C/utf8, Rest/binary>>, Opts, Acc) ->
json_escape(Rest, Opts, <<Acc/binary, C/utf8>>);
json_escape(<<_, Rest/binary>>, Opts=#opts{loose_unicode=true}, Acc) ->
json_escape(Rest, Opts, <<Acc/binary, 16#fffd/utf8>>);
json_escape(<<>>, _Opts, Acc) ->
Acc;
json_escape(Rest, Opts, Acc) ->
erlang:error(badarg, [Rest, Opts, Acc]).
json_escape(Bin, Opts, Acc) ->
erlang:error(badarg, [Bin, Opts, Acc]).
%% convert a codepoint to it's \uXXXX equiv.
@ -175,12 +167,6 @@ binary_escape_test_() ->
<<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">>
)
},
{"json string loose unicode escaping",
?_assertEqual(
json_escape(<<16#ffff>>, #opts{loose_unicode=true}),
<<16#fffd/utf8>>
)
},
{"jsonp protection",
?_assertEqual(
json_escape(<<226, 128, 168, 226, 128, 169>>, #opts{}),