encoder now only performs a single pass on strings and can optionally json encode them

This commit is contained in:
alisdair sullivan 2012-03-24 19:42:00 -07:00
parent 821a8ac06e
commit 5bc8bfdf45
4 changed files with 70 additions and 62 deletions

View file

@ -23,7 +23,7 @@
-module(jsx_encoder). -module(jsx_encoder).
-export([encoder/3]). -export([encoder/3, clean_string/1]).
-spec encoder(Handler::module(), State::any(), Opts::jsx:opts()) -> jsx:encoder(). -spec encoder(Handler::module(), State::any(), Opts::jsx:opts()) -> jsx:encoder().
@ -53,7 +53,7 @@ start(Term, {Handler, State}, Opts) ->
value(String, {Handler, State}, Opts) when is_binary(String) -> value(String, {Handler, State}, Opts) when is_binary(String) ->
Handler:handle_event({string, check_string(String, {Handler, State}, Opts)}, State); Handler:handle_event({string, clean_string(String, <<>>, Opts)}, State);
value(Float, {Handler, State}, _Opts) when is_float(Float) -> value(Float, {Handler, State}, _Opts) when is_float(Float) ->
Handler:handle_event({float, Float}, State); Handler:handle_event({float, Float}, State);
value(Int, {Handler, State}, _Opts) when is_integer(Int) -> value(Int, {Handler, State}, _Opts) when is_integer(Int) ->
@ -83,7 +83,7 @@ object([{Key, Value}|Rest], {Handler, State}, Opts) ->
Handler, Handler,
value( value(
Value, Value,
{Handler, Handler:handle_event({key, check_string(fix_key(Key), {Handler, State}, Opts)}, State)}, {Handler, Handler:handle_event({key, clean_string(fix_key(Key), <<>>, Opts)}, State)},
Opts Opts
) )
}, },
@ -103,21 +103,39 @@ fix_key(Key) when is_atom(Key) -> fix_key(atom_to_binary(Key, utf8));
fix_key(Key) when is_binary(Key) -> Key. fix_key(Key) when is_binary(Key) -> Key.
check_string(String, Handler, Opts) -> clean_string(Bin) -> clean_string(Bin, <<>>, #opts{json_escape=true}).
case check_string(String) of
true -> String;
false ->
case Opts#opts.loose_unicode of
true -> clean_string(String, <<>>);
false -> erlang:error(badarg, [String, Handler, Opts])
end
end.
check_string(<<C/utf8, Rest/binary>>) when C < 16#fdd0 -> clean_string(<<$\", Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
check_string(Rest); clean_string(Rest, <<Acc/binary, $\\, $\">>, Opts);
check_string(<<C/utf8, Rest/binary>>) when C > 16#fdef, C < 16#fffe -> clean_string(<<$\\, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
check_string(Rest); clean_string(Rest, <<Acc/binary, $\\, $\\>>, Opts);
check_string(<<C/utf8, Rest/binary>>) clean_string(<<$\b, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
clean_string(Rest, <<Acc/binary, $\\, $b>>, Opts);
clean_string(<<$\f, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
clean_string(Rest, <<Acc/binary, $\\, $f>>, Opts);
clean_string(<<$\n, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
clean_string(Rest, <<Acc/binary, $\\, $n>>, Opts);
clean_string(<<$\r, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
clean_string(Rest, <<Acc/binary, $\\, $r>>, Opts);
clean_string(<<$\t, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
clean_string(Rest, <<Acc/binary, $\\, $t>>, Opts);
clean_string(<<$/, Rest/binary>>, Acc, Opts=#opts{json_escape=true, escape_forward_slash=true}) ->
clean_string(Rest, <<Acc/binary, $\\, $/>>, Opts);
clean_string(<<16#2028/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true, no_jsonp_escapes=true}) ->
clean_string(Rest, <<Acc/binary, 16#2028/utf8>>, Opts);
clean_string(<<16#2029/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true, no_jsonp_escapes=true}) ->
clean_string(Rest, <<Acc/binary, 16#2029/utf8>>, Opts);
clean_string(<<16#2028/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
clean_string(Rest, <<Acc/binary, (json_escape_sequence(16#2028))/binary>>, Opts);
clean_string(<<16#2029/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) ->
clean_string(Rest, <<Acc/binary, (json_escape_sequence(16#2029))/binary>>, Opts);
clean_string(<<C/utf8, Rest/binary>>, Acc, Opts=#opts{json_escape=true}) when C < 32 ->
clean_string(Rest, <<Acc/binary, (json_escape_sequence(C))/binary>>, Opts);
clean_string(<<C/utf8, Rest/binary>>, Acc, Opts) when C < 16#fdd0 ->
clean_string(Rest, <<Acc/binary, C/utf8>>, Opts);
clean_string(<<C/utf8, Rest/binary>>, Acc, Opts) when C > 16#fdef, C < 16#fffe ->
clean_string(Rest, <<Acc/binary, C/utf8>>, Opts);
clean_string(<<C/utf8, Rest/binary>>, Acc, Opts)
when C > 16#ffff andalso when C > 16#ffff andalso
C =/= 16#1fffe andalso C =/= 16#1ffff andalso C =/= 16#1fffe andalso C =/= 16#1ffff andalso
C =/= 16#2fffe andalso C =/= 16#2ffff andalso C =/= 16#2fffe andalso C =/= 16#2ffff andalso
@ -135,46 +153,18 @@ check_string(<<C/utf8, Rest/binary>>)
C =/= 16#efffe andalso C =/= 16#effff andalso C =/= 16#efffe andalso C =/= 16#effff andalso
C =/= 16#ffffe andalso C =/= 16#fffff andalso C =/= 16#ffffe andalso C =/= 16#fffff andalso
C =/= 16#10fffe andalso C =/= 16#10ffff -> C =/= 16#10fffe andalso C =/= 16#10ffff ->
check_string(Rest); clean_string(Rest, <<Acc/binary, C/utf8>>, Opts);
check_string(<<>>) -> true;
check_string(<<_, _/binary>>) -> false.
clean_string(<<C/utf8, Rest/binary>>, Acc) when C < 16#fdd0 ->
clean_string(Rest, <<Acc/binary, C/utf8>>);
clean_string(<<C/utf8, Rest/binary>>, Acc) when C > 16#fdef, C < 16#fffe ->
clean_string(Rest, <<Acc/binary, C/utf8>>);
clean_string(<<C/utf8, Rest/binary>>, Acc)
when C > 16#ffff andalso
C =/= 16#1fffe andalso C =/= 16#1ffff andalso
C =/= 16#2fffe andalso C =/= 16#2ffff andalso
C =/= 16#3fffe andalso C =/= 16#3ffff andalso
C =/= 16#4fffe andalso C =/= 16#4ffff andalso
C =/= 16#5fffe andalso C =/= 16#5ffff andalso
C =/= 16#6fffe andalso C =/= 16#6ffff andalso
C =/= 16#7fffe andalso C =/= 16#7ffff andalso
C =/= 16#8fffe andalso C =/= 16#8ffff andalso
C =/= 16#9fffe andalso C =/= 16#9ffff andalso
C =/= 16#afffe andalso C =/= 16#affff andalso
C =/= 16#bfffe andalso C =/= 16#bffff andalso
C =/= 16#cfffe andalso C =/= 16#cffff andalso
C =/= 16#dfffe andalso C =/= 16#dffff andalso
C =/= 16#efffe andalso C =/= 16#effff andalso
C =/= 16#ffffe andalso C =/= 16#fffff andalso
C =/= 16#10fffe andalso C =/= 16#10ffff ->
clean_string(Rest, <<Acc/binary, C/utf8>>);
%% surrogates %% surrogates
clean_string(<<237, X, _, Rest/binary>>, Acc) when X >= 160 -> clean_string(<<237, X, _, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) when X >= 160 ->
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>); clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
%% private use noncharacters %% private use noncharacters
clean_string(<<239, 183, X, Rest/binary>>, Acc) when X >= 143, X =< 175 -> clean_string(<<239, 183, X, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) when X >= 143, X =< 175 ->
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>); clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
%% u+fffe and u+ffff %% u+fffe and u+ffff
clean_string(<<239, 191, X, Rest/binary>>, Acc) when X == 190; X == 191 -> clean_string(<<239, 191, X, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) when X == 190; X == 191 ->
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>); clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
%% the u+Xfffe and u+Xffff noncharacters %% the u+Xfffe and u+Xffff noncharacters
clean_string(<<X, Y, 191, Z, Rest/binary>>, Acc) clean_string(<<X, Y, 191, Z, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) when (
when (
(X == 240 andalso Y == 159) orelse (X == 240 andalso Y == 159) orelse
(X == 240 andalso Y == 175) orelse (X == 240 andalso Y == 175) orelse
(X == 240 andalso Y == 191) orelse (X == 240 andalso Y == 191) orelse
@ -184,13 +174,27 @@ clean_string(<<X, Y, 191, Z, Rest/binary>>, Acc)
) orelse ) orelse
(X == 244 andalso Y == 143) (X == 244 andalso Y == 143)
) andalso (Z == 190 orelse Z == 191) -> ) andalso (Z == 190 orelse Z == 191) ->
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>); clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
clean_string(<<_, Rest/binary>>, Acc) -> clean_string(<<_, Rest/binary>>, Acc, Opts=#opts{loose_unicode=true}) ->
clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>); clean_string(Rest, <<Acc/binary, 16#fffd/utf8>>, Opts);
clean_string(<<>>, Acc) -> Acc. clean_string(<<>>, Acc, _) -> Acc;
clean_string(Bin, _Acc, Opts) -> erlang:error(badarg, [Bin, Opts]).
%% convert a codepoint to it's \uXXXX equiv.
json_escape_sequence(X) ->
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
unicode:characters_to_binary([$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]).
to_hex(10) -> $a;
to_hex(11) -> $b;
to_hex(12) -> $c;
to_hex(13) -> $d;
to_hex(14) -> $e;
to_hex(15) -> $f;
to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc...
-ifdef(TEST). -ifdef(TEST).
-include_lib("eunit/include/eunit.hrl"). -include_lib("eunit/include/eunit.hrl").

View file

@ -4,5 +4,6 @@
explicit_end = false, explicit_end = false,
single_quotes = false, single_quotes = false,
no_jsonp_escapes = false, no_jsonp_escapes = false,
comments = false comments = false,
json_escape = false
}). }).

View file

@ -39,7 +39,7 @@
-spec to_json(Source::any(), Opts::opts()) -> binary(). -spec to_json(Source::any(), Opts::opts()) -> binary().
to_json(Source, Opts) when is_list(Opts) -> to_json(Source, Opts) when is_list(Opts) ->
(jsx:encoder(?MODULE, Opts, jsx_utils:extract_opts(Opts)))(Source). (jsx:encoder(?MODULE, Opts, jsx_utils:extract_opts([json_escape] ++ Opts)))(Source).
-spec format(Source::binary(), Opts::opts()) -> binary(). -spec format(Source::binary(), Opts::opts()) -> binary().
@ -135,8 +135,8 @@ handle_event(Event, {[array|Stack], Acc, Opts = #opts{depth = Depth}}) ->
handle_event(end_json, {[], Acc, _Opts}) -> unicode:characters_to_binary(Acc, utf8). handle_event(end_json, {[], Acc, _Opts}) -> unicode:characters_to_binary(Acc, utf8).
encode(string, String, Opts) -> encode(string, String, _Opts) ->
[?quote, jsx_utils:json_escape(String, Opts), ?quote]; [?quote, String, ?quote];
encode(literal, Literal, _Opts) -> encode(literal, Literal, _Opts) ->
erlang:atom_to_list(Literal); erlang:atom_to_list(Literal);
encode(integer, Integer, _Opts) -> encode(integer, Integer, _Opts) ->

View file

@ -49,6 +49,8 @@ parse_opts([no_jsonp_escapes|Rest], Opts) ->
parse_opts(Rest, Opts#opts{no_jsonp_escapes=true}); parse_opts(Rest, Opts#opts{no_jsonp_escapes=true});
parse_opts([comments|Rest], Opts) -> parse_opts([comments|Rest], Opts) ->
parse_opts(Rest, Opts#opts{comments=true}); parse_opts(Rest, Opts#opts{comments=true});
parse_opts([json_escape|Rest], Opts) ->
parse_opts(Rest, Opts#opts{json_escape=true});
parse_opts(_, _) -> parse_opts(_, _) ->
{error, badarg}. {error, badarg}.
@ -60,7 +62,8 @@ valid_flags() ->
explicit_end, explicit_end,
single_quotes, single_quotes,
no_jsonp_escapes, no_jsonp_escapes,
comments comments,
json_escape
]. ].