reorder parser clauses and simplify escaping for performance

This commit is contained in:
alisdair sullivan 2014-12-05 03:16:58 +00:00
parent 9842a00b4d
commit 56c3bdb578
2 changed files with 79 additions and 85 deletions

View file

@ -23,7 +23,7 @@
-module(jsx_encoder). -module(jsx_encoder).
-export([encoder/3, encode/1, encode/2, unzip/1]). -export([encoder/3, encode/1, encode/2]).
-spec encoder(Handler::module(), State::any(), Config::list()) -> jsx:encoder(). -spec encoder(Handler::module(), State::any(), Config::list()) -> jsx:encoder().
@ -44,11 +44,10 @@ encode(Term, EntryPoint) -> encode_(Term, EntryPoint).
-endif. -endif.
-ifdef(maps_support). -ifdef(maps_support).
encode(Map, _EntryPoint) when is_map(Map), map_size(Map) < 1 -> [start_object, end_object]; encode(Map, _EntryPoint) when is_map(Map), map_size(Map) < 1 ->
[start_object, end_object];
encode(Term, EntryPoint) when is_map(Term) -> encode(Term, EntryPoint) when is_map(Term) ->
lists:flatten( [start_object] ++ unpack(Term, EntryPoint);
[start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unpack(Term) ] ++ [end_object]
);
encode(Term, EntryPoint) -> encode_(Term, EntryPoint). encode(Term, EntryPoint) -> encode_(Term, EntryPoint).
-endif. -endif.
@ -56,28 +55,29 @@ encode_([], _EntryPoint) -> [start_array, end_array];
encode_([{}], _EntryPoint) -> [start_object, end_object]; encode_([{}], _EntryPoint) -> [start_object, end_object];
encode_([{_, _}|_] = Term, EntryPoint) -> encode_([{_, _}|_] = Term, EntryPoint) ->
lists:flatten( [start_object] ++ unzip(Term, EntryPoint);
[start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unzip(Term) ] ++ [end_object]
);
encode_(Term, EntryPoint) when is_list(Term) -> encode_(Term, EntryPoint) when is_list(Term) ->
lists:flatten( [start_array] ++ unhitch(Term, EntryPoint);
[start_array] ++ [ EntryPoint:encode(T, EntryPoint) || T <- Term ] ++ [end_array]
);
encode_(Else, _EntryPoint) -> [Else]. encode_(Else, _EntryPoint) -> [Else].
unzip(List) -> unzip(List, []). unzip([{K, V}|Rest], EntryPoint) when is_integer(K); is_binary(K); is_atom(K) ->
[K] ++ EntryPoint:encode(V, EntryPoint) ++ unzip(Rest, EntryPoint);
unzip([], _) -> [end_object].
unhitch([V|Rest], EntryPoint) ->
EntryPoint:encode(V, EntryPoint) ++ unhitch(Rest, EntryPoint);
unhitch([], _) -> [end_array].
unzip([], Acc) -> lists:reverse(Acc);
unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K); is_integer(K) -> unzip(Rest, [V, K] ++ Acc).
-ifdef(maps_support). -ifdef(maps_support).
unpack(Map) -> unpack(maps:keys(Map), Map, []). unpack(Map, EntryPoint) -> unpack(Map, maps:keys(Map), EntryPoint).
unpack([], _, Acc) -> lists:reverse(Acc); unpack(Map, [K|Rest], EntryPoint) when is_integer(K); is_binary(K); is_atom(K) ->
unpack([K|Rest], Map, Acc) when is_binary(K); is_atom(K); is_integer(K) -> [K] ++ EntryPoint:encode(maps:get(K, Map), EntryPoint) ++ unpack(Map, Rest, EntryPoint);
unpack(Rest, Map, [maps:get(K, Map), K] ++ Acc). unpack(_, [], _) -> [end_object].
-endif. -endif.

View file

@ -87,36 +87,26 @@ incomplete(State, Handler, Stack, Config=#config{incomplete_handler=F}) ->
handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}. handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}.
value([start_object|Tokens], Handler, Stack, Config) -> value([String|Tokens], Handler, Stack, Config) when is_binary(String) ->
object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config);
value([start_array|Tokens], Handler, Stack, Config) ->
array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config);
value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
maybe_done(Tokens, handle_event({literal, Literal}, Handler, Config), Stack, Config);
value([Literal|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null ->
value([{literal, Literal}] ++ Tokens, Handler, Stack, Config);
value([{integer, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) ->
maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config);
value([{float, Number}|Tokens], Handler, Stack, Config) when is_float(Number) ->
maybe_done(Tokens, handle_event({float, Number}, Handler, Config), Stack, Config);
value([{number, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) ->
value([{integer, Number}] ++ Tokens, Handler, Stack, Config);
value([{number, Number}|Tokens], Handler, Stack, Config) when is_float(Number) ->
value([{float, Number}] ++ Tokens, Handler, Stack, Config);
value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) ->
value([{integer, Number}] ++ Tokens, Handler, Stack, Config);
value([Number|Tokens], Handler, Stack, Config) when is_float(Number) ->
value([{float, Number}] ++ Tokens, Handler, Stack, Config);
value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) ->
try clean_string(String, Config) of Clean -> try clean_string(String, Config) of Clean ->
maybe_done(Tokens, handle_event({string, Clean}, Handler, Config), Stack, Config) maybe_done(Tokens, handle_event({string, Clean}, Handler, Config), Stack, Config)
catch error:badarg -> catch error:badarg ->
?error(value, [{string, String}|Tokens], Handler, Stack, Config) ?error(value, [{string, String}|Tokens], Handler, Stack, Config)
end; end;
value([String|Tokens], Handler, Stack, Config) when is_binary(String) -> value([true|Tokens], Handler, Stack, Config) ->
value([{string, String}] ++ Tokens, Handler, Stack, Config); maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config);
value([String|Tokens], Handler, Stack, Config) when is_atom(String) -> value([false|Tokens], Handler, Stack, Config) ->
value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config); maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config);
value([null|Tokens], Handler, Stack, Config) ->
maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config);
value([start_object|Tokens], Handler, Stack, Config) ->
object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config);
value([start_array|Tokens], Handler, Stack, Config) ->
array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config);
value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) ->
maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config);
value([Number|Tokens], Handler, Stack, Config) when is_float(Number) ->
maybe_done(Tokens, handle_event({float, Number}, Handler, Config), Stack, Config);
value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) -> value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) ->
value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config); value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config);
value([{{Year, Month, Day}, {Hour, Min, Sec}}|Tokens], Handler, Stack, Config) value([{{Year, Month, Day}, {Hour, Min, Sec}}|Tokens], Handler, Stack, Config)
@ -129,6 +119,10 @@ when is_integer(Year), is_integer(Month), is_integer(Day), is_integer(Hour), is_
Stack, Stack,
Config Config
); );
value([{_, Value}|Tokens], Handler, Stack, Config) ->
value([Value] ++ Tokens, Handler, Stack, Config);
value([String|Tokens], Handler, Stack, Config) when is_atom(String) ->
value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config);
value([], Handler, Stack, Config) -> value([], Handler, Stack, Config) ->
incomplete(value, Handler, Stack, Config); incomplete(value, Handler, Stack, Config);
value(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> value(BadTokens, Handler, Stack, Config) when is_list(BadTokens) ->
@ -203,35 +197,35 @@ clean_string(Bin, Config) -> clean(Bin, [], Config).
clean(<<>>, Acc, _) -> iolist_to_binary(Acc); clean(<<>>, Acc, _) -> iolist_to_binary(Acc);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#20 -> clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#20 ->
maybe_replace(X, Rest, Acc, Config); clean(Rest, [Acc, maybe_replace(X, Config)], Config);
clean(<<34, Rest/binary>>, Acc, Config) -> clean(<<34, Rest/binary>>, Acc, Config) ->
maybe_replace(34, Rest, Acc, Config); clean(Rest, [Acc, maybe_replace(34, Config)], Config);
clean(<<47, Rest/binary>>, Acc, Config) -> clean(<<47, Rest/binary>>, Acc, Config) ->
maybe_replace(47, Rest, Acc, Config); clean(Rest, [Acc, maybe_replace(47, Config)], Config);
clean(<<92, Rest/binary>>, Acc, Config) -> clean(<<92, Rest/binary>>, Acc, Config) ->
maybe_replace(92, Rest, Acc, Config); clean(Rest, [Acc, maybe_replace(92, Config)], Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config=#config{uescape=true}) when X >= 16#80 -> clean(<<X/utf8, Rest/binary>>, Acc, Config=#config{uescape=true}) when X >= 16#80 ->
maybe_replace(X, Rest, Acc, Config); clean(Rest, [Acc, maybe_replace(X, Config)], Config);
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 -> clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
maybe_replace(X, Rest, Acc, Config); clean(Rest, [Acc, maybe_replace(X, Config)], Config);
clean(<<_/utf8, _/binary>> = Bin, Acc, Config) -> clean(<<_/utf8, _/binary>> = Bin, Acc, Config) ->
Size = count(Bin, 0, Config), Size = count(Bin, 0, Config),
<<Clean:Size/binary, Rest/binary>> = Bin, <<Clean:Size/binary, Rest/binary>> = Bin,
clean(Rest, [Acc, Clean], Config); clean(Rest, [Acc, Clean], Config);
%% surrogates %% surrogates
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
maybe_replace(surrogate, Rest, Acc, Config); clean(Rest, [Acc, maybe_replace(surrogate, Config)], Config);
%% overlong encodings and missing continuations of a 2 byte sequence %% overlong encodings and missing continuations of a 2 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 -> clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 ->
maybe_replace(badutf, strip_continuations(Rest, 1), Acc, Config); clean(strip_continuations(Rest, 1), [Acc, maybe_replace(badutf, Config)], Config);
%% overlong encodings and missing continuations of a 3 byte sequence %% overlong encodings and missing continuations of a 3 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 -> clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 ->
maybe_replace(badutf, strip_continuations(Rest, 2), Acc, Config); clean(strip_continuations(Rest, 2), [Acc, maybe_replace(badutf, Config)], Config);
%% overlong encodings and missing continuations of a 4 byte sequence %% overlong encodings and missing continuations of a 4 byte sequence
clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 -> clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
maybe_replace(badutf, strip_continuations(Rest, 3), Acc, Config); clean(strip_continuations(Rest, 3), [Acc, maybe_replace(badutf, Config)], Config);
clean(<<_, Rest/binary>>, Acc, Config) -> clean(<<_, Rest/binary>>, Acc, Config) ->
maybe_replace(badutf, Rest, Acc, Config). clean(Rest, [Acc, maybe_replace(badutf, Config)], Config).
count(<<>>, N, _) -> N; count(<<>>, N, _) -> N;
@ -474,43 +468,43 @@ strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
strip_continuations(Bin, _) -> Bin. strip_continuations(Bin, _) -> Bin.
maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) -> maybe_replace($\b, #config{escaped_strings=true}) ->
clean(Rest, [Acc, $\\, $b], Config); [$\\, $b];
maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) -> maybe_replace($\t, #config{escaped_strings=true}) ->
clean(Rest, [Acc, $\\, $t], Config); [$\\, $t];
maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) -> maybe_replace($\n, #config{escaped_strings=true}) ->
clean(Rest, [Acc, $\\, $n], Config); [$\\, $n];
maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) -> maybe_replace($\f, #config{escaped_strings=true}) ->
clean(Rest, [Acc, $\\, $f], Config); [$\\, $f];
maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) -> maybe_replace($\r, #config{escaped_strings=true}) ->
clean(Rest, [Acc, $\\, $r], Config); [$\\, $r];
maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) -> maybe_replace($\", #config{escaped_strings=true}) ->
clean(Rest, [Acc, $\\, $\"], Config); [$\\, $\"];
maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) -> maybe_replace($/, Config=#config{escaped_strings=true}) ->
case Config#config.escaped_forward_slashes of case Config#config.escaped_forward_slashes of
true -> clean(Rest, [Acc, $\\, $/], Config); true -> [$\\, $/];
false -> clean(Rest, [Acc, $/], Config) false -> [$/]
end; end;
maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) -> maybe_replace($\\, #config{escaped_strings=true}) ->
clean(Rest, [Acc, $\\, $\\], Config); [$\\, $\\];
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 -> maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
clean(Rest, [Acc, json_escape_sequence(X)], Config); json_escape_sequence(X);
%% escaped even if no other escaping was requested! %% escaped even if no other escaping was requested!
maybe_replace(X, Rest, Acc, Config=#config{uescape=true}) when X >= 16#80 -> maybe_replace(X, #config{uescape=true}) when X >= 16#80 ->
clean(Rest, [Acc, json_escape_sequence(X)], Config); json_escape_sequence(X);
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
case Config#config.unescaped_jsonp of case Config#config.unescaped_jsonp of
true -> clean(Rest, [Acc, <<X/utf8>>], Config); true -> [<<X/utf8>>];
false -> clean(Rest, [Acc, json_escape_sequence(X)], Config) false -> json_escape_sequence(X)
end; end;
maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> maybe_replace(Atom, #config{strict_utf8=true}) when is_atom(Atom) ->
erlang:error(badarg); erlang:error(badarg);
maybe_replace(surrogate, Rest, Acc, Config) -> maybe_replace(surrogate, _Config) ->
clean(Rest, [Acc, <<16#fffd/utf8>>], Config); [<<16#fffd/utf8>>];
maybe_replace(badutf, Rest, Acc, Config) -> maybe_replace(badutf, _Config) ->
clean(Rest, [Acc, <<16#fffd/utf8>>], Config); [<<16#fffd/utf8>>];
maybe_replace(X, Rest, Acc, Config) -> maybe_replace(X, _Config) ->
clean(Rest, [Acc, <<X/utf8>>], Config). [<<X/utf8>>].
%% convert a codepoint to it's \uXXXX equiv. %% convert a codepoint to it's \uXXXX equiv.