From 9842a00b4d0b3a9bef734db5962d6a354c7c45cc Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 7 Dec 2014 07:59:51 +0000 Subject: [PATCH 01/19] strip out checks for repeated keys in generated json. no one cares about repeated keys --- README.md | 4 --- src/jsx_config.erl | 9 +++-- src/jsx_config.hrl | 1 - src/jsx_parser.erl | 38 ++++---------------- src/jsx_verify.erl | 88 +++++++++------------------------------------- 5 files changed, 28 insertions(+), 112 deletions(-) diff --git a/README.md b/README.md index dbe1a16..35ce425 100644 --- a/README.md +++ b/README.md @@ -387,10 +387,6 @@ additional options beyond these. see control codes and problematic codepoints and replacing them with the appropriate escapes -- `repeat_keys` - - this flag circumvents checking for repeated keys in generated json - - `stream` see [incomplete input](#incomplete-input) diff --git a/src/jsx_config.erl b/src/jsx_config.erl index f7b8d7a..b720033 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -63,12 +63,14 @@ parse_config([unescaped_jsonp|Rest], Config) -> parse_config(Rest, Config#config{unescaped_jsonp=true}); parse_config([dirty_strings|Rest], Config) -> parse_config(Rest, Config#config{dirty_strings=true}); +%% retained for backwards compat, now does nothing however parse_config([repeat_keys|Rest], Config) -> - parse_config(Rest, Config#config{repeat_keys=true}); + parse_config(Rest, Config); parse_config([uescape|Rest], Config) -> parse_config(Rest, Config#config{uescape=true}); parse_config([strict|Rest], Config) -> - parse_config(Rest, Config#config{strict_comments=true, + parse_config(Rest, Config#config{ + strict_comments=true, strict_commas=true, strict_utf8=true, strict_single_quotes=true, @@ -190,7 +192,6 @@ config_test_() -> escaped_strings = true, unescaped_jsonp = true, dirty_strings = true, - repeat_keys = true, strict_comments = true, strict_commas = true, strict_utf8 = true, @@ -274,7 +275,6 @@ config_to_list_test_() -> escaped_strings, unescaped_jsonp, dirty_strings, - repeat_keys, stream, uescape, strict @@ -284,7 +284,6 @@ config_to_list_test_() -> escaped_strings = true, unescaped_jsonp = true, dirty_strings = true, - repeat_keys = true, strict_comments = true, strict_utf8 = true, strict_single_quotes = true, diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index e72247a..3b87da1 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -3,7 +3,6 @@ escaped_strings = false :: boolean(), unescaped_jsonp = false :: boolean(), dirty_strings = false :: boolean(), - repeat_keys = false :: boolean(), strict_comments = false :: boolean(), strict_commas = false :: boolean(), strict_utf8 = false :: boolean(), diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index da128ff..4fea49d 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -88,7 +88,7 @@ handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event value([start_object|Tokens], Handler, Stack, Config) -> - object(Tokens, handle_event(start_object, Handler, Config), [{object, sets:new()}|Stack], Config); + object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); value([start_array|Tokens], Handler, Stack, Config) -> array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> @@ -136,35 +136,19 @@ value(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> value(Token, Handler, Stack, Config) -> value([Token], Handler, Stack, Config). -object([end_object|Tokens], Handler, [{object, _}|Stack], Config) -> +object([end_object|Tokens], Handler, [object|Stack], Config) -> maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config); object([{key, Key}|Tokens], Handler, Stack, Config) when is_atom(Key); is_binary(Key); is_integer(Key) -> object([Key|Tokens], Handler, Stack, Config); -object([Key|Tokens], Handler, [{object, _Keys}|Stack], Config=#config{repeat_keys=true}) +object([Key|Tokens], Handler, [object|Stack], Config) when is_atom(Key); is_binary(Key); is_integer(Key) -> try clean_string(fix_key(Key), Config) of K -> value( Tokens, handle_event({key, K}, Handler, Config), - [{object, []}|Stack], - Config - ) - catch error:badarg -> - ?error(object, [{string, Key}|Tokens], Handler, Stack, Config) - end; -object([Key|Tokens], Handler, [{object, Keys}|Stack], Config) -when is_atom(Key); is_binary(Key); is_integer(Key) -> - try - CleanKey = clean_string(fix_key(Key), Config), - case sets:is_element(CleanKey, Keys) of true -> erlang:error(badarg); _ -> ok end, - CleanKey - of K -> - value( - Tokens, - handle_event({key, K}, Handler, Config), - [{object, sets:add_element(K, Keys)}|Stack], + [object|Stack], Config ) catch error:badarg -> @@ -186,7 +170,7 @@ array(Token, Handler, Stack, Config) -> maybe_done([end_json], Handler, [], Config) -> done([end_json], Handler, [], Config); -maybe_done(Tokens, Handler, [{object, _}|_] = Stack, Config) when is_list(Tokens) -> +maybe_done(Tokens, Handler, [object|_] = Stack, Config) when is_list(Tokens) -> object(Tokens, Handler, Stack, Config); maybe_done(Tokens, Handler, [array|_] = Stack, Config) when is_list(Tokens) -> array(Tokens, Handler, Stack, Config); @@ -1057,6 +1041,7 @@ json_escape_sequence_test_() -> {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} ]. + uescape_test_() -> [ {"\"\\u0080\"", ?_assertEqual( @@ -1080,6 +1065,7 @@ uescape_test_() -> )} ]. + fix_key_test_() -> [ {"binary key", ?_assertEqual(fix_key(<<"foo">>), <<"foo">>)}, @@ -1088,16 +1074,6 @@ fix_key_test_() -> ]. -repeated_key_test_() -> - Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end, - [ - {"repeated key", ?_assertError( - badarg, - Parse([start_object, <<"key">>, true, <<"key">>, true, end_object], []) - )} - ]. - - datetime_test_() -> [ {"datetime", ?_assertEqual( diff --git a/src/jsx_verify.erl b/src/jsx_verify.erl index 09a668d..2073b99 100644 --- a/src/jsx_verify.erl +++ b/src/jsx_verify.erl @@ -27,15 +27,7 @@ -export([init/1, handle_event/2]). --record(config, { - repeated_keys = true -}). - --type config() :: []. --export_type([config/0]). - - --spec is_json(Source::binary(), Config::config()) -> true | false | {incomplete, jsx:decoder()}. +-spec is_json(Source::binary(), Config::jsx_config:config()) -> true | false | {incomplete, jsx:decoder()}. is_json(Source, Config) when is_list(Config) -> try (jsx:decoder(?MODULE, Config, jsx_config:extract_config(Config)))(Source) @@ -43,7 +35,7 @@ is_json(Source, Config) when is_list(Config) -> end. --spec is_term(Source::any(), Config::config()) -> true | false | {incomplete, jsx:encoder()}. +-spec is_term(Source::any(), Config::jsx_config:config()) -> true | false | {incomplete, jsx:encoder()}. is_term(Source, Config) when is_list(Config) -> try (jsx:encoder(?MODULE, Config, jsx_config:extract_config(Config)))(Source) @@ -51,15 +43,15 @@ is_term(Source, Config) when is_list(Config) -> end. -parse_config(Config) -> parse_config(Config, #config{}). +parse_config(Config) -> parse_config(Config, []). +%% ignore deprecated flags parse_config([no_repeated_keys|Rest], Config) -> - parse_config(Rest, Config#config{repeated_keys=false}); -%% deprecated, use `no_repeated_keys` + parse_config(Rest, Config); parse_config([{repeated_keys, Val}|Rest], Config) when Val == true; Val == false -> - parse_config(Rest, Config#config{repeated_keys=Val}); + parse_config(Rest, Config); parse_config([repeated_keys|Rest], Config) -> - parse_config(Rest, Config#config{repeated_keys=true}); + parse_config(Rest, Config); parse_config([{K, _}|Rest] = Options, Config) -> case lists:member(K, jsx_config:valid_flags()) of true -> parse_config(Rest, Config); @@ -73,27 +65,18 @@ parse_config([K|Rest] = Options, Config) -> parse_config([], Config) -> Config. --type state() :: {#config{}, any()}. + +%% we don't actually need any state for this +-type state() :: []. -spec init(Config::proplists:proplist()) -> state(). -init(Config) -> {parse_config(Config), []}. +init(Config) -> parse_config(Config). -spec handle_event(Event::any(), State::state()) -> state(). handle_event(end_json, _) -> true; -handle_event(_, {Config, _} = State) when Config#config.repeated_keys == true -> State; - -handle_event(start_object, {Config, Keys}) -> {Config, [dict:new()] ++ Keys}; -handle_event(end_object, {Config, [_|Keys]}) -> {Config, Keys}; - -handle_event({key, Key}, {Config, [CurrentKeys|Keys]}) -> - case dict:is_key(Key, CurrentKeys) of - true -> erlang:error(badarg); - false -> {Config, [dict:store(Key, blah, CurrentKeys)|Keys]} - end; - handle_event(_, State) -> State. @@ -105,15 +88,15 @@ handle_event(_, State) -> State. config_test_() -> [ - {"empty config", ?_assertEqual(#config{}, parse_config([]))}, - {"no repeat keys", ?_assertEqual(#config{repeated_keys=false}, parse_config([no_repeated_keys]))}, - {"bare repeated keys", ?_assertEqual(#config{}, parse_config([repeated_keys]))}, + {"empty config", ?_assertEqual([], parse_config([]))}, + {"no repeat keys", ?_assertEqual([], parse_config([no_repeated_keys]))}, + {"bare repeated keys", ?_assertEqual([], parse_config([repeated_keys]))}, {"repeated keys true", ?_assertEqual( - #config{}, + [], parse_config([{repeated_keys, true}]) )}, {"repeated keys false", ?_assertEqual( - #config{repeated_keys=false}, + [], parse_config([{repeated_keys, false}]) )}, {"invalid opt flag", ?_assertError(badarg, parse_config([error]))}, @@ -121,50 +104,13 @@ config_test_() -> ]. -repeated_keys_test_() -> - RepeatedKey = [ - start_object, - {key, <<"alpha">>}, - {literal, true}, - {key, <<"alpha">>}, - {literal, false}, - end_object, - end_json - ], - NestedKey = [ - start_object, - {key, <<"alpha">>}, - start_object, - {key, <<"alpha">>}, - start_object, - {key, <<"alpha">>}, - {literal, true}, - end_object, - end_object, - end_object, - end_json - ], - [ - {"repeated key", ?_assert( - lists:foldl(fun handle_event/2, {#config{}, []}, RepeatedKey) - )}, - {"no repeated key", ?_assertError( - badarg, - lists:foldl(fun handle_event/2, {#config{repeated_keys=false}, []}, RepeatedKey) - )}, - {"nested key", ?_assert( - lists:foldl(fun handle_event/2, {#config{repeated_keys=false}, []}, NestedKey) - )} - ]. - - handle_event_test_() -> Data = jsx:test_cases() ++ jsx:special_test_cases(), [ { Title, ?_assertEqual( true, - lists:foldl(fun handle_event/2, {#config{}, []}, Events ++ [end_json]) + lists:foldl(fun handle_event/2, [], Events ++ [end_json]) ) } || {Title, _, _, Events} <- Data ]. From 56c3bdb57821e6b5e797d65be5639cb079601107 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Fri, 5 Dec 2014 03:16:58 +0000 Subject: [PATCH 02/19] reorder parser clauses and simplify escaping for performance --- src/jsx_encoder.erl | 36 ++++++------- src/jsx_parser.erl | 128 +++++++++++++++++++++----------------------- 2 files changed, 79 insertions(+), 85 deletions(-) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index d39d49a..12b1c67 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -23,7 +23,7 @@ -module(jsx_encoder). --export([encoder/3, encode/1, encode/2, unzip/1]). +-export([encoder/3, encode/1, encode/2]). -spec encoder(Handler::module(), State::any(), Config::list()) -> jsx:encoder(). @@ -44,11 +44,10 @@ encode(Term, EntryPoint) -> encode_(Term, EntryPoint). -endif. -ifdef(maps_support). -encode(Map, _EntryPoint) when is_map(Map), map_size(Map) < 1 -> [start_object, end_object]; +encode(Map, _EntryPoint) when is_map(Map), map_size(Map) < 1 -> + [start_object, end_object]; encode(Term, EntryPoint) when is_map(Term) -> - lists:flatten( - [start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unpack(Term) ] ++ [end_object] - ); + [start_object] ++ unpack(Term, EntryPoint); encode(Term, EntryPoint) -> encode_(Term, EntryPoint). -endif. @@ -56,28 +55,29 @@ encode_([], _EntryPoint) -> [start_array, end_array]; encode_([{}], _EntryPoint) -> [start_object, end_object]; encode_([{_, _}|_] = Term, EntryPoint) -> - lists:flatten( - [start_object] ++ [ EntryPoint:encode(T, EntryPoint) || T <- unzip(Term) ] ++ [end_object] - ); + [start_object] ++ unzip(Term, EntryPoint); encode_(Term, EntryPoint) when is_list(Term) -> - lists:flatten( - [start_array] ++ [ EntryPoint:encode(T, EntryPoint) || T <- Term ] ++ [end_array] - ); + [start_array] ++ unhitch(Term, EntryPoint); encode_(Else, _EntryPoint) -> [Else]. -unzip(List) -> unzip(List, []). +unzip([{K, V}|Rest], EntryPoint) when is_integer(K); is_binary(K); is_atom(K) -> + [K] ++ EntryPoint:encode(V, EntryPoint) ++ unzip(Rest, EntryPoint); +unzip([], _) -> [end_object]. + + +unhitch([V|Rest], EntryPoint) -> + EntryPoint:encode(V, EntryPoint) ++ unhitch(Rest, EntryPoint); +unhitch([], _) -> [end_array]. -unzip([], Acc) -> lists:reverse(Acc); -unzip([{K, V}|Rest], Acc) when is_binary(K); is_atom(K); is_integer(K) -> unzip(Rest, [V, K] ++ Acc). -ifdef(maps_support). -unpack(Map) -> unpack(maps:keys(Map), Map, []). +unpack(Map, EntryPoint) -> unpack(Map, maps:keys(Map), EntryPoint). -unpack([], _, Acc) -> lists:reverse(Acc); -unpack([K|Rest], Map, Acc) when is_binary(K); is_atom(K); is_integer(K) -> - unpack(Rest, Map, [maps:get(K, Map), K] ++ Acc). +unpack(Map, [K|Rest], EntryPoint) when is_integer(K); is_binary(K); is_atom(K) -> + [K] ++ EntryPoint:encode(maps:get(K, Map), EntryPoint) ++ unpack(Map, Rest, EntryPoint); +unpack(_, [], _) -> [end_object]. -endif. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 4fea49d..7a8f230 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -87,36 +87,26 @@ incomplete(State, Handler, Stack, Config=#config{incomplete_handler=F}) -> handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}. -value([start_object|Tokens], Handler, Stack, Config) -> - object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); -value([start_array|Tokens], Handler, Stack, Config) -> - array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); -value([{literal, Literal}|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> - maybe_done(Tokens, handle_event({literal, Literal}, Handler, Config), Stack, Config); -value([Literal|Tokens], Handler, Stack, Config) when Literal == true; Literal == false; Literal == null -> - value([{literal, Literal}] ++ Tokens, Handler, Stack, Config); -value([{integer, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) -> - maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config); -value([{float, Number}|Tokens], Handler, Stack, Config) when is_float(Number) -> - maybe_done(Tokens, handle_event({float, Number}, Handler, Config), Stack, Config); -value([{number, Number}|Tokens], Handler, Stack, Config) when is_integer(Number) -> - value([{integer, Number}] ++ Tokens, Handler, Stack, Config); -value([{number, Number}|Tokens], Handler, Stack, Config) when is_float(Number) -> - value([{float, Number}] ++ Tokens, Handler, Stack, Config); -value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) -> - value([{integer, Number}] ++ Tokens, Handler, Stack, Config); -value([Number|Tokens], Handler, Stack, Config) when is_float(Number) -> - value([{float, Number}] ++ Tokens, Handler, Stack, Config); -value([{string, String}|Tokens], Handler, Stack, Config) when is_binary(String) -> +value([String|Tokens], Handler, Stack, Config) when is_binary(String) -> try clean_string(String, Config) of Clean -> maybe_done(Tokens, handle_event({string, Clean}, Handler, Config), Stack, Config) catch error:badarg -> ?error(value, [{string, String}|Tokens], Handler, Stack, Config) end; -value([String|Tokens], Handler, Stack, Config) when is_binary(String) -> - value([{string, String}] ++ Tokens, Handler, Stack, Config); -value([String|Tokens], Handler, Stack, Config) when is_atom(String) -> - value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config); +value([true|Tokens], Handler, Stack, Config) -> + maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config); +value([false|Tokens], Handler, Stack, Config) -> + maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config); +value([null|Tokens], Handler, Stack, Config) -> + maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config); +value([start_object|Tokens], Handler, Stack, Config) -> + object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); +value([start_array|Tokens], Handler, Stack, Config) -> + array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); +value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) -> + maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config); +value([Number|Tokens], Handler, Stack, Config) when is_float(Number) -> + maybe_done(Tokens, handle_event({float, Number}, Handler, Config), Stack, Config); value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) -> value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config); value([{{Year, Month, Day}, {Hour, Min, Sec}}|Tokens], Handler, Stack, Config) @@ -129,6 +119,10 @@ when is_integer(Year), is_integer(Month), is_integer(Day), is_integer(Hour), is_ Stack, Config ); +value([{_, Value}|Tokens], Handler, Stack, Config) -> + value([Value] ++ Tokens, Handler, Stack, Config); +value([String|Tokens], Handler, Stack, Config) when is_atom(String) -> + value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config); value([], Handler, Stack, Config) -> incomplete(value, Handler, Stack, Config); value(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> @@ -203,35 +197,35 @@ clean_string(Bin, Config) -> clean(Bin, [], Config). clean(<<>>, Acc, _) -> iolist_to_binary(Acc); clean(<>, Acc, Config) when X < 16#20 -> - maybe_replace(X, Rest, Acc, Config); + clean(Rest, [Acc, maybe_replace(X, Config)], Config); clean(<<34, Rest/binary>>, Acc, Config) -> - maybe_replace(34, Rest, Acc, Config); + clean(Rest, [Acc, maybe_replace(34, Config)], Config); clean(<<47, Rest/binary>>, Acc, Config) -> - maybe_replace(47, Rest, Acc, Config); + clean(Rest, [Acc, maybe_replace(47, Config)], Config); clean(<<92, Rest/binary>>, Acc, Config) -> - maybe_replace(92, Rest, Acc, Config); + clean(Rest, [Acc, maybe_replace(92, Config)], Config); clean(<>, Acc, Config=#config{uescape=true}) when X >= 16#80 -> - maybe_replace(X, Rest, Acc, Config); + clean(Rest, [Acc, maybe_replace(X, Config)], Config); clean(<>, Acc, Config) when X == 16#2028; X == 16#2029 -> - maybe_replace(X, Rest, Acc, Config); + clean(Rest, [Acc, maybe_replace(X, Config)], Config); clean(<<_/utf8, _/binary>> = Bin, Acc, Config) -> Size = count(Bin, 0, Config), <> = Bin, clean(Rest, [Acc, Clean], Config); %% surrogates clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> - maybe_replace(surrogate, Rest, Acc, Config); + clean(Rest, [Acc, maybe_replace(surrogate, Config)], Config); %% overlong encodings and missing continuations of a 2 byte sequence clean(<>, Acc, Config) when X >= 192, X =< 223 -> - maybe_replace(badutf, strip_continuations(Rest, 1), Acc, Config); + clean(strip_continuations(Rest, 1), [Acc, maybe_replace(badutf, Config)], Config); %% overlong encodings and missing continuations of a 3 byte sequence clean(<>, Acc, Config) when X >= 224, X =< 239 -> - maybe_replace(badutf, strip_continuations(Rest, 2), Acc, Config); + clean(strip_continuations(Rest, 2), [Acc, maybe_replace(badutf, Config)], Config); %% overlong encodings and missing continuations of a 4 byte sequence clean(<>, Acc, Config) when X >= 240, X =< 247 -> - maybe_replace(badutf, strip_continuations(Rest, 3), Acc, Config); + clean(strip_continuations(Rest, 3), [Acc, maybe_replace(badutf, Config)], Config); clean(<<_, Rest/binary>>, Acc, Config) -> - maybe_replace(badutf, Rest, Acc, Config). + clean(Rest, [Acc, maybe_replace(badutf, Config)], Config). count(<<>>, N, _) -> N; @@ -474,43 +468,43 @@ strip_continuations(<>, N) when X >= 128, X =< 191 -> strip_continuations(Bin, _) -> Bin. -maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [Acc, $\\, $b], Config); -maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [Acc, $\\, $t], Config); -maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [Acc, $\\, $n], Config); -maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [Acc, $\\, $f], Config); -maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [Acc, $\\, $r], Config); -maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [Acc, $\\, $\"], Config); -maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) -> +maybe_replace($\b, #config{escaped_strings=true}) -> + [$\\, $b]; +maybe_replace($\t, #config{escaped_strings=true}) -> + [$\\, $t]; +maybe_replace($\n, #config{escaped_strings=true}) -> + [$\\, $n]; +maybe_replace($\f, #config{escaped_strings=true}) -> + [$\\, $f]; +maybe_replace($\r, #config{escaped_strings=true}) -> + [$\\, $r]; +maybe_replace($\", #config{escaped_strings=true}) -> + [$\\, $\"]; +maybe_replace($/, Config=#config{escaped_strings=true}) -> case Config#config.escaped_forward_slashes of - true -> clean(Rest, [Acc, $\\, $/], Config); - false -> clean(Rest, [Acc, $/], Config) + true -> [$\\, $/]; + false -> [$/] end; -maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [Acc, $\\, $\\], Config); -maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 -> - clean(Rest, [Acc, json_escape_sequence(X)], Config); +maybe_replace($\\, #config{escaped_strings=true}) -> + [$\\, $\\]; +maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> + json_escape_sequence(X); %% escaped even if no other escaping was requested! -maybe_replace(X, Rest, Acc, Config=#config{uescape=true}) when X >= 16#80 -> - clean(Rest, [Acc, json_escape_sequence(X)], Config); -maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> +maybe_replace(X, #config{uescape=true}) when X >= 16#80 -> + json_escape_sequence(X); +maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> case Config#config.unescaped_jsonp of - true -> clean(Rest, [Acc, <>], Config); - false -> clean(Rest, [Acc, json_escape_sequence(X)], Config) + true -> [<>]; + false -> json_escape_sequence(X) end; -maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> +maybe_replace(Atom, #config{strict_utf8=true}) when is_atom(Atom) -> erlang:error(badarg); -maybe_replace(surrogate, Rest, Acc, Config) -> - clean(Rest, [Acc, <<16#fffd/utf8>>], Config); -maybe_replace(badutf, Rest, Acc, Config) -> - clean(Rest, [Acc, <<16#fffd/utf8>>], Config); -maybe_replace(X, Rest, Acc, Config) -> - clean(Rest, [Acc, <>], Config). +maybe_replace(surrogate, _Config) -> + [<<16#fffd/utf8>>]; +maybe_replace(badutf, _Config) -> + [<<16#fffd/utf8>>]; +maybe_replace(X, _Config) -> + [<>]. %% convert a codepoint to it's \uXXXX equiv. From 43ba093ec51a0effcf2dd663cafee41f075b25ae Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 7 Dec 2014 22:04:41 +0000 Subject: [PATCH 03/19] build maps immediately when returning maps instead of building a proplist and converting --- src/jsx_to_term.erl | 189 +++++++++++++++++++++++++++++++++----------- 1 file changed, 143 insertions(+), 46 deletions(-) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 25cb227..ad7aae0 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -25,8 +25,15 @@ -export([to_term/2]). -export([init/1, handle_event/2]). --export([start_term/0, start_term/1]). --export([start_object/1, start_array/1, finish/1, insert/2, insert/3, get_key/1, get_value/1]). +-export([ + start_term/1, + start_object/1, + start_array/1, + finish/1, + insert/2, + get_key/1, + get_value/1 +]). -record(config, { @@ -37,15 +44,27 @@ -type config() :: list(). -export_type([config/0]). - --type json_value() :: list({binary(), json_value()}) - | list(json_value()) +-ifndef(maps_support). +-type json_value() :: list(json_value()) + | list({binary() | atom(), json_value()}) | true | false | null | integer() | float() | binary(). +-endif. + +-ifdef(maps_support). +-type json_value() :: list(json_value()) + | map() + | true + | false + | null + | integer() + | float() + | binary(). +-endif. -spec to_term(Source::binary(), Config::config()) -> json_value(). @@ -79,10 +98,11 @@ parse_config([K|Rest] = Options, Config) -> parse_config([], Config) -> Config. --type state() :: {[any()], #config{}}. + +-type state() :: {list(), #config{}}. -spec init(Config::proplists:proplist()) -> state(). -init(Config) -> {[], parse_config(Config)}. +init(Config) -> start_term(Config). -spec handle_event(Event::any(), State::state()) -> state(). @@ -118,47 +138,46 @@ format_key(Key, Config) -> %% the stack is a list of in progress objects/arrays %% `[Current, Parent, Grandparent,...OriginalAncestor]` %% an object has the representation on the stack of -%% `{object, [{NthKey, NthValue}, {NMinus1Key, NthMinus1Value},...{FirstKey, FirstValue}]}` -%% of if there's a key with a yet to be matched value -%% `{object, Key, [{NthKey, NthValue},...]}` +%% `{object, [ +%% {NthKey, NthValue}, +%% {NMinus1Key, NthMinus1Value}, +%% ..., +%% {FirstKey, FirstValue} +%% ]}` +%% or if returning maps +%% `{object, #{ +%% FirstKey => FirstValue, +%% SecondKey => SecondValue, +%% ..., +%% NthKey => NthValue +%% }}` +%% or if there's a key with a yet to be matched value +%% `{object, Key, ...}` %% an array looks like %% `{array, [NthValue, NthMinus1Value,...FirstValue]}` -start_term() -> {[], #config{}}. - start_term(Config) when is_list(Config) -> {[], parse_config(Config)}. + +-ifndef(maps_support). %% allocate a new object on top of the stack start_object({Stack, Config}) -> {[{object, []}] ++ Stack, Config}. + %% allocate a new array on top of the stack start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. --ifndef(maps_support). -finish(Any) -> finish0(Any). --endif. - --ifdef(maps_support). -finish({[{object, []}], Config=#config{return_maps=true}}) -> - {#{}, Config}; -finish({[{object, []}|Rest], Config=#config{return_maps=true}}) -> - insert(#{}, {Rest, Config}); -finish({[{object, Pairs}], Config=#config{return_maps=true}}) -> - {maps:from_list(Pairs), Config}; -finish({[{object, Pairs}|Rest], Config=#config{return_maps=true}}) -> - insert(maps:from_list(Pairs), {Rest, Config}); -finish(Else) -> finish0(Else). --endif. %% finish an object or array and insert it into the parent object if it exists or %% return it if it is the root object -finish0({[{object, []}], Config}) -> {[{}], Config}; -finish0({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); -finish0({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; -finish0({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); -finish0({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; -finish0({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); -finish0(_) -> erlang:error(badarg). +finish({[{object, []}], Config}) -> {[{}], Config}; +finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); +finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; +finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); +finish(_) -> erlang:error(badarg). + %% insert a value when there's no parent object or array insert(Value, {[], Config}) -> {Value, Config}; @@ -170,11 +189,51 @@ insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> insert(Value, {[{array, Values}|Rest], Config}) -> {[{array, [Value] ++ Values}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). +-endif. -%% insert a key/value pair into an object -insert(Key, Value, {[{object, Pairs}|Rest], Config}) -> + +-ifdef(maps_support). +%% allocate a new object on top of the stack +start_object({Stack, Config=#config{return_maps=true}}) -> + {[{object, #{}}] ++ Stack, Config}; +start_object({Stack, Config}) -> + {[{object, []}] ++ Stack, Config}. + + +%% allocate a new array on top of the stack +start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. + + +%% finish an object or array and insert it into the parent object if it exists or +%% return it if it is the root object +finish({[{object, Map}], Config=#config{return_maps=true}}) -> + {Map, Config}; +finish({[{object, Map}|Rest], Config=#config{return_maps=true}}) -> + insert(Map, {Rest, Config}); +finish({[{object, []}], Config}) -> {[{}], Config}; +finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); +finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; +finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); +finish(_) -> erlang:error(badarg). + + +%% insert a value when there's no parent object or array +insert(Value, {[], Config}) -> {Value, Config}; +%% insert a key or value into an object or array, autodetects the 'right' thing +insert(Key, {[{object, Map}|Rest], Config=#config{return_maps=true}}) -> + {[{object, Key, Map}] ++ Rest, Config}; +insert(Key, {[{object, Pairs}|Rest], Config}) -> + {[{object, Key, Pairs}] ++ Rest, Config}; +insert(Value, {[{object, Key, Map}|Rest], Config=#config{return_maps=true}}) -> + {[{object, maps:put(Key, Value, Map)}] ++ Rest, Config}; +insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; -insert(_, _, _) -> erlang:error(badarg). +insert(Value, {[{array, Values}|Rest], Config}) -> + {[{array, [Value] ++ Values}] ++ Rest, Config}; +insert(_, _) -> erlang:error(badarg). +-endif. get_key({[{object, Key, _}|_], _}) -> Key; @@ -235,10 +294,6 @@ format_key_test_() -> rep_manipulation_test_() -> [ - {"allocate a new context", ?_assertEqual( - {[], #config{}}, - start_term() - )}, {"allocate a new context with option", ?_assertEqual( {[], #config{labels=atom}}, start_term([{labels, atom}]) @@ -283,10 +338,6 @@ rep_manipulation_test_() -> {[{array, [value]}, junk], #config{}}, insert(value, {[{array, []}, junk], #config{}}) )}, - {"insert a key/value pair into an object", ?_assertEqual( - {[{object, [{key, value}, {x, y}]}, junk], #config{}}, - insert(key, value, {[{object, [{x, y}]}, junk], #config{}}) - )}, {"finish an object with no ancestor", ?_assertEqual( {[{a, b}, {x, y}], #config{}}, finish({[{object, [{x, y}, {a, b}]}], #config{}}) @@ -309,7 +360,54 @@ rep_manipulation_test_() -> )} ]. + -ifdef(maps_support). +rep_manipulation_with_maps_test_() -> + [ + {"allocate a new object on an empty stack", ?_assertEqual( + {[{object, #{}}], #config{return_maps=true}}, + start_object({[], #config{return_maps=true}}) + )}, + {"allocate a new object on a stack", ?_assertEqual( + {[{object, #{}}, {object, #{}}], #config{return_maps=true}}, + start_object({[{object, #{}}], #config{return_maps=true}}) + )}, + {"insert a key into an object", ?_assertEqual( + {[{object, key, #{}}, junk], #config{return_maps=true}}, + insert(key, {[{object, #{}}, junk], #config{return_maps=true}}) + )}, + {"get current key", ?_assertEqual( + key, + get_key({[{object, key, #{}}], #config{return_maps=true}}) + )}, + {"try to get non-key from object", ?_assertError( + badarg, + get_key({[{object, #{}}], #config{return_maps=true}}) + )}, + {"insert a value into an object", ?_assertEqual( + {[{object, #{key => value}}, junk], #config{return_maps=true}}, + insert(value, {[{object, key, #{}}, junk], #config{return_maps=true}}) + )}, + {"finish an object with no ancestor", ?_assertEqual( + {#{a => b, x => y}, #config{return_maps=true}}, + finish({[{object, #{x => y, a => b}}], #config{return_maps=true}}) + )}, + {"finish an empty object", ?_assertEqual( + {#{}, #config{return_maps=true}}, + finish({[{object, #{}}], #config{return_maps=true}}) + )}, + {"finish an object with an ancestor", ?_assertEqual( + { + [{object, #{key => #{a => b, x => y}, foo => bar}}], + #config{return_maps=true} + }, + finish({ + [{object, #{x => y, a => b}}, {object, key, #{foo => bar}}], + #config{return_maps=true} + }) + )} + ]. + return_maps_test_() -> [ @@ -334,7 +432,6 @@ return_maps_test_() -> jsx:decode(<<"[{}]">>, [return_maps]) )} ]. - -endif. From 8a4f92d8e65028ee58d1e786b0eb9fe2af5c3cf9 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 7 Dec 2014 22:21:55 +0000 Subject: [PATCH 04/19] skip redundant check for `uescape` escaping --- src/jsx_decoder.erl | 5 +---- src/jsx_parser.erl | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 81f06ba..de8e1af 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -311,7 +311,7 @@ string(<>, Handler, Acc, Stack, Config) -> string(<>, Handler, Acc, Stack, Config) -> unescape(Rest, Handler, Acc, Stack, Config); string(<>, Handler, Acc, Stack, Config=#config{uescape=true}) when X >= 16#80 -> - string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config); + string(Rest, Handler, [Acc, json_escape_sequence(X)], Stack, Config); string(<>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 -> string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config); string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) -> @@ -761,9 +761,6 @@ maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == end; maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> json_escape_sequence(X); -%% escaped even if no other escaping requested! -maybe_replace(X, #config{uescape=true}) when X >= 16#80 -> - json_escape_sequence(X); maybe_replace(X, _Config) -> <>. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 7a8f230..e99ef53 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -205,7 +205,7 @@ clean(<<47, Rest/binary>>, Acc, Config) -> clean(<<92, Rest/binary>>, Acc, Config) -> clean(Rest, [Acc, maybe_replace(92, Config)], Config); clean(<>, Acc, Config=#config{uescape=true}) when X >= 16#80 -> - clean(Rest, [Acc, maybe_replace(X, Config)], Config); + clean(Rest, [Acc, json_escape_sequence(X)], Config); clean(<>, Acc, Config) when X == 16#2028; X == 16#2029 -> clean(Rest, [Acc, maybe_replace(X, Config)], Config); clean(<<_/utf8, _/binary>> = Bin, Acc, Config) -> @@ -489,9 +489,6 @@ maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\]; maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> json_escape_sequence(X); -%% escaped even if no other escaping was requested! -maybe_replace(X, #config{uescape=true}) when X >= 16#80 -> - json_escape_sequence(X); maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> case Config#config.unescaped_jsonp of true -> [<>]; From a9982687c63101bac482489ea12a4a3c8d8c4b2f Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 7 Dec 2014 15:30:37 -0800 Subject: [PATCH 05/19] certain astral plane json escape sequences were inadvertently being replaced with the replacement character --- src/jsx_decoder.erl | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index de8e1af..0f4f3f2 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -687,31 +687,36 @@ unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace($\\, Config)], Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace($/, Config)], Stack, Config); -unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) - when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), - (X == $c orelse X == $d orelse X == $e orelse X == $f), - ?is_hex(B), ?is_hex(C), ?is_hex(Y), ?is_hex(Z) +unescape(<<$u, F, A, B, C, ?rsolidus, $u, G, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) + when (A == $8 orelse A == $9 orelse A == $a orelse A == $b orelse A == $A orelse A == $B), + (X == $c orelse X == $d orelse X == $e orelse X == $f orelse X == $C orelse X == $D orelse X == $E orelse X == $F), + (F == $d orelse F == $D), + (G == $d orelse G == $D), + ?is_hex(B), ?is_hex(C), ?is_hex(Y), ?is_hex(Z) -> High = erlang:list_to_integer([$d, A, B, C], 16), Low = erlang:list_to_integer([$d, X, Y, Z], 16), Codepoint = (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000, string(Rest, Handler, [Acc, <>], Stack, Config); -unescape(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) - when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), - ?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z) +unescape(<<$u, F, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) + when (A == $8 orelse A == $9 orelse A == $a orelse A == $b orelse A == $A orelse A == $B), + (F == $d orelse F == $D), + ?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z) -> case Config#config.strict_utf8 of true -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config); false -> string(Rest, Handler, [Acc, <<16#fffd/utf8>>, <<16#fffd/utf8>>], Stack, Config) end; -unescape(<<$u, $d, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) - when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), - ?is_hex(B), ?is_hex(C) +unescape(<<$u, F, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) + when (A == $8 orelse A == $9 orelse A == $a orelse A == $b orelse A == $A orelse A == $B), + (F == $d orelse F == $D), + ?is_hex(B), ?is_hex(C) -> incomplete(string, <>, Handler, Acc, Stack, Config); -unescape(<<$u, $d, A, B, C>>, Handler, Acc, Stack, Config) - when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), - ?is_hex(B), ?is_hex(C) +unescape(<<$u, F, A, B, C>>, Handler, Acc, Stack, Config) + when (A == $8 orelse A == $9 orelse A == $a orelse A == $b orelse A == $A orelse A == $B), + (F == $d orelse F == $D), + ?is_hex(B), ?is_hex(C) -> incomplete(string, <>, Handler, Acc, Stack, Config); unescape(<<$u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config) @@ -1526,16 +1531,27 @@ unescape_test_() -> {"unescape reverse solidus", <<"\\">>, <<"\\\\"/utf8>>}, {"unescape control", <<0>>, <<"\\u0000"/utf8>>}, {"unescape surrogate pair", <<16#10000/utf8>>, <<"\\ud800\\udc00"/utf8>>}, + {"unescape surrogate pair", <<16#10000/utf8>>, <<"\\uD800\\uDC00"/utf8>>}, {"replace bad high surrogate", <<16#fffd/utf8>>, <<"\\udc00"/utf8>>}, + {"replace bad high surrogate", <<16#fffd/utf8>>, <<"\\uDC00"/utf8>>}, {"replace naked high surrogate", <<16#fffd/utf8, "hello world">>, <<"\\ud800hello world"/utf8>> }, + {"replace naked high surrogate", + <<16#fffd/utf8, "hello world">>, + <<"\\uD800hello world"/utf8>> + }, {"replace naked low surrogate", <<16#fffd/utf8, "hello world">>, <<"\\udc00hello world"/utf8>> }, - {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\ud800\\u0000">>} + {"replace naked low surrogate", + <<16#fffd/utf8, "hello world">>, + <<"\\uDC00hello world"/utf8>> + }, + {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\ud800\\u0000">>}, + {"replace bad surrogate pair", <<16#fffd/utf8, 16#fffd/utf8>>, <<"\\uD800\\u0000">>} ], [{Title, ?_assertEqual([{string, Escaped}, end_json], decode(<<34, JSON/binary, 34>>))} || {Title, Escaped, JSON} <- Cases From 8e2d7a02539150f019d03ddb184ff813d8457de5 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 7 Dec 2014 16:31:29 -0800 Subject: [PATCH 06/19] decoder clause reordering and unrolling to reduce `when` clauses --- src/jsx_decoder.erl | 102 ++++++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 27 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 0f4f3f2..1ba5c07 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -119,10 +119,6 @@ resume(Rest, State, Handler, Acc, Stack, Config) -> Symbol >= $1 andalso Symbol =< $9 ). --define(is_whitespace(Symbol), - Symbol =:= ?space; Symbol =:= ?tab; Symbol =:= ?cr; Symbol =:= ?newline -). - %% error is a macro so the stack trace shows the error site when possible -ifndef(error). @@ -177,8 +173,12 @@ start(Bin, Handler, Stack, Config) -> value(<>, Handler, Stack, Config) -> string(Rest, Handler, Stack, Config); -value(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> - string(Rest, Handler, [singlequote|Stack], Config); +value(<>, Handler, Stack, Config) -> + value(Rest, Handler, Stack, Config); +value(<>, Handler, Stack, Config) -> + object(Rest, handle_event(start_object, Handler, Config), [key|Stack], Config); +value(<>, Handler, Stack, Config) -> + array(Rest, handle_event(start_array, Handler, Config), [array|Stack], Config); value(<<$t, Rest/binary>>, Handler, Stack, Config) -> true(Rest, Handler, Stack, Config); value(<<$f, Rest/binary>>, Handler, Stack, Config) -> @@ -189,14 +189,16 @@ value(<>, Handler, Stack, Config) -> negative(Rest, Handler, [$-], Stack, Config); value(<>, Handler, Stack, Config) -> zero(Rest, Handler, [$0], Stack, Config); +value(<>, Handler, Stack, Config) -> + value(Rest, Handler, Stack, Config); +value(<>, Handler, Stack, Config) -> + value(Rest, Handler, Stack, Config); +value(<>, Handler, Stack, Config) -> + value(Rest, Handler, Stack, Config); +value(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> + string(Rest, Handler, [singlequote|Stack], Config); value(<>, Handler, Stack, Config) when ?is_nonzero(S) -> integer(Rest, Handler, [S], Stack, Config); -value(<>, Handler, Stack, Config) -> - object(Rest, handle_event(start_object, Handler, Config), [key|Stack], Config); -value(<>, Handler, Stack, Config) -> - array(Rest, handle_event(start_array, Handler, Config), [array|Stack], Config); -value(<>, Handler, Stack, Config) when ?is_whitespace(S) -> - value(Rest, Handler, Stack, Config); value(<> = Rest, Handler, Stack, Config=#config{strict_commas=false}) -> maybe_done(Rest, Handler, Stack, Config); value(<>, Handler, Stack, Config=#config{strict_comments=true}) -> @@ -215,12 +217,18 @@ value(Bin, Handler, Stack, Config) -> object(<>, Handler, Stack, Config) -> string(Rest, Handler, Stack, Config); -object(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> - string(Rest, Handler, [singlequote|Stack], Config); +object(<>, Handler, Stack, Config) -> + object(Rest, Handler, Stack, Config); object(<>, Handler, [key|Stack], Config) -> maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config); -object(<>, Handler, Stack, Config) when ?is_whitespace(S) -> +object(<>, Handler, Stack, Config) -> object(Rest, Handler, Stack, Config); +object(<>, Handler, Stack, Config) -> + object(Rest, Handler, Stack, Config); +object(<>, Handler, Stack, Config) -> + object(Rest, Handler, Stack, Config); +object(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> + string(Rest, Handler, [singlequote|Stack], Config); object(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(object, <>, Handler, Stack, Config); object(<>, Handler, Stack, Config) -> @@ -237,7 +245,13 @@ object(Bin, Handler, Stack, Config) -> array(<>, Handler, [array|Stack], Config) -> maybe_done(Rest, handle_event(end_array, Handler, Config), Stack, Config); -array(<>, Handler, Stack, Config) when ?is_whitespace(S) -> +array(<>, Handler, Stack, Config) -> + array(Rest, Handler, Stack, Config); +array(<>, Handler, Stack, Config) -> + array(Rest, Handler, Stack, Config); +array(<>, Handler, Stack, Config) -> + array(Rest, Handler, Stack, Config); +array(<>, Handler, Stack, Config) -> array(Rest, Handler, Stack, Config); array(<>, Handler, Stack, Config=#config{strict_comments=true}) -> value(<>, Handler, Stack, Config); @@ -255,7 +269,13 @@ array(Bin, Handler, Stack, Config) -> colon(<>, Handler, [key|Stack], Config) -> value(Rest, Handler, [object|Stack], Config); -colon(<>, Handler, Stack, Config) when ?is_whitespace(S) -> +colon(<>, Handler, Stack, Config) -> + colon(Rest, Handler, Stack, Config); +colon(<>, Handler, Stack, Config) -> + colon(Rest, Handler, Stack, Config); +colon(<>, Handler, Stack, Config) -> + colon(Rest, Handler, Stack, Config); +colon(<>, Handler, Stack, Config) -> colon(Rest, Handler, Stack, Config); colon(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(colon, <>, Handler, Stack, Config); @@ -273,12 +293,18 @@ colon(Bin, Handler, Stack, Config) -> key(<>, Handler, Stack, Config) -> string(Rest, Handler, Stack, Config); -key(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> - string(Rest, Handler, [singlequote|Stack], Config); -key(<>, Handler, Stack, Config) when ?is_whitespace(S) -> +key(<>, Handler, Stack, Config) -> key(Rest, Handler, Stack, Config); key(<>, Handler, [key|Stack], Config=#config{strict_commas=false}) -> maybe_done(<>, Handler, [object|Stack], Config); +key(<>, Handler, Stack, Config) -> + key(Rest, Handler, Stack, Config); +key(<>, Handler, Stack, Config) -> + key(Rest, Handler, Stack, Config); +key(<>, Handler, Stack, Config) -> + key(Rest, Handler, Stack, Config); +key(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> + string(Rest, Handler, [singlequote|Stack], Config); key(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(key, <>, Handler, Stack, Config); key(<>, Handler, Stack, Config) -> @@ -310,10 +336,17 @@ string(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace(?solidus, Config)], Stack, Config); string(<>, Handler, Acc, Stack, Config) -> unescape(Rest, Handler, Acc, Stack, Config); -string(<>, Handler, Acc, Stack, Config=#config{uescape=true}) when X >= 16#80 -> - string(Rest, Handler, [Acc, json_escape_sequence(X)], Stack, Config); -string(<>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 -> - string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config); +string(<>, Handler, Acc, Stack, Config=#config{uescape=true}) -> + case X of + X when X < 16#80 -> string(Rest, Handler, [Acc, X], Stack, Config); + X -> string(Rest, Handler, [Acc, json_escape_sequence(X)], Stack, Config) + end; +%% u+2028 +string(<<226, 128, 168, Rest/binary>>, Handler, Acc, Stack, Config) -> + string(Rest, Handler, [Acc, maybe_replace(16#2028, Config)], Stack, Config); +%% u+2029 +string(<<226, 128, 169, Rest/binary>>, Handler, Acc, Stack, Config) -> + string(Rest, Handler, [Acc, maybe_replace(16#2029, Config)], Stack, Config); string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) -> Size = count(Bin, 0, Config), <> = Bin, @@ -616,7 +649,10 @@ count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) -> count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N; count(<>, N, Config) when X < 16#800 -> count(Rest, N + 2, Config); -count(<>, N, _) when X == 16#2028; X == 16#2029 -> N; +%% u+2028 +count(<<226, 128, 168, _/binary>>, N, _) -> N; +%% u+2029 +count(<<226, 128, 169, _/binary>>, N, _) -> N; count(<>, N, Config) when X < 16#10000 -> count(Rest, N + 3, Config); count(<<_/utf8, Rest/binary>>, N, Config) -> @@ -944,6 +980,8 @@ comment(Bin, Handler, Resume, Stack, Config) -> maybe_done(<>, Handler, [], Config) -> done(Rest, handle_event(end_json, Handler, Config), [], Config); +maybe_done(<>, Handler, Stack, Config) -> + maybe_done(Rest, Handler, Stack, Config); maybe_done(<>, Handler, [object|Stack], Config) -> maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config); maybe_done(<>, Handler, [array|Stack], Config) -> @@ -952,7 +990,11 @@ maybe_done(<>, Handler, [object|Stack], Config) -> key(Rest, Handler, [key|Stack], Config); maybe_done(<>, Handler, [array|_] = Stack, Config) -> value(Rest, Handler, Stack, Config); -maybe_done(<>, Handler, Stack, Config) when ?is_whitespace(S) -> +maybe_done(<>, Handler, Stack, Config) -> + maybe_done(Rest, Handler, Stack, Config); +maybe_done(<>, Handler, Stack, Config) -> + maybe_done(Rest, Handler, Stack, Config); +maybe_done(<>, Handler, Stack, Config) -> maybe_done(Rest, Handler, Stack, Config); maybe_done(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(maybe_done, <>, Handler, Stack, Config); @@ -968,7 +1010,13 @@ maybe_done(Bin, Handler, Stack, Config) -> ?error(maybe_done, Bin, Handler, Stack, Config). -done(<>, Handler, [], Config) when ?is_whitespace(S) -> +done(<>, Handler, [], Config) -> + done(Rest, Handler, [], Config); +done(<>, Handler, [], Config) -> + done(Rest, Handler, [], Config); +done(<>, Handler, [], Config) -> + done(Rest, Handler, [], Config); +done(<>, Handler, [], Config) -> done(Rest, Handler, [], Config); done(<>, Handler, Stack, Config=#config{strict_comments=true}) -> ?error(done, <>, Handler, Stack, Config); From 4b3fa9b903688508d3ebd6b009fbfb50ab61779d Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 7 Dec 2014 19:12:43 -0800 Subject: [PATCH 07/19] further unrolling of string parsing states and elimination of `when` guards --- src/jsx_decoder.erl | 12 ++++++------ src/jsx_parser.erl | 17 ++++++++++------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 1ba5c07..497edaf 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -647,16 +647,16 @@ count(<<127, Rest/binary>>, N, Config) -> count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) -> count(Rest, N + 1, Config); count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N; -count(<>, N, Config) when X < 16#800 -> - count(Rest, N + 2, Config); %% u+2028 count(<<226, 128, 168, _/binary>>, N, _) -> N; %% u+2029 count(<<226, 128, 169, _/binary>>, N, _) -> N; -count(<>, N, Config) when X < 16#10000 -> - count(Rest, N + 3, Config); -count(<<_/utf8, Rest/binary>>, N, Config) -> - count(Rest, N + 4, Config); +count(<>, N, Config) -> + case X of + X when X < 16#800 -> count(Rest, N + 2, Config); + X when X < 16#10000 -> count(Rest, N + 3, Config); + _ -> count(Rest, N + 4, Config) + end; count(_, N, _) -> N. diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index e99ef53..1773361 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -451,13 +451,16 @@ count(<<126, Rest/binary>>, N, Config) -> count(<<127, Rest/binary>>, N, Config) -> count(Rest, N + 1, Config); count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N; -count(<>, N, _) when X == 16#2028; X == 16#2029 -> N; -count(<>, N, Config) when X < 16#800 -> - count(Rest, N + 2, Config); -count(<>, N, Config) when X < 16#10000 -> - count(Rest, N + 3, Config); -count(<<_/utf8, Rest/binary>>, N, Config) -> - count(Rest, N + 4, Config); +%% u+2028 +count(<<226, 128, 168, _/binary>>, N, _) -> N; +%% u+2029 +count(<<226, 128, 169, _/binary>>, N, _) -> N; +count(<>, N, Config) -> + case X of + X when X < 16#800 -> count(Rest, N + 2, Config); + X when X < 16#10000 -> count(Rest, N + 3, Config); + _ -> count(Rest, N + 4, Config) + end; count(<<_, _/binary>>, N, _) -> N. From 34771087ff2efe13e2a04a2da983c3e6a30d3e14 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 8 Dec 2014 04:05:03 +0000 Subject: [PATCH 08/19] switch to iodata accumulation for number parsing --- src/jsx_decoder.erl | 77 ++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 497edaf..11fd5eb 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -60,6 +60,8 @@ resume(Rest, State, Handler, Acc, Stack, Config) -> string -> string(Rest, Handler, Acc, Stack, Config); integer -> integer(Rest, Handler, Acc, Stack, Config); decimal -> decimal(Rest, Handler, Acc, Stack, Config); + e -> e(Rest, Handler, Acc, Stack, Config); + ex -> ex(Rest, Handler, Acc, Stack, Config); exp -> exp(Rest, Handler, Acc, Stack, Config); zero -> zero(Rest, Handler, Acc, Stack, Config); true -> true(Rest, Handler, Stack, Config); @@ -826,78 +828,75 @@ to_hex(X) -> X + 48. %% like in strings, there's some pseudo states in here that will never -%% show up in errors or incompletes. some show up in value, some show -%% up in integer, decimal or exp +%% show up in incompletes. some show up in value, some show up in +%% integer, decimal or exp negative(<<$0, Rest/binary>>, Handler, Acc, Stack, Config) -> - zero(Rest, Handler, acc_seq(Acc, $0), Stack, Config); + zero(Rest, Handler, [Acc, $0], Stack, Config); negative(<>, Handler, Acc, Stack, Config) when ?is_nonzero(S) -> - integer(Rest, Handler, acc_seq(Acc, S), Stack, Config); + integer(Rest, Handler, [Acc, S], Stack, Config); negative(<<>>, Handler, [?negative], Stack, Config) -> incomplete(value, <>, Handler, Stack, Config); negative(Bin, Handler, Acc, Stack, Config) -> - ?error(value, <>, Handler, Acc, Stack, Config). + ?error(negative, Bin, Handler, Acc, Stack, Config). zero(<>, Handler, Acc, Stack, Config) -> - decimal(Rest, Handler, acc_seq(Acc, ?decimalpoint), Stack, Config); + decimal(Rest, Handler, [Acc, ?decimalpoint], Stack, Config); zero(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> - e(Rest, Handler, acc_seq(Acc, ".0e"), Stack, Config); + e(Rest, Handler, [Acc, ".0e"], Stack, Config); zero(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {zero, Acc}, Stack, Config). + finish_number(Bin, Handler, {zero, lists:flatten(Acc)}, Stack, Config). integer(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - integer(Rest, Handler, acc_seq(Acc, S), Stack, Config); + integer(Rest, Handler, [Acc, S], Stack, Config); integer(<>, Handler, Acc, Stack, Config) -> - initialdecimal(Rest, Handler, acc_seq(Acc, ?decimalpoint), Stack, Config); + initialdecimal(Rest, Handler, [Acc, ?decimalpoint], Stack, Config); integer(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> - e(Rest, Handler, acc_seq(Acc, ".0e"), Stack, Config); + e(Rest, Handler, [Acc, ".0e"], Stack, Config); integer(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {integer, Acc}, Stack, Config). + finish_number(Bin, Handler, {integer, lists:flatten(Acc)}, Stack, Config). initialdecimal(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - decimal(Rest, Handler, acc_seq(Acc, S), Stack, Config); -initialdecimal(<<>>, Handler, [?decimalpoint|Acc], Stack, Config) -> - incomplete(integer, <>, Handler, Acc, Stack, Config); + decimal(Rest, Handler, [Acc, S], Stack, Config); +initialdecimal(<<>>, Handler, Acc, Stack, Config) -> + [?decimalpoint|Rest] = lists:reverse(Acc), + incomplete(integer, <>, Handler, lists:reverse(Rest), Stack, Config); initialdecimal(Bin, Handler, Acc, Stack, Config) -> - ?error(decimal, Bin, Handler, Acc, Stack, Config). + ?error(initialdecimal, Bin, Handler, Acc, Stack, Config). decimal(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - decimal(Rest, Handler, acc_seq(Acc, S), Stack, Config); + decimal(Rest, Handler, [Acc, S], Stack, Config); decimal(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> - e(Rest, Handler, acc_seq(Acc, $e), Stack, Config); + e(Rest, Handler, [Acc, $e], Stack, Config); decimal(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {decimal, Acc}, Stack, Config). + finish_number(Bin, Handler, {decimal, lists:flatten(Acc)}, Stack, Config). e(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - exp(Rest, Handler, acc_seq(Acc, S), Stack, Config); + exp(Rest, Handler, [Acc, S], Stack, Config); e(<>, Handler, Acc, Stack, Config) when Sign =:= ?positive; Sign =:= ?negative -> - ex(Rest, Handler, acc_seq(Acc, Sign), Stack, Config); -e(<<>>, Handler, [$e|Acc], Stack, Config) -> - incomplete(decimal, <<$e>>, Handler, Acc, Stack, Config); + ex(Rest, Handler, [Acc, Sign], Stack, Config); +e(<<>>, Handler, Acc, Stack, Config) -> + incomplete(e, <<>>, Handler, Acc, Stack, Config); e(Bin, Handler, Acc, Stack, Config) -> - ?error(decimal, <<$e, Bin/binary>>, Handler, Acc, Stack, Config). + ?error(e, Bin, Handler, Acc, Stack, Config). ex(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - exp(Rest, Handler, acc_seq(Acc, S), Stack, Config); -ex(<<>>, Handler, [S, $e|Acc], Stack, Config) -> - incomplete(decimal, <<$e, S/utf8>>, Handler, Acc, Stack, Config); -ex(Bin, Handler, [S, $e|Acc], Stack, Config) -> - ?error(decimal, <<$e, S, Bin/binary>>, Handler, Acc, Stack, Config). + exp(Rest, Handler, [Acc, S], Stack, Config); +ex(<<>>, Handler, Acc, Stack, Config) -> + incomplete(ex, <<>>, Handler, Acc, Stack, Config); +ex(Bin, Handler, Acc, Stack, Config) -> + ?error(ex, Bin, Handler, Acc, Stack, Config). exp(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - exp(Rest, Handler, acc_seq(Acc, S), Stack, Config); + exp(Rest, Handler, [Acc, S], Stack, Config); exp(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {exp, Acc}, Stack, Config). - - -acc_seq(Seq, C) when is_list(C) -> lists:reverse(C) ++ Seq; -acc_seq(Seq, C) -> [C] ++ Seq. + finish_number(Bin, Handler, {exp, lists:flatten(Acc)}, Stack, Config). finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) -> @@ -908,10 +907,10 @@ finish_number(Rest, Handler, Acc, Stack, Config) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config). -format_number({zero, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))}; -format_number({integer, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))}; -format_number({decimal, Acc}) -> {float, list_to_float(lists:reverse(Acc))}; -format_number({exp, Acc}) -> {float, list_to_float(lists:reverse(Acc))}. +format_number({zero, Acc}) -> {integer, list_to_integer(Acc)}; +format_number({integer, Acc}) -> {integer, list_to_integer(Acc)}; +format_number({decimal, Acc}) -> {float, list_to_float(Acc)}; +format_number({exp, Acc}) -> {float, list_to_float(Acc)}. true(<<$r, $u, $e, Rest/binary>>, Handler, Stack, Config) -> From c39515bd60c898b7233e42d469f1875337a105c8 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 01:46:57 +0000 Subject: [PATCH 09/19] early exit from value when encountering literals and less list allocation for number parsing --- src/jsx_decoder.erl | 54 +++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 11fd5eb..ddbbfad 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -181,26 +181,48 @@ value(<>, Handler, Stack, Config) -> object(Rest, handle_event(start_object, Handler, Config), [key|Stack], Config); value(<>, Handler, Stack, Config) -> array(Rest, handle_event(start_array, Handler, Config), [array|Stack], Config); +value(<<$t, $r, $u, $e, Rest/binary>>, Handler, Stack, Config) -> + maybe_done(Rest, handle_event({literal, true}, Handler, Config), Stack, Config); +value(<<$f, $a, $l, $s, $e, Rest/binary>>, Handler, Stack, Config) -> + maybe_done(Rest, handle_event({literal, false}, Handler, Config), Stack, Config); +value(<<$n, $u, $l, $l, Rest/binary>>, Handler, Stack, Config) -> + maybe_done(Rest, handle_event({literal, null}, Handler, Config), Stack, Config); +value(<>, Handler, Stack, Config) -> + zero(Rest, Handler, [$0], Stack, Config); +value(<<$1, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$1], Stack, Config); +value(<<$2, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$2], Stack, Config); +value(<<$3, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$3], Stack, Config); +value(<<$4, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$4], Stack, Config); +value(<<$5, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$5], Stack, Config); +value(<<$6, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$6], Stack, Config); +value(<<$7, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$7], Stack, Config); +value(<<$8, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$8], Stack, Config); +value(<<$9, Rest/binary>>, Handler, Stack, Config) -> + integer(Rest, Handler, [$9], Stack, Config); +value(<>, Handler, Stack, Config) -> + negative(Rest, Handler, [$-], Stack, Config); +value(<>, Handler, Stack, Config) -> + value(Rest, Handler, Stack, Config); value(<<$t, Rest/binary>>, Handler, Stack, Config) -> true(Rest, Handler, Stack, Config); value(<<$f, Rest/binary>>, Handler, Stack, Config) -> false(Rest, Handler, Stack, Config); value(<<$n, Rest/binary>>, Handler, Stack, Config) -> null(Rest, Handler, Stack, Config); -value(<>, Handler, Stack, Config) -> - negative(Rest, Handler, [$-], Stack, Config); -value(<>, Handler, Stack, Config) -> - zero(Rest, Handler, [$0], Stack, Config); -value(<>, Handler, Stack, Config) -> - value(Rest, Handler, Stack, Config); value(<>, Handler, Stack, Config) -> value(Rest, Handler, Stack, Config); value(<>, Handler, Stack, Config) -> value(Rest, Handler, Stack, Config); value(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> string(Rest, Handler, [singlequote|Stack], Config); -value(<>, Handler, Stack, Config) when ?is_nonzero(S) -> - integer(Rest, Handler, [S], Stack, Config); value(<> = Rest, Handler, Stack, Config=#config{strict_commas=false}) -> maybe_done(Rest, Handler, Stack, Config); value(<>, Handler, Stack, Config=#config{strict_comments=true}) -> @@ -845,7 +867,7 @@ zero(<>, Handler, Acc, Stack, Config) -> zero(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> e(Rest, Handler, [Acc, ".0e"], Stack, Config); zero(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {zero, lists:flatten(Acc)}, Stack, Config). + finish_number(Bin, Handler, {zero, iolist_to_binary(Acc)}, Stack, Config). integer(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> @@ -855,7 +877,7 @@ integer(<>, Handler, Acc, Stack, Config) -> integer(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> e(Rest, Handler, [Acc, ".0e"], Stack, Config); integer(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {integer, lists:flatten(Acc)}, Stack, Config). + finish_number(Bin, Handler, {integer, iolist_to_binary(Acc)}, Stack, Config). initialdecimal(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> @@ -872,7 +894,7 @@ decimal(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_n decimal(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> e(Rest, Handler, [Acc, $e], Stack, Config); decimal(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {decimal, lists:flatten(Acc)}, Stack, Config). + finish_number(Bin, Handler, {decimal, iolist_to_binary(Acc)}, Stack, Config). e(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> @@ -896,7 +918,7 @@ ex(Bin, Handler, Acc, Stack, Config) -> exp(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Handler, [Acc, S], Stack, Config); exp(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {exp, lists:flatten(Acc)}, Stack, Config). + finish_number(Bin, Handler, {exp, iolist_to_binary(Acc)}, Stack, Config). finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) -> @@ -907,10 +929,10 @@ finish_number(Rest, Handler, Acc, Stack, Config) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config). -format_number({zero, Acc}) -> {integer, list_to_integer(Acc)}; -format_number({integer, Acc}) -> {integer, list_to_integer(Acc)}; -format_number({decimal, Acc}) -> {float, list_to_float(Acc)}; -format_number({exp, Acc}) -> {float, list_to_float(Acc)}. +format_number({zero, Acc}) -> {integer, binary_to_integer(Acc)}; +format_number({integer, Acc}) -> {integer, binary_to_integer(Acc)}; +format_number({decimal, Acc}) -> {float, binary_to_float(Acc)}; +format_number({exp, Acc}) -> {float, binary_to_float(Acc)}. true(<<$r, $u, $e, Rest/binary>>, Handler, Stack, Config) -> From 261574ee039da6a59c6c080875f741bf6766abbe Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 01:47:40 +0000 Subject: [PATCH 10/19] less list allocation in `jsx_to_term` --- src/jsx_to_term.erl | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index ad7aae0..5a628ea 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -172,10 +172,10 @@ start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. %% return it if it is the root object finish({[{object, []}], Config}) -> {[{}], Config}; finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); -finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; -finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); -finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; -finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {Pairs, Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(Pairs, {Rest, Config}); +finish({[{array, Values}], Config}) -> {Values, Config}; +finish({[{array, Values}|Rest], Config}) -> insert(Values, {Rest, Config}); finish(_) -> erlang:error(badarg). @@ -185,9 +185,9 @@ insert(Value, {[], Config}) -> {Value, Config}; insert(Key, {[{object, Pairs}|Rest], Config}) -> {[{object, Key, Pairs}] ++ Rest, Config}; insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> - {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; + {[{object, Pairs ++ [{Key, Value}]}] ++ Rest, Config}; insert(Value, {[{array, Values}|Rest], Config}) -> - {[{array, [Value] ++ Values}] ++ Rest, Config}; + {[{array, Values ++ [Value]}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). -endif. @@ -212,10 +212,10 @@ finish({[{object, Map}|Rest], Config=#config{return_maps=true}}) -> insert(Map, {Rest, Config}); finish({[{object, []}], Config}) -> {[{}], Config}; finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); -finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; -finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); -finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; -finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {Pairs, Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(Pairs, {Rest, Config}); +finish({[{array, Values}], Config}) -> {Values, Config}; +finish({[{array, Values}|Rest], Config}) -> insert(Values, {Rest, Config}); finish(_) -> erlang:error(badarg). @@ -229,9 +229,9 @@ insert(Key, {[{object, Pairs}|Rest], Config}) -> insert(Value, {[{object, Key, Map}|Rest], Config=#config{return_maps=true}}) -> {[{object, maps:put(Key, Value, Map)}] ++ Rest, Config}; insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> - {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; + {[{object, Pairs ++ [{Key, Value}]}] ++ Rest, Config}; insert(Value, {[{array, Values}|Rest], Config}) -> - {[{array, [Value] ++ Values}] ++ Rest, Config}; + {[{array, Values ++ [Value]}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). -endif. @@ -339,7 +339,7 @@ rep_manipulation_test_() -> insert(value, {[{array, []}, junk], #config{}}) )}, {"finish an object with no ancestor", ?_assertEqual( - {[{a, b}, {x, y}], #config{}}, + {[{x, y}, {a, b}], #config{}}, finish({[{object, [{x, y}, {a, b}]}], #config{}}) )}, {"finish an empty object", ?_assertEqual( @@ -347,16 +347,16 @@ rep_manipulation_test_() -> finish({[{object, []}], #config{}}) )}, {"finish an object with an ancestor", ?_assertEqual( - {[{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], #config{}}, + {[{object, [{foo, bar}, {key, [{x, y}, {a, b}]}]}], #config{}}, finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}}) )}, {"finish an array with no ancestor", ?_assertEqual( {[a, b, c], #config{}}, - finish({[{array, [c, b, a]}], #config{}}) + finish({[{array, [a, b, c]}], #config{}}) )}, {"finish an array with an ancestor", ?_assertEqual( - {[{array, [[a, b, c], d, e, f]}], #config{}}, - finish({[{array, [c, b, a]}, {array, [d, e, f]}], #config{}}) + {[{array, [d, e, f, [a, b, c]]}], #config{}}, + finish({[{array, [a, b, c]}, {array, [d, e, f]}], #config{}}) )} ]. From b1320a2e67998a805e9f45e840404df7680e0be5 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 05:00:18 +0000 Subject: [PATCH 11/19] remove `package.exs`, use `mix` branch instead --- package.exs | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 package.exs diff --git a/package.exs b/package.exs deleted file mode 100644 index d5708a6..0000000 --- a/package.exs +++ /dev/null @@ -1,30 +0,0 @@ -defmodule JSX.Mixfile do -use Mix.Project - - def project do - [ - app: :jsx, - version: "2.3.0", - description: "an erlang application for consuming, producing and manipulating json. inspired by yajl", - package: package - ] - end - - defp package do - [ - files: [ - "CHANGES.md", - "LICENSE", - "package.exs", - "README.md", - "rebar.config", - "rebar.config.script", - "config", - "src" - ], - contributors: ["alisdair sullivan"], - links: %{"github" => "https://github.com/talentdeficit/jsx"}, - licenses: ["MIT"] - ] - end -end \ No newline at end of file From 1033245da3bfdc80cdb7a67597030222563754f4 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 05:08:05 +0000 Subject: [PATCH 12/19] binary_to_whatever is not available on R15? fixed --- rebar.config | 4 +++- src/jsx_decoder.erl | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index c59e185..628e738 100644 --- a/rebar.config +++ b/rebar.config @@ -1,2 +1,4 @@ % uncomment to disable encoding support for erlang maps -% {jsx_nomaps, true}. \ No newline at end of file +% {jsx_nomaps, true}. + +{erl_opts, [{platform_define, "R15", 'no_binary_to_whatever'}]}. \ No newline at end of file diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index ddbbfad..b16199b 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -929,10 +929,19 @@ finish_number(Rest, Handler, Acc, Stack, Config) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config). +-ifndef(no_binary_to_whatever). format_number({zero, Acc}) -> {integer, binary_to_integer(Acc)}; format_number({integer, Acc}) -> {integer, binary_to_integer(Acc)}; format_number({decimal, Acc}) -> {float, binary_to_float(Acc)}; format_number({exp, Acc}) -> {float, binary_to_float(Acc)}. +-endif. + +-ifdef(no_binary_to_whatever). +format_number({zero, Acc}) -> {integer, list_to_integer(unicode:characters_to_list(Acc))}; +format_number({integer, Acc}) -> {integer, list_to_integer(unicode:characters_to_list(Acc))}; +format_number({decimal, Acc}) -> {float, list_to_float(unicode:characters_to_list(Acc))}; +format_number({exp, Acc}) -> {float, list_to_float(unicode:characters_to_list(Acc))}. +-endif. true(<<$r, $u, $e, Rest/binary>>, Handler, Stack, Config) -> From 371b49a9e39b06cd625fb4a509ae614d1a7a599c Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 05:16:26 +0000 Subject: [PATCH 13/19] extend `binary_to_whatever` to R14 even though it's not really supported anymore --- rebar.config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 628e738..35ddaa6 100644 --- a/rebar.config +++ b/rebar.config @@ -1,4 +1,6 @@ % uncomment to disable encoding support for erlang maps % {jsx_nomaps, true}. -{erl_opts, [{platform_define, "R15", 'no_binary_to_whatever'}]}. \ No newline at end of file +{erl_opts, [ + {platform_define, "R14|R15", 'no_binary_to_whatever'} +]}. \ No newline at end of file From 5843dbc56a6da9e6a186dc571b0d2bd1c11ec35c Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 08:05:48 +0000 Subject: [PATCH 14/19] actually i can use flatten in `jsx_to_term`! sweet --- src/jsx_to_term.erl | 51 ++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 5a628ea..6dc0c85 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -23,7 +23,7 @@ -module(jsx_to_term). --export([to_term/2]). +-export([to_term/2, flatify/1]). -export([init/1, handle_event/2]). -export([ start_term/1, @@ -172,10 +172,10 @@ start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. %% return it if it is the root object finish({[{object, []}], Config}) -> {[{}], Config}; finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); -finish({[{object, Pairs}], Config}) -> {Pairs, Config}; -finish({[{object, Pairs}|Rest], Config}) -> insert(Pairs, {Rest, Config}); -finish({[{array, Values}], Config}) -> {Values, Config}; -finish({[{array, Values}|Rest], Config}) -> insert(Values, {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {flatify(Pairs), Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(flatify(Pairs), {Rest, Config}); +finish({[{array, Values}], Config}) -> {flatify(Values), Config}; +finish({[{array, Values}|Rest], Config}) -> insert(flatify(Values), {Rest, Config}); finish(_) -> erlang:error(badarg). @@ -185,9 +185,9 @@ insert(Value, {[], Config}) -> {Value, Config}; insert(Key, {[{object, Pairs}|Rest], Config}) -> {[{object, Key, Pairs}] ++ Rest, Config}; insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> - {[{object, Pairs ++ [{Key, Value}]}] ++ Rest, Config}; + {[{object, [Pairs, {Key, Value}]}] ++ Rest, Config}; insert(Value, {[{array, Values}|Rest], Config}) -> - {[{array, Values ++ [Value]}] ++ Rest, Config}; + {[{array, [Values, Value]}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). -endif. @@ -212,10 +212,10 @@ finish({[{object, Map}|Rest], Config=#config{return_maps=true}}) -> insert(Map, {Rest, Config}); finish({[{object, []}], Config}) -> {[{}], Config}; finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); -finish({[{object, Pairs}], Config}) -> {Pairs, Config}; -finish({[{object, Pairs}|Rest], Config}) -> insert(Pairs, {Rest, Config}); -finish({[{array, Values}], Config}) -> {Values, Config}; -finish({[{array, Values}|Rest], Config}) -> insert(Values, {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {flatify(Pairs), Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(flatify(Pairs), {Rest, Config}); +finish({[{array, Values}], Config}) -> {flatify(Values), Config}; +finish({[{array, Values}|Rest], Config}) -> insert(flatify(Values), {Rest, Config}); finish(_) -> erlang:error(badarg). @@ -229,9 +229,9 @@ insert(Key, {[{object, Pairs}|Rest], Config}) -> insert(Value, {[{object, Key, Map}|Rest], Config=#config{return_maps=true}}) -> {[{object, maps:put(Key, Value, Map)}] ++ Rest, Config}; insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> - {[{object, Pairs ++ [{Key, Value}]}] ++ Rest, Config}; + {[{object, [Pairs, {Key, Value}]}] ++ Rest, Config}; insert(Value, {[{array, Values}|Rest], Config}) -> - {[{array, Values ++ [Value]}] ++ Rest, Config}; + {[{array, [Values, Value]}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). -endif. @@ -244,6 +244,15 @@ get_value({Value, _Config}) -> Value; get_value(_) -> erlang:error(badarg). +%% we know the structure of our accumulator so we can safely +%% flatten like this +flatify(List) -> flatify(List, []). +%% head of list should always be [] +flatify([], Tail) -> Tail; +flatify([H, T], Tail) -> flatify(H, [T] ++ Tail). + + + %% eunit tests -ifdef(TEST). @@ -331,32 +340,32 @@ rep_manipulation_test_() -> get_key({[{array, []}], #config{}}) )}, {"insert a value into an object", ?_assertEqual( - {[{object, [{key, value}]}, junk], #config{}}, + {[{object, [[], {key, value}]}, junk], #config{}}, insert(value, {[{object, key, []}, junk], #config{}}) )}, {"insert a value into an array", ?_assertEqual( - {[{array, [value]}, junk], #config{}}, + {[{array, [[], value]}, junk], #config{}}, insert(value, {[{array, []}, junk], #config{}}) )}, {"finish an object with no ancestor", ?_assertEqual( {[{x, y}, {a, b}], #config{}}, - finish({[{object, [{x, y}, {a, b}]}], #config{}}) + finish({[{object, [[[], {x, y}], {a, b}]}], #config{}}) )}, {"finish an empty object", ?_assertEqual( {[{}], #config{}}, finish({[{object, []}], #config{}}) )}, {"finish an object with an ancestor", ?_assertEqual( - {[{object, [{foo, bar}, {key, [{x, y}, {a, b}]}]}], #config{}}, - finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}}) + {[{object, [[[], {foo, bar}], {key, [{x, y}, {a, b}]}]}], #config{}}, + finish({[{object, [[[], {x, y}], {a, b}]}, {object, key, [[], {foo, bar}]}], #config{}}) )}, {"finish an array with no ancestor", ?_assertEqual( {[a, b, c], #config{}}, - finish({[{array, [a, b, c]}], #config{}}) + finish({[{array, [[[[], a], b], c]}], #config{}}) )}, {"finish an array with an ancestor", ?_assertEqual( - {[{array, [d, e, f, [a, b, c]]}], #config{}}, - finish({[{array, [a, b, c]}, {array, [d, e, f]}], #config{}}) + {[{array,[[[[[],d],e],f],[a,b,c]]}], #config{}}, + finish({[{array, [[[[], a], b], c]}, {array, [[[[], d], e], f]}], #config{}}) )} ]. From e5e04db58eae7e62e979f47321804c82dee875c6 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 02:19:13 -0800 Subject: [PATCH 15/19] produce binaries in escaped strings in decoder/parser instead of lists --- src/jsx_decoder.erl | 32 ++++++++++++++++---------------- src/jsx_parser.erl | 43 ++++++++++++++++++------------------------- 2 files changed, 34 insertions(+), 41 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index b16199b..e3daebd 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -726,9 +726,9 @@ strip_continuations(<>, Handler, Acc, Stack, Config, _) -> %% this all gets really gross and should probably eventually be folded into %% but for now it fakes being part of string on incompletes and errors unescape(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> - string(<>, Handler, [Acc, ?rsolidus], Stack, Config); + string(<>, Handler, [Acc, <>], Stack, Config); unescape(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> - string(Rest, Handler, [Acc, ?rsolidus, C], Stack, Config); + string(Rest, Handler, [Acc, <>], Stack, Config); unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace($\b, Config)], Stack, Config); unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) -> @@ -742,7 +742,7 @@ unescape(<<$t, Rest/binary>>, Handler, Acc, Stack, Config) -> unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace($\", Config)], Stack, Config); unescape(<>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) -> - string(Rest, Handler, [Acc, ?singlequote], Stack, Config); + string(Rest, Handler, [Acc, <>], Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace($\\, Config)], Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> @@ -793,7 +793,7 @@ unescape(Bin, Handler, Acc, Stack, Config) -> true -> incomplete(string, <>, Handler, Acc, Stack, Config); false -> case Config#config.strict_escapes of true -> ?error(string, <>, Handler, Acc, Stack, Config); - false -> string(Bin, Handler, [Acc, ?rsolidus], Stack, Config) + false -> string(Bin, Handler, [Acc, <>], Stack, Config) end end. @@ -806,19 +806,19 @@ is_partial_escape(<<>>) -> true; is_partial_escape(_) -> false. -maybe_replace(C, #config{dirty_strings=true}) -> C; -maybe_replace($\b, #config{escaped_strings=true}) -> [$\\, $b]; -maybe_replace($\t, #config{escaped_strings=true}) -> [$\\, $t]; -maybe_replace($\n, #config{escaped_strings=true}) -> [$\\, $n]; -maybe_replace($\f, #config{escaped_strings=true}) -> [$\\, $f]; -maybe_replace($\r, #config{escaped_strings=true}) -> [$\\, $r]; -maybe_replace($\", #config{escaped_strings=true}) -> [$\\, $\"]; +maybe_replace(C, #config{dirty_strings=true}) -> <>; +maybe_replace($\b, #config{escaped_strings=true}) -> <<$\\, $b>>; +maybe_replace($\t, #config{escaped_strings=true}) -> <<$\\, $t>>; +maybe_replace($\n, #config{escaped_strings=true}) -> <<$\\, $n>>; +maybe_replace($\f, #config{escaped_strings=true}) -> <<$\\, $f>>; +maybe_replace($\r, #config{escaped_strings=true}) -> <<$\\, $r>>; +maybe_replace($\", #config{escaped_strings=true}) -> <<$\\, $\">>; maybe_replace($/, Config=#config{escaped_strings=true}) -> case Config#config.escaped_forward_slashes of - true -> [$\\, $/] - ; false -> $/ + true -> <<$\\, $/>> + ; false -> <<$/>> end; -maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\]; +maybe_replace($\\, #config{escaped_strings=true}) -> <<$\\, $\\>>; maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> case Config#config.unescaped_jsonp of true -> <> @@ -832,11 +832,11 @@ maybe_replace(X, _Config) -> <>. %% convert a codepoint to it's \uXXXX equiv. json_escape_sequence(X) when X < 65536 -> <> = <>, - [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]; + <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>; json_escape_sequence(X) -> Adjusted = X - 16#10000, <> = <>, - json_escape_sequence(A + 16#d800) ++ json_escape_sequence(B + 16#dc00). + [json_escape_sequence(A + 16#d800), json_escape_sequence(B + 16#dc00)]. %% ascii "1" is [49], "2" is [50], etc... diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index 1773361..7113c71 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -471,50 +471,43 @@ strip_continuations(<>, N) when X >= 128, X =< 191 -> strip_continuations(Bin, _) -> Bin. -maybe_replace($\b, #config{escaped_strings=true}) -> - [$\\, $b]; -maybe_replace($\t, #config{escaped_strings=true}) -> - [$\\, $t]; -maybe_replace($\n, #config{escaped_strings=true}) -> - [$\\, $n]; -maybe_replace($\f, #config{escaped_strings=true}) -> - [$\\, $f]; -maybe_replace($\r, #config{escaped_strings=true}) -> - [$\\, $r]; -maybe_replace($\", #config{escaped_strings=true}) -> - [$\\, $\"]; +maybe_replace($\b, #config{escaped_strings=true}) -> <<$\\, $b>>; +maybe_replace($\t, #config{escaped_strings=true}) -> <<$\\, $t>>; +maybe_replace($\n, #config{escaped_strings=true}) -> <<$\\, $n>>; +maybe_replace($\f, #config{escaped_strings=true}) -> <<$\\, $f>>; +maybe_replace($\r, #config{escaped_strings=true}) -> <<$\\, $r>>; +maybe_replace($\", #config{escaped_strings=true}) -> <<$\\, $\">>; maybe_replace($/, Config=#config{escaped_strings=true}) -> case Config#config.escaped_forward_slashes of - true -> [$\\, $/]; - false -> [$/] + true -> <<$\\, $/>>; + false -> <<$/>> end; -maybe_replace($\\, #config{escaped_strings=true}) -> - [$\\, $\\]; +maybe_replace($\\, #config{escaped_strings=true}) -> <<$\\, $\\>>; maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> json_escape_sequence(X); maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> case Config#config.unescaped_jsonp of - true -> [<>]; + true -> <>; false -> json_escape_sequence(X) end; maybe_replace(Atom, #config{strict_utf8=true}) when is_atom(Atom) -> erlang:error(badarg); maybe_replace(surrogate, _Config) -> - [<<16#fffd/utf8>>]; + <<16#fffd/utf8>>; maybe_replace(badutf, _Config) -> - [<<16#fffd/utf8>>]; + <<16#fffd/utf8>>; maybe_replace(X, _Config) -> - [<>]. + <>. %% convert a codepoint to it's \uXXXX equiv. json_escape_sequence(X) when X < 65536 -> <> = <>, - [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]; + <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>; json_escape_sequence(X) -> Adjusted = X - 16#10000, <> = <>, - json_escape_sequence(A + 16#d800) ++ json_escape_sequence(B + 16#dc00). + [json_escape_sequence(A + 16#d800), json_escape_sequence(B + 16#dc00)]. to_hex(10) -> $a; @@ -1030,9 +1023,9 @@ bad_utf8_test_() -> json_escape_sequence_test_() -> [ - {"json escape sequence test - 16#0000", ?_assertEqual(json_escape_sequence(16#0000), "\\u0000")}, - {"json escape sequence test - 16#abc", ?_assertEqual(json_escape_sequence(16#abc), "\\u0abc")}, - {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} + {"json escape sequence test - 16#0000", ?_assertEqual(<<"\\u0000"/utf8>>, json_escape_sequence(16#0000))}, + {"json escape sequence test - 16#abc", ?_assertEqual(<<"\\u0abc"/utf8>>, json_escape_sequence(16#abc))}, + {"json escape sequence test - 16#def", ?_assertEqual(<<"\\u0def"/utf8>>, json_escape_sequence(16#def))} ]. From e2ba87f1cb7d32faac612a25a061332783d24a5a Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 01:47:55 -0800 Subject: [PATCH 16/19] switch to a counting implementation of number parsing a la strings --- src/jsx_decoder.erl | 238 ++++++++++++++++++++++++++++---------------- 1 file changed, 154 insertions(+), 84 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index e3daebd..4a049ed 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -58,12 +58,7 @@ resume(Rest, State, Handler, Acc, Stack, Config) -> colon -> colon(Rest, Handler, Stack, Config); key -> key(Rest, Handler, Stack, Config); string -> string(Rest, Handler, Acc, Stack, Config); - integer -> integer(Rest, Handler, Acc, Stack, Config); - decimal -> decimal(Rest, Handler, Acc, Stack, Config); - e -> e(Rest, Handler, Acc, Stack, Config); - ex -> ex(Rest, Handler, Acc, Stack, Config); - exp -> exp(Rest, Handler, Acc, Stack, Config); - zero -> zero(Rest, Handler, Acc, Stack, Config); + number -> number(Rest, Handler, Acc, Stack, Config); true -> true(Rest, Handler, Stack, Config); false -> false(Rest, Handler, Stack, Config); null -> null(Rest, Handler, Stack, Config); @@ -188,27 +183,27 @@ value(<<$f, $a, $l, $s, $e, Rest/binary>>, Handler, Stack, Config) -> value(<<$n, $u, $l, $l, Rest/binary>>, Handler, Stack, Config) -> maybe_done(Rest, handle_event({literal, null}, Handler, Config), Stack, Config); value(<>, Handler, Stack, Config) -> - zero(Rest, Handler, [$0], Stack, Config); + number(Rest, Handler, [?zero], [zero|Stack], Config); value(<<$1, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$1], Stack, Config); + number(Rest, Handler, [$1], [integer|Stack], Config); value(<<$2, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$2], Stack, Config); + number(Rest, Handler, [$2], [integer|Stack], Config); value(<<$3, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$3], Stack, Config); + number(Rest, Handler, [$3], [integer|Stack], Config); value(<<$4, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$4], Stack, Config); + number(Rest, Handler, [$4], [integer|Stack], Config); value(<<$5, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$5], Stack, Config); + number(Rest, Handler, [$5], [integer|Stack], Config); value(<<$6, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$6], Stack, Config); + number(Rest, Handler, [$6], [integer|Stack], Config); value(<<$7, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$7], Stack, Config); + number(Rest, Handler, [$7], [integer|Stack], Config); value(<<$8, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$8], Stack, Config); + number(Rest, Handler, [$8], [integer|Stack], Config); value(<<$9, Rest/binary>>, Handler, Stack, Config) -> - integer(Rest, Handler, [$9], Stack, Config); + number(Rest, Handler, [$9], [integer|Stack], Config); value(<>, Handler, Stack, Config) -> - negative(Rest, Handler, [$-], Stack, Config); + number(Rest, Handler, [$-], [negative|Stack], Config); value(<>, Handler, Stack, Config) -> value(Rest, Handler, Stack, Config); value(<<$t, Rest/binary>>, Handler, Stack, Config) -> @@ -384,9 +379,9 @@ string(<<239, 191, 190, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, <<16#fffe/utf8>>], Stack, Config); string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, <<16#ffff/utf8>>], Stack, Config); -%% partial utf8 codepoints string(<<>>, Handler, Acc, Stack, Config) -> incomplete(string, <<>>, Handler, Acc, Stack, Config); +%% partial utf8 codepoints string(<>, Handler, Acc, Stack, Config) when X >= 2#11000000 -> incomplete(string, <>, Handler, Acc, Stack, Config); string(<>, Handler, Acc, Stack, Config) when X >= 2#11100000, Y >= 2#10000000 -> @@ -849,98 +844,173 @@ to_hex(15) -> $f; to_hex(X) -> X + 48. -%% like in strings, there's some pseudo states in here that will never -%% show up in incompletes. some show up in value, some show up in -%% integer, decimal or exp -negative(<<$0, Rest/binary>>, Handler, Acc, Stack, Config) -> - zero(Rest, Handler, [Acc, $0], Stack, Config); -negative(<>, Handler, Acc, Stack, Config) when ?is_nonzero(S) -> - integer(Rest, Handler, [Acc, S], Stack, Config); -negative(<<>>, Handler, [?negative], Stack, Config) -> - incomplete(value, <>, Handler, Stack, Config); -negative(Bin, Handler, Acc, Stack, Config) -> - ?error(negative, Bin, Handler, Acc, Stack, Config). +number(<<$e, Rest/binary>>, Handler, Acc, [integer|Stack], Config) -> + number(Rest, Handler, [Acc, $., $0, $e], [e|Stack], Config); +number(<<$E, Rest/binary>>, Handler, Acc, [integer|Stack], Config) -> + number(Rest, Handler, [Acc, $., $0, $e], [e|Stack], Config); +number(<<$e, Rest/binary>>, Handler, Acc, [zero|Stack], Config) -> + number(Rest, Handler, [Acc, $., $0, $e], [e|Stack], Config); +number(<<$E, Rest/binary>>, Handler, Acc, [zero|Stack], Config) -> + number(Rest, Handler, [Acc, $., $0, $e], [e|Stack], Config); +number(<<>>, Handler, Acc, [State|Stack], Config=#config{stream=false}) -> + NumType = case State of + zero -> integer; + integer -> integer; + decimal -> float; + exp -> float + end, + finish_number(<<>>, Handler, {NumType, iolist_to_binary(Acc)}, Stack, Config); +number(<<>>, Handler, Acc, Stack, Config) -> + incomplete(number, <<>>, Handler, Acc, Stack, Config); +number(Bin, Handler, Acc, [State|Stack], Config) -> + Counted = case State of + zero -> zero(Bin, 0); + integer -> integer(Bin, 0); + negative -> negative(Bin, 0); + initialdecimal -> initialdecimal(Bin, 0); + decimal -> decimal(Bin, 0); + e -> e(Bin, 0); + ex -> ex(Bin, 0); + exp -> exp(Bin, 0) + end, + case Counted of + {finish_integer, Size} -> + <> = Bin, + finish_number(Rest, Handler, {integer, iolist_to_binary([Acc, Clean])}, Stack, Config); + {finish_float, Size} -> + <> = Bin, + finish_number(Rest, Handler, {float, iolist_to_binary([Acc, Clean])}, Stack, Config); + {error, Size} -> + <> = Bin, + ?error(number, Rest, Handler, [Acc, Clean], Stack, Config); + {NewState, Size} -> + <> = Bin, + number(Rest, Handler, [Acc, Clean], [NewState|Stack], Config) + end. -zero(<>, Handler, Acc, Stack, Config) -> - decimal(Rest, Handler, [Acc, ?decimalpoint], Stack, Config); -zero(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> - e(Rest, Handler, [Acc, ".0e"], Stack, Config); -zero(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {zero, iolist_to_binary(Acc)}, Stack, Config). +zero(<>, N) -> initialdecimal(Rest, N + 1); +zero(<<>>, N) -> {zero, N}; +zero(_, N) -> {finish_integer, N}. -integer(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - integer(Rest, Handler, [Acc, S], Stack, Config); -integer(<>, Handler, Acc, Stack, Config) -> - initialdecimal(Rest, Handler, [Acc, ?decimalpoint], Stack, Config); -integer(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> - e(Rest, Handler, [Acc, ".0e"], Stack, Config); -integer(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {integer, iolist_to_binary(Acc)}, Stack, Config). +integer(<<$0, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$1, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$2, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$3, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$4, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$5, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$6, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$7, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$8, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<<$9, Rest/binary>>, N) -> integer(Rest, N + 1); +integer(<>, N) -> initialdecimal(Rest, N + 1); +integer(<<$e, _/binary>>, N) -> {integer, N}; +integer(<<$E, _/binary>>, N) -> {integer, N}; +integer(<<>>, N) -> {integer, N}; +integer(_, N) -> {finish_integer, N}. -initialdecimal(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - decimal(Rest, Handler, [Acc, S], Stack, Config); -initialdecimal(<<>>, Handler, Acc, Stack, Config) -> - [?decimalpoint|Rest] = lists:reverse(Acc), - incomplete(integer, <>, Handler, lists:reverse(Rest), Stack, Config); -initialdecimal(Bin, Handler, Acc, Stack, Config) -> - ?error(initialdecimal, Bin, Handler, Acc, Stack, Config). +negative(<<$0, Rest/binary>>, N) -> zero(Rest, N + 1); +negative(<<$1, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<$2, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<$3, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<$4, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<$5, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<$6, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<$7, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<$8, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<$9, Rest/binary>>, N) -> integer(Rest, N + 1); +negative(<<>>, N) -> {negative, N}; +negative(_, N) -> {error, N}. -decimal(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - decimal(Rest, Handler, [Acc, S], Stack, Config); -decimal(<>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E -> - e(Rest, Handler, [Acc, $e], Stack, Config); -decimal(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {decimal, iolist_to_binary(Acc)}, Stack, Config). +initialdecimal(<<$0, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$1, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$2, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$3, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$4, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$5, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$6, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$7, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$8, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<$9, Rest/binary>>, N) -> decimal(Rest, N + 1); +initialdecimal(<<>>, N) -> {initialdecimal, N}; +initialdecimal(_, N) -> {error, N}. -e(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - exp(Rest, Handler, [Acc, S], Stack, Config); -e(<>, Handler, Acc, Stack, Config) when Sign =:= ?positive; Sign =:= ?negative -> - ex(Rest, Handler, [Acc, Sign], Stack, Config); -e(<<>>, Handler, Acc, Stack, Config) -> - incomplete(e, <<>>, Handler, Acc, Stack, Config); -e(Bin, Handler, Acc, Stack, Config) -> - ?error(e, Bin, Handler, Acc, Stack, Config). +decimal(<<$0, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$1, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$2, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$3, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$4, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$5, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$6, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$7, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$8, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$9, Rest/binary>>, N) -> decimal(Rest, N + 1); +decimal(<<$e, Rest/binary>>, N) -> e(Rest, N + 1); +decimal(<<$E, Rest/binary>>, N) -> e(Rest, N + 1); +decimal(<<>>, N) -> {decimal, N}; +decimal(_, N) -> {finish_float, N}. -ex(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - exp(Rest, Handler, [Acc, S], Stack, Config); -ex(<<>>, Handler, Acc, Stack, Config) -> - incomplete(ex, <<>>, Handler, Acc, Stack, Config); -ex(Bin, Handler, Acc, Stack, Config) -> - ?error(ex, Bin, Handler, Acc, Stack, Config). +e(<<$0, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$1, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$2, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$3, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$4, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$5, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$6, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$7, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$8, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<<$9, Rest/binary>>, N) -> exp(Rest, N + 1); +e(<>, N) -> ex(Rest, N + 1); +e(<>, N) -> ex(Rest, N + 1); +e(<<>>, N) -> {e, N}; +e(_, N) -> {error, N}. -exp(<>, Handler, Acc, Stack, Config) when S =:= ?zero; ?is_nonzero(S) -> - exp(Rest, Handler, [Acc, S], Stack, Config); -exp(Bin, Handler, Acc, Stack, Config) -> - finish_number(Bin, Handler, {exp, iolist_to_binary(Acc)}, Stack, Config). +ex(<<$0, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$1, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$2, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$3, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$4, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$5, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$6, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$7, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$8, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<$9, Rest/binary>>, N) -> exp(Rest, N + 1); +ex(<<>>, N) -> {ex, N}; +ex(_, N) -> {error, N}. + + +exp(<<$0, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$1, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$2, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$3, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$4, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$5, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$6, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$7, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$8, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<$9, Rest/binary>>, N) -> exp(Rest, N + 1); +exp(<<>>, N) -> {exp, N}; +exp(_, N) -> {finish_float, N}. -finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) -> - maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), [], Config); -finish_number(<<>>, Handler, {NumType, Acc}, Stack, Config) -> - incomplete(NumType, <<>>, Handler, Acc, Stack, Config); finish_number(Rest, Handler, Acc, Stack, Config) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), Stack, Config). -ifndef(no_binary_to_whatever). -format_number({zero, Acc}) -> {integer, binary_to_integer(Acc)}; format_number({integer, Acc}) -> {integer, binary_to_integer(Acc)}; -format_number({decimal, Acc}) -> {float, binary_to_float(Acc)}; -format_number({exp, Acc}) -> {float, binary_to_float(Acc)}. +format_number({float, Acc}) -> {float, binary_to_float(Acc)}. -endif. -ifdef(no_binary_to_whatever). -format_number({zero, Acc}) -> {integer, list_to_integer(unicode:characters_to_list(Acc))}; format_number({integer, Acc}) -> {integer, list_to_integer(unicode:characters_to_list(Acc))}; -format_number({decimal, Acc}) -> {float, list_to_float(unicode:characters_to_list(Acc))}; -format_number({exp, Acc}) -> {float, list_to_float(unicode:characters_to_list(Acc))}. +format_number({float, Acc}) -> {float, list_to_float(unicode:characters_to_list(Acc))}. -endif. From a9a2c2ebe6f02f86c626979d151c18347e301ef2 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 22:16:55 +0000 Subject: [PATCH 17/19] revert a `jsx_to_term` optimizatin that micro benchmarked well but didn't macro benchmark well --- src/jsx_to_term.erl | 52 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/jsx_to_term.erl b/src/jsx_to_term.erl index 6dc0c85..b23450c 100644 --- a/src/jsx_to_term.erl +++ b/src/jsx_to_term.erl @@ -169,13 +169,13 @@ start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. %% finish an object or array and insert it into the parent object if it exists or -%% return it if it is the root object +%% return it if it is the root object finish({[{object, []}], Config}) -> {[{}], Config}; finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); -finish({[{object, Pairs}], Config}) -> {flatify(Pairs), Config}; -finish({[{object, Pairs}|Rest], Config}) -> insert(flatify(Pairs), {Rest, Config}); -finish({[{array, Values}], Config}) -> {flatify(Values), Config}; -finish({[{array, Values}|Rest], Config}) -> insert(flatify(Values), {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); +finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; +finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); finish(_) -> erlang:error(badarg). @@ -185,9 +185,9 @@ insert(Value, {[], Config}) -> {Value, Config}; insert(Key, {[{object, Pairs}|Rest], Config}) -> {[{object, Key, Pairs}] ++ Rest, Config}; insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> - {[{object, [Pairs, {Key, Value}]}] ++ Rest, Config}; + {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; insert(Value, {[{array, Values}|Rest], Config}) -> - {[{array, [Values, Value]}] ++ Rest, Config}; + {[{array, [Value] ++ Values}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). -endif. @@ -205,17 +205,15 @@ start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. %% finish an object or array and insert it into the parent object if it exists or -%% return it if it is the root object -finish({[{object, Map}], Config=#config{return_maps=true}}) -> - {Map, Config}; -finish({[{object, Map}|Rest], Config=#config{return_maps=true}}) -> - insert(Map, {Rest, Config}); +%% return it if it is the root object +finish({[{object, Map}], Config=#config{return_maps=true}}) -> {Map, Config}; +finish({[{object, Map}|Rest], Config=#config{return_maps=true}}) -> insert(Map, {Rest, Config}); finish({[{object, []}], Config}) -> {[{}], Config}; finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); -finish({[{object, Pairs}], Config}) -> {flatify(Pairs), Config}; -finish({[{object, Pairs}|Rest], Config}) -> insert(flatify(Pairs), {Rest, Config}); -finish({[{array, Values}], Config}) -> {flatify(Values), Config}; -finish({[{array, Values}|Rest], Config}) -> insert(flatify(Values), {Rest, Config}); +finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; +finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); +finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; +finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); finish(_) -> erlang:error(badarg). @@ -229,9 +227,9 @@ insert(Key, {[{object, Pairs}|Rest], Config}) -> insert(Value, {[{object, Key, Map}|Rest], Config=#config{return_maps=true}}) -> {[{object, maps:put(Key, Value, Map)}] ++ Rest, Config}; insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> - {[{object, [Pairs, {Key, Value}]}] ++ Rest, Config}; + {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; insert(Value, {[{array, Values}|Rest], Config}) -> - {[{array, [Values, Value]}] ++ Rest, Config}; + {[{array, [Value] ++ Values}] ++ Rest, Config}; insert(_, _) -> erlang:error(badarg). -endif. @@ -340,32 +338,32 @@ rep_manipulation_test_() -> get_key({[{array, []}], #config{}}) )}, {"insert a value into an object", ?_assertEqual( - {[{object, [[], {key, value}]}, junk], #config{}}, + {[{object, [{key, value}]}, junk], #config{}}, insert(value, {[{object, key, []}, junk], #config{}}) )}, {"insert a value into an array", ?_assertEqual( - {[{array, [[], value]}, junk], #config{}}, + {[{array, [value]}, junk], #config{}}, insert(value, {[{array, []}, junk], #config{}}) )}, {"finish an object with no ancestor", ?_assertEqual( - {[{x, y}, {a, b}], #config{}}, - finish({[{object, [[[], {x, y}], {a, b}]}], #config{}}) + {[{a, b}, {x, y}], #config{}}, + finish({[{object, [{x, y}, {a, b}]}], #config{}}) )}, {"finish an empty object", ?_assertEqual( {[{}], #config{}}, finish({[{object, []}], #config{}}) )}, {"finish an object with an ancestor", ?_assertEqual( - {[{object, [[[], {foo, bar}], {key, [{x, y}, {a, b}]}]}], #config{}}, - finish({[{object, [[[], {x, y}], {a, b}]}, {object, key, [[], {foo, bar}]}], #config{}}) + {[{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], #config{}}, + finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}}) )}, {"finish an array with no ancestor", ?_assertEqual( {[a, b, c], #config{}}, - finish({[{array, [[[[], a], b], c]}], #config{}}) + finish({[{array, [c, b, a]}], #config{}}) )}, {"finish an array with an ancestor", ?_assertEqual( - {[{array,[[[[[],d],e],f],[a,b,c]]}], #config{}}, - finish({[{array, [[[[], a], b], c]}, {array, [[[[], d], e], f]}], #config{}}) + {[{array, [[a, b, c], d, e, f]}], #config{}}, + finish({[{array, [c, b, a]}, {array, [d, e, f]}], #config{}}) )} ]. From 8797ff6d9b6357acfcb6c1e78e53a661a4ee1c25 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 14:41:30 -0800 Subject: [PATCH 18/19] disable maps on R14, R15 and R16 --- config/maps | 1 - rebar.config | 6 ++---- rebar.config.script | 9 --------- 3 files changed, 2 insertions(+), 14 deletions(-) delete mode 100644 config/maps delete mode 100644 rebar.config.script diff --git a/config/maps b/config/maps deleted file mode 100644 index 8aa3993..0000000 --- a/config/maps +++ /dev/null @@ -1 +0,0 @@ -maps:keys(#{0 => false, 1 => true}) == [0,1]. \ No newline at end of file diff --git a/rebar.config b/rebar.config index 35ddaa6..b2653a2 100644 --- a/rebar.config +++ b/rebar.config @@ -1,6 +1,4 @@ -% uncomment to disable encoding support for erlang maps -% {jsx_nomaps, true}. - {erl_opts, [ - {platform_define, "R14|R15", 'no_binary_to_whatever'} + {platform_define, "R14|R15", 'no_binary_to_whatever'}, + {platform_define, "^((?!R1[456]).)*$", 'maps_support'} ]}. \ No newline at end of file diff --git a/rebar.config.script b/rebar.config.script deleted file mode 100644 index c5d8d92..0000000 --- a/rebar.config.script +++ /dev/null @@ -1,9 +0,0 @@ -case os:getenv("JSX_NOMAPS") or proplists:get_value(jsx_nomaps, CONFIG, false) of - false -> - try file:script("config/maps") of - {ok, true} -> [{erl_opts, [{d, maps_support}]}] ++ CONFIG; - _ -> CONFIG - catch _:_ -> CONFIG - end; - _ -> CONFIG -end. \ No newline at end of file From 8f62b3adf112f282fefffb905ef16af6c1b189e2 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 9 Dec 2014 16:24:09 -0800 Subject: [PATCH 19/19] v2.4.0 --- CHANGES.md | 5 +++++ README.md | 2 +- src/jsx.app.src | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4a66843..0de2a3a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +v2.4.0 + +* enough performance improvements to justify a new version. 2-3x + speedup depending on mode of operation + v2.3.1 * fixes an issue where astral plane json escape sequences were diff --git a/README.md b/README.md index fbab6ce..857043a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# jsx (v2.3.1) # +# jsx (v2.4) # an erlang application for consuming, producing and manipulating [json][json]. inspired by [yajl][yajl] diff --git a/src/jsx.app.src b/src/jsx.app.src index a81e5c0..f0090de 100644 --- a/src/jsx.app.src +++ b/src/jsx.app.src @@ -1,7 +1,7 @@ {application, jsx, [ {description, "a streaming, evented json parsing toolkit"}, - {vsn, "2.3.1"}, + {vsn, "2.4.0"}, {modules, [ jsx, jsx_encoder,