From cff4d17561d7dd21210f21761dd643081fb3727c Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 20 Jun 2010 17:00:08 -0700 Subject: [PATCH] substantial refactoring of api and decoder internals --- examples/jsx_parser.erl | 115 --------- examples/jsx_prettify.erl | 112 --------- examples/jsx_stream_parser.erl | 67 ------ examples/jsx_verify.erl | 52 ---- src/jsx.erl | 134 +++++++---- src/jsx_decoder.erl | 418 ++++++++++++++++----------------- src/jsx_decoder.hrl | 10 +- src/jsx_types.hrl | 7 +- test/jsx_test.erl | 107 --------- 9 files changed, 304 insertions(+), 718 deletions(-) delete mode 100644 examples/jsx_parser.erl delete mode 100644 examples/jsx_prettify.erl delete mode 100644 examples/jsx_stream_parser.erl delete mode 100644 examples/jsx_verify.erl delete mode 100644 test/jsx_test.erl diff --git a/examples/jsx_parser.erl b/examples/jsx_parser.erl deleted file mode 100644 index 37f05ef..0000000 --- a/examples/jsx_parser.erl +++ /dev/null @@ -1,115 +0,0 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - -%% this module is an example of how to use the raw parser api - --module(jsx_parser). --author("alisdairsullivan@yahoo.ca"). - --export([decode/1, event/2]). - -%% export to allow the dirty hack below --export([literal/1, string/1, float/1, integer/1]). - - -%% this is a strict parser, no comments, no naked values and only one key per object. it -%% also is not streaming, though it could be modified to parse partial objects/lists. - -%% event takes two arguments, the result of calling the parser on a json argument (or the -%% generator returned by the parser) and a term that holds the erlang representation of -%% the json. - -decode(JSON) -> - F = jsx:parser(), - try event(F(JSON), []) - catch error:badjson -> {error, badjson} - end. - - -%% erlang representation is dicts for objects and lists for arrays. - -event({start_object, Next}, Stack) -> - event(Next(), [dict:new()] ++ Stack); -event({start_array, Next}, Stack) -> - event(Next(), [[]] ++ Stack); - -event({end_object, Next}, [Object, {key, Key}, Parent|Stack]) when is_tuple(Parent) -> - event(Next(), [insert(Key, Object, Parent)] ++ Stack); -event({end_array, Next}, [Array, {key, Key}, Parent|Stack]) when is_tuple(Parent) -> - event(Next(), [insert(Key, lists:reverse(Array), Parent)] ++ Stack); -event({end_object, Next}, [Object, Parent|Stack]) when is_list(Parent) -> - event(Next(), [[Object] ++ Parent] ++ Stack); -event({end_array, Next}, [Array, Parent|Stack]) when is_list(Parent) -> - event(Next(), [[lists:reverse(Array)] ++ Parent] ++ Stack); - -%% special cases for closing the root objects - -event({end_object, Next}, [Object]) -> - event(Next(), [Object]); -event({end_array, Next}, [Array]) -> - event(Next(), [lists:reverse(Array)]); - -%% keys are just pushed onto the stack until their corresponding value is -%% encountered - -event({{key, Key}, Next}, [Stack]) -> - event(Next(), [{key, Key}] ++ Stack); - -%% reject values that aren't wrapped by an array or object - -event({{_Type, _Value}, _Next}, []) -> - {error, badjson}; - -%% this is kind of a dirty hack, but erlang will interpret atoms when applied to (Args) -%% as a function. so naming our formatting functions string, integer, float and literal will -%% allow the following shortcut - -event({{Type, Value}, Next}, [{key, Key}, Object|Stack]) -> - event(Next(), [insert(Key, ?MODULE:Type(Value), Object)] ++ Stack); -event({{Type, Value}, Next}, [Array|Stack]) when is_list(Array) -> - event(Next(), [[?MODULE:Type(Value)] ++ Array] ++ Stack); - -event({end_json, _}, [Stack]) -> - Stack. - - -%% we're restricting keys to one occurence per object, as the spec implies. - -insert(Key, Val, Dict) -> - case dict:is_key(Key, Dict) of - false -> dict:store(Key, Val, Dict) - ; true -> erlang:error(badjson) - end. - - -%% strings and literals we just return with no post-processing, numbers we convert -%% from strings to integers/floats as appropriate - -string(String) -> - String. -integer(Number) -> - list_to_integer(Number). -float(Number) -> - list_to_float(Number). -literal(Literal) -> - Literal. \ No newline at end of file diff --git a/examples/jsx_prettify.erl b/examples/jsx_prettify.erl deleted file mode 100644 index 0765811..0000000 --- a/examples/jsx_prettify.erl +++ /dev/null @@ -1,112 +0,0 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - --module(jsx_prettify). --author("alisdairsullivan@yahoo.ca"). - - --export([pretty/2, prettify/2]). - --record(opts, { - indent = " " -}). - - -pretty(JSON, Opts) -> - Init = init(parse_opts(Opts, #opts{})), - P = jsx:parser({jsx_prettify, prettify, Init}, []), - case P(JSON) of - {incomplete, _} -> {error, badjson} - ; {error, badjson} -> {error, badjson} - ; {Result, _} -> Result - end. - - -parse_opts([{indent, Val}|Rest], Opts) -> - parse_opts(Rest, Opts#opts{indent = [ 16#20 || _ <- lists:seq(1, Val) ]}); -parse_opts([], Opts) -> - Opts. - - -init(Opts) -> - {[], Opts#opts.indent, 0, new}. - - -prettify(start_object, {Acc, Indent, Level, value}) -> - {Acc ++ ",\n" ++ indent(Indent, Level) ++ "{", Indent, Level + 1, new}; -prettify(start_object, {Acc, Indent, Level, new}) -> - {Acc ++ ",\n" ++ indent(Indent, Level) ++ "{", Indent, Level + 1, new}; -prettify(start_object, {Acc, Indent, Level, _}) -> - {Acc ++ "{", Indent, Level + 1, new}; - -prettify(start_array, {Acc, Indent, Level, value}) -> - {Acc ++ ",\n" ++ indent(Indent, Level) ++ "[", Indent, Level + 1, new}; -prettify(start_array, {Acc, Indent, Level, new}) -> - {Acc ++ ",\n" ++ indent(Indent, Level) ++ "[", Indent, Level + 1, new}; -prettify(start_array, {Acc, Indent, Level, _}) -> - {Acc ++ "[", Indent, Level + 1, new}; - -prettify(end_object, {Acc, Indent, Level, value}) -> - {Acc ++ "\n" ++ indent(Indent, Level - 1) ++ "}", Indent, Level - 1, value}; -prettify(end_object, {Acc, Indent, Level, new}) -> - {Acc ++ "}", Indent, Level - 1, value}; - -prettify(end_array, {Acc, Indent, Level, value}) -> - {Acc ++ "\n" ++ indent(Indent, Level - 1) ++ "]", Indent, Level - 1, value}; -prettify(end_array, {Acc, Indent, Level, new}) -> - {Acc ++ "]", Indent, Level - 1, value}; - -prettify({key, Key}, {Acc, Indent, Level, value}) -> - {Acc ++ ",\n" ++ indent(Indent, Level) ++ "\"" ++ Key ++ "\": ", Indent, Level, key}; -prettify({key, Key}, {Acc, Indent, Level, _}) -> - {Acc ++ "\n" ++ indent(Indent, Level) ++ "\"" ++ Key ++ "\": ", Indent, Level, key}; - -prettify({Type, Value}, {Acc, Indent, Level, value}) -> - {Acc ++ ",\n" ++ indent(Indent, Level) ++ format(Type, Value), Indent, Level, value}; -prettify({Type, Value}, {Acc, Indent, Level, new}) -> - {Acc ++ "\n" ++ indent(Indent, Level) ++ format(Type, Value), Indent, Level, value}; -prettify({Type, Value}, {Acc, Indent, Level, key}) -> - {Acc ++ format(Type, Value), Indent, Level, value}; - -prettify(reset, {_, Indent, _, _}) -> - {[], Indent, 0, new}; -prettify(end_json, {Acc, _, _, _}) -> - Acc. - - -format(string, String) -> - "\"" ++ String ++ "\""; -format(literal, Literal) -> - erlang:atom_to_list(Literal); -format(_, Number) -> - Number. - - -indent(Indent, Level) -> - indent(Indent, Level, ""). - -indent(Indent, 0, Acc) -> - Acc; -indent(Indent, N, Acc) -> - Indent ++ Acc. - \ No newline at end of file diff --git a/examples/jsx_stream_parser.erl b/examples/jsx_stream_parser.erl deleted file mode 100644 index a653311..0000000 --- a/examples/jsx_stream_parser.erl +++ /dev/null @@ -1,67 +0,0 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - --module(jsx_stream_parser). --author("alisdairsullivan@yahoo.ca"). - --export([decoder/1, event/2]). - -decoder(Opts) -> - Decoder = jsx:parser({jsx_stream_parser, event, 0}, Opts), - fun(Stream) -> try - case Decoder(Stream) of - {incomplete, F} -> {incomplete, F} - ; {error, badjson} -> {error, badjson} - end - catch - throw:{ok, Result} -> {ok, Result} - ; throw:not_found -> {error, not_found} - end - end. - -event(start_object, Level) -> - Level + 1; - -event(start_array, 0) -> - throw(not_found); -event(start_array, Level) -> - Level + 1; - -event(end_object, Level) -> - Level - 1; -event(end_array, Level) -> - Level - 1; - -event({key, "_id"}, 1) -> - capture; - -event({string, String}, capture) -> - throw({ok, String}); - -event(reset, _) -> - 0; -event(end_json, _) -> - throw(not_found); - -event(_, Level) -> - Level. \ No newline at end of file diff --git a/examples/jsx_verify.erl b/examples/jsx_verify.erl deleted file mode 100644 index 7d6776d..0000000 --- a/examples/jsx_verify.erl +++ /dev/null @@ -1,52 +0,0 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - --module(jsx_verify). --author("alisdairsullivan@yahoo.ca"). - --export([is_json/1, event/2]). - - - -%% this is a strict parser, no comments, no naked values and only one key per object. it -%% also is not streaming, though it could be modified to parse partial objects/lists. - -is_json(JSON) -> - P = jsx:parser({jsx_verify, event, ok}, []), - case P(JSON) of - {incomplete, _} -> - false - ; {error, badjson} -> - false - ; _ -> - true - end. - - -%% erlang representation is dicts for objects and lists for arrays. these are pushed -%% onto a stack, the top of which is our current level, deeper levels represent parent -%% and grandparent levels in the json structure. keys are also stored on top of the array -%% during parsing of their associated values. - -event(_, ok) -> - ok. diff --git a/src/jsx.erl b/src/jsx.erl index ec7df00..ce78e4f 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -24,35 +24,17 @@ -module(jsx). -author("alisdairsullivan@yahoo.ca"). --export([decode/1, decode/2, parser/0, parser/1]). +%% the core parser api +-export([parser/0, parser/1]). +%% example usage of core api +-export([decode/1, decode/2]). +-export([is_json/1, is_json/2]). +-export([fold/3, fold/4]). + +%% types for function specifications -include("jsx_types.hrl"). - - -%% decode takes a json binary (and optionally, a proplist of options) and returns a list -%% of events corresponding to the json structure/contents. it converts incompletes into -%% errors - --spec decode(JSON::json()) -> {ok, [jsx_event(),...]} | {error, badjson}. --spec decode(JSON::json(), Opts::jsx_opts()) -> {ok, [jsx_event(),...]} | {error, badjson}. - -decode(JSON) -> - decode(JSON, []). - -decode(JSON, Opts) -> - F = parser(Opts), - decode_loop(F(JSON), []). -decode_loop({incomplete, _}, _) -> {error, badjson}; -decode_loop({error, badjson}, _) -> {error, badjson}; -decode_loop({end_json, _}, State) -> {ok, lists:reverse(State)}; -decode_loop({Event, F}, State) -> decode_loop(F(), [Event] ++ State). - - -%% parser returns an anonymous function of arity 1 that takes a json binary as it's -%% argument and returns a tuple containing an error, incomplete and a new parser that -%% can be handed more input to resume parsing or a single event and a function that -%% can be called to get the next result -spec parser() -> jsx_parser(). -spec parser(Opts::jsx_opts()) -> jsx_parser(). @@ -76,6 +58,72 @@ start(F, OptsList) -> fun(Stream) -> F(Stream, Opts) end. +%% decode is an example decoder using the jsx api. it converts the events into a simple +%% list and converts incomplete parses into errors. + +-spec decode(JSON::json()) -> {ok, [jsx_event(),...]} | {error, badjson}. +-spec decode(JSON::json(), Opts::jsx_opts()) -> {ok, [jsx_event(),...]} | {error, badjson}. + +decode(JSON) -> + decode(JSON, []). + +decode(JSON, Opts) -> + fold(fun(end_json, State) -> + lists:reverse(State) + ;(Event, State) -> [Event] ++ State end, + [], JSON, Opts). + + +-spec is_json(JSON::json()) -> true | false. +-spec is_json(JSON::json(), Opts::jsx_opts()) -> true | false. + +is_json(JSON) -> + is_json(JSON, []). + +is_json(JSON, Opts) -> + case fold(fun(end_json, ok) -> true ;(_, _) -> ok end, ok, JSON, Opts) of + {incomplete, _} -> false + ; {error, _} -> false + ; {ok, true} -> true + end. + + +-spec fold(F::fun((jsx_event(), any()) -> any()), + Acc::any(), + JSON::json()) -> + {ok, any()} | {incomplete, jsx_parser()} | {error, atom()}. +-spec fold(F::fun((jsx_event(), any()) -> any()), + Acc::any(), + JSON::json(), + Opts::jsx_opts()) -> + {ok, any()} | {incomplete, jsx_parser()} | {error, atom()} + ; (F::fun((jsx_event(), any()) -> any()), + Acc::any(), + JSON::json(), + Parser::jsx_parser()) -> + {ok, any()} | {incomplete, jsx_parser()} | {error, atom()}. + +fold(F, Acc, JSON) -> + P = jsx:parser(), + fold(F, Acc, JSON, P). + +fold(F, Acc, JSON, Opts) when is_list(Opts) -> + P = jsx:parser(Opts), + fold(F, Acc, JSON, P); +fold(F, Acc, JSON, P) -> + fold_loop(F, Acc, P(JSON)). + +fold_loop(F, Acc, {incomplete, Next}) -> + {incomplete, fun(Bin) -> fold_loop(F, Acc, Next(Bin)) end}; +fold_loop(_, _, {error, Error}) -> {error, Error}; +fold_loop(F, Acc, {end_json, _}) -> {ok, F(end_json, Acc)}; +fold_loop(F, Acc, {Event, Next}) -> fold_loop(F, F(Event, Acc), Next()). + + + +%% option parsing + +%% converts a proplist into a tuple parse_opts(Opts) -> parse_opts(Opts, {false, codepoint, false}). @@ -93,6 +141,8 @@ parse_opts([{stream_mode, Value}|Rest], {Comments, EscapedUnicode, _Stream}) -> parse_opts([{encoding, _}|Rest], Opts) -> parse_opts(Rest, Opts). + +%% encoding detection %% first check to see if there's a bom, if not, use the rfc4627 method for determining %% encoding. this function makes some assumptions about the validity of the stream @@ -142,31 +192,29 @@ detect_encoding(<> = JSON, Opts) when X =/= 0, Y =/= 0 -> %% a problem, to autodetect naked single digits' encoding, there is not enough data %% to conclusively determine the encoding correctly. below is an attempt to solve %% the problem - detect_encoding(<>, Opts) when X =/= 0 -> - {try {Result, _} = jsx_utf8:parse(<>, Opts), Result - catch error:function_clause -> incomplete end, - fun(Stream) -> + try jsx_utf8:parse(<>, Opts) + catch error:function_clause -> + {incomplete, fun(Stream) -> detect_encoding(<>, Opts) - end - }; + end} + end; detect_encoding(<<0, X>>, Opts) when X =/= 0 -> - {try {Result, _} = jsx_utf16:parse(<<0, X>>, Opts), Result - catch error:function_clause -> incomplete end, - fun(Stream) -> + try jsx_utf16:parse(<<0, X>>, Opts) + catch error:function_clause -> + {incomplete, fun(Stream) -> detect_encoding(<<0, X, Stream/binary>>, Opts) - end - }; + end} + end; detect_encoding(<>, Opts) when X =/= 0 -> - {try {Result, _} = jsx_utf16le:parse(<>, Opts), Result - catch error:function_clause -> incomplete end, - fun(Stream) -> + try jsx_utf16le:parse(<>, Opts) + catch error:function_clause -> + {incomplete, fun(Stream) -> detect_encoding(<>, Opts) - end - }; + end} + end; %% not enough input, request more - detect_encoding(Bin, Opts) -> {incomplete, fun(Stream) -> diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index fcdbfa3..0e93ef9 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -33,7 +33,7 @@ -include("jsx_types.hrl"). --spec parse(JSON::json(), Opts::jsx_opts()) -> parser_result(). +-spec parse(JSON::json(), Opts::jsx_opts()) -> jsx_parser_result(). parse(JSON, Opts) -> start(JSON, [], Opts). @@ -45,14 +45,26 @@ parse(JSON, Opts) -> %% returning from a value or a key inside objects. all pops, peeks and pushes are %% inlined. the code that handles naked values and comments is not optimized by the %% compiler for efficient matching, but you shouldn't be using naked values or comments -%% anyways, they are horrible and contrary to the spec. +%% anyways, they are horrible and contrary to the spec + + +%% two macros to simplify incomplete handling +-define(incomplete(Valid, Incomplete, Finish), + case Valid of + true -> {error, badjson} + ; false -> {incomplete, Incomplete, Finish} + end +). + +-define(ferror, fun() -> {error, badjson} end). + start(<>, Stack, Opts) when ?is_whitespace(S) -> start(Rest, Stack, Opts); start(<>, Stack, Opts) -> - {start_object, fun() -> object(Rest, [key|Stack], Opts) end}; + {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; start(<>, Stack, Opts) -> - {start_array, fun() -> array(Rest, [array|Stack], Opts) end}; + {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; start(<>, Stack, Opts) -> string(Rest, Stack, Opts, []); start(<<$t/?encoding, Rest/binary>>, Stack, Opts) -> @@ -70,33 +82,30 @@ start(<>, Stack, Opts) when ?is_nonzero(S) -> start(<>, Stack, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Opts) end); start(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> start(<>, Stack, Opts) end} - end. - + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> start(<>, Stack, Opts) end, + ?ferror + ). maybe_done(<>, Stack, Opts) when ?is_whitespace(S) -> maybe_done(Rest, Stack, Opts); maybe_done(<>, [object|Stack], Opts) -> - {end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; maybe_done(<>, [array|Stack], Opts) -> - {end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; maybe_done(<>, [object|Stack], Opts) -> key(Rest, [key|Stack], Opts); maybe_done(<>, [array|_] = Stack, Opts) -> value(Rest, Stack, Opts); maybe_done(<>, Stack, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Opts) end); -maybe_done(Bin, [], ?stream_mode(Opts)) -> - {end_json, fun() -> start(Bin, [], Opts) end}; maybe_done(<<>>, [], Opts) -> - {end_json, fun() -> {incomplete, fun(Stream) -> maybe_done(Stream, [], Opts) end} end}; + {event, end_json, fun(Stream) -> maybe_done(Stream, [], Opts) end}; maybe_done(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> maybe_done(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> maybe_done(<>, Stack, Opts) end, + ?ferror + ). object(<>, Stack, Opts) when ?is_whitespace(S) -> @@ -104,14 +113,14 @@ object(<>, Stack, Opts) when ?is_whitespace(S) -> object(<>, Stack, Opts) -> string(Rest, Stack, Opts, []); object(<>, [key|Stack], Opts) -> - {end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; object(<>, Stack, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Opts) end); object(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> object(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> object(<>, Stack, Opts) end, + ?ferror + ). array(<>, Stack, Opts) when ?is_whitespace(S) -> @@ -131,18 +140,18 @@ array(<>, Stack, Opts) -> array(<>, Stack, Opts) when ?is_nonzero(S) -> integer(Rest, Stack, Opts, [S]); array(<>, Stack, Opts) -> - {start_object, fun() -> object(Rest, [key|Stack], Opts) end}; + {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; array(<>, Stack, Opts) -> - {start_array, fun() -> array(Rest, [array|Stack], Opts) end}; + {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; array(<>, [array|Stack], Opts) -> - {end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; array(<>, Stack, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Opts) end); array(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> array(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> array(<>, Stack, Opts) end, + ?ferror + ). value(<>, Stack, Opts) when ?is_whitespace(S) -> @@ -162,16 +171,16 @@ value(<>, Stack, Opts) -> value(<>, Stack, Opts) when ?is_nonzero(S) -> integer(Rest, Stack, Opts, [S]); value(<>, Stack, Opts) -> - {start_object, fun() -> object(Rest, [key|Stack], Opts) end}; + {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; value(<>, Stack, Opts) -> - {start_array, fun() -> array(Rest, [array|Stack], Opts) end}; + {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; value(<>, Stack, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Opts) end); value(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> value(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> value(<>, Stack, Opts) end, + ?ferror + ). colon(<>, Stack, Opts) when ?is_whitespace(S) -> @@ -181,10 +190,10 @@ colon(<>, [key|Stack], Opts) -> colon(<>, Stack, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Opts) end); colon(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> colon(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> colon(<>, Stack, Opts) end, + ?ferror + ). key(<>, Stack, Opts) when ?is_whitespace(S) -> @@ -194,21 +203,24 @@ key(<>, Stack, Opts) -> key(<>, Stack, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Opts) end); key(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> key(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> key(<>, Stack, Opts) end, + ?ferror + ). %% string has an additional parameter, an accumulator (Acc) used to hold the intermediate %% representation of the string being parsed. using a list of integers representing %% unicode codepoints is faster than constructing binaries, many of which will be -%% converted back to lists by the user anyways. +%% converted back to lists by the user anyways + +%% string uses partial_utf/1 to cease parsing when invalid encodings are encountered +%% rather than just checking remaining binary size like other states string(<>, [key|_] = Stack, Opts, Acc) -> - {{key, lists:reverse(Acc)}, fun() -> colon(Rest, Stack, Opts) end}; + {event, {key, lists:reverse(Acc)}, fun() -> colon(Rest, Stack, Opts) end}; string(<>, Stack, Opts, Acc) -> - {{string, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, {string, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; string(<>, Stack, Opts, Acc) -> escape(Rest, Stack, Opts, Acc); string(<>, Stack, Opts, Acc) when ?is_noncontrol(S) -> @@ -270,7 +282,7 @@ partial_utf(_) -> true. %% only thing to note here is the additional accumulator passed to escaped_unicode used %% to hold the codepoint sequence. unescessary, but nicer than using the string -%% accumulator. +%% accumulator escape(<<$b/?encoding, Rest/binary>>, Stack, Opts, Acc) -> string(Rest, Stack, Opts, "\b" ++ Acc); @@ -288,10 +300,10 @@ escape(<>, Stack, Opts, Acc) when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> string(Rest, Stack, Opts, [S] ++ Acc); escape(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> escape(<>, Stack, Opts, Acc) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> escape(<>, Stack, Opts, Acc) end, + ?ferror + ). %% this code is ugly and unfortunate, but so is json's handling of escaped unicode @@ -300,7 +312,7 @@ escape(Bin, Stack, Opts, Acc) -> %% the codepoint option is present the sequence is converted and inserted as long %% as it represents a valid unicode codepoint. this means non-characters %% representable in 16 bits are not converted (the utf16 surrogates and the two -%% special non-characters). any other option and no conversion is done. +%% special non-characters). any other option and no conversion is done escaped_unicode(<>, Stack, @@ -338,15 +350,13 @@ escaped_unicode(<>, Stack, Opts, String, [C, B, A]) wh escaped_unicode(<>, Stack, Opts, String, Acc) when ?is_hex(S) -> escaped_unicode(Rest, Stack, Opts, String, [S] ++ Acc); escaped_unicode(Bin, Stack, Opts, String, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> - escaped_unicode(<>, Stack, Opts, String, Acc) - end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> escaped_unicode(<>, Stack, Opts, String, Acc) end, + ?ferror + ). %% upon encountering a low pair json/hex encoded value, check to see if there's a high -%% value already in the accumulator. +%% value already in the accumulator check_acc_for_surrogate([D, C, B, A, $u, ?rsolidus|Rest]) when ?is_hex(D), ?is_hex(C), ?is_hex(B), ?is_hex(A) -> @@ -366,67 +376,63 @@ surrogate_to_codepoint(High, Low) -> %% like strings, numbers are collected in an intermediate accumulator before -%% being emitted to the callback handler. +%% being emitted to the callback handler negative(<<$0/?encoding, Rest/binary>>, Stack, Opts, Acc) -> zero(Rest, Stack, Opts, "0" ++ Acc); negative(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> integer(Rest, Stack, Opts, [S] ++ Acc); negative(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> negative(<>, Stack, Opts, Acc) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> negative(<>, Stack, Opts, Acc) end, + ?ferror + ). zero(<>, [object|Stack], Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> - {end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} end}; zero(<>, [array|Stack], Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> - {end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} end}; zero(<>, [object|Stack], Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; + {event, {integer, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; zero(<>, [array|_] = Stack, Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; + {event, {integer, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; zero(<>, Stack, Opts, Acc) -> initial_decimal(Rest, Stack, Opts, [?decimalpoint] ++ Acc); zero(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> - {{integer, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, {integer, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; zero(<>, Stack, ?comments_enabled(Opts), Acc) -> maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Opts, Acc) end); -zero(Bin, [], ?stream_mode(Opts), Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> - {end_json, fun() -> start(Bin, [], Opts) end} - end}; zero(<<>>, [], Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> - {end_json, fun() -> {incomplete, fun(Stream) -> maybe_done(Stream, [], Opts) end} - end} - end}; -zero(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> zero(<>, Stack, Opts, Acc) end} - end. + {incomplete, + fun(Stream) -> zero(Stream, [], Opts, Acc) end, + fun() -> {event, {integer, lists:reverse(Acc)}, fun() -> maybe_done(<<>>, [], Opts) end} end + }; +zero(Bin, Stack, Opts, Acc) -> + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> zero(<>, Stack, Opts, Acc) end, + ?ferror + ). integer(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> integer(Rest, Stack, Opts, [S] ++ Acc); integer(<>, [object|Stack], Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> - {end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} end}; integer(<>, [array|Stack], Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> - {end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} end}; integer(<>, [object|Stack], Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; + {event, {integer, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; integer(<>, [array|_] = Stack, Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; + {event, {integer, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; integer(<>, Stack, Opts, Acc) -> initial_decimal(Rest, Stack, Opts, [?decimalpoint] ++ Acc); integer(<>, Stack, Opts, Acc) -> @@ -436,23 +442,19 @@ integer(<<$e/?encoding, Rest/binary>>, Stack, Opts, Acc) -> integer(<<$E/?encoding, Rest/binary>>, Stack, Opts, Acc) -> e(Rest, Stack, Opts, "e0." ++ Acc); integer(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> - {{integer, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, {integer, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; integer(<>, Stack, ?comments_enabled(Opts), Acc) -> maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Opts, Acc) end); -integer(Bin, [], ?stream_mode(Opts), Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> - {end_json, fun() -> start(Bin, [], Opts) end} - end}; integer(<<>>, [], Opts, Acc) -> - {{integer, lists:reverse(Acc)}, fun() -> - {end_json, fun() -> {incomplete, fun(Stream) -> maybe_done(Stream, [], Opts) end} - end} - end}; + {incomplete, + fun(Stream) -> zero(Stream, [], Opts, Acc) end, + fun() -> {event, {integer, lists:reverse(Acc)}, fun() -> maybe_done(<<>>, [], Opts) end} end + }; integer(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> integer(<>, Stack, Opts, Acc) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> integer(<>, Stack, Opts, Acc) end, + ?ferror + ). initial_decimal(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> @@ -460,26 +462,26 @@ initial_decimal(<>, Stack, Opts, Acc) when ?is_nonzero initial_decimal(<>, Stack, Opts, Acc) -> decimal(Rest, Stack, Opts, [?zero] ++ Acc); initial_decimal(Bin, Stack, Opts, Acc) -> -case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Opts, Acc) end} -end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> initial_decimal(<>, Stack, Opts, Acc) end, + ?ferror + ). decimal(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> decimal(Rest, Stack, Opts, [S] ++ Acc); decimal(<>, [object|Stack], Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> - {end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} end}; decimal(<>, [array|Stack], Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> - {end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} end}; decimal(<>, [object|Stack], Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; + {event, {float, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; decimal(<>, [array|_] = Stack, Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; + {event, {float, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; decimal(<>, Stack, Opts, Acc) -> decimal(Rest, Stack, Opts, [?zero] ++ Acc); decimal(<<$e/?encoding, Rest/binary>>, Stack, Opts, Acc) -> @@ -487,23 +489,19 @@ decimal(<<$e/?encoding, Rest/binary>>, Stack, Opts, Acc) -> decimal(<<$E/?encoding, Rest/binary>>, Stack, Opts, Acc) -> e(Rest, Stack, Opts, "e" ++ Acc); decimal(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> - {{float, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, {float, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; decimal(<>, Stack, ?comments_enabled(Opts), Acc) -> maybe_comment(Rest, fun(Resume) -> decimal(Resume, Stack, Opts, Acc) end); -decimal(Bin, [], ?stream_mode(Opts), Acc) -> - {{float, lists:reverse(Acc)}, fun() -> - {end_json, fun() -> start(Bin, [], Opts) end} - end}; decimal(<<>>, [], Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> - {end_json, fun() -> {incomplete, fun(Stream) -> maybe_done(Stream, [], Opts) end} - end} - end}; + {incomplete, + fun(Stream) -> decimal(Stream, [], Opts, Acc) end, + fun() -> {event, {float, lists:reverse(Acc)}, fun() -> maybe_done(<<>>, [], Opts) end} end + }; decimal(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> decimal(<>, Stack, Opts, Acc) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> decimal(<>, Stack, Opts, Acc) end, + ?ferror + ). e(<>, Stack, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> @@ -511,145 +509,141 @@ e(<>, Stack, Opts, Acc) when S =:= ?zero; ?is_nonzero( e(<>, Stack, Opts, Acc) when S =:= ?positive; S =:= ?negative -> ex(Rest, Stack, Opts, [S] ++ Acc); e(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> e(<>, Stack, Opts, Acc) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> e(<>, Stack, Opts, Acc) end, + ?ferror + ). ex(<>, Stack, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Opts, [S] ++ Acc); ex(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> ex(<>, Stack, Opts, Acc) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> ex(<>, Stack, Opts, Acc) end, + ?ferror + ). exp(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> exp(Rest, Stack, Opts, [S] ++ Acc); exp(<>, [object|Stack], Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> - {end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} end}; exp(<>, [array|Stack], Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> - {end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} end}; exp(<>, [object|Stack], Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; + {event, {float, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; exp(<>, [array|_] = Stack, Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; + {event, {float, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; exp(<>, Stack, Opts, Acc) -> exp(Rest, Stack, Opts, [?zero] ++ Acc); exp(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> - {{float, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, {float, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; exp(<>, Stack, ?comments_enabled(Opts), Acc) -> maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Opts, Acc) end); -exp(Bin, [], ?stream_mode(Opts), Acc) -> - {{float, lists:reverse(Acc)}, fun() -> - {end_json, fun() -> start(Bin, [], Opts) end} - end}; exp(<<>>, [], Opts, Acc) -> - {{float, lists:reverse(Acc)}, fun() -> - {end_json, fun() -> {incomplete, fun(Stream) -> maybe_done(Stream, [], Opts) end} - end} - end}; + {incomplete, + fun(Stream) -> exp(Stream, [], Opts, Acc) end, + fun() -> {event, {float, lists:reverse(Acc)}, fun() -> maybe_done(<<>>, [], Opts) end} end + }; exp(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> exp(<>, Stack, Opts, Acc) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> exp(<>, Stack, Opts, Acc) end, + ?ferror + ). tr(<<$r/?encoding, Rest/binary>>, Stack, Opts) -> tru(Rest, Stack, Opts); tr(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> tr(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> tr(<>, Stack, Opts) end, + ?ferror + ). tru(<<$u/?encoding, Rest/binary>>, Stack, Opts) -> true(Rest, Stack, Opts); tru(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> tru(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> tru(<>, Stack, Opts) end, + ?ferror + ). true(<<$e/?encoding, Rest/binary>>, Stack, Opts) -> - {{literal, true}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, {literal, true}, fun() -> maybe_done(Rest, Stack, Opts) end}; true(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> true(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> true(<>, Stack, Opts) end, + ?ferror + ). fa(<<$a/?encoding, Rest/binary>>, Stack, Opts) -> fal(Rest, Stack, Opts); fa(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> fa(<>, Stack, Opts) end} - end. - + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> fa(<>, Stack, Opts) end, + ?ferror + ). + fal(<<$l/?encoding, Rest/binary>>, Stack, Opts) -> fals(Rest, Stack, Opts); fal(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> fal(<>, Stack, Opts) end} - end. - + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> fal(<>, Stack, Opts) end, + ?ferror + ). + fals(<<$s/?encoding, Rest/binary>>, Stack, Opts) -> false(Rest, Stack, Opts); fals(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> fals(<>, Stack, Opts) end} - end. - + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> fals(<>, Stack, Opts) end, + ?ferror + ). + false(<<$e/?encoding, Rest/binary>>, Stack, Opts) -> - {{literal, false}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, {literal, false}, fun() -> maybe_done(Rest, Stack, Opts) end}; false(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> false(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> false(<>, Stack, Opts) end, + ?ferror + ). nu(<<$u/?encoding, Rest/binary>>, Stack, Opts) -> nul(Rest, Stack, Opts); nu(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> nu(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> nu(<>, Stack, Opts) end, + ?ferror + ). nul(<<$l/?encoding, Rest/binary>>, Stack, Opts) -> null(Rest, Stack, Opts); nul(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> nul(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> nul(<>, Stack, Opts) end, + ?ferror + ). null(<<$l/?encoding, Rest/binary>>, Stack, Opts) -> - {{literal, null}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {event, {literal, null}, fun() -> maybe_done(Rest, Stack, Opts) end}; null(Bin, Stack, Opts) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> null(<>, Stack, Opts) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> null(<>, Stack, Opts) end, + ?ferror + ). %% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode @@ -661,10 +655,10 @@ null(Bin, Stack, Opts) -> maybe_comment(<>, Resume) -> comment(Rest, Resume); maybe_comment(Bin, Resume) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> maybe_comment(<>, Resume) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> maybe_comment(<>, Resume) end, + ?ferror + ). comment(<>, Resume) -> @@ -672,10 +666,10 @@ comment(<>, Resume) -> comment(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); comment(Bin, Resume) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> comment(<>, Resume) end} - end. + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> comment(<>, Resume) end, + ?ferror + ). maybe_comment_done(<>, Resume) -> @@ -683,7 +677,7 @@ maybe_comment_done(<>, Resume) -> maybe_comment_done(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); maybe_comment_done(Bin, Resume) -> - case byte_size(Bin) >= ?symbol_size of - true -> {error, badjson} - ; _ -> {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end} - end. \ No newline at end of file + ?incomplete(?partial_codepoint(Bin), + fun(Stream) -> maybe_comment_done(<>, Resume) end, + ?ferror + ). \ No newline at end of file diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 8c6e8c3..b4f9c88 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -86,25 +86,25 @@ %% compilation macros for unified decoder -ifdef(utf8). -define(encoding, utf8). --define(symbol_size, 1). +-define(partial_codepoint(Bin), byte_size(Bin) >= 1). -endif. -ifdef(utf16). -define(encoding, utf16). --define(symbol_size, 2). +-define(partial_codepoint(Bin), byte_size(Bin) >= 2). -endif. -ifdef(utf16le). -define(encoding, utf16-little). --define(symbol_size, 2). +-define(partial_codepoint(Bin), byte_size(Bin) >= 2). -endif. -ifdef(utf32). -define(encoding, utf32). --define(symbol_size, 4). +-define(partial_codepoint(Bin), byte_size(Bin) >= 4). -endif. -ifdef(utf32le). -define(encoding, utf32-little). --define(symbol_size, 4). +-define(partial_codepoint(Bin), byte_size(Bin) >= 4). -endif. \ No newline at end of file diff --git a/src/jsx_types.hrl b/src/jsx_types.hrl index d0a1677..d85a23b 100644 --- a/src/jsx_types.hrl +++ b/src/jsx_types.hrl @@ -55,12 +55,9 @@ %% this probably doesn't work properly --type jsx_parser() :: fun((json()) -> {[jsx_event(),...], jsx_parser()} - | {incomplete, jsx_parser()} - | {error, badjson} -). +-type jsx_parser() :: fun((json()) -> jsx_parser_result()). --type parser_result() :: {jsx_event(), fun(() -> parser_result())} +-type jsx_parser_result() :: {event, jsx_event(), fun(() -> jsx_parser_result())} | {incomplete, jsx_parser()} | {error, badjson}. \ No newline at end of file diff --git a/test/jsx_test.erl b/test/jsx_test.erl deleted file mode 100644 index 6496384..0000000 --- a/test/jsx_test.erl +++ /dev/null @@ -1,107 +0,0 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - --module(jsx_test). --author("alisdairsullivan@yahoo.ca"). - --export([test/0, test/1, test_event/2]). - -test() -> - F = decoder([]), - incremental_decode(F, unicode:characters_to_binary(<<"0">>, utf8, utf16)). - -test(Dir) -> - ValidJSONTests = load_tests(Dir), - - etap:plan(length(ValidJSONTests) * 10), - run_tests(ValidJSONTests), - etap:end_tests(). - - -decoder(Flags) -> - jsx:decoder({jsx_test, test_event, []}, Flags). - -test_event(end_json, Acc) -> - lists:reverse(Acc); -test_event(Event, Acc) -> - [Event] ++ Acc. - - -load_tests(Dir) -> - TestSpecs = filelib:wildcard("*.test", Dir), - load_tests(TestSpecs, Dir, []). - -load_tests([], _Dir, Acc) -> - lists:reverse(Acc); -load_tests([Test|Rest], Dir, Acc) -> - try - TestName = filename:basename(Test, ".test"), - {ok, JSON} = file:read_file(Dir ++ "/" ++ TestName ++ ".json"), - case file:consult(Dir ++ "/" ++ Test) of - {ok, [Events]} -> - load_tests(Rest, Dir, [{TestName, JSON, Events, []}] ++ Acc) - ; {ok, [Events, Flags]} -> - load_tests(Rest, Dir, [{TestName, JSON, Events, Flags}] ++ Acc) - end - catch _:_ -> load_tests(Rest, Dir, Acc) end. - -run_tests([]) -> - ok; -run_tests([{TestName, JSON, Events, Flags}|Rest]) -> - F = decoder(Flags), - etap:is(decode(F, JSON), Events, TestName ++ ": utf8"), - etap:is(incremental_decode(F, JSON), Events, TestName ++ ": incremental utf8"), - etap:is(decode(F, to_utf16(JSON)), Events, TestName ++ ": utf16"), - etap:is(incremental_decode(F, to_utf16(JSON)), Events, TestName ++ ": incremental utf16"), - etap:is(decode(F, to_utf16le(JSON)), Events, TestName ++ ": utf16le"), - etap:is(incremental_decode(F, to_utf16le(JSON)), Events, TestName ++ ": incremental utf16le"), - etap:is(decode(F, to_utf32(JSON)), Events, TestName ++ ": utf32"), - etap:is(incremental_decode(F, to_utf32(JSON)), Events, TestName ++ ": incremental utf32"), - etap:is(decode(F, to_utf32le(JSON)), Events, TestName ++ ": utf32le"), - etap:is(incremental_decode(F, to_utf32le(JSON)), Events, TestName ++ ": incremental utf32le"), - run_tests(Rest). - - -incremental_decode(F, <<>>) -> - case F(<<>>) of - {incomplete, G} -> G - ; {Result, _} -> Result - end; -incremental_decode(F, <>) -> - {_, G} = F(<>), - incremental_decode(G, Rest). - -decode(F, JSON) -> - case F(JSON) of - {incomplete, G} when is_function(G) -> - throw(badjson) - ; {Result, _} -> - Result - end. - -to_utf16(Bin) -> unicode:characters_to_binary(Bin, utf8, utf16). -to_utf16le(Bin) -> unicode:characters_to_binary(Bin, utf8, {utf16,little}). -to_utf32(Bin) -> unicode:characters_to_binary(Bin, utf8, utf32). -to_utf32le(Bin) -> unicode:characters_to_binary(Bin, utf8, {utf32,little}). - - \ No newline at end of file