diff --git a/src/jsx_utf16b.erl b/src/jsx_utf16b.erl new file mode 100644 index 0000000..009a820 --- /dev/null +++ b/src/jsx_utf16b.erl @@ -0,0 +1,436 @@ +-module(jsx_utf16b). + +-export([start/4]). + +-include("jsx_common.hrl"). + + +%% this code is mostly autogenerated and mostly ugly. apologies. for more insight on +%% Callbacks or Opts, see the comments accompanying decoder/2 (in jsx.erl). Stack +%% is a stack of flags used to track depth and to keep track of whether we are +%% returning from a value or a key inside objects. all pops, peeks and pushes are +%% inlined. the code that handles naked values and comments is not optimized by the +%% compiler for efficient matching, but you shouldn't be using naked values or comments +%% anyways, they are horrible and contrary to the spec. + +start(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + string(Rest, Stack, Callbacks, Opts, []); +start(<<$t/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + tr(Rest, Stack, Callbacks, Opts); +start(<<$f/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + fa(Rest, Stack, Callbacks, Opts); +start(<<$n/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + nu(Rest, Stack, Callbacks, Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + negative(Rest, Stack, Callbacks, Opts, "-"); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + zero(Rest, Stack, Callbacks, Opts, "0"); +start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S), Opts#opts.naked_values == true -> + integer(Rest, Stack, Callbacks, Opts, [S]); +start(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); +start(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + start(Rest, Stack, Callbacks, Opts); +start(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> start(Stream, Stack, Callbacks, Opts) end. + + +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +maybe_done(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + key(Rest, [key|Stack], Callbacks, Opts); +maybe_done(<>, [array|_] = Stack, Callbacks, Opts) -> + value(Rest, Stack, Callbacks, Opts); +maybe_done(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); +maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, Callbacks, Opts); +maybe_done(<<>>, [], Callbacks, _Opts) -> + callback(eof, Callbacks); +maybe_done(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> maybe_done(Stream, Stack, Callbacks, Opts) end. + + +object(<>, [key|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +object(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +object(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); +object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + object(Rest, Stack, Callbacks, Opts); +object(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> object(Stream, Stack, Callbacks, Opts) end. + + +array(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +array(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +array(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +array(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +array(<<$t/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +array(<<$f/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +array(<<$n/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +array(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +array(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +array(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +array(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); +array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + array(Rest, Stack, Callbacks, Opts); +array(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> array(Stream, Stack, Callbacks, Opts) end. + + +value(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +value(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +value(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +value(<<$t/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +value(<<$f/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +value(<<$n/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +value(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +value(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +value(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +value(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); +value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + value(Rest, Stack, Callbacks, Opts); +value(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> value(Stream, Stack, Callbacks, Opts) end. + + +colon(<>, [key|Stack], Callbacks, Opts) -> + value(Rest, [object|Stack], Callbacks, Opts); +colon(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); +colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + colon(Rest, Stack, Callbacks, Opts); +colon(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> colon(Stream, Stack, Callbacks, Opts) end. + + +key(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +key(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); +key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + key(Rest, Stack, Callbacks, Opts); +key(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> key(Stream, Stack, Callbacks, Opts) end. + + +%% string has an additional parameter, an accumulator (Acc) used to hold the intermediate +%% representation of the string being parsed. using a list of integers representing +%% unicode codepoints is faster than constructing binaries, many of which will be +%% converted back to lists by the user anyways. + +string(<>, [key|_] = Stack, Callbacks, Opts, Acc) -> + colon(Rest, Stack, callback({key, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback({string, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + escape(Rest, Stack, Callbacks, Opts, Acc); +string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +string(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> string(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% only thing to note here is the additional accumulator passed to escaped_unicode used +%% to hold the codepoint sequence. unescessary, but nicer than using the string +%% accumulator. + +escape(<<"b"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\b" ++ Acc); +escape(<<"f"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\f" ++ Acc); +escape(<<"n"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\n" ++ Acc); +escape(<<"r"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\r" ++ Acc); +escape(<<"t"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\t" ++ Acc); +escape(<<"u"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, Acc, []); +escape(<>, Stack, Callbacks, Opts, Acc) + when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +escape(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> escape(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% this code is ugly and unfortunate, but so is json's handling of escaped unicode +%% codepoint sequences. if the ascii option is present, the sequence is converted +%% to a codepoint and inserted into the string if it represents an ascii value. if +%% the codepoint option is present the sequence is converted and inserted as long +%% as it represents a valid 16 bit integer value (this is where json's spec gets +%% insane). any other option and the sequence is converted back to an erlang string +%% and appended to the string in place. + +escaped_unicode(<>, Stack, Callbacks, Opts, String, [C, B, A]) -> + X = erlang:list_to_integer([A, B, C, D], 16), + case Opts#opts.escaped_unicode of + ascii when X < 16#0080 -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; codepoint -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Callbacks, Opts, [?rsolidus, $u, A, B, C, D] ++ String) + end; +escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); +escaped_unicode(<<>>, Stack, Callbacks, Opts, String, Acc) -> + fun(Stream) -> escaped_unicode(Stream, Stack, Callbacks, Opts, String, Acc) end. + + +%% like strings, numbers are collected in an intermediate accumulator before +%% being emitted to the callback handler. no processing of numbers is done in +%% process, it's left for the user, though there are convenience functions to +%% convert them into erlang floats/integers in jsx_utils.erl. + +%% TODO: actually write that jsx_utils.erl module mentioned above... + +negative(<<"0"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); +negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +negative(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> negative(Stream, Stack, Callbacks, Opts, Acc) end. + + +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +zero(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Callbacks, Opts, Acc) end); +zero(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +zero(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> zero(Stream, Stack, Callbacks, Opts, Acc) end. + + +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) -> + integer(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +integer(<<"e"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<<"E"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Callbacks, Opts, Acc) end); +integer(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +integer(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> integer(Stream, Stack, Callbacks, Opts, Acc) end. + + +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +fraction(<<"e"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<<"E"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + fraction(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> fraction(Resume, Stack, Callbacks, Opts, Acc) end); +fraction(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +fraction(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> fraction(Stream, Stack, Callbacks, Opts, Acc) end. + + +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> + ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> e(Stream, Stack, Callbacks, Opts, Acc) end. + + +ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +ex(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> ex(Stream, Stack, Callbacks, Opts, Acc) end. + + +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) -> + exp(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Callbacks, Opts, Acc) end); +exp(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +exp(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> exp(Stream, Stack, Callbacks, Opts, Acc) end. + + +tr(<<"r"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + tru(Rest, Stack, Callbacks, Opts); +tr(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tr(Stream, Stack, Callbacks, Opts) end. + + +tru(<<"u"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + true(Rest, Stack, Callbacks, Opts); +tru(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tru(Stream, Stack, Callbacks, Opts) end. + + +true(<<"e"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, true}, Callbacks), Opts); +true(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> true(Stream, Stack, Callbacks, Opts) end. + + +fa(<<"a"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + fal(Rest, Stack, Callbacks, Opts); +fa(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fa(Stream, Stack, Callbacks, Opts) end. + + +fal(<<"l"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + fals(Rest, Stack, Callbacks, Opts); +fal(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fal(Stream, Stack, Callbacks, Opts) end. + + +fals(<<"s"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + false(Rest, Stack, Callbacks, Opts); +fals(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fals(Stream, Stack, Callbacks, Opts) end. + + +false(<<"e"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, false}, Callbacks), Opts); +false(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> false(Stream, Stack, Callbacks, Opts) end. + + +nu(<<"u"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + nul(Rest, Stack, Callbacks, Opts); +nu(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nu(Stream, Stack, Callbacks, Opts) end. + + +nul(<<"l"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + null(Rest, Stack, Callbacks, Opts); +nul(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nul(Stream, Stack, Callbacks, Opts) end. + + +null(<<"l"/utf16-big, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, null}, Callbacks), Opts); +null(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> null(Stream, Stack, Callbacks, Opts) end. + + +%% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode +%% character is valid in a comment, except, obviously the */ sequence which ends +%% the comment. they're implemented as a closure called when the comment ends that +%% returns execution to the point where the comment began. comments are not +%% recorded in any way, simply parsed. + +maybe_comment(<>, Resume) -> + comment(Rest, Resume); +maybe_comment(<<>>, Resume) -> + fun(Stream) -> maybe_comment(Stream, Resume) end. + + +comment(<>, Resume) -> + maybe_comment_done(Rest, Resume); +comment(<<_/utf16-big, Rest/binary>>, Resume) -> + comment(Rest, Resume); +comment(<<>>, Resume) -> + fun(Stream) -> comment(Stream, Resume) end. + + +maybe_comment_done(<>, Resume) -> + Resume(Rest); +maybe_comment_done(<<>>, Resume) -> + fun(Stream) -> maybe_comment_done(Stream, Resume) end. + + +%% helper function for dispatching of parser events + +callback(eof, {none, Callbacks}) -> + lists:reverse(Callbacks); +callback(Event, {none, Callbacks}) -> + {none, [Event] ++ Callbacks}; +callback(Event, {Mod, State}) when is_atom(Mod) -> + {Mod, Mod:jsx_event(Event, State)}; +callback(Event, {F, State}) when is_function(F) -> + {F, F(Event, State)}. + + + + + + diff --git a/src/jsx_utf16l.erl b/src/jsx_utf16l.erl new file mode 100644 index 0000000..8d1b504 --- /dev/null +++ b/src/jsx_utf16l.erl @@ -0,0 +1,436 @@ +-module(jsx_utf16l). + +-export([start/4]). + +-include("jsx_common.hrl"). + + +%% this code is mostly autogenerated and mostly ugly. apologies. for more insight on +%% Callbacks or Opts, see the comments accompanying decoder/2 (in jsx.erl). Stack +%% is a stack of flags used to track depth and to keep track of whether we are +%% returning from a value or a key inside objects. all pops, peeks and pushes are +%% inlined. the code that handles naked values and comments is not optimized by the +%% compiler for efficient matching, but you shouldn't be using naked values or comments +%% anyways, they are horrible and contrary to the spec. + +start(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + string(Rest, Stack, Callbacks, Opts, []); +start(<<$t/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + tr(Rest, Stack, Callbacks, Opts); +start(<<$f/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + fa(Rest, Stack, Callbacks, Opts); +start(<<$n/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + nu(Rest, Stack, Callbacks, Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + negative(Rest, Stack, Callbacks, Opts, "-"); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + zero(Rest, Stack, Callbacks, Opts, "0"); +start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S), Opts#opts.naked_values == true -> + integer(Rest, Stack, Callbacks, Opts, [S]); +start(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); +start(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + start(Rest, Stack, Callbacks, Opts); +start(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> start(Stream, Stack, Callbacks, Opts) end. + + +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +maybe_done(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + key(Rest, [key|Stack], Callbacks, Opts); +maybe_done(<>, [array|_] = Stack, Callbacks, Opts) -> + value(Rest, Stack, Callbacks, Opts); +maybe_done(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); +maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, Callbacks, Opts); +maybe_done(<<>>, [], Callbacks, _Opts) -> + callback(eof, Callbacks); +maybe_done(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> maybe_done(Stream, Stack, Callbacks, Opts) end. + + +object(<>, [key|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +object(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +object(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); +object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + object(Rest, Stack, Callbacks, Opts); +object(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> object(Stream, Stack, Callbacks, Opts) end. + + +array(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +array(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +array(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +array(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +array(<<$t/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +array(<<$f/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +array(<<$n/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +array(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +array(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +array(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +array(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); +array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + array(Rest, Stack, Callbacks, Opts); +array(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> array(Stream, Stack, Callbacks, Opts) end. + + +value(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +value(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +value(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +value(<<$t/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +value(<<$f/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +value(<<$n/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +value(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +value(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +value(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +value(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); +value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + value(Rest, Stack, Callbacks, Opts); +value(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> value(Stream, Stack, Callbacks, Opts) end. + + +colon(<>, [key|Stack], Callbacks, Opts) -> + value(Rest, [object|Stack], Callbacks, Opts); +colon(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); +colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + colon(Rest, Stack, Callbacks, Opts); +colon(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> colon(Stream, Stack, Callbacks, Opts) end. + + +key(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +key(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); +key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + key(Rest, Stack, Callbacks, Opts); +key(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> key(Stream, Stack, Callbacks, Opts) end. + + +%% string has an additional parameter, an accumulator (Acc) used to hold the intermediate +%% representation of the string being parsed. using a list of integers representing +%% unicode codepoints is faster than constructing binaries, many of which will be +%% converted back to lists by the user anyways. + +string(<>, [key|_] = Stack, Callbacks, Opts, Acc) -> + colon(Rest, Stack, callback({key, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback({string, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + escape(Rest, Stack, Callbacks, Opts, Acc); +string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +string(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> string(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% only thing to note here is the additional accumulator passed to escaped_unicode used +%% to hold the codepoint sequence. unescessary, but nicer than using the string +%% accumulator. + +escape(<<"b"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\b" ++ Acc); +escape(<<"f"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\f" ++ Acc); +escape(<<"n"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\n" ++ Acc); +escape(<<"r"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\r" ++ Acc); +escape(<<"t"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\t" ++ Acc); +escape(<<"u"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, Acc, []); +escape(<>, Stack, Callbacks, Opts, Acc) + when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +escape(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> escape(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% this code is ugly and unfortunate, but so is json's handling of escaped unicode +%% codepoint sequences. if the ascii option is present, the sequence is converted +%% to a codepoint and inserted into the string if it represents an ascii value. if +%% the codepoint option is present the sequence is converted and inserted as long +%% as it represents a valid 16 bit integer value (this is where json's spec gets +%% insane). any other option and the sequence is converted back to an erlang string +%% and appended to the string in place. + +escaped_unicode(<>, Stack, Callbacks, Opts, String, [C, B, A]) -> + X = erlang:list_to_integer([A, B, C, D], 16), + case Opts#opts.escaped_unicode of + ascii when X < 16#0080 -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; codepoint -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Callbacks, Opts, [?rsolidus, $u, A, B, C, D] ++ String) + end; +escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); +escaped_unicode(<<>>, Stack, Callbacks, Opts, String, Acc) -> + fun(Stream) -> escaped_unicode(Stream, Stack, Callbacks, Opts, String, Acc) end. + + +%% like strings, numbers are collected in an intermediate accumulator before +%% being emitted to the callback handler. no processing of numbers is done in +%% process, it's left for the user, though there are convenience functions to +%% convert them into erlang floats/integers in jsx_utils.erl. + +%% TODO: actually write that jsx_utils.erl module mentioned above... + +negative(<<"0"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); +negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +negative(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> negative(Stream, Stack, Callbacks, Opts, Acc) end. + + +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +zero(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Callbacks, Opts, Acc) end); +zero(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +zero(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> zero(Stream, Stack, Callbacks, Opts, Acc) end. + + +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) -> + integer(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +integer(<<"e"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<<"E"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Callbacks, Opts, Acc) end); +integer(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +integer(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> integer(Stream, Stack, Callbacks, Opts, Acc) end. + + +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +fraction(<<"e"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<<"E"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + fraction(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> fraction(Resume, Stack, Callbacks, Opts, Acc) end); +fraction(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +fraction(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> fraction(Stream, Stack, Callbacks, Opts, Acc) end. + + +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> + ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> e(Stream, Stack, Callbacks, Opts, Acc) end. + + +ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +ex(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> ex(Stream, Stack, Callbacks, Opts, Acc) end. + + +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) -> + exp(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Callbacks, Opts, Acc) end); +exp(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +exp(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> exp(Stream, Stack, Callbacks, Opts, Acc) end. + + +tr(<<"r"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + tru(Rest, Stack, Callbacks, Opts); +tr(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tr(Stream, Stack, Callbacks, Opts) end. + + +tru(<<"u"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + true(Rest, Stack, Callbacks, Opts); +tru(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tru(Stream, Stack, Callbacks, Opts) end. + + +true(<<"e"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, true}, Callbacks), Opts); +true(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> true(Stream, Stack, Callbacks, Opts) end. + + +fa(<<"a"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + fal(Rest, Stack, Callbacks, Opts); +fa(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fa(Stream, Stack, Callbacks, Opts) end. + + +fal(<<"l"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + fals(Rest, Stack, Callbacks, Opts); +fal(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fal(Stream, Stack, Callbacks, Opts) end. + + +fals(<<"s"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + false(Rest, Stack, Callbacks, Opts); +fals(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fals(Stream, Stack, Callbacks, Opts) end. + + +false(<<"e"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, false}, Callbacks), Opts); +false(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> false(Stream, Stack, Callbacks, Opts) end. + + +nu(<<"u"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + nul(Rest, Stack, Callbacks, Opts); +nu(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nu(Stream, Stack, Callbacks, Opts) end. + + +nul(<<"l"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + null(Rest, Stack, Callbacks, Opts); +nul(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nul(Stream, Stack, Callbacks, Opts) end. + + +null(<<"l"/utf16-little, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, null}, Callbacks), Opts); +null(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> null(Stream, Stack, Callbacks, Opts) end. + + +%% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode +%% character is valid in a comment, except, obviously the */ sequence which ends +%% the comment. they're implemented as a closure called when the comment ends that +%% returns execution to the point where the comment began. comments are not +%% recorded in any way, simply parsed. + +maybe_comment(<>, Resume) -> + comment(Rest, Resume); +maybe_comment(<<>>, Resume) -> + fun(Stream) -> maybe_comment(Stream, Resume) end. + + +comment(<>, Resume) -> + maybe_comment_done(Rest, Resume); +comment(<<_/utf16-little, Rest/binary>>, Resume) -> + comment(Rest, Resume); +comment(<<>>, Resume) -> + fun(Stream) -> comment(Stream, Resume) end. + + +maybe_comment_done(<>, Resume) -> + Resume(Rest); +maybe_comment_done(<<>>, Resume) -> + fun(Stream) -> maybe_comment_done(Stream, Resume) end. + + +%% helper function for dispatching of parser events + +callback(eof, {none, Callbacks}) -> + lists:reverse(Callbacks); +callback(Event, {none, Callbacks}) -> + {none, [Event] ++ Callbacks}; +callback(Event, {Mod, State}) when is_atom(Mod) -> + {Mod, Mod:jsx_event(Event, State)}; +callback(Event, {F, State}) when is_function(F) -> + {F, F(Event, State)}. + + + + + + diff --git a/src/jsx_utf32b.erl b/src/jsx_utf32b.erl new file mode 100644 index 0000000..3c33407 --- /dev/null +++ b/src/jsx_utf32b.erl @@ -0,0 +1,436 @@ +-module(jsx_utf32b). + +-export([start/4]). + +-include("jsx_common.hrl"). + + +%% this code is mostly autogenerated and mostly ugly. apologies. for more insight on +%% Callbacks or Opts, see the comments accompanying decoder/2 (in jsx.erl). Stack +%% is a stack of flags used to track depth and to keep track of whether we are +%% returning from a value or a key inside objects. all pops, peeks and pushes are +%% inlined. the code that handles naked values and comments is not optimized by the +%% compiler for efficient matching, but you shouldn't be using naked values or comments +%% anyways, they are horrible and contrary to the spec. + +start(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + string(Rest, Stack, Callbacks, Opts, []); +start(<<$t/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + tr(Rest, Stack, Callbacks, Opts); +start(<<$f/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + fa(Rest, Stack, Callbacks, Opts); +start(<<$n/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + nu(Rest, Stack, Callbacks, Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + negative(Rest, Stack, Callbacks, Opts, "-"); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + zero(Rest, Stack, Callbacks, Opts, "0"); +start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S), Opts#opts.naked_values == true -> + integer(Rest, Stack, Callbacks, Opts, [S]); +start(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); +start(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + start(Rest, Stack, Callbacks, Opts); +start(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> start(Stream, Stack, Callbacks, Opts) end. + + +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +maybe_done(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + key(Rest, [key|Stack], Callbacks, Opts); +maybe_done(<>, [array|_] = Stack, Callbacks, Opts) -> + value(Rest, Stack, Callbacks, Opts); +maybe_done(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); +maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, Callbacks, Opts); +maybe_done(<<>>, [], Callbacks, _Opts) -> + callback(eof, Callbacks); +maybe_done(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> maybe_done(Stream, Stack, Callbacks, Opts) end. + + +object(<>, [key|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +object(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +object(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); +object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + object(Rest, Stack, Callbacks, Opts); +object(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> object(Stream, Stack, Callbacks, Opts) end. + + +array(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +array(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +array(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +array(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +array(<<$t/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +array(<<$f/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +array(<<$n/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +array(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +array(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +array(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +array(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); +array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + array(Rest, Stack, Callbacks, Opts); +array(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> array(Stream, Stack, Callbacks, Opts) end. + + +value(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +value(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +value(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +value(<<$t/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +value(<<$f/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +value(<<$n/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +value(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +value(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +value(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +value(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); +value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + value(Rest, Stack, Callbacks, Opts); +value(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> value(Stream, Stack, Callbacks, Opts) end. + + +colon(<>, [key|Stack], Callbacks, Opts) -> + value(Rest, [object|Stack], Callbacks, Opts); +colon(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); +colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + colon(Rest, Stack, Callbacks, Opts); +colon(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> colon(Stream, Stack, Callbacks, Opts) end. + + +key(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +key(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); +key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + key(Rest, Stack, Callbacks, Opts); +key(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> key(Stream, Stack, Callbacks, Opts) end. + + +%% string has an additional parameter, an accumulator (Acc) used to hold the intermediate +%% representation of the string being parsed. using a list of integers representing +%% unicode codepoints is faster than constructing binaries, many of which will be +%% converted back to lists by the user anyways. + +string(<>, [key|_] = Stack, Callbacks, Opts, Acc) -> + colon(Rest, Stack, callback({key, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback({string, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + escape(Rest, Stack, Callbacks, Opts, Acc); +string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +string(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> string(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% only thing to note here is the additional accumulator passed to escaped_unicode used +%% to hold the codepoint sequence. unescessary, but nicer than using the string +%% accumulator. + +escape(<<"b"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\b" ++ Acc); +escape(<<"f"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\f" ++ Acc); +escape(<<"n"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\n" ++ Acc); +escape(<<"r"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\r" ++ Acc); +escape(<<"t"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\t" ++ Acc); +escape(<<"u"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, Acc, []); +escape(<>, Stack, Callbacks, Opts, Acc) + when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +escape(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> escape(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% this code is ugly and unfortunate, but so is json's handling of escaped unicode +%% codepoint sequences. if the ascii option is present, the sequence is converted +%% to a codepoint and inserted into the string if it represents an ascii value. if +%% the codepoint option is present the sequence is converted and inserted as long +%% as it represents a valid 16 bit integer value (this is where json's spec gets +%% insane). any other option and the sequence is converted back to an erlang string +%% and appended to the string in place. + +escaped_unicode(<>, Stack, Callbacks, Opts, String, [C, B, A]) -> + X = erlang:list_to_integer([A, B, C, D], 16), + case Opts#opts.escaped_unicode of + ascii when X < 16#0080 -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; codepoint -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Callbacks, Opts, [?rsolidus, $u, A, B, C, D] ++ String) + end; +escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); +escaped_unicode(<<>>, Stack, Callbacks, Opts, String, Acc) -> + fun(Stream) -> escaped_unicode(Stream, Stack, Callbacks, Opts, String, Acc) end. + + +%% like strings, numbers are collected in an intermediate accumulator before +%% being emitted to the callback handler. no processing of numbers is done in +%% process, it's left for the user, though there are convenience functions to +%% convert them into erlang floats/integers in jsx_utils.erl. + +%% TODO: actually write that jsx_utils.erl module mentioned above... + +negative(<<"0"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); +negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +negative(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> negative(Stream, Stack, Callbacks, Opts, Acc) end. + + +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +zero(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Callbacks, Opts, Acc) end); +zero(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +zero(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> zero(Stream, Stack, Callbacks, Opts, Acc) end. + + +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) -> + integer(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +integer(<<"e"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<<"E"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Callbacks, Opts, Acc) end); +integer(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +integer(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> integer(Stream, Stack, Callbacks, Opts, Acc) end. + + +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +fraction(<<"e"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<<"E"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + fraction(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> fraction(Resume, Stack, Callbacks, Opts, Acc) end); +fraction(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +fraction(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> fraction(Stream, Stack, Callbacks, Opts, Acc) end. + + +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> + ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> e(Stream, Stack, Callbacks, Opts, Acc) end. + + +ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +ex(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> ex(Stream, Stack, Callbacks, Opts, Acc) end. + + +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) -> + exp(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Callbacks, Opts, Acc) end); +exp(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +exp(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> exp(Stream, Stack, Callbacks, Opts, Acc) end. + + +tr(<<"r"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + tru(Rest, Stack, Callbacks, Opts); +tr(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tr(Stream, Stack, Callbacks, Opts) end. + + +tru(<<"u"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + true(Rest, Stack, Callbacks, Opts); +tru(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tru(Stream, Stack, Callbacks, Opts) end. + + +true(<<"e"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, true}, Callbacks), Opts); +true(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> true(Stream, Stack, Callbacks, Opts) end. + + +fa(<<"a"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + fal(Rest, Stack, Callbacks, Opts); +fa(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fa(Stream, Stack, Callbacks, Opts) end. + + +fal(<<"l"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + fals(Rest, Stack, Callbacks, Opts); +fal(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fal(Stream, Stack, Callbacks, Opts) end. + + +fals(<<"s"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + false(Rest, Stack, Callbacks, Opts); +fals(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fals(Stream, Stack, Callbacks, Opts) end. + + +false(<<"e"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, false}, Callbacks), Opts); +false(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> false(Stream, Stack, Callbacks, Opts) end. + + +nu(<<"u"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + nul(Rest, Stack, Callbacks, Opts); +nu(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nu(Stream, Stack, Callbacks, Opts) end. + + +nul(<<"l"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + null(Rest, Stack, Callbacks, Opts); +nul(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nul(Stream, Stack, Callbacks, Opts) end. + + +null(<<"l"/utf32-big, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, null}, Callbacks), Opts); +null(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> null(Stream, Stack, Callbacks, Opts) end. + + +%% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode +%% character is valid in a comment, except, obviously the */ sequence which ends +%% the comment. they're implemented as a closure called when the comment ends that +%% returns execution to the point where the comment began. comments are not +%% recorded in any way, simply parsed. + +maybe_comment(<>, Resume) -> + comment(Rest, Resume); +maybe_comment(<<>>, Resume) -> + fun(Stream) -> maybe_comment(Stream, Resume) end. + + +comment(<>, Resume) -> + maybe_comment_done(Rest, Resume); +comment(<<_/utf32-big, Rest/binary>>, Resume) -> + comment(Rest, Resume); +comment(<<>>, Resume) -> + fun(Stream) -> comment(Stream, Resume) end. + + +maybe_comment_done(<>, Resume) -> + Resume(Rest); +maybe_comment_done(<<>>, Resume) -> + fun(Stream) -> maybe_comment_done(Stream, Resume) end. + + +%% helper function for dispatching of parser events + +callback(eof, {none, Callbacks}) -> + lists:reverse(Callbacks); +callback(Event, {none, Callbacks}) -> + {none, [Event] ++ Callbacks}; +callback(Event, {Mod, State}) when is_atom(Mod) -> + {Mod, Mod:jsx_event(Event, State)}; +callback(Event, {F, State}) when is_function(F) -> + {F, F(Event, State)}. + + + + + + diff --git a/src/jsx_utf32l.erl b/src/jsx_utf32l.erl new file mode 100644 index 0000000..1a59219 --- /dev/null +++ b/src/jsx_utf32l.erl @@ -0,0 +1,436 @@ +-module(jsx_utf32l). + +-export([start/4]). + +-include("jsx_common.hrl"). + + +%% this code is mostly autogenerated and mostly ugly. apologies. for more insight on +%% Callbacks or Opts, see the comments accompanying decoder/2 (in jsx.erl). Stack +%% is a stack of flags used to track depth and to keep track of whether we are +%% returning from a value or a key inside objects. all pops, peeks and pushes are +%% inlined. the code that handles naked values and comments is not optimized by the +%% compiler for efficient matching, but you shouldn't be using naked values or comments +%% anyways, they are horrible and contrary to the spec. + +start(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + string(Rest, Stack, Callbacks, Opts, []); +start(<<$t/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + tr(Rest, Stack, Callbacks, Opts); +start(<<$f/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + fa(Rest, Stack, Callbacks, Opts); +start(<<$n/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + nu(Rest, Stack, Callbacks, Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + negative(Rest, Stack, Callbacks, Opts, "-"); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + zero(Rest, Stack, Callbacks, Opts, "0"); +start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S), Opts#opts.naked_values == true -> + integer(Rest, Stack, Callbacks, Opts, [S]); +start(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); +start(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + start(Rest, Stack, Callbacks, Opts); +start(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> start(Stream, Stack, Callbacks, Opts) end. + + +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +maybe_done(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + key(Rest, [key|Stack], Callbacks, Opts); +maybe_done(<>, [array|_] = Stack, Callbacks, Opts) -> + value(Rest, Stack, Callbacks, Opts); +maybe_done(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); +maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, Callbacks, Opts); +maybe_done(<<>>, [], Callbacks, _Opts) -> + callback(eof, Callbacks); +maybe_done(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> maybe_done(Stream, Stack, Callbacks, Opts) end. + + +object(<>, [key|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +object(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +object(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); +object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + object(Rest, Stack, Callbacks, Opts); +object(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> object(Stream, Stack, Callbacks, Opts) end. + + +array(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +array(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +array(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +array(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +array(<<$t/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +array(<<$f/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +array(<<$n/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +array(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +array(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +array(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +array(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); +array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + array(Rest, Stack, Callbacks, Opts); +array(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> array(Stream, Stack, Callbacks, Opts) end. + + +value(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +value(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +value(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +value(<<$t/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +value(<<$f/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +value(<<$n/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +value(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +value(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +value(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +value(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); +value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + value(Rest, Stack, Callbacks, Opts); +value(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> value(Stream, Stack, Callbacks, Opts) end. + + +colon(<>, [key|Stack], Callbacks, Opts) -> + value(Rest, [object|Stack], Callbacks, Opts); +colon(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); +colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + colon(Rest, Stack, Callbacks, Opts); +colon(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> colon(Stream, Stack, Callbacks, Opts) end. + + +key(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +key(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); +key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + key(Rest, Stack, Callbacks, Opts); +key(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> key(Stream, Stack, Callbacks, Opts) end. + + +%% string has an additional parameter, an accumulator (Acc) used to hold the intermediate +%% representation of the string being parsed. using a list of integers representing +%% unicode codepoints is faster than constructing binaries, many of which will be +%% converted back to lists by the user anyways. + +string(<>, [key|_] = Stack, Callbacks, Opts, Acc) -> + colon(Rest, Stack, callback({key, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback({string, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + escape(Rest, Stack, Callbacks, Opts, Acc); +string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +string(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> string(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% only thing to note here is the additional accumulator passed to escaped_unicode used +%% to hold the codepoint sequence. unescessary, but nicer than using the string +%% accumulator. + +escape(<<"b"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\b" ++ Acc); +escape(<<"f"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\f" ++ Acc); +escape(<<"n"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\n" ++ Acc); +escape(<<"r"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\r" ++ Acc); +escape(<<"t"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\t" ++ Acc); +escape(<<"u"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, Acc, []); +escape(<>, Stack, Callbacks, Opts, Acc) + when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +escape(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> escape(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% this code is ugly and unfortunate, but so is json's handling of escaped unicode +%% codepoint sequences. if the ascii option is present, the sequence is converted +%% to a codepoint and inserted into the string if it represents an ascii value. if +%% the codepoint option is present the sequence is converted and inserted as long +%% as it represents a valid 16 bit integer value (this is where json's spec gets +%% insane). any other option and the sequence is converted back to an erlang string +%% and appended to the string in place. + +escaped_unicode(<>, Stack, Callbacks, Opts, String, [C, B, A]) -> + X = erlang:list_to_integer([A, B, C, D], 16), + case Opts#opts.escaped_unicode of + ascii when X < 16#0080 -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; codepoint -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Callbacks, Opts, [?rsolidus, $u, A, B, C, D] ++ String) + end; +escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); +escaped_unicode(<<>>, Stack, Callbacks, Opts, String, Acc) -> + fun(Stream) -> escaped_unicode(Stream, Stack, Callbacks, Opts, String, Acc) end. + + +%% like strings, numbers are collected in an intermediate accumulator before +%% being emitted to the callback handler. no processing of numbers is done in +%% process, it's left for the user, though there are convenience functions to +%% convert them into erlang floats/integers in jsx_utils.erl. + +%% TODO: actually write that jsx_utils.erl module mentioned above... + +negative(<<"0"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); +negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +negative(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> negative(Stream, Stack, Callbacks, Opts, Acc) end. + + +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +zero(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Callbacks, Opts, Acc) end); +zero(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +zero(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> zero(Stream, Stack, Callbacks, Opts, Acc) end. + + +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) -> + integer(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +integer(<<"e"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<<"E"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Callbacks, Opts, Acc) end); +integer(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +integer(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> integer(Stream, Stack, Callbacks, Opts, Acc) end. + + +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +fraction(<<"e"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<<"E"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + fraction(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> fraction(Resume, Stack, Callbacks, Opts, Acc) end); +fraction(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +fraction(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> fraction(Stream, Stack, Callbacks, Opts, Acc) end. + + +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> + ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> e(Stream, Stack, Callbacks, Opts, Acc) end. + + +ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +ex(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> ex(Stream, Stack, Callbacks, Opts, Acc) end. + + +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) -> + exp(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Callbacks, Opts, Acc) end); +exp(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +exp(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> exp(Stream, Stack, Callbacks, Opts, Acc) end. + + +tr(<<"r"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + tru(Rest, Stack, Callbacks, Opts); +tr(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tr(Stream, Stack, Callbacks, Opts) end. + + +tru(<<"u"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + true(Rest, Stack, Callbacks, Opts); +tru(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tru(Stream, Stack, Callbacks, Opts) end. + + +true(<<"e"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, true}, Callbacks), Opts); +true(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> true(Stream, Stack, Callbacks, Opts) end. + + +fa(<<"a"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + fal(Rest, Stack, Callbacks, Opts); +fa(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fa(Stream, Stack, Callbacks, Opts) end. + + +fal(<<"l"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + fals(Rest, Stack, Callbacks, Opts); +fal(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fal(Stream, Stack, Callbacks, Opts) end. + + +fals(<<"s"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + false(Rest, Stack, Callbacks, Opts); +fals(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fals(Stream, Stack, Callbacks, Opts) end. + + +false(<<"e"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, false}, Callbacks), Opts); +false(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> false(Stream, Stack, Callbacks, Opts) end. + + +nu(<<"u"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + nul(Rest, Stack, Callbacks, Opts); +nu(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nu(Stream, Stack, Callbacks, Opts) end. + + +nul(<<"l"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + null(Rest, Stack, Callbacks, Opts); +nul(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nul(Stream, Stack, Callbacks, Opts) end. + + +null(<<"l"/utf32-little, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, null}, Callbacks), Opts); +null(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> null(Stream, Stack, Callbacks, Opts) end. + + +%% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode +%% character is valid in a comment, except, obviously the */ sequence which ends +%% the comment. they're implemented as a closure called when the comment ends that +%% returns execution to the point where the comment began. comments are not +%% recorded in any way, simply parsed. + +maybe_comment(<>, Resume) -> + comment(Rest, Resume); +maybe_comment(<<>>, Resume) -> + fun(Stream) -> maybe_comment(Stream, Resume) end. + + +comment(<>, Resume) -> + maybe_comment_done(Rest, Resume); +comment(<<_/utf32-little, Rest/binary>>, Resume) -> + comment(Rest, Resume); +comment(<<>>, Resume) -> + fun(Stream) -> comment(Stream, Resume) end. + + +maybe_comment_done(<>, Resume) -> + Resume(Rest); +maybe_comment_done(<<>>, Resume) -> + fun(Stream) -> maybe_comment_done(Stream, Resume) end. + + +%% helper function for dispatching of parser events + +callback(eof, {none, Callbacks}) -> + lists:reverse(Callbacks); +callback(Event, {none, Callbacks}) -> + {none, [Event] ++ Callbacks}; +callback(Event, {Mod, State}) when is_atom(Mod) -> + {Mod, Mod:jsx_event(Event, State)}; +callback(Event, {F, State}) when is_function(F) -> + {F, F(Event, State)}. + + + + + +