From 3b3e33c6e33833f74c5b0c8d885990391c6a2a4d Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Mon, 24 May 2010 14:59:20 -0700 Subject: [PATCH] fixes revealed by cover --- README | 0 src/jsx_decoder.erl | 436 +++++++++++++++++++++++++++ test/cases/array.json | 2 +- test/cases/array.test | 2 +- test/cases/comments.json | 2 +- test/cases/comments.test | 2 +- test/cases/naked_number_a.json | 1 + test/cases/naked_number_a.test | 2 + test/cases/naked_number_b.json | 1 + test/cases/naked_number_b.test | 2 + test/cases/naked_number_c.json | 1 + test/cases/naked_number_c.test | 2 + test/cases/naked_number_d.json | 1 + test/cases/naked_number_d.test | 2 + test/cases/object.json | 1 + test/cases/object.test | 1 + test/cases/unicode_to_codepoint.json | 1 + test/cases/unicode_to_codepoint.test | 2 + test/cases/unicode_unconverted.json | 1 + test/cases/unicode_unconverted.test | 1 + test/cases/whitespace.json | 4 + test/cases/whitespace.test | 1 + 22 files changed, 464 insertions(+), 4 deletions(-) create mode 100644 README create mode 100644 src/jsx_decoder.erl create mode 100644 test/cases/naked_number_a.json create mode 100644 test/cases/naked_number_a.test create mode 100644 test/cases/naked_number_b.json create mode 100644 test/cases/naked_number_b.test create mode 100644 test/cases/naked_number_c.json create mode 100644 test/cases/naked_number_c.test create mode 100644 test/cases/naked_number_d.json create mode 100644 test/cases/naked_number_d.test create mode 100644 test/cases/object.json create mode 100644 test/cases/object.test create mode 100644 test/cases/unicode_to_codepoint.json create mode 100644 test/cases/unicode_to_codepoint.test create mode 100644 test/cases/unicode_unconverted.json create mode 100644 test/cases/unicode_unconverted.test create mode 100644 test/cases/whitespace.json create mode 100644 test/cases/whitespace.test diff --git a/README b/README new file mode 100644 index 0000000..e69de29 diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl new file mode 100644 index 0000000..1a8951a --- /dev/null +++ b/src/jsx_decoder.erl @@ -0,0 +1,436 @@ +-module(jsx_decoder). + +-export([start/4]). + +-include("jsx_common.hrl"). + + +%% this code is mostly autogenerated and mostly ugly. apologies. for more insight on +%% Callbacks or Opts, see the comments accompanying decoder/2 (in jsx.erl). Stack +%% is a stack of flags used to track depth and to keep track of whether we are +%% returning from a value or a key inside objects. all pops, peeks and pushes are +%% inlined. the code that handles naked values and comments is not optimized by the +%% compiler for efficient matching, but you shouldn't be using naked values or comments +%% anyways, they are horrible and contrary to the spec. + +start(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + string(Rest, Stack, Callbacks, Opts, []); +start(<<$t/utf8, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + tr(Rest, Stack, Callbacks, Opts); +start(<<$f/utf8, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + fa(Rest, Stack, Callbacks, Opts); +start(<<$n/utf8, Rest/binary>>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + nu(Rest, Stack, Callbacks, Opts); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + negative(Rest, Stack, Callbacks, Opts, "-"); +start(<>, Stack, Callbacks, Opts) when Opts#opts.naked_values == true -> + zero(Rest, Stack, Callbacks, Opts, "0"); +start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S), Opts#opts.naked_values == true -> + integer(Rest, Stack, Callbacks, Opts, [S]); +start(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); +start(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + start(Rest, Stack, Callbacks, Opts); +start(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> start(Stream, Stack, Callbacks, Opts) end. + + +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +maybe_done(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + key(Rest, [key|Stack], Callbacks, Opts); +maybe_done(<>, [array|_] = Stack, Callbacks, Opts) -> + value(Rest, Stack, Callbacks, Opts); +maybe_done(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); +maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, Callbacks, Opts); +maybe_done(<<>>, [], Callbacks, _Opts) -> + callback(eof, Callbacks); +maybe_done(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> maybe_done(Stream, Stack, Callbacks, Opts) end. + + +object(<>, [key|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_object, Callbacks), Opts); +object(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +object(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); +object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + object(Rest, Stack, Callbacks, Opts); +object(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> object(Stream, Stack, Callbacks, Opts) end. + + +array(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +array(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +array(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +array(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, callback(end_array, Callbacks), Opts); +array(<<$t/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +array(<<$f/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +array(<<$n/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +array(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +array(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +array(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +array(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); +array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + array(Rest, Stack, Callbacks, Opts); +array(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> array(Stream, Stack, Callbacks, Opts) end. + + +value(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +value(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], callback(start_object, Callbacks), Opts); +value(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], callback(start_array, Callbacks), Opts); +value(<<$t/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +value(<<$f/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +value(<<$n/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +value(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +value(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +value(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +value(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); +value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + value(Rest, Stack, Callbacks, Opts); +value(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> value(Stream, Stack, Callbacks, Opts) end. + + +colon(<>, [key|Stack], Callbacks, Opts) -> + value(Rest, [object|Stack], Callbacks, Opts); +colon(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); +colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + colon(Rest, Stack, Callbacks, Opts); +colon(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> colon(Stream, Stack, Callbacks, Opts) end. + + +key(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +key(<>, Stack, Callbacks, Opts) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); +key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + key(Rest, Stack, Callbacks, Opts); +key(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> key(Stream, Stack, Callbacks, Opts) end. + + +%% string has an additional parameter, an accumulator (Acc) used to hold the intermediate +%% representation of the string being parsed. using a list of integers representing +%% unicode codepoints is faster than constructing binaries, many of which will be +%% converted back to lists by the user anyways. + +string(<>, [key|_] = Stack, Callbacks, Opts, Acc) -> + colon(Rest, Stack, callback({key, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback({string, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + escape(Rest, Stack, Callbacks, Opts, Acc); +string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +string(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> string(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% only thing to note here is the additional accumulator passed to escaped_unicode used +%% to hold the codepoint sequence. unescessary, but nicer than using the string +%% accumulator. + +escape(<<"b"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\b" ++ Acc); +escape(<<"f"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\f" ++ Acc); +escape(<<"n"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\n" ++ Acc); +escape(<<"r"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\r" ++ Acc); +escape(<<"t"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\t" ++ Acc); +escape(<<"u"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, Acc, []); +escape(<>, Stack, Callbacks, Opts, Acc) + when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +escape(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> escape(Stream, Stack, Callbacks, Opts, Acc) end. + + +%% this code is ugly and unfortunate, but so is json's handling of escaped unicode +%% codepoint sequences. if the ascii option is present, the sequence is converted +%% to a codepoint and inserted into the string if it represents an ascii value. if +%% the codepoint option is present the sequence is converted and inserted as long +%% as it represents a valid 16 bit integer value (this is where json's spec gets +%% insane). any other option and the sequence is converted back to an erlang string +%% and appended to the string in place. + +escaped_unicode(<>, Stack, Callbacks, Opts, String, [C, B, A]) -> + X = erlang:list_to_integer([A, B, C, D], 16), + case Opts#opts.escaped_unicode of + ascii when X < 16#0080 -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; codepoint -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) + end; +escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); +escaped_unicode(<<>>, Stack, Callbacks, Opts, String, Acc) -> + fun(Stream) -> escaped_unicode(Stream, Stack, Callbacks, Opts, String, Acc) end. + + +%% like strings, numbers are collected in an intermediate accumulator before +%% being emitted to the callback handler. no processing of numbers is done in +%% process, it's left for the user, though there are convenience functions to +%% convert them into erlang floats/integers in jsx_utils.erl. + +%% TODO: actually write that jsx_utils.erl module mentioned above... + +negative(<<"0"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); +negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +negative(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> negative(Stream, Stack, Callbacks, Opts, Acc) end. + + +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +zero(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Callbacks, Opts, Acc) end); +zero(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +zero(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> zero(Stream, Stack, Callbacks, Opts, Acc) end. + + +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) -> + integer(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +integer(<<"e"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<<"E"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Callbacks, Opts, Acc) end); +integer(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +integer(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> integer(Stream, Stack, Callbacks, Opts, Acc) end. + + +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +fraction(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) -> + fraction(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +fraction(<<"e"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<<"E"/utf8, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + fraction(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +fraction(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +fraction(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> fraction(Resume, Stack, Callbacks, Opts, Acc) end); +fraction(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +fraction(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> fraction(Stream, Stack, Callbacks, Opts, Acc) end. + + +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> + ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> e(Stream, Stack, Callbacks, Opts, Acc) end. + + +ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +ex(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> ex(Stream, Stack, Callbacks, Opts, Acc) end. + + +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_object, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, callback(end_array, callback({number, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) -> + exp(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, callback({number, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) when Opts#opts.comments == true -> + maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Callbacks, Opts, Acc) end); +exp(<<>>, [], Callbacks, _Opts, Acc) -> + callback(eof, callback({number, lists:reverse(Acc)}, Callbacks)); +exp(<<>>, Stack, Callbacks, Opts, Acc) -> + fun(Stream) -> exp(Stream, Stack, Callbacks, Opts, Acc) end. + + +tr(<<"r"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + tru(Rest, Stack, Callbacks, Opts); +tr(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tr(Stream, Stack, Callbacks, Opts) end. + + +tru(<<"u"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + true(Rest, Stack, Callbacks, Opts); +tru(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> tru(Stream, Stack, Callbacks, Opts) end. + + +true(<<"e"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, true}, Callbacks), Opts); +true(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> true(Stream, Stack, Callbacks, Opts) end. + + +fa(<<"a"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + fal(Rest, Stack, Callbacks, Opts); +fa(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fa(Stream, Stack, Callbacks, Opts) end. + + +fal(<<"l"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + fals(Rest, Stack, Callbacks, Opts); +fal(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fal(Stream, Stack, Callbacks, Opts) end. + + +fals(<<"s"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + false(Rest, Stack, Callbacks, Opts); +fals(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> fals(Stream, Stack, Callbacks, Opts) end. + + +false(<<"e"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, false}, Callbacks), Opts); +false(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> false(Stream, Stack, Callbacks, Opts) end. + + +nu(<<"u"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + nul(Rest, Stack, Callbacks, Opts); +nu(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nu(Stream, Stack, Callbacks, Opts) end. + + +nul(<<"l"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + null(Rest, Stack, Callbacks, Opts); +nul(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> nul(Stream, Stack, Callbacks, Opts) end. + + +null(<<"l"/utf8, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, callback({literal, null}, Callbacks), Opts); +null(<<>>, Stack, Callbacks, Opts) -> + fun(Stream) -> null(Stream, Stack, Callbacks, Opts) end. + + +%% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode +%% character is valid in a comment, except, obviously the */ sequence which ends +%% the comment. they're implemented as a closure called when the comment ends that +%% returns execution to the point where the comment began. comments are not +%% recorded in any way, simply parsed. + +maybe_comment(<>, Resume) -> + comment(Rest, Resume); +maybe_comment(<<>>, Resume) -> + fun(Stream) -> maybe_comment(Stream, Resume) end. + + +comment(<>, Resume) -> + maybe_comment_done(Rest, Resume); +comment(<<_/utf8, Rest/binary>>, Resume) -> + comment(Rest, Resume); +comment(<<>>, Resume) -> + fun(Stream) -> comment(Stream, Resume) end. + + +maybe_comment_done(<>, Resume) -> + Resume(Rest); +maybe_comment_done(<<>>, Resume) -> + fun(Stream) -> maybe_comment_done(Stream, Resume) end. + + +%% helper function for dispatching of parser events + +callback(eof, {none, Callbacks}) -> + lists:reverse(Callbacks); +callback(Event, {none, Callbacks}) -> + {none, [Event] ++ Callbacks}; +callback(Event, {Mod, State}) when is_atom(Mod) -> + {Mod, Mod:jsx_event(Event, State)}; +callback(Event, {F, State}) when is_function(F) -> + {F, F(Event, State)}. + + + + + + diff --git a/test/cases/array.json b/test/cases/array.json index 2f4b3a3..ee1041c 100644 --- a/test/cases/array.json +++ b/test/cases/array.json @@ -1 +1 @@ -["foo","bar", "baz",true,false,null,{"key":"value"},[null,null,null,[]],"\n\r\\"] +["foo","bar", "baz",[true],[false],[null],true, false, null, 0.7, {"key":"value"},[{}, null,null,null,[]],"\n\r\\", [-1]] diff --git a/test/cases/array.test b/test/cases/array.test index 7f7111e..145a795 100644 --- a/test/cases/array.test +++ b/test/cases/array.test @@ -1 +1 @@ -[start_array, {string, "foo"}, {string, "bar"}, {string, "baz"}, {literal, true}, {literal, false}, {literal, null}, start_object, {key, "key"}, {string, "value"}, end_object, start_array, {literal, null}, {literal, null}, {literal, null}, start_array, end_array, end_array, {string, "\n\r\\"}, end_array]. \ No newline at end of file +[start_array, {string, "foo"}, {string, "bar"}, {string, "baz"}, start_array, {literal, true}, end_array, start_array, {literal, false}, end_array, start_array, {literal, null}, end_array, {literal, true}, {literal, false}, {literal, null}, {number, "0.7"}, start_object, {key, "key"}, {string, "value"}, end_object, start_array, start_object, end_object, {literal, null}, {literal, null}, {literal, null}, start_array, end_array, end_array, {string, "\n\r\\"}, start_array, {number, "-1"}, end_array, end_array]. \ No newline at end of file diff --git a/test/cases/comments.json b/test/cases/comments.json index d9d54ef..f82d177 100644 --- a/test/cases/comments.json +++ b/test/cases/comments.json @@ -1 +1 @@ -[/*array open comment*/ "a string"/*string comment*/, 1/*number comment*/, {/*object open comment*/"key"/*post key comment*/:/*colon comment*/[]/*object close comment*/}, true/*literal comment*/] \ No newline at end of file +/*preceding comment*/ [/*array open comment*/ "a string"/*string comment*/, 1/*number comment*/, {/*object open comment*/"key"/*post key comment*/:/*colon comment*/[], /*pre key comment*/"another key": 0/*object close comment*/}, true/*literal comment*/] /*concluding comment*/ \ No newline at end of file diff --git a/test/cases/comments.test b/test/cases/comments.test index db90001..1870be2 100644 --- a/test/cases/comments.test +++ b/test/cases/comments.test @@ -1,2 +1,2 @@ -[start_array, {string, "a string"}, {number, "1"}, start_object, {key, "key"}, start_array, end_array, end_object, {literal, true}, end_array]. +[start_array, {string, "a string"}, {number, "1"}, start_object, {key, "key"}, start_array, end_array, {key, "another key"}, {number, "0"}, end_object, {literal, true}, end_array]. [{comments, true}]. \ No newline at end of file diff --git a/test/cases/naked_number_a.json b/test/cases/naked_number_a.json new file mode 100644 index 0000000..f70d7bb --- /dev/null +++ b/test/cases/naked_number_a.json @@ -0,0 +1 @@ +42 \ No newline at end of file diff --git a/test/cases/naked_number_a.test b/test/cases/naked_number_a.test new file mode 100644 index 0000000..e165868 --- /dev/null +++ b/test/cases/naked_number_a.test @@ -0,0 +1,2 @@ +[{number, "42"}]. +[{naked_values, true}]. \ No newline at end of file diff --git a/test/cases/naked_number_b.json b/test/cases/naked_number_b.json new file mode 100644 index 0000000..67f7ad0 --- /dev/null +++ b/test/cases/naked_number_b.json @@ -0,0 +1 @@ +-42 \ No newline at end of file diff --git a/test/cases/naked_number_b.test b/test/cases/naked_number_b.test new file mode 100644 index 0000000..4f4d80f --- /dev/null +++ b/test/cases/naked_number_b.test @@ -0,0 +1,2 @@ +[{number, "-42"}]. +[{naked_values, true}]. \ No newline at end of file diff --git a/test/cases/naked_number_c.json b/test/cases/naked_number_c.json new file mode 100644 index 0000000..017da84 --- /dev/null +++ b/test/cases/naked_number_c.json @@ -0,0 +1 @@ +-0.7 \ No newline at end of file diff --git a/test/cases/naked_number_c.test b/test/cases/naked_number_c.test new file mode 100644 index 0000000..1257b2b --- /dev/null +++ b/test/cases/naked_number_c.test @@ -0,0 +1,2 @@ +[{number, "-0.7"}]. +[{naked_values, true}]. \ No newline at end of file diff --git a/test/cases/naked_number_d.json b/test/cases/naked_number_d.json new file mode 100644 index 0000000..0e2c939 --- /dev/null +++ b/test/cases/naked_number_d.json @@ -0,0 +1 @@ +0.7 \ No newline at end of file diff --git a/test/cases/naked_number_d.test b/test/cases/naked_number_d.test new file mode 100644 index 0000000..69e7453 --- /dev/null +++ b/test/cases/naked_number_d.test @@ -0,0 +1,2 @@ +[{number, "0.7"}]. +[{naked_values, true}]. \ No newline at end of file diff --git a/test/cases/object.json b/test/cases/object.json new file mode 100644 index 0000000..f59c630 --- /dev/null +++ b/test/cases/object.json @@ -0,0 +1 @@ +{"foo":"bar", "baz":true, "false":null,"object":{ "key" : "value" },"list":[null,null,null,[],"\n\r\\"]} \ No newline at end of file diff --git a/test/cases/object.test b/test/cases/object.test new file mode 100644 index 0000000..7e65efb --- /dev/null +++ b/test/cases/object.test @@ -0,0 +1 @@ +[start_object, {key, "foo"}, {string, "bar"}, {key, "baz"}, {literal, true}, {key, "false"}, {literal, null}, {key, "object"}, start_object, {key, "key"}, {string, "value"}, end_object, {key, "list"}, start_array, {literal, null}, {literal, null}, {literal, null}, start_array, end_array, {string, "\n\r\\"}, end_array, end_object]. \ No newline at end of file diff --git a/test/cases/unicode_to_codepoint.json b/test/cases/unicode_to_codepoint.json new file mode 100644 index 0000000..7984511 --- /dev/null +++ b/test/cases/unicode_to_codepoint.json @@ -0,0 +1 @@ +[ "arabic letter alef: ", "\u0627" ] \ No newline at end of file diff --git a/test/cases/unicode_to_codepoint.test b/test/cases/unicode_to_codepoint.test new file mode 100644 index 0000000..c257a0d --- /dev/null +++ b/test/cases/unicode_to_codepoint.test @@ -0,0 +1,2 @@ +[start_array, {string, "arabic letter alef: "}, {string, [16#0627]}, end_array]. +[{escaped_unicode, codepoint}]. \ No newline at end of file diff --git a/test/cases/unicode_unconverted.json b/test/cases/unicode_unconverted.json new file mode 100644 index 0000000..7984511 --- /dev/null +++ b/test/cases/unicode_unconverted.json @@ -0,0 +1 @@ +[ "arabic letter alef: ", "\u0627" ] \ No newline at end of file diff --git a/test/cases/unicode_unconverted.test b/test/cases/unicode_unconverted.test new file mode 100644 index 0000000..c70dd06 --- /dev/null +++ b/test/cases/unicode_unconverted.test @@ -0,0 +1 @@ +[start_array, {string, "arabic letter alef: "}, {string, "\\u0627"}, end_array]. \ No newline at end of file diff --git a/test/cases/whitespace.json b/test/cases/whitespace.json new file mode 100644 index 0000000..7d61621 --- /dev/null +++ b/test/cases/whitespace.json @@ -0,0 +1,4 @@ + + [0.3] + + \ No newline at end of file diff --git a/test/cases/whitespace.test b/test/cases/whitespace.test new file mode 100644 index 0000000..f200b42 --- /dev/null +++ b/test/cases/whitespace.test @@ -0,0 +1 @@ +[start_array, {number, "0.3"}, end_array].