From 0d7a6f97206d2c40582e02f7d65eb3e43596e3ea Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 8 Jun 2010 20:21:18 -0700 Subject: [PATCH] fixed embarassing non-termination bug --- examples/jsx_prettify.erl | 49 +-- src/jsx.erl | 18 +- src/jsx_decoder.erl | 122 +++--- src/jsx_utf8.erl | 764 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 862 insertions(+), 91 deletions(-) create mode 100644 src/jsx_utf8.erl diff --git a/examples/jsx_prettify.erl b/examples/jsx_prettify.erl index f353040..020f6f6 100644 --- a/examples/jsx_prettify.erl +++ b/examples/jsx_prettify.erl @@ -25,16 +25,16 @@ -author("alisdairsullivan@yahoo.ca"). --export([pretty/2, jsx_event/2]). +-export([pretty/2, prettify/2]). -record(opts, { - indent = 4 + indent = " " }). pretty(JSON, Opts) -> Init = init(parse_opts(Opts, #opts{})), - P = jsx:decoder({jsx_prettify, jsx_event, Init}, []), + P = jsx:decoder({jsx_prettify, prettify, Init}, []), case P(JSON) of {incomplete, _} -> {error, badjson} ; {error, badjson} -> {error, badjson} @@ -43,7 +43,7 @@ pretty(JSON, Opts) -> parse_opts([{indent, Val}|Rest], Opts) -> - parse_opts(Rest, Opts#opts{indent = Val}); + parse_opts(Rest, Opts#opts{indent = [ 16#20 || _ <- lists:seq(1, Val) ]}); parse_opts([], Opts) -> Opts. @@ -52,45 +52,45 @@ init(Opts) -> {[], Opts#opts.indent, 0, new}. -jsx_event(start_object, {Acc, Indent, Level, value}) -> +prettify(start_object, {Acc, Indent, Level, value}) -> {Acc ++ ",\n" ++ indent(Indent, Level) ++ "{", Indent, Level + 1, new}; -jsx_event(start_object, {Acc, Indent, Level, new}) -> +prettify(start_object, {Acc, Indent, Level, new}) -> {Acc ++ ",\n" ++ indent(Indent, Level) ++ "{", Indent, Level + 1, new}; -jsx_event(start_object, {Acc, Indent, Level, _}) -> +prettify(start_object, {Acc, Indent, Level, _}) -> {Acc ++ "{", Indent, Level + 1, new}; -jsx_event(start_array, {Acc, Indent, Level, value}) -> +prettify(start_array, {Acc, Indent, Level, value}) -> {Acc ++ ",\n" ++ indent(Indent, Level) ++ "[", Indent, Level + 1, new}; -jsx_event(start_array, {Acc, Indent, Level, new}) -> +prettify(start_array, {Acc, Indent, Level, new}) -> {Acc ++ ",\n" ++ indent(Indent, Level) ++ "[", Indent, Level + 1, new}; -jsx_event(start_array, {Acc, Indent, Level, _}) -> +prettify(start_array, {Acc, Indent, Level, _}) -> {Acc ++ "[", Indent, Level + 1, new}; -jsx_event(end_object, {Acc, Indent, Level, value}) -> +prettify(end_object, {Acc, Indent, Level, value}) -> {Acc ++ "\n" ++ indent(Indent, Level - 1) ++ "}", Indent, Level - 1, value}; -jsx_event(end_object, {Acc, Indent, Level, new}) -> +prettify(end_object, {Acc, Indent, Level, new}) -> {Acc ++ "}", Indent, Level - 1, value}; -jsx_event(end_array, {Acc, Indent, Level, value}) -> +prettify(end_array, {Acc, Indent, Level, value}) -> {Acc ++ "\n" ++ indent(Indent, Level - 1) ++ "]", Indent, Level - 1, value}; -jsx_event(end_array, {Acc, Indent, Level, new}) -> +prettify(end_array, {Acc, Indent, Level, new}) -> {Acc ++ "]", Indent, Level - 1, value}; -jsx_event({key, Key}, {Acc, Indent, Level, value}) -> +prettify({key, Key}, {Acc, Indent, Level, value}) -> {Acc ++ ",\n" ++ indent(Indent, Level) ++ "\"" ++ Key ++ "\": ", Indent, Level, key}; -jsx_event({key, Key}, {Acc, Indent, Level, _}) -> +prettify({key, Key}, {Acc, Indent, Level, _}) -> {Acc ++ "\n" ++ indent(Indent, Level) ++ "\"" ++ Key ++ "\": ", Indent, Level, key}; -jsx_event({Type, Value}, {Acc, Indent, Level, value}) -> +prettify({Type, Value}, {Acc, Indent, Level, value}) -> {Acc ++ ",\n" ++ indent(Indent, Level) ++ format(Type, Value), Indent, Level, value}; -jsx_event({Type, Value}, {Acc, Indent, Level, new}) -> +prettify({Type, Value}, {Acc, Indent, Level, new}) -> {Acc ++ "\n" ++ indent(Indent, Level) ++ format(Type, Value), Indent, Level, value}; -jsx_event({Type, Value}, {Acc, Indent, Level, key}) -> +prettify({Type, Value}, {Acc, Indent, Level, key}) -> {Acc ++ format(Type, Value), Indent, Level, value}; -jsx_event(reset, {_, Indent, _, _}) -> +prettify(reset, {_, Indent, _, _}) -> {[], Indent, 0, new}; -jsx_event(end_of_json, {Acc, _, _, _}) -> +prettify(end_of_json, {Acc, _, _, _}) -> Acc. @@ -103,5 +103,10 @@ format(_, Number) -> indent(Indent, Level) -> - [ 16#20 || _ <- lists:seq(1, Indent * Level) ]. + indent(Indent, Level, ""). + +indent(Indent, 0, Acc) -> + Acc; +indent(Indent, N, Acc) -> + Indent ++ Acc. \ No newline at end of file diff --git a/src/jsx.erl b/src/jsx.erl index f6da35a..f101f24 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -27,7 +27,12 @@ -export([decode/1, decoder/0, decoder/1, decoder/2, detect_encoding/4]). decode(JSON) -> - (jsx:decoder())(JSON). + F = decoder(), + case F(JSON) of + {incomplete, _} -> {error, badjson} + ; {error, badjson} -> {error, badjson} + ; {Result, _} -> {ok, Result} + end. decoder() -> decoder([]). @@ -45,7 +50,6 @@ decoder({Mod, Fun, State}, OptsList) when is_list(OptsList), is_atom(Mod), is_at start({fun(E, S) -> Mod:Fun(E, S) end, State}, OptsList). start(Callbacks, OptsList) -> - Opts = parse_opts(OptsList), F = case proplists:get_value(encoding, OptsList, auto) of utf8 -> fun jsx_utf8:start/4 ; utf16 -> fun jsx_utf16:start/4 @@ -54,13 +58,11 @@ start(Callbacks, OptsList) -> ; {utf32, little} -> fun jsx_utf32le:start/4 ; auto -> fun jsx:detect_encoding/4 end, - start(Callbacks, Opts, F). + start(Callbacks, OptsList, F). -start(Callbacks, Opts, F) -> - fun(Stream) -> - try F(Stream, [], Callbacks, Opts) - catch error:badjson -> {error, badjson} end - end. +start(Callbacks, OptsList, F) -> + Opts = parse_opts(OptsList), + fun(Stream) -> F(Stream, [], Callbacks, Opts) end. parse_opts(Opts) -> parse_opts(Opts, {false, codepoint, false}). diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 83d8310..c409514 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -72,8 +72,8 @@ start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) start(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); start(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> start(<>, Stack, Callbacks, Opts) end @@ -99,9 +99,9 @@ maybe_done(Bin, [], Callbacks, ?stream_mode(Opts)) -> end}; maybe_done(<<>>, [], Callbacks, Opts) -> {fold(end_of_json, Callbacks), fun(Stream) -> maybe_done(Stream, [], Callbacks, Opts) end}; -maybe_done(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} +maybe_done(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end @@ -118,8 +118,8 @@ object(<>, [key|Stack], Callbacks, Opts) -> object(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); object(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> object(<>, Stack, Callbacks, Opts) end @@ -152,8 +152,8 @@ array(<>, [array|Stack], Callbacks, Opts) -> array(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); array(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> array(<>, Stack, Callbacks, Opts) end @@ -184,8 +184,8 @@ value(<>, Stack, Callbacks, Opts) -> value(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); value(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> value(<>, Stack, Callbacks, Opts) end @@ -200,8 +200,8 @@ colon(<>, [key|Stack], Callbacks, Opts) -> colon(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); colon(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end @@ -216,8 +216,8 @@ key(<>, Stack, Callbacks, Opts) -> key(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); key(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> key(<>, Stack, Callbacks, Opts) end @@ -317,8 +317,8 @@ escape(<>, Stack, Callbacks, Opts, Acc) when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); escape(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end @@ -372,8 +372,8 @@ escaped_unicode(<>, Stack, Callbacks, Opts, String, [C escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) end @@ -408,8 +408,8 @@ negative(<<$0/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); negative(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end @@ -440,8 +440,8 @@ zero(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_json, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> zero(Stream, [], Callbacks, Opts, Acc) end}; zero(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end @@ -480,8 +480,8 @@ integer(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_json, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> integer(Stream, [], Callbacks, Opts, Acc) end}; integer(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end @@ -494,8 +494,8 @@ initial_decimal(<>, Stack, Callbacks, Opts, Acc) when initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); initial_decimal(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end @@ -532,8 +532,8 @@ decimal(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_json, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> decimal(Stream, [], Callbacks, Opts, Acc) end}; decimal(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end @@ -546,8 +546,8 @@ e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ? e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); e(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end @@ -558,8 +558,8 @@ e(Bin, Stack, Callbacks, Opts, Acc) -> ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); ex(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end @@ -592,8 +592,8 @@ exp(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_json, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> exp(Stream, [], Callbacks, Opts, Acc) end}; exp(Bin, Stack, Callbacks, Opts, Acc) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end @@ -604,8 +604,8 @@ exp(Bin, Stack, Callbacks, Opts, Acc) -> tr(<<$r/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> tru(Rest, Stack, Callbacks, Opts); tr(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end @@ -616,8 +616,8 @@ tr(Bin, Stack, Callbacks, Opts) -> tru(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> true(Rest, Stack, Callbacks, Opts); tru(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end @@ -628,8 +628,8 @@ tru(Bin, Stack, Callbacks, Opts) -> true(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, true}, Callbacks), Opts); true(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> true(<>, Stack, Callbacks, Opts) end @@ -640,8 +640,8 @@ true(Bin, Stack, Callbacks, Opts) -> fa(<<$a/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fal(Rest, Stack, Callbacks, Opts); fa(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end @@ -652,8 +652,8 @@ fa(Bin, Stack, Callbacks, Opts) -> fal(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fals(Rest, Stack, Callbacks, Opts); fal(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end @@ -664,8 +664,8 @@ fal(Bin, Stack, Callbacks, Opts) -> fals(<<$s/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> false(Rest, Stack, Callbacks, Opts); fals(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end @@ -676,8 +676,8 @@ fals(Bin, Stack, Callbacks, Opts) -> false(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, false}, Callbacks), Opts); false(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> false(<>, Stack, Callbacks, Opts) end @@ -688,8 +688,8 @@ false(Bin, Stack, Callbacks, Opts) -> nu(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> nul(Rest, Stack, Callbacks, Opts); nu(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end @@ -700,8 +700,8 @@ nu(Bin, Stack, Callbacks, Opts) -> nul(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> null(Rest, Stack, Callbacks, Opts); nul(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end @@ -712,8 +712,8 @@ nul(Bin, Stack, Callbacks, Opts) -> null(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, null}, Callbacks), Opts); null(Bin, Stack, Callbacks, Opts) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> null(<>, Stack, Callbacks, Opts) end @@ -730,8 +730,8 @@ null(Bin, Stack, Callbacks, Opts) -> maybe_comment(<>, Resume) -> comment(Rest, Resume); maybe_comment(Bin, Resume) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> maybe_comment(<>, Resume) end @@ -744,8 +744,8 @@ comment(<>, Resume) -> comment(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); comment(Bin, Resume) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> comment(<>, Resume) end} end. @@ -756,8 +756,8 @@ maybe_comment_done(<>, Resume) -> maybe_comment_done(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); maybe_comment_done(Bin, Resume) -> - case byte_size(Bin) of - ?symbol_size -> {error, badjson} + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} ; _ -> {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end} end. \ No newline at end of file diff --git a/src/jsx_utf8.erl b/src/jsx_utf8.erl new file mode 100644 index 0000000..a58042e --- /dev/null +++ b/src/jsx_utf8.erl @@ -0,0 +1,764 @@ +%% The MIT License + +%% Copyright (c) 2010 Alisdair Sullivan + +%% Permission is hereby granted, free of charge, to any person obtaining a copy +%% of this software and associated documentation files (the "Software"), to deal +%% in the Software without restriction, including without limitation the rights +%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the Software is +%% furnished to do so, subject to the following conditions: + +%% The above copyright notice and this permission notice shall be included in +%% all copies or substantial portions of the Software. + +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +%% THE SOFTWARE. + + +%% this is a template for the utf8, utf16, utf16le, utf32 and utf32le decoders. it should +%% not be compiled directly, see the build script in /priv for details + +-module(jsx_utf8). +-author("alisdairsullivan@yahoo.ca"). + +-define(utf8, true). +-include("jsx_decoder.hrl"). + +-export([start/4]). + + +%% callbacks to our handler are roughly equivalent to a fold over the events, incremental +%% rather than all at once. + +fold(end_of_json, {F, State}) -> + F(end_of_json, State); +fold(Event, {F, State}) when is_function(F) -> + {F, F(Event, State)}. + + +%% this code is mostly autogenerated and mostly ugly. apologies. for more insight on +%% Callbacks or Opts, see the comments accompanying decoder/2 (in jsx.erl). Stack +%% is a stack of flags used to track depth and to keep track of whether we are +%% returning from a value or a key inside objects. all pops, peeks and pushes are +%% inlined. the code that handles naked values and comments is not optimized by the +%% compiler for efficient matching, but you shouldn't be using naked values or comments +%% anyways, they are horrible and contrary to the spec. + +start(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + start(Rest, Stack, Callbacks, Opts); +start(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], fold(start_object, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], fold(start_array, Callbacks), Opts); +start(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +start(<<$t/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +start(<<$f/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +start(<<$n/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +start(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +start(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +start(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); +start(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> start(<>, Stack, Callbacks, Opts) end + } + end. + + +maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, Callbacks, Opts); +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, fold(end_object, Callbacks), Opts); +maybe_done(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, fold(end_array, Callbacks), Opts); +maybe_done(<>, [object|Stack], Callbacks, Opts) -> + key(Rest, [key|Stack], Callbacks, Opts); +maybe_done(<>, [array|_] = Stack, Callbacks, Opts) -> + value(Rest, Stack, Callbacks, Opts); +maybe_done(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); +maybe_done(Bin, [], Callbacks, ?stream_mode(Opts)) -> + {fold(end_of_json, Callbacks), fun(Stream) -> + start(<>, [], fold(reset, Callbacks), Opts) + end}; +maybe_done(<<>>, [], Callbacks, Opts) -> + {fold(end_of_json, Callbacks), fun(Stream) -> maybe_done(Stream, [], Callbacks, Opts) end}; +maybe_done(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end + } + end. + + +object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + object(Rest, Stack, Callbacks, Opts); +object(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +object(<>, [key|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, fold(end_object, Callbacks), Opts); +object(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); +object(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> object(<>, Stack, Callbacks, Opts) end + } + end. + + +array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + array(Rest, Stack, Callbacks, Opts); +array(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +array(<<$t/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +array(<<$f/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +array(<<$n/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +array(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +array(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +array(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +array(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], fold(start_object, Callbacks), Opts); +array(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], fold(start_array, Callbacks), Opts); +array(<>, [array|Stack], Callbacks, Opts) -> + maybe_done(Rest, Stack, fold(end_array, Callbacks), Opts); +array(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); +array(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> array(<>, Stack, Callbacks, Opts) end + } + end. + + +value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + value(Rest, Stack, Callbacks, Opts); +value(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +value(<<$t/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + tr(Rest, Stack, Callbacks, Opts); +value(<<$f/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + fa(Rest, Stack, Callbacks, Opts); +value(<<$n/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + nu(Rest, Stack, Callbacks, Opts); +value(<>, Stack, Callbacks, Opts) -> + negative(Rest, Stack, Callbacks, Opts, "-"); +value(<>, Stack, Callbacks, Opts) -> + zero(Rest, Stack, Callbacks, Opts, "0"); +value(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S]); +value(<>, Stack, Callbacks, Opts) -> + object(Rest, [key|Stack], fold(start_object, Callbacks), Opts); +value(<>, Stack, Callbacks, Opts) -> + array(Rest, [array|Stack], fold(start_array, Callbacks), Opts); +value(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); +value(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> value(<>, Stack, Callbacks, Opts) end + } + end. + + +colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + colon(Rest, Stack, Callbacks, Opts); +colon(<>, [key|Stack], Callbacks, Opts) -> + value(Rest, [object|Stack], Callbacks, Opts); +colon(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); +colon(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end + } + end. + + +key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> + key(Rest, Stack, Callbacks, Opts); +key(<>, Stack, Callbacks, Opts) -> + string(Rest, Stack, Callbacks, Opts, []); +key(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); +key(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> key(<>, Stack, Callbacks, Opts) end + } + end. + + +%% string has an additional parameter, an accumulator (Acc) used to hold the intermediate +%% representation of the string being parsed. using a list of integers representing +%% unicode codepoints is faster than constructing binaries, many of which will be +%% converted back to lists by the user anyways. + +string(<>, [key|_] = Stack, Callbacks, Opts, Acc) -> + colon(Rest, Stack, fold({key, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold({string, lists:reverse(Acc)}, Callbacks), Opts); +string(<>, Stack, Callbacks, Opts, Acc) -> + escape(Rest, Stack, Callbacks, Opts, Acc); +string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +string(Bin, Stack, Callbacks, Opts, Acc) -> + case partial_utf(Bin) of + true -> + {incomplete, + fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end + } + ; false -> + {error, badjson} + end. + + +-ifdef(utf8). +partial_utf(<<>>) -> true; +partial_utf(<>) when X >= 16#c2, X =< 16#df -> true; +partial_utf(<>) when X >= 16#e0, X =< 16#ef -> + case Rest of + <<>> -> true + ; <> when Y >= 16#80, Y =< 16#bf -> true + end; +partial_utf(<>) when X >= 16#f0, X =< 16#f4 -> + case Rest of + <<>> -> true + ; <> when Y >= 16#80, Y =< 16#bf -> true + ; <> when Y >= 16#80, Y =< 16#bf, Z >= 16#80, Z =< 16#bf -> true + end; +partial_utf(_) -> false. +-endif. + +-ifdef(utf16). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf16le). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf32). +partial_utf(<<_:32>>) -> false; +partial_utf(_) -> true. +-endif. + +-ifdef(utf32le). +partial_utf(<<_:32>>) -> false; +partial_utf(_) -> true. +-endif. + + +%% only thing to note here is the additional accumulator passed to escaped_unicode used +%% to hold the codepoint sequence. unescessary, but nicer than using the string +%% accumulator. + +escape(<<$b/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\b" ++ Acc); +escape(<<$f/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\f" ++ Acc); +escape(<<$n/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\n" ++ Acc); +escape(<<$r/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\r" ++ Acc); +escape(<<$t/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + string(Rest, Stack, Callbacks, Opts, "\t" ++ Acc); +escape(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, Acc, []); +escape(<>, Stack, Callbacks, Opts, Acc) + when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +escape(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +%% this code is ugly and unfortunate, but so is json's handling of escaped unicode +%% codepoint sequences. if the ascii option is present, the sequence is converted +%% to a codepoint and inserted into the string if it represents an ascii value. if +%% the codepoint option is present the sequence is converted and inserted as long +%% as it represents a valid unicode codepoint. this means non-characters +%% representable in 16 bits are not converted (the utf16 surrogates and the two +%% special non-characters). any other option and no conversion is done. + +escaped_unicode(<>, + Stack, + Callbacks, + ?escaped_unicode_to_ascii(Opts), + String, + [C, B, A]) + when ?is_hex(D) -> + case erlang:list_to_integer([A, B, C, D], 16) of + X when X < 128 -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) + end; +escaped_unicode(<>, + Stack, + Callbacks, + ?escaped_unicode_to_codepoint(Opts), + String, + [C, B, A]) + when ?is_hex(D) -> + case erlang:list_to_integer([A, B, C, D], 16) of + X when X >= 16#dc00, X =< 16#dfff -> + case check_acc_for_surrogate(String) of + false -> + string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) + ; {Y, NewString} -> + string(Rest, Stack, Callbacks, Opts, [surrogate_to_codepoint(Y, X)] ++ NewString) + end + ; X when X < 16#d800; X > 16#dfff, X < 16#fffe -> + string(Rest, Stack, Callbacks, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) + end; +escaped_unicode(<>, Stack, Callbacks, Opts, String, [C, B, A]) when ?is_hex(D) -> + string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String); +escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> + escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); +escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) end + } + end. + +%% upon encountering a low pair json/hex encoded value, check to see if there's a high +%% value already in the accumulator. + +check_acc_for_surrogate([D, C, B, A, $u, ?rsolidus|Rest]) + when ?is_hex(D), ?is_hex(C), ?is_hex(B), ?is_hex(A) -> + case erlang:list_to_integer([A, B, C, D], 16) of + X when X >=16#d800, X =< 16#dbff -> + {X, Rest}; + _ -> + false + end; +check_acc_for_surrogate(_) -> + false. + +%% stole this from the unicode spec + +surrogate_to_codepoint(High, Low) -> + (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. + + +%% like strings, numbers are collected in an intermediate accumulator before +%% being emitted to the callback handler. + +negative(<<$0/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); +negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +negative(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold(end_object, fold({integer, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold(end_array, fold({integer, lists:reverse(Acc)}, Callbacks)), Opts); +zero(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], fold({integer, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, fold({integer, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, Opts, Acc) -> + initial_decimal(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +zero(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, fold({integer, lists:reverse(Acc)}, Callbacks), Opts); +zero(<>, Stack, Callbacks, ?comments_enabled(Opts), Acc) -> + maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Callbacks, Opts, Acc) end); +zero(Bin, [], Callbacks, ?stream_mode(Opts), Acc) -> + CB = fold({integer, lists:reverse(Acc)}, Callbacks), + {fold(end_of_json, CB), fun(Stream) -> + start(<>, [], fold(reset, CB), Opts) + end}; +zero(<<>>, [], Callbacks, Opts, Acc) -> + {fold(end_of_json, fold({integer, lists:reverse(Acc)}, Callbacks)), + fun(Stream) -> zero(Stream, [], Callbacks, Opts, Acc) end}; +zero(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold(end_object, fold({integer, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold(end_array, fold({integer, lists:reverse(Acc)}, Callbacks)), Opts); +integer(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], fold({integer, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, fold({integer, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, Opts, Acc) -> + initial_decimal(Rest, Stack, Callbacks, Opts, [?decimalpoint] ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) -> + integer(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +integer(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e0." ++ Acc); +integer(<<$E/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e0." ++ Acc); +integer(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, fold({integer, lists:reverse(Acc)}, Callbacks), Opts); +integer(<>, Stack, Callbacks, ?comments_enabled(Opts), Acc) -> + maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Callbacks, Opts, Acc) end); +integer(Bin, [], Callbacks, ?stream_mode(Opts), Acc) -> + CB = fold({integer, lists:reverse(Acc)}, Callbacks), + {fold(end_of_json, CB), fun(Stream) -> + start(<>, [], fold(reset, CB), Opts) + end}; +integer(<<>>, [], Callbacks, Opts, Acc) -> + {fold(end_of_json, fold({integer, lists:reverse(Acc)}, Callbacks)), + fun(Stream) -> integer(Stream, [], Callbacks, Opts, Acc) end}; +integer(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +initial_decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + decimal(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> + decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +initial_decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + decimal(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +decimal(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold(end_object, fold({float, lists:reverse(Acc)}, Callbacks)), Opts); +decimal(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold(end_array, fold({float, lists:reverse(Acc)}, Callbacks)), Opts); +decimal(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], fold({float, lists:reverse(Acc)}, Callbacks), Opts); +decimal(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, fold({float, lists:reverse(Acc)}, Callbacks), Opts); +decimal(<>, Stack, Callbacks, Opts, Acc) -> + decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +decimal(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +decimal(<<$E/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> + e(Rest, Stack, Callbacks, Opts, "e" ++ Acc); +decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, fold({float, lists:reverse(Acc)}, Callbacks), Opts); +decimal(<>, Stack, Callbacks, ?comments_enabled(Opts), Acc) -> + maybe_comment(Rest, fun(Resume) -> decimal(Resume, Stack, Callbacks, Opts, Acc) end); +decimal(Bin, [], Callbacks, ?stream_mode(Opts), Acc) -> + CB = fold({float, lists:reverse(Acc)}, Callbacks), + {fold(end_of_json, CB), fun(Stream) -> + start(<>, [], fold(reset, CB), Opts) + end}; +decimal(<<>>, [], Callbacks, Opts, Acc) -> + {fold(end_of_json, fold({float, lists:reverse(Acc)}, Callbacks)), + fun(Stream) -> decimal(Stream, [], Callbacks, Opts, Acc) end}; +decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> + ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +e(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +ex(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> + exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold(end_object, fold({float, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [array|Stack], Callbacks, Opts, Acc) -> + maybe_done(Rest, Stack, fold(end_array, fold({float, lists:reverse(Acc)}, Callbacks)), Opts); +exp(<>, [object|Stack], Callbacks, Opts, Acc) -> + key(Rest, [key|Stack], fold({float, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, [array|_] = Stack, Callbacks, Opts, Acc) -> + value(Rest, Stack, fold({float, lists:reverse(Acc)}, Callbacks), Opts); +exp(<>, Stack, Callbacks, Opts, Acc) -> + exp(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); +exp(<>, Stack, Callbacks, ?comments_enabled(Opts), Acc) -> + maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Callbacks, Opts, Acc) end); +exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, fold({float, lists:reverse(Acc)}, Callbacks), Opts); +exp(Bin, [], Callbacks, ?stream_mode(Opts), Acc) -> + CB = fold({float, lists:reverse(Acc)}, Callbacks), + {fold(end_of_json, CB), fun(Stream) -> + start(<>, [], fold(reset, CB), Opts) + end}; +exp(<<>>, [], Callbacks, Opts, Acc) -> + {fold(end_of_json, fold({float, lists:reverse(Acc)}, Callbacks)), + fun(Stream) -> exp(Stream, [], Callbacks, Opts, Acc) end}; +exp(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end + } + end. + + +tr(<<$r/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + tru(Rest, Stack, Callbacks, Opts); +tr(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end + } + end. + + +tru(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + true(Rest, Stack, Callbacks, Opts); +tru(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end + } + end. + + +true(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, fold({literal, true}, Callbacks), Opts); +true(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> true(<>, Stack, Callbacks, Opts) end + } + end. + + +fa(<<$a/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + fal(Rest, Stack, Callbacks, Opts); +fa(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end + } + end. + + +fal(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + fals(Rest, Stack, Callbacks, Opts); +fal(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end + } + end. + + +fals(<<$s/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + false(Rest, Stack, Callbacks, Opts); +fals(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end + } + end. + + +false(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, fold({literal, false}, Callbacks), Opts); +false(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> false(<>, Stack, Callbacks, Opts) end + } + end. + + +nu(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + nul(Rest, Stack, Callbacks, Opts); +nu(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end + } + end. + + +nul(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + null(Rest, Stack, Callbacks, Opts); +nul(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end + } + end. + + +null(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> + maybe_done(Rest, Stack, fold({literal, null}, Callbacks), Opts); +null(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> null(<>, Stack, Callbacks, Opts) end + } + end. + + +%% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode +%% character is valid in a comment, except, obviously the */ sequence which ends +%% the comment. they're implemented as a closure called when the comment ends that +%% returns execution to the point where the comment began. comments are not +%% recorded in any way, simply parsed. + +maybe_comment(<>, Resume) -> + comment(Rest, Resume); +maybe_comment(Bin, Resume) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_comment(<>, Resume) end + } + end. + + +comment(<>, Resume) -> + maybe_comment_done(Rest, Resume); +comment(<<_/?encoding, Rest/binary>>, Resume) -> + comment(Rest, Resume); +comment(Bin, Resume) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> comment(<>, Resume) end} + end. + + +maybe_comment_done(<>, Resume) -> + Resume(Rest); +maybe_comment_done(<<_/?encoding, Rest/binary>>, Resume) -> + comment(Rest, Resume); +maybe_comment_done(Bin, Resume) -> + case byte_size(Bin) >= ?symbol_size of + true -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end} + end. \ No newline at end of file