From 8c308159e1850e2b652fa055d9b323c5c8681164 Mon Sep 17 00:00:00 2001 From: alisdair Date: Sun, 22 Aug 2010 02:01:22 +0000 Subject: [PATCH] interstitial commit to preserve experiment --- include/jsx_decoder_template.hrl | 751 +++++++++++++++++ src/jsx_utf8.erl | 1299 +----------------------------- 2 files changed, 753 insertions(+), 1297 deletions(-) create mode 100644 include/jsx_decoder_template.hrl diff --git a/include/jsx_decoder_template.hrl b/include/jsx_decoder_template.hrl new file mode 100644 index 0000000..c63e119 --- /dev/null +++ b/include/jsx_decoder_template.hrl @@ -0,0 +1,751 @@ +-include("./include/jsx_common.hrl"). + + + +-spec parse(JSON::eep0018(), Opts::jsx_opts()) -> jsx_parser_result(). + + +%% option flags + +-define(comments_enabled(X), {_, true, _, _, _} = X). +-define(escaped_unicode_to_ascii(X), {_, _, ascii, _, _} = X). +-define(escaped_unicode_to_codepoint(X), {_, _, codepoint, _, _} = X). +-define(multi_term(X), {_, _, _, true, _} = X). + +%% whitespace +-define(space, 16#20). +-define(tab, 16#09). +-define(cr, 16#0D). +-define(newline, 16#0A). + +%% object delimiters +-define(start_object, 16#7B). +-define(end_object, 16#7D). + +%% array delimiters +-define(start_array, 16#5B). +-define(end_array, 16#5D). + +%% kv seperator +-define(comma, 16#2C). +-define(quote, 16#22). +-define(colon, 16#3A). + +%% string escape sequences +-define(escape, 16#5C). +-define(rsolidus, 16#5C). +-define(solidus, 16#2F). +-define(formfeed, 16#0C). +-define(backspace, 16#08). +-define(unicode, 16#75). + +%% math +-define(zero, 16#30). +-define(decimalpoint, 16#2E). +-define(negative, 16#2D). +-define(positive, 16#2B). + +%% comments +-define(star, 16#2a). + + +%% some useful guards +-define(is_hex(Symbol), + (Symbol >= $a andalso Symbol =< $z); (Symbol >= $A andalso Symbol =< $Z); + (Symbol >= $0 andalso Symbol =< $9) +). + +-define(is_nonzero(Symbol), + Symbol >= $1 andalso Symbol =< $9 +). + +-define(is_noncontrol(Symbol), + Symbol >= ?space +). + +-define(is_whitespace(Symbol), + Symbol =:= ?space; Symbol =:= ?tab; Symbol =:= ?cr; Symbol =:= ?newline +). + + + +%% compilation macros for unified decoder +-ifdef(utf8). +-define(encoding, utf8). +-define(partial_codepoint(Bin), byte_size(Bin) < 1). +-endif. + +-ifdef(utf16). +-define(encoding, utf16). +-define(partial_codepoint(Bin), byte_size(Bin) < 2). +-endif. + +-ifdef(utf16le). +-define(encoding, utf16-little). +-define(partial_codepoint(Bin), byte_size(Bin) < 2). +-endif. + +-ifdef(utf32). +-define(encoding, utf32). +-define(partial_codepoint(Bin), byte_size(Bin) < 4). +-endif. + +-ifdef(utf32le). +-define(encoding, utf32-little). +-define(partial_codepoint(Bin), byte_size(Bin) < 4). +-endif. + + +%% this is the template for the utf backends for the jsx decoder. it's included +%% by the various jsx_utfxx.erl frontends and all modifications to this file +%% should take that into account + + +-export([parse/2]). + + + +parse(JSON, Opts) -> + start(JSON, [], Opts). + + +start(<>, Stack, Opts) when ?is_whitespace(S) -> + start(Rest, Stack, Opts); +start(<>, Stack, Opts) -> + {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; +start(<>, Stack, Opts) -> + {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; +start(<>, Stack, Opts) -> + string(Rest, Stack, Opts, []); +start(<<$t/?encoding, Rest/binary>>, Stack, Opts) -> + tr(Rest, Stack, Opts); +start(<<$f/?encoding, Rest/binary>>, Stack, Opts) -> + fa(Rest, Stack, Opts); +start(<<$n/?encoding, Rest/binary>>, Stack, Opts) -> + nu(Rest, Stack, Opts); +start(<>, Stack, Opts) -> + negative(Rest, Stack, Opts, "-"); +start(<>, Stack, Opts) -> + zero(Rest, Stack, Opts, "0"); +start(<>, Stack, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Opts, [S]); +start(<>, Stack, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Opts) end); +start(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> start(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +maybe_done(<>, Stack, Opts) when ?is_whitespace(S) -> + maybe_done(Rest, Stack, Opts); +maybe_done(<>, [object|Stack], Opts) -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; +maybe_done(<>, [array|Stack], Opts) -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; +maybe_done(<>, [object|Stack], Opts) -> + key(Rest, [key|Stack], Opts); +maybe_done(<>, [array|_] = Stack, Opts) -> + value(Rest, Stack, Opts); +maybe_done(<>, Stack, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Opts) end); +maybe_done(Rest, [], ?multi_term(Opts)) -> + {event, end_json, fun() -> start(Rest, [], Opts) end}; +maybe_done(Rest, [], Opts) -> + done(Rest, Opts); +maybe_done(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> maybe_done(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +done(<>, Opts) when ?is_whitespace(S) -> + done(Rest, Opts); +done(<>, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> done(Resume, Opts) end); +done(<<>>, Opts) -> + {event, end_json, fun() -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> done(Stream, Opts) end} end}; +done(Bin, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> done(<>, Opts) end} + ; false -> {error, badjson} + end. + + +object(<>, Stack, Opts) when ?is_whitespace(S) -> + object(Rest, Stack, Opts); +object(<>, Stack, Opts) -> + string(Rest, Stack, Opts, []); +object(<>, [key|Stack], Opts) -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; +object(<>, Stack, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Opts) end); +object(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> object(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +array(<>, Stack, Opts) when ?is_whitespace(S) -> + array(Rest, Stack, Opts); +array(<>, Stack, Opts) -> + string(Rest, Stack, Opts, []); +array(<<$t/?encoding, Rest/binary>>, Stack, Opts) -> + tr(Rest, Stack, Opts); +array(<<$f/?encoding, Rest/binary>>, Stack, Opts) -> + fa(Rest, Stack, Opts); +array(<<$n/?encoding, Rest/binary>>, Stack, Opts) -> + nu(Rest, Stack, Opts); +array(<>, Stack, Opts) -> + negative(Rest, Stack, Opts, "-"); +array(<>, Stack, Opts) -> + zero(Rest, Stack, Opts, "0"); +array(<>, Stack, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Opts, [S]); +array(<>, Stack, Opts) -> + {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; +array(<>, Stack, Opts) -> + {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; +array(<>, [array|Stack], Opts) -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; +array(<>, Stack, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Opts) end); +array(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> array(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +value(<>, Stack, Opts) when ?is_whitespace(S) -> + value(Rest, Stack, Opts); +value(<>, Stack, Opts) -> + string(Rest, Stack, Opts, []); +value(<<$t/?encoding, Rest/binary>>, Stack, Opts) -> + tr(Rest, Stack, Opts); +value(<<$f/?encoding, Rest/binary>>, Stack, Opts) -> + fa(Rest, Stack, Opts); +value(<<$n/?encoding, Rest/binary>>, Stack, Opts) -> + nu(Rest, Stack, Opts); +value(<>, Stack, Opts) -> + negative(Rest, Stack, Opts, "-"); +value(<>, Stack, Opts) -> + zero(Rest, Stack, Opts, "0"); +value(<>, Stack, Opts) when ?is_nonzero(S) -> + integer(Rest, Stack, Opts, [S]); +value(<>, Stack, Opts) -> + {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; +value(<>, Stack, Opts) -> + {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; +value(<>, Stack, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Opts) end); +value(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> value(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +colon(<>, Stack, Opts) when ?is_whitespace(S) -> + colon(Rest, Stack, Opts); +colon(<>, [key|Stack], Opts) -> + value(Rest, [object|Stack], Opts); +colon(<>, Stack, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Opts) end); +colon(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> colon(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +key(<>, Stack, Opts) when ?is_whitespace(S) -> + key(Rest, Stack, Opts); +key(<>, Stack, Opts) -> + string(Rest, Stack, Opts, []); +key(<>, Stack, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Opts) end); +key(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> key(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +%% string has an additional parameter, an accumulator (Acc) used to hold the intermediate +%% representation of the string being parsed. using a list of integers representing +%% unicode codepoints is faster than constructing binaries, many of which will be +%% converted back to lists by the user anyways +%% string uses partial_utf/1 to cease parsing when invalid encodings are encountered +%% rather than just checking remaining binary size like other states +string(<>, [key|_] = Stack, Opts, Acc) -> + {event, {key, lists:reverse(Acc)}, fun() -> colon(Rest, Stack, Opts) end}; +string(<>, Stack, Opts, Acc) -> + {event, {string, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; +string(<>, Stack, Opts, Acc) -> + escape(Rest, Stack, Opts, Acc); +string(<>, Stack, Opts, Acc) when ?is_noncontrol(S) -> + string(Rest, Stack, Opts, [S] ++ Acc); +string(Bin, Stack, Opts, Acc) -> + case partial_utf(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> string(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +-ifdef(utf8). +partial_utf(<<>>) -> true; +partial_utf(<>) when X >= 16#c2, X =< 16#df -> true; +partial_utf(<>) when X >= 16#e0, X =< 16#ef -> + case Rest of + <<>> -> true + ; <> when Y >= 16#80, Y =< 16#bf -> true + end; +partial_utf(<>) when X >= 16#f0, X =< 16#f4 -> + case Rest of + <<>> -> true + ; <> when Y >= 16#80, Y =< 16#bf -> true + ; <> when Y >= 16#80, Y =< 16#bf, Z >= 16#80, Z =< 16#bf -> true + end; +partial_utf(_) -> false. +-endif. + +-ifdef(utf16). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf16le). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf32). +partial_utf(<<_:32>>) -> false; +partial_utf(_) -> true. +-endif. + +-ifdef(utf32le). +partial_utf(<<_:32>>) -> false; +partial_utf(_) -> true. +-endif. + + +%% only thing to note here is the additional accumulator passed to escaped_unicode used +%% to hold the codepoint sequence. unescessary, but nicer than using the string +%% accumulator +escape(<<$b/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + string(Rest, Stack, Opts, "\b" ++ Acc); +escape(<<$f/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + string(Rest, Stack, Opts, "\f" ++ Acc); +escape(<<$n/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + string(Rest, Stack, Opts, "\n" ++ Acc); +escape(<<$r/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + string(Rest, Stack, Opts, "\r" ++ Acc); +escape(<<$t/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + string(Rest, Stack, Opts, "\t" ++ Acc); +escape(<<$u/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + escaped_unicode(Rest, Stack, Opts, Acc, []); +escape(<>, Stack, Opts, Acc) + when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> + string(Rest, Stack, Opts, [S] ++ Acc); +escape(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> escape(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +%% this code is ugly and unfortunate, but so is json's handling of escaped unicode +%% codepoint sequences. if the ascii option is present, the sequence is converted +%% to a codepoint and inserted into the string if it represents an ascii value. if +%% the codepoint option is present the sequence is converted and inserted as long +%% as it represents a valid unicode codepoint. this means non-characters +%% representable in 16 bits are not converted (the utf16 surrogates and the two +%% special non-characters). any other option and no conversion is done +escaped_unicode(<>, + Stack, + ?escaped_unicode_to_ascii(Opts), + String, + [C, B, A]) + when ?is_hex(D) -> + case erlang:list_to_integer([A, B, C, D], 16) of + X when X < 128 -> + string(Rest, Stack, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) + end; +escaped_unicode(<>, + Stack, + ?escaped_unicode_to_codepoint(Opts), + String, + [C, B, A]) + when ?is_hex(D) -> + case erlang:list_to_integer([A, B, C, D], 16) of + X when X >= 16#dc00, X =< 16#dfff -> + case check_acc_for_surrogate(String) of + false -> + string(Rest, Stack, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) + ; {Y, NewString} -> + string(Rest, Stack, Opts, [surrogate_to_codepoint(Y, X)] ++ NewString) + end + ; X when X < 16#d800; X > 16#dfff, X < 16#fffe -> + string(Rest, Stack, Opts, [X] ++ String) + ; _ -> + string(Rest, Stack, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) + end; +escaped_unicode(<>, Stack, Opts, String, [C, B, A]) when ?is_hex(D) -> + string(Rest, Stack, Opts, [D, C, B, A, $u, ?rsolidus] ++ String); +escaped_unicode(<>, Stack, Opts, String, Acc) when ?is_hex(S) -> + escaped_unicode(Rest, Stack, Opts, String, [S] ++ Acc); +escaped_unicode(Bin, Stack, Opts, String, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> escaped_unicode(<>, Stack, Opts, String, Acc) end} + ; false -> {error, badjson} + end. + + +%% upon encountering a low pair json/hex encoded value, check to see if there's a high +%% value already in the accumulator +check_acc_for_surrogate([D, C, B, A, $u, ?rsolidus|Rest]) + when ?is_hex(D), ?is_hex(C), ?is_hex(B), ?is_hex(A) -> + case erlang:list_to_integer([A, B, C, D], 16) of + X when X >=16#d800, X =< 16#dbff -> + {X, Rest}; + _ -> + false + end; +check_acc_for_surrogate(_) -> + false. + + +%% stole this from the unicode spec +surrogate_to_codepoint(High, Low) -> + (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. + + +%% like strings, numbers are collected in an intermediate accumulator before +%% being emitted to the callback handler +negative(<<$0/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + zero(Rest, Stack, Opts, "0" ++ Acc); +negative(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Opts, [S] ++ Acc); +negative(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> negative(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +zero(<>, [object|Stack], Opts, Acc) -> + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + end}; +zero(<>, [array|Stack], Opts, Acc) -> + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + end}; +zero(<>, [object|Stack], Opts, Acc) -> + {event, {integer, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; +zero(<>, [array|_] = Stack, Opts, Acc) -> + {event, {integer, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; +zero(<>, Stack, Opts, Acc) -> + initial_decimal(Rest, Stack, Opts, [?decimalpoint] ++ Acc); +zero(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> + {event, {integer, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; +zero(<>, Stack, ?comments_enabled(Opts), Acc) -> + maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Opts, Acc) end); +zero(<<>>, [], Opts, Acc) -> + {incomplete, fun(end_stream) -> + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_json, fun() -> zero(<<>>, [], Opts, Acc) end} + end} + ; (Stream) -> zero(Stream, [], Opts, Acc) + end}; +zero(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> zero(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +integer(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> + integer(Rest, Stack, Opts, [S] ++ Acc); +integer(<>, [object|Stack], Opts, Acc) -> + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + end}; +integer(<>, [array|Stack], Opts, Acc) -> + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + end}; +integer(<>, [object|Stack], Opts, Acc) -> + {event, {integer, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; +integer(<>, [array|_] = Stack, Opts, Acc) -> + {event, {integer, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; +integer(<>, Stack, Opts, Acc) -> + initial_decimal(Rest, Stack, Opts, [?decimalpoint] ++ Acc); +integer(<>, Stack, Opts, Acc) -> + integer(Rest, Stack, Opts, [?zero] ++ Acc); +integer(<<$e/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + e(Rest, Stack, Opts, "e0." ++ Acc); +integer(<<$E/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + e(Rest, Stack, Opts, "e0." ++ Acc); +integer(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> + {event, {integer, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; +integer(<>, Stack, ?comments_enabled(Opts), Acc) -> + maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Opts, Acc) end); +integer(<<>>, [], Opts, Acc) -> + {incomplete, fun(end_stream) -> + {event, {integer, lists:reverse(Acc)}, fun() -> + {event, end_json, fun() -> integer(<<>>, [], Opts, Acc) end} + end} + ; (Stream) -> integer(Stream, [], Opts, Acc) + end}; +integer(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> integer(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +initial_decimal(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> + decimal(Rest, Stack, Opts, [S] ++ Acc); +initial_decimal(<>, Stack, Opts, Acc) -> + decimal(Rest, Stack, Opts, [?zero] ++ Acc); +initial_decimal(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> initial_decimal(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +decimal(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> + decimal(Rest, Stack, Opts, [S] ++ Acc); +decimal(<>, [object|Stack], Opts, Acc) -> + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + end}; +decimal(<>, [array|Stack], Opts, Acc) -> + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + end}; +decimal(<>, [object|Stack], Opts, Acc) -> + {event, {float, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; +decimal(<>, [array|_] = Stack, Opts, Acc) -> + {event, {float, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; +decimal(<>, Stack, Opts, Acc) -> + decimal(Rest, Stack, Opts, [?zero] ++ Acc); +decimal(<<$e/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + e(Rest, Stack, Opts, "e" ++ Acc); +decimal(<<$E/?encoding, Rest/binary>>, Stack, Opts, Acc) -> + e(Rest, Stack, Opts, "e" ++ Acc); +decimal(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> + {event, {float, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; +decimal(<>, Stack, ?comments_enabled(Opts), Acc) -> + maybe_comment(Rest, fun(Resume) -> decimal(Resume, Stack, Opts, Acc) end); +decimal(<<>>, [], Opts, Acc) -> + {incomplete, fun(end_stream) -> + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_json, fun() -> decimal(<<>>, [], Opts, Acc) end} + end} + ; (Stream) -> decimal(Stream, [], Opts, Acc) + end}; +decimal(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> decimal(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +e(<>, Stack, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Opts, [S] ++ Acc); +e(<>, Stack, Opts, Acc) when S =:= ?positive; S =:= ?negative -> + ex(Rest, Stack, Opts, [S] ++ Acc); +e(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> e(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +ex(<>, Stack, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> + exp(Rest, Stack, Opts, [S] ++ Acc); +ex(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> ex(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +exp(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> + exp(Rest, Stack, Opts, [S] ++ Acc); +exp(<>, [object|Stack], Opts, Acc) -> + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + end}; +exp(<>, [array|Stack], Opts, Acc) -> + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + end}; +exp(<>, [object|Stack], Opts, Acc) -> + {event, {float, lists:reverse(Acc)}, fun() -> key(Rest, [key|Stack], Opts) end}; +exp(<>, [array|_] = Stack, Opts, Acc) -> + {event, {float, lists:reverse(Acc)}, fun() -> value(Rest, Stack, Opts) end}; +exp(<>, Stack, Opts, Acc) -> + exp(Rest, Stack, Opts, [?zero] ++ Acc); +exp(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> + {event, {float, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; +exp(<>, Stack, ?comments_enabled(Opts), Acc) -> + maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Opts, Acc) end); +exp(<<>>, [], Opts, Acc) -> + {incomplete, fun(end_stream) -> + {event, {float, lists:reverse(Acc)}, fun() -> + {event, end_json, fun() -> exp(<<>>, [], Opts, Acc) end} + end} + ; (Stream) -> exp(Stream, [], Opts, Acc) + end}; +exp(Bin, Stack, Opts, Acc) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> exp(<>, Stack, Opts, Acc) end} + ; false -> {error, badjson} + end. + + +tr(<<$r/?encoding, Rest/binary>>, Stack, Opts) -> + tru(Rest, Stack, Opts); +tr(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> tr(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +tru(<<$u/?encoding, Rest/binary>>, Stack, Opts) -> + true(Rest, Stack, Opts); +tru(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> tru(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +true(<<$e/?encoding, Rest/binary>>, Stack, Opts) -> + {event, {literal, true}, fun() -> maybe_done(Rest, Stack, Opts) end}; +true(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> true(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +fa(<<$a/?encoding, Rest/binary>>, Stack, Opts) -> + fal(Rest, Stack, Opts); +fa(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> fa(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +fal(<<$l/?encoding, Rest/binary>>, Stack, Opts) -> + fals(Rest, Stack, Opts); +fal(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> fal(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +fals(<<$s/?encoding, Rest/binary>>, Stack, Opts) -> + false(Rest, Stack, Opts); +fals(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> fals(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +false(<<$e/?encoding, Rest/binary>>, Stack, Opts) -> + {event, {literal, false}, fun() -> maybe_done(Rest, Stack, Opts) end}; +false(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> false(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +nu(<<$u/?encoding, Rest/binary>>, Stack, Opts) -> + nul(Rest, Stack, Opts); +nu(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> nu(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +nul(<<$l/?encoding, Rest/binary>>, Stack, Opts) -> + null(Rest, Stack, Opts); +nul(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> nul(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +null(<<$l/?encoding, Rest/binary>>, Stack, Opts) -> + {event, {literal, null}, fun() -> maybe_done(Rest, Stack, Opts) end}; +null(Bin, Stack, Opts) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> null(<>, Stack, Opts) end} + ; false -> {error, badjson} + end. + + +%% comments are c style, ex: /* blah blah */ +%% any unicode character is valid in a comment except the */ sequence which ends +%% the comment. they're implemented as a closure called when the comment ends that +%% returns execution to the point where the comment began. comments are not +%% reported in any way, simply parsed. +maybe_comment(<>, Resume) -> + comment(Rest, Resume); +maybe_comment(Bin, Resume) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> maybe_comment(<>, Resume) end} + ; false -> {error, badjson} + end. + + +comment(<>, Resume) -> + maybe_comment_done(Rest, Resume); +comment(<<_/?encoding, Rest/binary>>, Resume) -> + comment(Rest, Resume); +comment(Bin, Resume) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> comment(<>, Resume) end} + ; false -> {error, badjson} + end. + + +maybe_comment_done(<>, Resume) -> + Resume(Rest); +maybe_comment_done(<<_/?encoding, Rest/binary>>, Resume) -> + comment(Rest, Resume); +maybe_comment_done(Bin, Resume) -> + case ?partial_codepoint(Bin) of + true -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> maybe_comment_done(<>, Resume) end} + ; false -> {error, badjson} + end. \ No newline at end of file diff --git a/src/jsx_utf8.erl b/src/jsx_utf8.erl index 98b4252..6d8850b 100644 --- a/src/jsx_utf8.erl +++ b/src/jsx_utf8.erl @@ -1,1302 +1,7 @@ --file("priv/jsx_decoder_template.erl", 1). - - -%% @hidden hide this module from edoc, exported functions are internal to jsx -%% and may be altered or removed without notice - - -module(jsx_utf8). --author("alisdairsullivan@yahoo.ca"). - --export([parse/2]). - --file("./include/jsx_decoder.hrl", 1). - --file("./include/jsx_common.hrl", 1). - --type jsx_opts() :: [jsx_opt()]. - --type jsx_opt() :: {comments, true | false} - | {escaped_unicode, ascii | codepoint | none} - | {multi_term, true | false} - | {encoding, - auto | - utf8 | - utf16 | - {utf16, little} | - utf32 | - {utf32, little}}. - --type unicode_codepoint() :: 0..1114111. - --type unicode_string() :: [unicode_codepoint()]. - --type jsx_event() :: start_object - | end_object - | start_array - | end_array - | end_json - | {key, unicode_string()} - | {string, unicode_string()} - | {integer, unicode_string()} - | {float, unicode_string()} - | {literal, true} - | {literal, false} - | {literal, null}. - --type jsx_parser() :: fun((binary()) -> jsx_parser_result()). - --type jsx_parser_result() :: {event, - jsx_event(), - fun(() -> jsx_parser_result())} - | {incomplete, jsx_parser()} - | {error, badjson} - | ok. - --type supported_utf() :: utf8 - | utf16 - | {utf16, little} - | utf32 - | {utf32, little}. - --type eep0018() :: eep0018_object() | eep0018_array(). - --type eep0018_array() :: [eep0018_term()]. - --type eep0018_object() :: [{eep0018_key(), eep0018_term()}]. - --type eep0018_key() :: binary() | atom(). - --type eep0018_term() :: eep0018_array() - | eep0018_object() - | eep0018_string() - | eep0018_number() - | true - | false - | null. - --type eep0018_string() :: binary(). - --type eep0018_number() :: float() | integer(). - --type encoder_opts() :: [encoder_opt()]. - --type encoder_opt() :: {strict, true | false} - | {encoding, supported_utf()} - | {space, integer()} - | space - | {indent, integer()} - | indent. - --type decoder_opts() :: [decoder_opt()]. - --type decoder_opt() :: {strict, true | false} - | {comments, true | false} - | {encoding, supported_utf()} - | {label, atom | binary | existing_atom} - | {float, true | false}. - --type verify_opts() :: [verify_opt()]. - --type verify_opt() :: {strict, true | false} - | {encoding, auto | supported_utf()} - | {comments, true | false}. - --type format_opts() :: [format_opt()]. - --type format_opt() :: {strict, true | false} - | {encoding, auto | supported_utf()} - | {comments, true | false} - | {space, integer()} - | space - | {indent, integer()} - | indent - | {output_encoding, supported_utf()}. - --file("./include/jsx_decoder.hrl", 24). - --spec parse(JSON :: eep0018(), Opts :: jsx_opts()) -> - jsx_parser_result(). - --file("priv/jsx_decoder_template.erl", 35). - -parse(JSON, Opts) -> - start(JSON, [], Opts). - -start(<>, Stack, Opts) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - start(Rest, Stack, Opts); -start(<<123/utf8,Rest/binary>>, Stack, Opts) -> - {event, - start_object, - fun() -> - object(Rest, [key|Stack], Opts) - end}; -start(<<91/utf8,Rest/binary>>, Stack, Opts) -> - {event, - start_array, - fun() -> - array(Rest, [array|Stack], Opts) - end}; -start(<<34/utf8,Rest/binary>>, Stack, Opts) -> - string(Rest, Stack, Opts, []); -start(<<$t/utf8,Rest/binary>>, Stack, Opts) -> - tr(Rest, Stack, Opts); -start(<<$f/utf8,Rest/binary>>, Stack, Opts) -> - fa(Rest, Stack, Opts); -start(<<$n/utf8,Rest/binary>>, Stack, Opts) -> - nu(Rest, Stack, Opts); -start(<<45/utf8,Rest/binary>>, Stack, Opts) -> - negative(Rest, Stack, Opts, "-"); -start(<<48/utf8,Rest/binary>>, Stack, Opts) -> - zero(Rest, Stack, Opts, "0"); -start(<>, Stack, Opts) - when - S >= $1 - andalso - S =< $9 -> - integer(Rest, Stack, Opts, [S]); -start(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts) -> - maybe_comment(Rest, - fun(Resume) -> - start(Resume, Stack, Opts) - end); -start(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - start(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -maybe_done(<>, Stack, Opts) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - maybe_done(Rest, Stack, Opts); -maybe_done(<<125/utf8,Rest/binary>>, [object|Stack], Opts) -> - {event, - end_object, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -maybe_done(<<93/utf8,Rest/binary>>, [array|Stack], Opts) -> - {event, - end_array, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -maybe_done(<<44/utf8,Rest/binary>>, [object|Stack], Opts) -> - key(Rest, [key|Stack], Opts); -maybe_done(<<44/utf8,Rest/binary>>, [array|_] = Stack, Opts) -> - value(Rest, Stack, Opts); -maybe_done(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts) -> - maybe_comment(Rest, - fun(Resume) -> - maybe_done(Resume, Stack, Opts) - end); -maybe_done(Rest, [], {_,_,_,true,_} = Opts) -> - {event, - end_json, - fun() -> - start(Rest, [], Opts) - end}; -maybe_done(Rest, [], Opts) -> - done(Rest, Opts); -maybe_done(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - maybe_done(<>, - Stack, - Opts) - end}; - false -> - {error,badjson} - end. - -done(<>, Opts) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - done(Rest, Opts); -done(<<47/utf8,Rest/binary>>, {_,true,_,_,_} = Opts) -> - maybe_comment(Rest, - fun(Resume) -> - done(Resume, Opts) - end); -done(<<>>, Opts) -> - {event, - end_json, - fun() -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - done(Stream, Opts) - end} - end}; -done(Bin, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - done(<>, Opts) - end}; - false -> - {error,badjson} - end. - -object(<>, Stack, Opts) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - object(Rest, Stack, Opts); -object(<<34/utf8,Rest/binary>>, Stack, Opts) -> - string(Rest, Stack, Opts, []); -object(<<125/utf8,Rest/binary>>, [key|Stack], Opts) -> - {event, - end_object, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -object(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts) -> - maybe_comment(Rest, - fun(Resume) -> - object(Resume, Stack, Opts) - end); -object(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - object(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -array(<>, Stack, Opts) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - array(Rest, Stack, Opts); -array(<<34/utf8,Rest/binary>>, Stack, Opts) -> - string(Rest, Stack, Opts, []); -array(<<$t/utf8,Rest/binary>>, Stack, Opts) -> - tr(Rest, Stack, Opts); -array(<<$f/utf8,Rest/binary>>, Stack, Opts) -> - fa(Rest, Stack, Opts); -array(<<$n/utf8,Rest/binary>>, Stack, Opts) -> - nu(Rest, Stack, Opts); -array(<<45/utf8,Rest/binary>>, Stack, Opts) -> - negative(Rest, Stack, Opts, "-"); -array(<<48/utf8,Rest/binary>>, Stack, Opts) -> - zero(Rest, Stack, Opts, "0"); -array(<>, Stack, Opts) - when - S >= $1 - andalso - S =< $9 -> - integer(Rest, Stack, Opts, [S]); -array(<<123/utf8,Rest/binary>>, Stack, Opts) -> - {event, - start_object, - fun() -> - object(Rest, [key|Stack], Opts) - end}; -array(<<91/utf8,Rest/binary>>, Stack, Opts) -> - {event, - start_array, - fun() -> - array(Rest, [array|Stack], Opts) - end}; -array(<<93/utf8,Rest/binary>>, [array|Stack], Opts) -> - {event, - end_array, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -array(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts) -> - maybe_comment(Rest, - fun(Resume) -> - array(Resume, Stack, Opts) - end); -array(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - array(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -value(<>, Stack, Opts) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - value(Rest, Stack, Opts); -value(<<34/utf8,Rest/binary>>, Stack, Opts) -> - string(Rest, Stack, Opts, []); -value(<<$t/utf8,Rest/binary>>, Stack, Opts) -> - tr(Rest, Stack, Opts); -value(<<$f/utf8,Rest/binary>>, Stack, Opts) -> - fa(Rest, Stack, Opts); -value(<<$n/utf8,Rest/binary>>, Stack, Opts) -> - nu(Rest, Stack, Opts); -value(<<45/utf8,Rest/binary>>, Stack, Opts) -> - negative(Rest, Stack, Opts, "-"); -value(<<48/utf8,Rest/binary>>, Stack, Opts) -> - zero(Rest, Stack, Opts, "0"); -value(<>, Stack, Opts) - when - S >= $1 - andalso - S =< $9 -> - integer(Rest, Stack, Opts, [S]); -value(<<123/utf8,Rest/binary>>, Stack, Opts) -> - {event, - start_object, - fun() -> - object(Rest, [key|Stack], Opts) - end}; -value(<<91/utf8,Rest/binary>>, Stack, Opts) -> - {event, - start_array, - fun() -> - array(Rest, [array|Stack], Opts) - end}; -value(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts) -> - maybe_comment(Rest, - fun(Resume) -> - value(Resume, Stack, Opts) - end); -value(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - value(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -colon(<>, Stack, Opts) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - colon(Rest, Stack, Opts); -colon(<<58/utf8,Rest/binary>>, [key|Stack], Opts) -> - value(Rest, [object|Stack], Opts); -colon(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts) -> - maybe_comment(Rest, - fun(Resume) -> - colon(Resume, Stack, Opts) - end); -colon(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - colon(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -key(<>, Stack, Opts) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - key(Rest, Stack, Opts); -key(<<34/utf8,Rest/binary>>, Stack, Opts) -> - string(Rest, Stack, Opts, []); -key(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts) -> - maybe_comment(Rest, - fun(Resume) -> - key(Resume, Stack, Opts) - end); -key(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - key(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -string(<<34/utf8,Rest/binary>>, [key|_] = Stack, Opts, Acc) -> - {event, - {key,lists:reverse(Acc)}, - fun() -> - colon(Rest, Stack, Opts) - end}; -string(<<34/utf8,Rest/binary>>, Stack, Opts, Acc) -> - {event, - {string,lists:reverse(Acc)}, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -string(<<92/utf8,Rest/binary>>, Stack, Opts, Acc) -> - escape(Rest, Stack, Opts, Acc); -string(<>, Stack, Opts, Acc) when S >= 32 -> - string(Rest, Stack, Opts, [S] ++ Acc); -string(Bin, Stack, Opts, Acc) -> - case partial_utf(Bin) of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - string(<>, - Stack, - Opts, - Acc) - end}; - false -> - {error,badjson} - end. - -partial_utf(<<>>) -> - true; -partial_utf(<>) when X >= 194, X =< 223 -> - true; -partial_utf(<>) when X >= 224, X =< 239 -> - case Rest of - <<>> -> - true; - <> when Y >= 128, Y =< 191 -> - true - end; -partial_utf(<>) when X >= 240, X =< 244 -> - case Rest of - <<>> -> - true; - <> when Y >= 128, Y =< 191 -> - true; - <> when Y >= 128, Y =< 191, Z >= 128, Z =< 191 -> - true - end; -partial_utf(_) -> - false. - -escape(<<$b/utf8,Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\b" ++ Acc); -escape(<<$f/utf8,Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\f" ++ Acc); -escape(<<$n/utf8,Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\n" ++ Acc); -escape(<<$r/utf8,Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\r" ++ Acc); -escape(<<$t/utf8,Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\t" ++ Acc); -escape(<<$u/utf8,Rest/binary>>, Stack, Opts, Acc) -> - escaped_unicode(Rest, Stack, Opts, Acc, []); -escape(<>, Stack, Opts, Acc) - when S =:= 34; S =:= 47; S =:= 92 -> - string(Rest, Stack, Opts, [S] ++ Acc); -escape(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - escape(<>, - Stack, - Opts, - Acc) - end}; - false -> - {error,badjson} - end. - -escaped_unicode(<>, - Stack, - {_,_,ascii,_,_} = Opts, - String, - [C,B,A]) - when - D >= $a - andalso - D =< $z; - D >= $A - andalso - D =< $Z; - D >= $0 - andalso - D =< $9 -> - case erlang:list_to_integer([A,B,C,D], 16) of - X when X < 128 -> - string(Rest, Stack, Opts, [X] ++ String); - _ -> - string(Rest, Stack, Opts, [D,C,B,A,$u,92] ++ String) - end; -escaped_unicode(<>, - Stack, - {_,_,codepoint,_,_} = Opts, - String, - [C,B,A]) - when - D >= $a - andalso - D =< $z; - D >= $A - andalso - D =< $Z; - D >= $0 - andalso - D =< $9 -> - case erlang:list_to_integer([A,B,C,D], 16) of - X when X >= 56320, X =< 57343 -> - case check_acc_for_surrogate(String) of - false -> - string(Rest, Stack, Opts, [D,C,B,A,$u,92] ++ String); - {Y,NewString} -> - string(Rest, - Stack, - Opts, - [surrogate_to_codepoint(Y, X)] ++ NewString) - end; - X when X < 55296; X > 57343, X < 65534 -> - string(Rest, Stack, Opts, [X] ++ String); - _ -> - string(Rest, Stack, Opts, [D,C,B,A,$u,92] ++ String) - end; -escaped_unicode(<>, Stack, Opts, String, [C,B,A]) - when - D >= $a - andalso - D =< $z; - D >= $A - andalso - D =< $Z; - D >= $0 - andalso - D =< $9 -> - string(Rest, Stack, Opts, [D,C,B,A,$u,92] ++ String); -escaped_unicode(<>, Stack, Opts, String, Acc) - when - S >= $a - andalso - S =< $z; - S >= $A - andalso - S =< $Z; - S >= $0 - andalso - S =< $9 -> - escaped_unicode(Rest, Stack, Opts, String, [S] ++ Acc); -escaped_unicode(Bin, Stack, Opts, String, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - escaped_unicode(<>, - Stack, - Opts, - String, - Acc) - end}; - false -> - {error,badjson} - end. - -check_acc_for_surrogate([D,C,B,A,$u,92|Rest]) - when - D >= $a - andalso - D =< $z; - D >= $A - andalso - D =< $Z; - D >= $0 - andalso - D =< $9, - C >= $a - andalso - C =< $z; - C >= $A - andalso - C =< $Z; - C >= $0 - andalso - C =< $9, - B >= $a - andalso - B =< $z; - B >= $A - andalso - B =< $Z; - B >= $0 - andalso - B =< $9, - A >= $a - andalso - A =< $z; - A >= $A - andalso - A =< $Z; - A >= $0 - andalso - A =< $9 -> - case erlang:list_to_integer([A,B,C,D], 16) of - X when X >= 55296, X =< 56319 -> - {X,Rest}; - _ -> - false - end; -check_acc_for_surrogate(_) -> - false. - -surrogate_to_codepoint(High, Low) -> - (High - 55296) * 1024 + (Low - 56320) + 65536. - -negative(<<$0/utf8,Rest/binary>>, Stack, Opts, Acc) -> - zero(Rest, Stack, Opts, "0" ++ Acc); -negative(<>, Stack, Opts, Acc) - when - S >= $1 - andalso - S =< $9 -> - integer(Rest, Stack, Opts, [S] ++ Acc); -negative(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - negative(<>, - Stack, - Opts, - Acc) - end}; - false -> - {error,badjson} - end. - -zero(<<125/utf8,Rest/binary>>, [object|Stack], Opts, Acc) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - {event, - end_object, - fun() -> - maybe_done(Rest, Stack, Opts) - end} - end}; -zero(<<93/utf8,Rest/binary>>, [array|Stack], Opts, Acc) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - {event, - end_array, - fun() -> - maybe_done(Rest, Stack, Opts) - end} - end}; -zero(<<44/utf8,Rest/binary>>, [object|Stack], Opts, Acc) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - key(Rest, [key|Stack], Opts) - end}; -zero(<<44/utf8,Rest/binary>>, [array|_] = Stack, Opts, Acc) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - value(Rest, Stack, Opts) - end}; -zero(<<46/utf8,Rest/binary>>, Stack, Opts, Acc) -> - initial_decimal(Rest, Stack, Opts, [46] ++ Acc); -zero(<>, Stack, Opts, Acc) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -zero(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts, Acc) -> - maybe_comment(Rest, - fun(Resume) -> - zero(Resume, Stack, Opts, Acc) - end); -zero(<<>>, [], Opts, Acc) -> - {incomplete, - fun(end_stream) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - {event, - end_json, - fun() -> - zero(<<>>, [], Opts, Acc) - end} - end}; - (Stream) -> - zero(Stream, [], Opts, Acc) - end}; -zero(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - zero(<>, Stack, Opts, Acc) - end}; - false -> - {error,badjson} - end. - -integer(<>, Stack, Opts, Acc) - when - S >= $1 - andalso - S =< $9 -> - integer(Rest, Stack, Opts, [S] ++ Acc); -integer(<<125/utf8,Rest/binary>>, [object|Stack], Opts, Acc) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - {event, - end_object, - fun() -> - maybe_done(Rest, Stack, Opts) - end} - end}; -integer(<<93/utf8,Rest/binary>>, [array|Stack], Opts, Acc) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - {event, - end_array, - fun() -> - maybe_done(Rest, Stack, Opts) - end} - end}; -integer(<<44/utf8,Rest/binary>>, [object|Stack], Opts, Acc) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - key(Rest, [key|Stack], Opts) - end}; -integer(<<44/utf8,Rest/binary>>, [array|_] = Stack, Opts, Acc) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - value(Rest, Stack, Opts) - end}; -integer(<<46/utf8,Rest/binary>>, Stack, Opts, Acc) -> - initial_decimal(Rest, Stack, Opts, [46] ++ Acc); -integer(<<48/utf8,Rest/binary>>, Stack, Opts, Acc) -> - integer(Rest, Stack, Opts, [48] ++ Acc); -integer(<<$e/utf8,Rest/binary>>, Stack, Opts, Acc) -> - e(Rest, Stack, Opts, "e0." ++ Acc); -integer(<<$E/utf8,Rest/binary>>, Stack, Opts, Acc) -> - e(Rest, Stack, Opts, "e0." ++ Acc); -integer(<>, Stack, Opts, Acc) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -integer(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts, Acc) -> - maybe_comment(Rest, - fun(Resume) -> - integer(Resume, Stack, Opts, Acc) - end); -integer(<<>>, [], Opts, Acc) -> - {incomplete, - fun(end_stream) -> - {event, - {integer,lists:reverse(Acc)}, - fun() -> - {event, - end_json, - fun() -> - integer(<<>>, [], Opts, Acc) - end} - end}; - (Stream) -> - integer(Stream, [], Opts, Acc) - end}; -integer(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - integer(<>, - Stack, - Opts, - Acc) - end}; - false -> - {error,badjson} - end. - -initial_decimal(<>, Stack, Opts, Acc) - when - S >= $1 - andalso - S =< $9 -> - decimal(Rest, Stack, Opts, [S] ++ Acc); -initial_decimal(<<48/utf8,Rest/binary>>, Stack, Opts, Acc) -> - decimal(Rest, Stack, Opts, [48] ++ Acc); -initial_decimal(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - initial_decimal(<>, - Stack, - Opts, - Acc) - end}; - false -> - {error,badjson} - end. - -decimal(<>, Stack, Opts, Acc) - when - S >= $1 - andalso - S =< $9 -> - decimal(Rest, Stack, Opts, [S] ++ Acc); -decimal(<<125/utf8,Rest/binary>>, [object|Stack], Opts, Acc) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - {event, - end_object, - fun() -> - maybe_done(Rest, Stack, Opts) - end} - end}; -decimal(<<93/utf8,Rest/binary>>, [array|Stack], Opts, Acc) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - {event, - end_array, - fun() -> - maybe_done(Rest, Stack, Opts) - end} - end}; -decimal(<<44/utf8,Rest/binary>>, [object|Stack], Opts, Acc) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - key(Rest, [key|Stack], Opts) - end}; -decimal(<<44/utf8,Rest/binary>>, [array|_] = Stack, Opts, Acc) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - value(Rest, Stack, Opts) - end}; -decimal(<<48/utf8,Rest/binary>>, Stack, Opts, Acc) -> - decimal(Rest, Stack, Opts, [48] ++ Acc); -decimal(<<$e/utf8,Rest/binary>>, Stack, Opts, Acc) -> - e(Rest, Stack, Opts, "e" ++ Acc); -decimal(<<$E/utf8,Rest/binary>>, Stack, Opts, Acc) -> - e(Rest, Stack, Opts, "e" ++ Acc); -decimal(<>, Stack, Opts, Acc) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -decimal(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts, Acc) -> - maybe_comment(Rest, - fun(Resume) -> - decimal(Resume, Stack, Opts, Acc) - end); -decimal(<<>>, [], Opts, Acc) -> - {incomplete, - fun(end_stream) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - {event, - end_json, - fun() -> - decimal(<<>>, [], Opts, Acc) - end} - end}; - (Stream) -> - decimal(Stream, [], Opts, Acc) - end}; -decimal(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - decimal(<>, - Stack, - Opts, - Acc) - end}; - false -> - {error,badjson} - end. - -e(<>, Stack, Opts, Acc) - when - S =:= 48; - S >= $1 - andalso - S =< $9 -> - exp(Rest, Stack, Opts, [S] ++ Acc); -e(<>, Stack, Opts, Acc) when S =:= 43; S =:= 45 -> - ex(Rest, Stack, Opts, [S] ++ Acc); -e(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - e(<>, Stack, Opts, Acc) - end}; - false -> - {error,badjson} - end. - -ex(<>, Stack, Opts, Acc) - when - S =:= 48; - S >= $1 - andalso - S =< $9 -> - exp(Rest, Stack, Opts, [S] ++ Acc); -ex(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - ex(<>, Stack, Opts, Acc) - end}; - false -> - {error,badjson} - end. - -exp(<>, Stack, Opts, Acc) - when - S >= $1 - andalso - S =< $9 -> - exp(Rest, Stack, Opts, [S] ++ Acc); -exp(<<125/utf8,Rest/binary>>, [object|Stack], Opts, Acc) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - {event, - end_object, - fun() -> - maybe_done(Rest, Stack, Opts) - end} - end}; -exp(<<93/utf8,Rest/binary>>, [array|Stack], Opts, Acc) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - {event, - end_array, - fun() -> - maybe_done(Rest, Stack, Opts) - end} - end}; -exp(<<44/utf8,Rest/binary>>, [object|Stack], Opts, Acc) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - key(Rest, [key|Stack], Opts) - end}; -exp(<<44/utf8,Rest/binary>>, [array|_] = Stack, Opts, Acc) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - value(Rest, Stack, Opts) - end}; -exp(<<48/utf8,Rest/binary>>, Stack, Opts, Acc) -> - exp(Rest, Stack, Opts, [48] ++ Acc); -exp(<>, Stack, Opts, Acc) - when S =:= 32; S =:= 9; S =:= 13; S =:= 10 -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -exp(<<47/utf8,Rest/binary>>, Stack, {_,true,_,_,_} = Opts, Acc) -> - maybe_comment(Rest, - fun(Resume) -> - exp(Resume, Stack, Opts, Acc) - end); -exp(<<>>, [], Opts, Acc) -> - {incomplete, - fun(end_stream) -> - {event, - {float,lists:reverse(Acc)}, - fun() -> - {event, - end_json, - fun() -> - exp(<<>>, [], Opts, Acc) - end} - end}; - (Stream) -> - exp(Stream, [], Opts, Acc) - end}; -exp(Bin, Stack, Opts, Acc) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - exp(<>, Stack, Opts, Acc) - end}; - false -> - {error,badjson} - end. - -tr(<<$r/utf8,Rest/binary>>, Stack, Opts) -> - tru(Rest, Stack, Opts); -tr(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - tr(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -tru(<<$u/utf8,Rest/binary>>, Stack, Opts) -> - true(Rest, Stack, Opts); -tru(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - tru(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -true(<<$e/utf8,Rest/binary>>, Stack, Opts) -> - {event, - {literal,true}, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -true(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - true(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -fa(<<$a/utf8,Rest/binary>>, Stack, Opts) -> - fal(Rest, Stack, Opts); -fa(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - fa(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -fal(<<$l/utf8,Rest/binary>>, Stack, Opts) -> - fals(Rest, Stack, Opts); -fal(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - fal(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -fals(<<$s/utf8,Rest/binary>>, Stack, Opts) -> - false(Rest, Stack, Opts); -fals(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - fals(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -false(<<$e/utf8,Rest/binary>>, Stack, Opts) -> - {event, - {literal,false}, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -false(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - false(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -nu(<<$u/utf8,Rest/binary>>, Stack, Opts) -> - nul(Rest, Stack, Opts); -nu(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - nu(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -nul(<<$l/utf8,Rest/binary>>, Stack, Opts) -> - null(Rest, Stack, Opts); -nul(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - nul(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -null(<<$l/utf8,Rest/binary>>, Stack, Opts) -> - {event, - {literal,null}, - fun() -> - maybe_done(Rest, Stack, Opts) - end}; -null(Bin, Stack, Opts) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - null(<>, Stack, Opts) - end}; - false -> - {error,badjson} - end. - -maybe_comment(<<42/utf8,Rest/binary>>, Resume) -> - comment(Rest, Resume); -maybe_comment(Bin, Resume) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - maybe_comment(<>, Resume) - end}; - false -> - {error,badjson} - end. - -comment(<<42/utf8,Rest/binary>>, Resume) -> - maybe_comment_done(Rest, Resume); -comment(<<_/utf8,Rest/binary>>, Resume) -> - comment(Rest, Resume); -comment(Bin, Resume) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - comment(<>, Resume) - end}; - false -> - {error,badjson} - end. - -maybe_comment_done(<<47/utf8,Rest/binary>>, Resume) -> - Resume(Rest); -maybe_comment_done(<<_/utf8,Rest/binary>>, Resume) -> - comment(Rest, Resume); -maybe_comment_done(Bin, Resume) -> - case byte_size(Bin) < 1 of - true -> - {incomplete, - fun(end_stream) -> - {error,badjson}; - (Stream) -> - maybe_comment_done(<>, - Resume) - end}; - false -> - {error,badjson} - end. +-define(utf8, true). +-include("./include/jsx_decoder_template.hrl"). \ No newline at end of file