diff --git a/src/jsx_utf16.erl b/src/jsx_utf16.erl index 879e0a4..ea01fcf 100644 --- a/src/jsx_utf16.erl +++ b/src/jsx_utf16.erl @@ -27,8 +27,10 @@ -export([start/4]). + +-define(utf16, true). -define(encoding, utf16). --define(partial_size(Bin), byte_size(Bin) < 2). +-define(char_size, 2). %% callbacks to our handler are roughly equivalent to a fold over the events, incremental %% rather than all at once. @@ -69,9 +71,14 @@ start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) integer(Rest, Stack, Callbacks, Opts, [S]); start(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); -start(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> start(<>, Stack, Callbacks, Opts) end}; -start(_, _, _, _) -> {error, badjson}. +start(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> start(<>, Stack, Callbacks, Opts) end + } + end. maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -88,9 +95,14 @@ maybe_done(<>, Stack, Callbacks, ?comments_enab maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); maybe_done(<<>>, [], Callbacks, Opts) -> {fold(end_of_stream, Callbacks), fun(Stream) -> maybe_done(Stream, [], Callbacks, Opts) end}; -maybe_done(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end}; -maybe_done(_, _, _, _) -> {error, badjson}. +maybe_done(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end + } + end. object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -101,9 +113,14 @@ object(<>, [key|Stack], Callbacks, Opts) -> maybe_done(Rest, Stack, fold(end_object, Callbacks), Opts); object(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); -object(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> object(<>, Stack, Callbacks, Opts) end}; -object(_, _, _, _) -> {error, badjson}. +object(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> object(<>, Stack, Callbacks, Opts) end + } + end. array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -130,9 +147,14 @@ array(<>, [array|Stack], Callbacks, Opts) -> maybe_done(Rest, Stack, fold(end_array, Callbacks), Opts); array(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); -array(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> array(<>, Stack, Callbacks, Opts) end}; -array(_, _, _, _) -> {error, badjson}. +array(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> array(<>, Stack, Callbacks, Opts) end + } + end. value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -157,9 +179,14 @@ value(<>, Stack, Callbacks, Opts) -> array(Rest, [array|Stack], fold(start_array, Callbacks), Opts); value(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); -value(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> value(<>, Stack, Callbacks, Opts) end}; -value(_, _, _, _) -> {error, badjson}. +value(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> value(<>, Stack, Callbacks, Opts) end + } + end. colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -168,9 +195,14 @@ colon(<>, [key|Stack], Callbacks, Opts) -> value(Rest, [object|Stack], Callbacks, Opts); colon(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); -colon(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end}; -colon(_, _, _, _) -> {error, badjson}. +colon(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end + } + end. key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -179,9 +211,14 @@ key(<>, Stack, Callbacks, Opts) -> string(Rest, Stack, Callbacks, Opts, []); key(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); -key(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> key(<>, Stack, Callbacks, Opts) end}; -key(_, _, _, _) -> {error, badjson}. +key(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> key(<>, Stack, Callbacks, Opts) end + } + end. %% string has an additional parameter, an accumulator (Acc) used to hold the intermediate @@ -199,22 +236,41 @@ string(<>, Stack, Callbacks, Opts, Acc) -> string(<>, Stack, Callbacks, Opts, Acc) -> escape(Rest, Stack, Callbacks, Opts, Acc); string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> - string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); string(Bin, Stack, Callbacks, Opts, Acc) -> - case partial_utf16(Bin) of + case partial_utf(Bin) of true -> - {incomplete, fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end} + {incomplete, + fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end + } ; false -> {error, badjson} end. - -partial_utf16(<<>>) -> true; + +-ifdef(utf16). +partial_utf(<<>>) -> true; %% this case is not strictly true, there are single bytes that should be rejected, but %% they're rare enough they can be ignored -partial_utf16(<<_X>>) -> true; -partial_utf16(<>) when X >= 16#d8, X =< 16#df -> true; -partial_utf16(<>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df -> true; -partial_utf16(_) -> false. +partial_utf(<<_X>>) -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf16le). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf32). +partial_utf(<<_:32>>) -> false; +partial_utf(_) -> true. +-endif. %% only thing to note here is the additional accumulator passed to escaped_unicode used @@ -236,9 +292,14 @@ escape(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> escape(<>, Stack, Callbacks, Opts, Acc) when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -escape(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end}; -escape(_, _, _, _, _) -> {error, badjson}. +escape(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end + } + end. %% this code is ugly and unfortunate, but so is json's handling of escaped unicode @@ -286,12 +347,14 @@ escaped_unicode(<>, Stack, Callbacks, Opts, String, [C string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String); escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); -escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> - escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) - end - }; -escaped_unicode(_, _, _, _, _, _) -> {error, badjson}. +escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) end + } + end. %% upon encountering a low pair json/hex encoded value, check to see if there's a high %% value already in the accumulator. @@ -314,19 +377,20 @@ surrogate_to_codepoint(High, Low) -> %% like strings, numbers are collected in an intermediate accumulator before -%% being emitted to the callback handler. no processing of numbers is done in -%% process, it's left for the user, though there are convenience functions to -%% convert them into erlang floats/integers in jsx_utils.erl. - -%% TODO: actually write that jsx_utils.erl module mentioned above... +%% being emitted to the callback handler. negative(<<$0/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -negative(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end}; -negative(_, _, _, _, _) -> {error, badjson}. +negative(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end + } + end. zero(<>, [object|Stack], Callbacks, Opts, Acc) -> @@ -346,9 +410,14 @@ zero(<>, Stack, Callbacks, ?comments_enabled(Op zero(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> zero(Stream, [], Callbacks, Opts, Acc) end}; -zero(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end}; -zero(_, _, _, _, _) -> {error, badjson}. +zero(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end + } + end. integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -376,18 +445,28 @@ integer(<>, Stack, Callbacks, ?comments_enabled integer(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> integer(Stream, [], Callbacks, Opts, Acc) end}; -integer(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end}; -integer(_, _, _, _, _) -> {error, badjson}. +integer(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end + } + end. initial_decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> decimal(Rest, Stack, Callbacks, Opts, [S] ++ Acc); initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); -initial_decimal(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end}; -initial_decimal(_, _, _, _, _) -> {error, badjson}. +initial_decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -413,25 +492,40 @@ decimal(<>, Stack, Callbacks, ?comments_enabled decimal(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> decimal(Stream, [], Callbacks, Opts, Acc) end}; -decimal(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end}; -decimal(_, _, _, _, _) -> {error, badjson}. +decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -e(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end}; -e(_, _, _, _, _) -> {error, badjson}. +e(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end + } + end. ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -ex(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end}; -ex(_, _, _, _, _) -> {error, badjson}. +ex(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end + } + end. exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -453,79 +547,134 @@ exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespa exp(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> exp(Stream, [], Callbacks, Opts, Acc) end}; -exp(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end}; -exp(_, _, _, _, _) -> {error, badjson}. +exp(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end + } + end. tr(<<$r/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> tru(Rest, Stack, Callbacks, Opts); -tr(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end}; -tr(_, _, _, _) -> {error, badjson}. +tr(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end + } + end. tru(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> true(Rest, Stack, Callbacks, Opts); -tru(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end}; -tru(_, _, _, _) -> {error, badjson}. +tru(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end + } + end. true(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, true}, Callbacks), Opts); -true(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> true(<>, Stack, Callbacks, Opts) end}; -true(_, _, _, _) -> {error, badjson}. +true(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> true(<>, Stack, Callbacks, Opts) end + } + end. fa(<<$a/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fal(Rest, Stack, Callbacks, Opts); -fa(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end}; -fa(_, _, _, _) -> {error, badjson}. +fa(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end + } + end. fal(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fals(Rest, Stack, Callbacks, Opts); -fal(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end}; -fal(_, _, _, _) -> {error, badjson}. +fal(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end + } + end. fals(<<$s/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> false(Rest, Stack, Callbacks, Opts); -fals(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end}; -fals(_, _, _, _) -> {error, badjson}. +fals(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end + } + end. false(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, false}, Callbacks), Opts); -false(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> false(<>, Stack, Callbacks, Opts) end}; -false(_, _, _, _) -> {error, badjson}. +false(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> false(<>, Stack, Callbacks, Opts) end + } + end. nu(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> nul(Rest, Stack, Callbacks, Opts); -nu(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end}; -nu(_, _, _, _) -> {error, badjson}. +nu(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end + } + end. nul(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> null(Rest, Stack, Callbacks, Opts); -nul(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end}; -nul(_, _, _, _) -> {error, badjson}. +nul(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end + } + end. null(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, null}, Callbacks), Opts); -null(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> null(<>, Stack, Callbacks, Opts) end}; -null(_, _, _, _) -> {error, badjson}. +null(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> null(<>, Stack, Callbacks, Opts) end + } + end. %% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode @@ -536,22 +685,35 @@ null(_, _, _, _) -> {error, badjson}. maybe_comment(<>, Resume) -> comment(Rest, Resume); -maybe_comment(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_comment(<>, Resume) end}; -maybe_comment(_, _) -> {error, badjson}. +maybe_comment(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_comment(<>, Resume) end + } + end. comment(<>, Resume) -> maybe_comment_done(Rest, Resume); comment(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); -comment(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> comment(<>, Resume) end}. +comment(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> comment(<>, Resume) end} + end. maybe_comment_done(<>, Resume) -> Resume(Rest); maybe_comment_done(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); -maybe_comment_done(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end}. \ No newline at end of file +maybe_comment_done(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end} + end. \ No newline at end of file diff --git a/src/jsx_utf16le.erl b/src/jsx_utf16le.erl index ea20cc3..ab2708b 100644 --- a/src/jsx_utf16le.erl +++ b/src/jsx_utf16le.erl @@ -27,8 +27,10 @@ -export([start/4]). + +-define(utf16le, true). -define(encoding, utf16-little). --define(partial_size(Bin), byte_size(Bin) < 2). +-define(char_size, 2). %% callbacks to our handler are roughly equivalent to a fold over the events, incremental %% rather than all at once. @@ -69,9 +71,14 @@ start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) integer(Rest, Stack, Callbacks, Opts, [S]); start(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); -start(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> start(<>, Stack, Callbacks, Opts) end}; -start(_, _, _, _) -> {error, badjson}. +start(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> start(<>, Stack, Callbacks, Opts) end + } + end. maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -88,9 +95,14 @@ maybe_done(<>, Stack, Callbacks, ?comments_enab maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); maybe_done(<<>>, [], Callbacks, Opts) -> {fold(end_of_stream, Callbacks), fun(Stream) -> maybe_done(Stream, [], Callbacks, Opts) end}; -maybe_done(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end}; -maybe_done(_, _, _, _) -> {error, badjson}. +maybe_done(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end + } + end. object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -101,9 +113,14 @@ object(<>, [key|Stack], Callbacks, Opts) -> maybe_done(Rest, Stack, fold(end_object, Callbacks), Opts); object(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); -object(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> object(<>, Stack, Callbacks, Opts) end}; -object(_, _, _, _) -> {error, badjson}. +object(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> object(<>, Stack, Callbacks, Opts) end + } + end. array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -130,9 +147,14 @@ array(<>, [array|Stack], Callbacks, Opts) -> maybe_done(Rest, Stack, fold(end_array, Callbacks), Opts); array(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); -array(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> array(<>, Stack, Callbacks, Opts) end}; -array(_, _, _, _) -> {error, badjson}. +array(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> array(<>, Stack, Callbacks, Opts) end + } + end. value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -157,9 +179,14 @@ value(<>, Stack, Callbacks, Opts) -> array(Rest, [array|Stack], fold(start_array, Callbacks), Opts); value(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); -value(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> value(<>, Stack, Callbacks, Opts) end}; -value(_, _, _, _) -> {error, badjson}. +value(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> value(<>, Stack, Callbacks, Opts) end + } + end. colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -168,9 +195,14 @@ colon(<>, [key|Stack], Callbacks, Opts) -> value(Rest, [object|Stack], Callbacks, Opts); colon(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); -colon(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end}; -colon(_, _, _, _) -> {error, badjson}. +colon(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end + } + end. key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -179,9 +211,14 @@ key(<>, Stack, Callbacks, Opts) -> string(Rest, Stack, Callbacks, Opts, []); key(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); -key(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> key(<>, Stack, Callbacks, Opts) end}; -key(_, _, _, _) -> {error, badjson}. +key(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> key(<>, Stack, Callbacks, Opts) end + } + end. %% string has an additional parameter, an accumulator (Acc) used to hold the intermediate @@ -199,22 +236,41 @@ string(<>, Stack, Callbacks, Opts, Acc) -> string(<>, Stack, Callbacks, Opts, Acc) -> escape(Rest, Stack, Callbacks, Opts, Acc); string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> - string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); string(Bin, Stack, Callbacks, Opts, Acc) -> - case partial_utf16(Bin) of + case partial_utf(Bin) of true -> - {incomplete, fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end} + {incomplete, + fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end + } ; false -> {error, badjson} end. - -partial_utf16(<<>>) -> true; + +-ifdef(utf16). +partial_utf(<<>>) -> true; %% this case is not strictly true, there are single bytes that should be rejected, but %% they're rare enough they can be ignored -partial_utf16(<<_X>>) -> true; -partial_utf16(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true; -partial_utf16(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true; -partial_utf16(_) -> false. +partial_utf(<<_X>>) -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf16le). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf32). +partial_utf(<<_:32>>) -> false; +partial_utf(_) -> true. +-endif. %% only thing to note here is the additional accumulator passed to escaped_unicode used @@ -236,9 +292,14 @@ escape(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> escape(<>, Stack, Callbacks, Opts, Acc) when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -escape(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end}; -escape(_, _, _, _, _) -> {error, badjson}. +escape(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end + } + end. %% this code is ugly and unfortunate, but so is json's handling of escaped unicode @@ -286,12 +347,14 @@ escaped_unicode(<>, Stack, Callbacks, Opts, String, [C string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String); escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); -escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> - escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) - end - }; -escaped_unicode(_, _, _, _, _, _) -> {error, badjson}. +escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) end + } + end. %% upon encountering a low pair json/hex encoded value, check to see if there's a high %% value already in the accumulator. @@ -314,19 +377,20 @@ surrogate_to_codepoint(High, Low) -> %% like strings, numbers are collected in an intermediate accumulator before -%% being emitted to the callback handler. no processing of numbers is done in -%% process, it's left for the user, though there are convenience functions to -%% convert them into erlang floats/integers in jsx_utils.erl. - -%% TODO: actually write that jsx_utils.erl module mentioned above... +%% being emitted to the callback handler. negative(<<$0/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -negative(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end}; -negative(_, _, _, _, _) -> {error, badjson}. +negative(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end + } + end. zero(<>, [object|Stack], Callbacks, Opts, Acc) -> @@ -346,9 +410,14 @@ zero(<>, Stack, Callbacks, ?comments_enabled(Op zero(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> zero(Stream, [], Callbacks, Opts, Acc) end}; -zero(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end}; -zero(_, _, _, _, _) -> {error, badjson}. +zero(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end + } + end. integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -376,18 +445,28 @@ integer(<>, Stack, Callbacks, ?comments_enabled integer(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> integer(Stream, [], Callbacks, Opts, Acc) end}; -integer(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end}; -integer(_, _, _, _, _) -> {error, badjson}. +integer(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end + } + end. initial_decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> decimal(Rest, Stack, Callbacks, Opts, [S] ++ Acc); initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); -initial_decimal(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end}; -initial_decimal(_, _, _, _, _) -> {error, badjson}. +initial_decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -413,25 +492,40 @@ decimal(<>, Stack, Callbacks, ?comments_enabled decimal(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> decimal(Stream, [], Callbacks, Opts, Acc) end}; -decimal(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end}; -decimal(_, _, _, _, _) -> {error, badjson}. +decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -e(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end}; -e(_, _, _, _, _) -> {error, badjson}. +e(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end + } + end. ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -ex(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end}; -ex(_, _, _, _, _) -> {error, badjson}. +ex(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end + } + end. exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -453,79 +547,134 @@ exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespa exp(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> exp(Stream, [], Callbacks, Opts, Acc) end}; -exp(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end}; -exp(_, _, _, _, _) -> {error, badjson}. +exp(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end + } + end. tr(<<$r/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> tru(Rest, Stack, Callbacks, Opts); -tr(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end}; -tr(_, _, _, _) -> {error, badjson}. +tr(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end + } + end. tru(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> true(Rest, Stack, Callbacks, Opts); -tru(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end}; -tru(_, _, _, _) -> {error, badjson}. +tru(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end + } + end. true(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, true}, Callbacks), Opts); -true(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> true(<>, Stack, Callbacks, Opts) end}; -true(_, _, _, _) -> {error, badjson}. +true(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> true(<>, Stack, Callbacks, Opts) end + } + end. fa(<<$a/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fal(Rest, Stack, Callbacks, Opts); -fa(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end}; -fa(_, _, _, _) -> {error, badjson}. +fa(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end + } + end. fal(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fals(Rest, Stack, Callbacks, Opts); -fal(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end}; -fal(_, _, _, _) -> {error, badjson}. +fal(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end + } + end. fals(<<$s/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> false(Rest, Stack, Callbacks, Opts); -fals(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end}; -fals(_, _, _, _) -> {error, badjson}. +fals(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end + } + end. false(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, false}, Callbacks), Opts); -false(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> false(<>, Stack, Callbacks, Opts) end}; -false(_, _, _, _) -> {error, badjson}. +false(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> false(<>, Stack, Callbacks, Opts) end + } + end. nu(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> nul(Rest, Stack, Callbacks, Opts); -nu(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end}; -nu(_, _, _, _) -> {error, badjson}. +nu(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end + } + end. nul(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> null(Rest, Stack, Callbacks, Opts); -nul(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end}; -nul(_, _, _, _) -> {error, badjson}. +nul(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end + } + end. null(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, null}, Callbacks), Opts); -null(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> null(<>, Stack, Callbacks, Opts) end}; -null(_, _, _, _) -> {error, badjson}. +null(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> null(<>, Stack, Callbacks, Opts) end + } + end. %% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode @@ -536,22 +685,35 @@ null(_, _, _, _) -> {error, badjson}. maybe_comment(<>, Resume) -> comment(Rest, Resume); -maybe_comment(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_comment(<>, Resume) end}; -maybe_comment(_, _) -> {error, badjson}. +maybe_comment(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_comment(<>, Resume) end + } + end. comment(<>, Resume) -> maybe_comment_done(Rest, Resume); comment(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); -comment(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> comment(<>, Resume) end}. +comment(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> comment(<>, Resume) end} + end. maybe_comment_done(<>, Resume) -> Resume(Rest); maybe_comment_done(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); -maybe_comment_done(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end}. \ No newline at end of file +maybe_comment_done(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end} + end. \ No newline at end of file diff --git a/src/jsx_utf32.erl b/src/jsx_utf32.erl index 3693c85..160581d 100644 --- a/src/jsx_utf32.erl +++ b/src/jsx_utf32.erl @@ -27,8 +27,10 @@ -export([start/4]). + +-define(utf32, true). -define(encoding, utf32). --define(partial_size(Bin), byte_size(Bin) < 4). +-define(char_size, 4). %% callbacks to our handler are roughly equivalent to a fold over the events, incremental %% rather than all at once. @@ -69,9 +71,14 @@ start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) integer(Rest, Stack, Callbacks, Opts, [S]); start(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); -start(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> start(<>, Stack, Callbacks, Opts) end}; -start(_, _, _, _) -> {error, badjson}. +start(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> start(<>, Stack, Callbacks, Opts) end + } + end. maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -88,9 +95,14 @@ maybe_done(<>, Stack, Callbacks, ?comments_enab maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); maybe_done(<<>>, [], Callbacks, Opts) -> {fold(end_of_stream, Callbacks), fun(Stream) -> maybe_done(Stream, [], Callbacks, Opts) end}; -maybe_done(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end}; -maybe_done(_, _, _, _) -> {error, badjson}. +maybe_done(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end + } + end. object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -101,9 +113,14 @@ object(<>, [key|Stack], Callbacks, Opts) -> maybe_done(Rest, Stack, fold(end_object, Callbacks), Opts); object(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); -object(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> object(<>, Stack, Callbacks, Opts) end}; -object(_, _, _, _) -> {error, badjson}. +object(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> object(<>, Stack, Callbacks, Opts) end + } + end. array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -130,9 +147,14 @@ array(<>, [array|Stack], Callbacks, Opts) -> maybe_done(Rest, Stack, fold(end_array, Callbacks), Opts); array(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); -array(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> array(<>, Stack, Callbacks, Opts) end}; -array(_, _, _, _) -> {error, badjson}. +array(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> array(<>, Stack, Callbacks, Opts) end + } + end. value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -157,9 +179,14 @@ value(<>, Stack, Callbacks, Opts) -> array(Rest, [array|Stack], fold(start_array, Callbacks), Opts); value(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); -value(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> value(<>, Stack, Callbacks, Opts) end}; -value(_, _, _, _) -> {error, badjson}. +value(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> value(<>, Stack, Callbacks, Opts) end + } + end. colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -168,9 +195,14 @@ colon(<>, [key|Stack], Callbacks, Opts) -> value(Rest, [object|Stack], Callbacks, Opts); colon(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); -colon(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end}; -colon(_, _, _, _) -> {error, badjson}. +colon(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end + } + end. key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -179,9 +211,14 @@ key(<>, Stack, Callbacks, Opts) -> string(Rest, Stack, Callbacks, Opts, []); key(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); -key(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> key(<>, Stack, Callbacks, Opts) end}; -key(_, _, _, _) -> {error, badjson}. +key(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> key(<>, Stack, Callbacks, Opts) end + } + end. %% string has an additional parameter, an accumulator (Acc) used to hold the intermediate @@ -199,10 +236,41 @@ string(<>, Stack, Callbacks, Opts, Acc) -> string(<>, Stack, Callbacks, Opts, Acc) -> escape(Rest, Stack, Callbacks, Opts, Acc); string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> - string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -string(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end}; -string(_, _, _, _, _) -> {error, badjson}. + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +string(Bin, Stack, Callbacks, Opts, Acc) -> + case partial_utf(Bin) of + true -> + {incomplete, + fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end + } + ; false -> + {error, badjson} + end. + +-ifdef(utf16). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf16le). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf32). +partial_utf(<<_:32>>) -> false; +partial_utf(_) -> true. +-endif. %% only thing to note here is the additional accumulator passed to escaped_unicode used @@ -224,9 +292,14 @@ escape(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> escape(<>, Stack, Callbacks, Opts, Acc) when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -escape(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end}; -escape(_, _, _, _, _) -> {error, badjson}. +escape(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end + } + end. %% this code is ugly and unfortunate, but so is json's handling of escaped unicode @@ -274,12 +347,14 @@ escaped_unicode(<>, Stack, Callbacks, Opts, String, [C string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String); escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); -escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> - escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) - end - }; -escaped_unicode(_, _, _, _, _, _) -> {error, badjson}. +escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) end + } + end. %% upon encountering a low pair json/hex encoded value, check to see if there's a high %% value already in the accumulator. @@ -299,22 +374,23 @@ check_acc_for_surrogate(_) -> surrogate_to_codepoint(High, Low) -> (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. - - + + %% like strings, numbers are collected in an intermediate accumulator before -%% being emitted to the callback handler. no processing of numbers is done in -%% process, it's left for the user, though there are convenience functions to -%% convert them into erlang floats/integers in jsx_utils.erl. - -%% TODO: actually write that jsx_utils.erl module mentioned above... +%% being emitted to the callback handler. negative(<<$0/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -negative(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end}; -negative(_, _, _, _, _) -> {error, badjson}. +negative(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end + } + end. zero(<>, [object|Stack], Callbacks, Opts, Acc) -> @@ -334,9 +410,14 @@ zero(<>, Stack, Callbacks, ?comments_enabled(Op zero(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> zero(Stream, [], Callbacks, Opts, Acc) end}; -zero(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end}; -zero(_, _, _, _, _) -> {error, badjson}. +zero(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end + } + end. integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -364,18 +445,28 @@ integer(<>, Stack, Callbacks, ?comments_enabled integer(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> integer(Stream, [], Callbacks, Opts, Acc) end}; -integer(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end}; -integer(_, _, _, _, _) -> {error, badjson}. +integer(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end + } + end. initial_decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> decimal(Rest, Stack, Callbacks, Opts, [S] ++ Acc); initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); -initial_decimal(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end}; -initial_decimal(_, _, _, _, _) -> {error, badjson}. +initial_decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -401,25 +492,40 @@ decimal(<>, Stack, Callbacks, ?comments_enabled decimal(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> decimal(Stream, [], Callbacks, Opts, Acc) end}; -decimal(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end}; -decimal(_, _, _, _, _) -> {error, badjson}. +decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -e(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end}; -e(_, _, _, _, _) -> {error, badjson}. +e(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end + } + end. ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -ex(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end}; -ex(_, _, _, _, _) -> {error, badjson}. +ex(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end + } + end. exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -441,79 +547,134 @@ exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespa exp(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> exp(Stream, [], Callbacks, Opts, Acc) end}; -exp(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end}; -exp(_, _, _, _, _) -> {error, badjson}. +exp(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end + } + end. tr(<<$r/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> tru(Rest, Stack, Callbacks, Opts); -tr(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end}; -tr(_, _, _, _) -> {error, badjson}. +tr(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end + } + end. tru(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> true(Rest, Stack, Callbacks, Opts); -tru(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end}; -tru(_, _, _, _) -> {error, badjson}. +tru(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end + } + end. true(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, true}, Callbacks), Opts); -true(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> true(<>, Stack, Callbacks, Opts) end}; -true(_, _, _, _) -> {error, badjson}. +true(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> true(<>, Stack, Callbacks, Opts) end + } + end. fa(<<$a/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fal(Rest, Stack, Callbacks, Opts); -fa(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end}; -fa(_, _, _, _) -> {error, badjson}. +fa(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end + } + end. fal(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fals(Rest, Stack, Callbacks, Opts); -fal(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end}; -fal(_, _, _, _) -> {error, badjson}. +fal(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end + } + end. fals(<<$s/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> false(Rest, Stack, Callbacks, Opts); -fals(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end}; -fals(_, _, _, _) -> {error, badjson}. +fals(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end + } + end. false(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, false}, Callbacks), Opts); -false(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> false(<>, Stack, Callbacks, Opts) end}; -false(_, _, _, _) -> {error, badjson}. +false(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> false(<>, Stack, Callbacks, Opts) end + } + end. nu(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> nul(Rest, Stack, Callbacks, Opts); -nu(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end}; -nu(_, _, _, _) -> {error, badjson}. +nu(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end + } + end. nul(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> null(Rest, Stack, Callbacks, Opts); -nul(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end}; -nul(_, _, _, _) -> {error, badjson}. +nul(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end + } + end. null(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, null}, Callbacks), Opts); -null(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> null(<>, Stack, Callbacks, Opts) end}; -null(_, _, _, _) -> {error, badjson}. +null(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> null(<>, Stack, Callbacks, Opts) end + } + end. %% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode @@ -524,22 +685,35 @@ null(_, _, _, _) -> {error, badjson}. maybe_comment(<>, Resume) -> comment(Rest, Resume); -maybe_comment(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_comment(<>, Resume) end}; -maybe_comment(_, _) -> {error, badjson}. +maybe_comment(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_comment(<>, Resume) end + } + end. comment(<>, Resume) -> maybe_comment_done(Rest, Resume); comment(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); -comment(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> comment(<>, Resume) end}. +comment(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> comment(<>, Resume) end} + end. maybe_comment_done(<>, Resume) -> Resume(Rest); maybe_comment_done(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); -maybe_comment_done(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end}. \ No newline at end of file +maybe_comment_done(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end} + end. \ No newline at end of file diff --git a/src/jsx_utf32le.erl b/src/jsx_utf32le.erl index ebd7823..242ebe5 100644 --- a/src/jsx_utf32le.erl +++ b/src/jsx_utf32le.erl @@ -27,8 +27,10 @@ -export([start/4]). + +-define(utf32, true). -define(encoding, utf32-little). --define(partial_size(Bin), byte_size(Bin) < 4). +-define(char_size, 4). %% callbacks to our handler are roughly equivalent to a fold over the events, incremental %% rather than all at once. @@ -69,9 +71,14 @@ start(<>, Stack, Callbacks, Opts) when ?is_nonzero(S) integer(Rest, Stack, Callbacks, Opts, [S]); start(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Callbacks, Opts) end); -start(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> start(<>, Stack, Callbacks, Opts) end}; -start(_, _, _, _) -> {error, badjson}. +start(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> start(<>, Stack, Callbacks, Opts) end + } + end. maybe_done(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -88,9 +95,14 @@ maybe_done(<>, Stack, Callbacks, ?comments_enab maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Callbacks, Opts) end); maybe_done(<<>>, [], Callbacks, Opts) -> {fold(end_of_stream, Callbacks), fun(Stream) -> maybe_done(Stream, [], Callbacks, Opts) end}; -maybe_done(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end}; -maybe_done(_, _, _, _) -> {error, badjson}. +maybe_done(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_done(<>, Stack, Callbacks, Opts) end + } + end. object(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -101,9 +113,14 @@ object(<>, [key|Stack], Callbacks, Opts) -> maybe_done(Rest, Stack, fold(end_object, Callbacks), Opts); object(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Callbacks, Opts) end); -object(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> object(<>, Stack, Callbacks, Opts) end}; -object(_, _, _, _) -> {error, badjson}. +object(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> object(<>, Stack, Callbacks, Opts) end + } + end. array(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -130,9 +147,14 @@ array(<>, [array|Stack], Callbacks, Opts) -> maybe_done(Rest, Stack, fold(end_array, Callbacks), Opts); array(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Callbacks, Opts) end); -array(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> array(<>, Stack, Callbacks, Opts) end}; -array(_, _, _, _) -> {error, badjson}. +array(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> array(<>, Stack, Callbacks, Opts) end + } + end. value(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -157,9 +179,14 @@ value(<>, Stack, Callbacks, Opts) -> array(Rest, [array|Stack], fold(start_array, Callbacks), Opts); value(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Callbacks, Opts) end); -value(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> value(<>, Stack, Callbacks, Opts) end}; -value(_, _, _, _) -> {error, badjson}. +value(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> value(<>, Stack, Callbacks, Opts) end + } + end. colon(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -168,9 +195,14 @@ colon(<>, [key|Stack], Callbacks, Opts) -> value(Rest, [object|Stack], Callbacks, Opts); colon(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Callbacks, Opts) end); -colon(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end}; -colon(_, _, _, _) -> {error, badjson}. +colon(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> colon(<>, Stack, Callbacks, Opts) end + } + end. key(<>, Stack, Callbacks, Opts) when ?is_whitespace(S) -> @@ -179,9 +211,14 @@ key(<>, Stack, Callbacks, Opts) -> string(Rest, Stack, Callbacks, Opts, []); key(<>, Stack, Callbacks, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Callbacks, Opts) end); -key(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> key(<>, Stack, Callbacks, Opts) end}; -key(_, _, _, _) -> {error, badjson}. +key(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> key(<>, Stack, Callbacks, Opts) end + } + end. %% string has an additional parameter, an accumulator (Acc) used to hold the intermediate @@ -199,10 +236,41 @@ string(<>, Stack, Callbacks, Opts, Acc) -> string(<>, Stack, Callbacks, Opts, Acc) -> escape(Rest, Stack, Callbacks, Opts, Acc); string(<>, Stack, Callbacks, Opts, Acc) when ?is_noncontrol(S) -> - string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -string(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end}; -string(_, _, _, _, _) -> {error, badjson}. + string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); +string(Bin, Stack, Callbacks, Opts, Acc) -> + case partial_utf(Bin) of + true -> + {incomplete, + fun(Stream) -> string(<>, Stack, Callbacks, Opts, Acc) end + } + ; false -> + {error, badjson} + end. + +-ifdef(utf16). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf16le). +partial_utf(<<>>) -> true; +%% this case is not strictly true, there are single bytes that should be rejected, but +%% they're rare enough they can be ignored +partial_utf(<<_X>>) -> true; +partial_utf(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true; +partial_utf(_) -> false. +-endif. + +-ifdef(utf32). +partial_utf(<<_:32>>) -> false; +partial_utf(_) -> true. +-endif. %% only thing to note here is the additional accumulator passed to escaped_unicode used @@ -224,9 +292,14 @@ escape(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> escape(<>, Stack, Callbacks, Opts, Acc) when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> string(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -escape(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end}; -escape(_, _, _, _, _) -> {error, badjson}. +escape(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escape(<>, Stack, Callbacks, Opts, Acc) end + } + end. %% this code is ugly and unfortunate, but so is json's handling of escaped unicode @@ -274,12 +347,14 @@ escaped_unicode(<>, Stack, Callbacks, Opts, String, [C string(Rest, Stack, Callbacks, Opts, [D, C, B, A, $u, ?rsolidus] ++ String); escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) when ?is_hex(S) -> escaped_unicode(Rest, Stack, Callbacks, Opts, String, [S] ++ Acc); -escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> - escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) - end - }; -escaped_unicode(_, _, _, _, _, _) -> {error, badjson}. +escaped_unicode(Bin, Stack, Callbacks, Opts, String, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> escaped_unicode(<>, Stack, Callbacks, Opts, String, Acc) end + } + end. %% upon encountering a low pair json/hex encoded value, check to see if there's a high %% value already in the accumulator. @@ -299,22 +374,23 @@ check_acc_for_surrogate(_) -> surrogate_to_codepoint(High, Low) -> (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. - - + + %% like strings, numbers are collected in an intermediate accumulator before -%% being emitted to the callback handler. no processing of numbers is done in -%% process, it's left for the user, though there are convenience functions to -%% convert them into erlang floats/integers in jsx_utils.erl. - -%% TODO: actually write that jsx_utils.erl module mentioned above... +%% being emitted to the callback handler. negative(<<$0/?encoding, Rest/binary>>, Stack, Callbacks, Opts, Acc) -> zero(Rest, Stack, Callbacks, Opts, "0" ++ Acc); negative(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> integer(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -negative(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end}; -negative(_, _, _, _, _) -> {error, badjson}. +negative(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> negative(<>, Stack, Callbacks, Opts, Acc) end + } + end. zero(<>, [object|Stack], Callbacks, Opts, Acc) -> @@ -334,9 +410,14 @@ zero(<>, Stack, Callbacks, ?comments_enabled(Op zero(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> zero(Stream, [], Callbacks, Opts, Acc) end}; -zero(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end}; -zero(_, _, _, _, _) -> {error, badjson}. +zero(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> zero(<>, Stack, Callbacks, Opts, Acc) end + } + end. integer(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -364,18 +445,28 @@ integer(<>, Stack, Callbacks, ?comments_enabled integer(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({integer, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> integer(Stream, [], Callbacks, Opts, Acc) end}; -integer(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end}; -integer(_, _, _, _, _) -> {error, badjson}. +integer(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> integer(<>, Stack, Callbacks, Opts, Acc) end + } + end. initial_decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> decimal(Rest, Stack, Callbacks, Opts, [S] ++ Acc); initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); -initial_decimal(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end}; -initial_decimal(_, _, _, _, _) -> {error, badjson}. +initial_decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -401,25 +492,40 @@ decimal(<>, Stack, Callbacks, ?comments_enabled decimal(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> decimal(Stream, [], Callbacks, Opts, Acc) end}; -decimal(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end}; -decimal(_, _, _, _, _) -> {error, badjson}. +decimal(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> decimal(<>, Stack, Callbacks, Opts, Acc) end + } + end. e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); e(<>, Stack, Callbacks, Opts, Acc) when S =:= ?positive; S =:= ?negative -> ex(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -e(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end}; -e(_, _, _, _, _) -> {error, badjson}. +e(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> e(<>, Stack, Callbacks, Opts, Acc) end + } + end. ex(<>, Stack, Callbacks, Opts, Acc) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Callbacks, Opts, [S] ++ Acc); -ex(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end}; -ex(_, _, _, _, _) -> {error, badjson}. +ex(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> ex(<>, Stack, Callbacks, Opts, Acc) end + } + end. exp(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> @@ -441,79 +547,134 @@ exp(<>, Stack, Callbacks, Opts, Acc) when ?is_whitespa exp(<<>>, [], Callbacks, Opts, Acc) -> {fold(end_of_stream, fold({float, lists:reverse(Acc)}, Callbacks)), fun(Stream) -> exp(Stream, [], Callbacks, Opts, Acc) end}; -exp(Bin, Stack, Callbacks, Opts, Acc) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end}; -exp(_, _, _, _, _) -> {error, badjson}. +exp(Bin, Stack, Callbacks, Opts, Acc) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> exp(<>, Stack, Callbacks, Opts, Acc) end + } + end. tr(<<$r/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> tru(Rest, Stack, Callbacks, Opts); -tr(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end}; -tr(_, _, _, _) -> {error, badjson}. +tr(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tr(<>, Stack, Callbacks, Opts) end + } + end. tru(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> true(Rest, Stack, Callbacks, Opts); -tru(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end}; -tru(_, _, _, _) -> {error, badjson}. +tru(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> tru(<>, Stack, Callbacks, Opts) end + } + end. true(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, true}, Callbacks), Opts); -true(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> true(<>, Stack, Callbacks, Opts) end}; -true(_, _, _, _) -> {error, badjson}. +true(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> true(<>, Stack, Callbacks, Opts) end + } + end. fa(<<$a/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fal(Rest, Stack, Callbacks, Opts); -fa(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end}; -fa(_, _, _, _) -> {error, badjson}. +fa(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fa(<>, Stack, Callbacks, Opts) end + } + end. fal(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> fals(Rest, Stack, Callbacks, Opts); -fal(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end}; -fal(_, _, _, _) -> {error, badjson}. +fal(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fal(<>, Stack, Callbacks, Opts) end + } + end. fals(<<$s/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> false(Rest, Stack, Callbacks, Opts); -fals(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end}; -fals(_, _, _, _) -> {error, badjson}. +fals(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> fals(<>, Stack, Callbacks, Opts) end + } + end. false(<<$e/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, false}, Callbacks), Opts); -false(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> false(<>, Stack, Callbacks, Opts) end}; -false(_, _, _, _) -> {error, badjson}. +false(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> false(<>, Stack, Callbacks, Opts) end + } + end. nu(<<$u/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> nul(Rest, Stack, Callbacks, Opts); -nu(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end}; -nu(_, _, _, _) -> {error, badjson}. +nu(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nu(<>, Stack, Callbacks, Opts) end + } + end. nul(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> null(Rest, Stack, Callbacks, Opts); -nul(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end}; -nul(_, _, _, _) -> {error, badjson}. +nul(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> nul(<>, Stack, Callbacks, Opts) end + } + end. null(<<$l/?encoding, Rest/binary>>, Stack, Callbacks, Opts) -> maybe_done(Rest, Stack, fold({literal, null}, Callbacks), Opts); -null(Bin, Stack, Callbacks, Opts) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> null(<>, Stack, Callbacks, Opts) end}; -null(_, _, _, _) -> {error, badjson}. +null(Bin, Stack, Callbacks, Opts) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> null(<>, Stack, Callbacks, Opts) end + } + end. %% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode @@ -524,22 +685,35 @@ null(_, _, _, _) -> {error, badjson}. maybe_comment(<>, Resume) -> comment(Rest, Resume); -maybe_comment(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_comment(<>, Resume) end}; -maybe_comment(_, _) -> {error, badjson}. +maybe_comment(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, + fun(Stream) -> maybe_comment(<>, Resume) end + } + end. comment(<>, Resume) -> maybe_comment_done(Rest, Resume); comment(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); -comment(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> comment(<>, Resume) end}. +comment(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> comment(<>, Resume) end} + end. maybe_comment_done(<>, Resume) -> Resume(Rest); maybe_comment_done(<<_/?encoding, Rest/binary>>, Resume) -> comment(Rest, Resume); -maybe_comment_done(Bin, Resume) when ?partial_size(Bin) -> - {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end}. \ No newline at end of file +maybe_comment_done(Bin, Resume) -> + case byte_size(Bin) of + ?char_size -> {error, badjson} + ; _ -> + {incomplete, fun(Stream) -> maybe_comment_done(<>, Resume) end} + end. \ No newline at end of file