diff --git a/include/jsx.hrl b/include/jsx.hrl index af38380..8bcb854 100644 --- a/include/jsx.hrl +++ b/include/jsx.hrl @@ -24,15 +24,6 @@ -%% opts record --record(opts, { - comments = false, - escaped_unicode = codepoint, - multi_term = false, - encoding = auto -}). - - -spec parser() -> jsx_parser(). -spec parser(Opts::jsx_opts()) -> jsx_parser(). diff --git a/include/jsx_decoder.hrl b/include/jsx_decoder.hrl index d9ad498..e3125d3 100644 --- a/include/jsx_decoder.hrl +++ b/include/jsx_decoder.hrl @@ -30,12 +30,20 @@ --spec parse(JSON::eep0018(), Opts::jsx_opts()) -> jsx_parser_result(). +-spec parser(OptsList::jsx_opts()) -> jsx_parser(). + + +%% opts record for decoder +-record(opts, { + comments = false, + escaped_unicode = codepoint, + multi_term = false, + encoding = auto +}). %% option flags --define(comments_enabled(X), {_, true, _, _, _} = X). -define(escaped_unicode_to_ascii(X), {_, _, ascii, _, _} = X). -define(escaped_unicode_to_codepoint(X), {_, _, codepoint, _, _} = X). -define(multi_term(X), {_, _, _, true, _} = X). @@ -123,12 +131,36 @@ -endif. --export([parse/2]). +-export([parser/1]). -parse(JSON, Opts) -> - start(JSON, [], Opts). +parser(OptsList) -> + case parse_opts(OptsList) of + {error, badopt} -> {error, badopt} + ; Opts -> fun(JSON) -> start(JSON, [], Opts) end + end. + + +%% converts a proplist into a tuple +parse_opts(Opts) -> + parse_opts(Opts, #opts{}). + +parse_opts([], Opts) -> + Opts; +parse_opts([{comments, Value}|Rest], Opts) -> + true = lists:member(Value, [true, false]), + parse_opts(Rest, Opts#opts{comments = Value}); +parse_opts([{escaped_unicode, Value}|Rest], Opts) -> + true = lists:member(Value, [ascii, codepoint, none]), + parse_opts(Rest, Opts#opts{escaped_unicode = Value}); +parse_opts([{multi_term, Value}|Rest], Opts) -> + true = lists:member(Value, [true, false]), + parse_opts(Rest, Opts#opts{multi_term = Value}); +parse_opts([{encoding, _}|Rest], Opts) -> + parse_opts(Rest, Opts); +parse_opts(_, _) -> + {error, badarg}. start(<>, Stack, Opts) when ?is_whitespace(S) -> @@ -151,7 +183,7 @@ start(<>, Stack, Opts) -> zero(Rest, Stack, Opts, "0"); start(<>, Stack, Opts) when ?is_nonzero(S) -> integer(Rest, Stack, Opts, [S]); -start(<>, Stack, ?comments_enabled(Opts)) -> +start(<>, Stack, #opts{comments = true} = Opts) -> maybe_comment(Rest, fun(Resume) -> start(Resume, Stack, Opts) end); start(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of @@ -170,7 +202,7 @@ maybe_done(<>, [object|Stack], Opts) -> key(Rest, [key|Stack], Opts); maybe_done(<>, [array|_] = Stack, Opts) -> value(Rest, Stack, Opts); -maybe_done(<>, Stack, ?comments_enabled(Opts)) -> +maybe_done(<>, Stack, #opts{comments = true} = Opts) -> maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Opts) end); maybe_done(Rest, [], ?multi_term(Opts)) -> {event, end_json, fun() -> start(Rest, [], Opts) end}; @@ -185,7 +217,7 @@ maybe_done(Bin, Stack, Opts) -> done(<>, Opts) when ?is_whitespace(S) -> done(Rest, Opts); -done(<>, ?comments_enabled(Opts)) -> +done(<>, #opts{comments = true} = Opts) -> maybe_comment(Rest, fun(Resume) -> done(Resume, Opts) end); done(<<>>, Opts) -> {event, end_json, fun() -> {incomplete, fun(end_stream) -> {error, badjson}; (Stream) -> done(Stream, Opts) end} end}; @@ -202,7 +234,7 @@ object(<>, Stack, Opts) -> string(Rest, Stack, Opts, []); object(<>, [key|Stack], Opts) -> {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; -object(<>, Stack, ?comments_enabled(Opts)) -> +object(<>, Stack, #opts{comments = true} = Opts) -> maybe_comment(Rest, fun(Resume) -> object(Resume, Stack, Opts) end); object(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of @@ -233,7 +265,7 @@ array(<>, Stack, Opts) -> {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; array(<>, [array|Stack], Opts) -> {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; -array(<>, Stack, ?comments_enabled(Opts)) -> +array(<>, Stack, #opts{comments = true} = Opts) -> maybe_comment(Rest, fun(Resume) -> array(Resume, Stack, Opts) end); array(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of @@ -262,7 +294,7 @@ value(<>, Stack, Opts) -> {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; value(<>, Stack, Opts) -> {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; -value(<>, Stack, ?comments_enabled(Opts)) -> +value(<>, Stack, #opts{comments = true} = Opts) -> maybe_comment(Rest, fun(Resume) -> value(Resume, Stack, Opts) end); value(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of @@ -275,7 +307,7 @@ colon(<>, Stack, Opts) when ?is_whitespace(S) -> colon(Rest, Stack, Opts); colon(<>, [key|Stack], Opts) -> value(Rest, [object|Stack], Opts); -colon(<>, Stack, ?comments_enabled(Opts)) -> +colon(<>, Stack, #opts{comments = true} = Opts) -> maybe_comment(Rest, fun(Resume) -> colon(Resume, Stack, Opts) end); colon(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of @@ -288,7 +320,7 @@ key(<>, Stack, Opts) when ?is_whitespace(S) -> key(Rest, Stack, Opts); key(<>, Stack, Opts) -> string(Rest, Stack, Opts, []); -key(<>, Stack, ?comments_enabled(Opts)) -> +key(<>, Stack, #opts{comments = true} = Opts) -> maybe_comment(Rest, fun(Resume) -> key(Resume, Stack, Opts) end); key(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of @@ -488,7 +520,7 @@ zero(<>, Stack, Opts, Acc) -> initial_decimal(Rest, Stack, Opts, [?decimalpoint] ++ Acc); zero(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> {event, {integer, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; -zero(<>, Stack, ?comments_enabled(Opts), Acc) -> +zero(<>, Stack, #opts{comments = true} = Opts, Acc) -> maybe_comment(Rest, fun(Resume) -> zero(Resume, Stack, Opts, Acc) end); zero(<<>>, [], Opts, Acc) -> {incomplete, fun(end_stream) -> @@ -528,7 +560,7 @@ integer(<<$E/?encoding, Rest/binary>>, Stack, Opts, Acc) -> e(Rest, Stack, Opts, "e0." ++ Acc); integer(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> {event, {integer, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; -integer(<>, Stack, ?comments_enabled(Opts), Acc) -> +integer(<>, Stack, #opts{comments = true} = Opts, Acc) -> maybe_comment(Rest, fun(Resume) -> integer(Resume, Stack, Opts, Acc) end); integer(<<>>, [], Opts, Acc) -> {incomplete, fun(end_stream) -> @@ -577,7 +609,7 @@ decimal(<<$E/?encoding, Rest/binary>>, Stack, Opts, Acc) -> e(Rest, Stack, Opts, "e" ++ Acc); decimal(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> {event, {float, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; -decimal(<>, Stack, ?comments_enabled(Opts), Acc) -> +decimal(<>, Stack, #opts{comments = true} = Opts, Acc) -> maybe_comment(Rest, fun(Resume) -> decimal(Resume, Stack, Opts, Acc) end); decimal(<<>>, [], Opts, Acc) -> {incomplete, fun(end_stream) -> @@ -631,7 +663,7 @@ exp(<>, Stack, Opts, Acc) -> exp(Rest, Stack, Opts, [?zero] ++ Acc); exp(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> {event, {float, lists:reverse(Acc)}, fun() -> maybe_done(Rest, Stack, Opts) end}; -exp(<>, Stack, ?comments_enabled(Opts), Acc) -> +exp(<>, Stack, #opts{comments = true} = Opts, Acc) -> maybe_comment(Rest, fun(Resume) -> exp(Resume, Stack, Opts, Acc) end); exp(<<>>, [], Opts, Acc) -> {incomplete, fun(end_stream) -> diff --git a/src/jsx.erl b/src/jsx.erl index 222874d..70688b0 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -161,17 +161,13 @@ parser() -> %% @end parser(OptsList) -> - F = case proplists:get_value(encoding, OptsList, auto) of - utf8 -> fun jsx_utf8:parse/2 - ; utf16 -> fun jsx_utf16:parse/2 - ; utf32 -> fun jsx_utf32:parse/2 - ; {utf16, little} -> fun jsx_utf16le:parse/2 - ; {utf32, little} -> fun jsx_utf32le:parse/2 - ; auto -> fun detect_encoding/2 - end, - case parse_opts(OptsList) of - {error, badopt} -> {error, badopt} - ; Opts -> fun(Stream) -> F(Stream, Opts) end + case proplists:get_value(encoding, OptsList, auto) of + utf8 -> jsx_utf8:parser(OptsList) + ; utf16 -> jsx_utf16:parser(OptsList) + ; utf32 -> jsx_utf32:parser(OptsList) + ; {utf16, little} -> jsx_utf16le:parser(OptsList) + ; {utf32, little} -> jsx_utf32le:parser(OptsList) + ; auto -> detect_encoding(OptsList) end. @@ -352,64 +348,44 @@ eventify([Next|Rest]) -> fun() -> {event, Next, eventify(Rest)} end. -%% ---------------------------------------------------------------------------- + %% internal functions -%% ---------------------------------------------------------------------------- -%% option parsing - -%% converts a proplist into a tuple -parse_opts(Opts) -> - parse_opts(Opts, #opts{}). - -parse_opts([], Opts) -> - Opts; -parse_opts([{comments, Value}|Rest], Opts) -> - true = lists:member(Value, [true, false]), - parse_opts(Rest, Opts#opts{comments = Value}); -parse_opts([{escaped_unicode, Value}|Rest], Opts) -> - true = lists:member(Value, [ascii, codepoint, none]), - parse_opts(Rest, Opts#opts{escaped_unicode = Value}); -parse_opts([{multi_term, Value}|Rest], Opts) -> - true = lists:member(Value, [true, false]), - parse_opts(Rest, Opts#opts{multi_term = Value}); -parse_opts([{encoding, _}|Rest], Opts) -> - parse_opts(Rest, Opts); -parse_opts(_, _) -> - {error, badopt}. - %% encoding detection %% first check to see if there's a bom, if not, use the rfc4627 method for determining %% encoding. this function makes some assumptions about the validity of the stream %% which may delay failure later than if an encoding is explicitly provided + +detect_encoding(OptsList) -> + fun(Stream) -> detect_encoding(Stream, OptsList) end. %% utf8 bom detection -detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) -> jsx_utf8:parse(Rest, Opts); +detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) -> (jsx_utf8:parser(Opts))(Rest); %% utf32-little bom detection (this has to come before utf16-little or it'll match that) -detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) -> jsx_utf32le:parse(Rest, Opts); +detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) -> (jsx_utf32le:parser(Opts))(Rest); %% utf16-big bom detection -detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) -> jsx_utf16:parse(Rest, Opts); +detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) -> (jsx_utf16:parser(Opts))(Rest); %% utf16-little bom detection -detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) -> jsx_utf16le:parse(Rest, Opts); +detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) -> (jsx_utf16le:parser(Opts))(Rest); %% utf32-big bom detection -detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) -> jsx_utf32:parse(Rest, Opts); +detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) -> (jsx_utf32:parser(Opts))(Rest); %% utf32-little null order detection detect_encoding(<> = JSON, Opts) when X =/= 0 -> - jsx_utf32le:parse(JSON, Opts); + (jsx_utf32le:parser(Opts))(JSON); %% utf16-big null order detection detect_encoding(<<0, X, 0, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 -> - jsx_utf16:parse(JSON, Opts); + (jsx_utf16:parser(Opts))(JSON); %% utf16-little null order detection detect_encoding(<> = JSON, Opts) when X =/= 0, Y =/= 0 -> - jsx_utf16le:parse(JSON, Opts); + (jsx_utf16le:parser(Opts))(JSON); %% utf32-big null order detection detect_encoding(<<0, 0, 0, X, _Rest/binary>> = JSON, Opts) when X =/= 0 -> - jsx_utf32:parse(JSON, Opts); + (jsx_utf32:parser(Opts))(JSON); %% utf8 null order detection detect_encoding(<> = JSON, Opts) when X =/= 0, Y =/= 0 -> - jsx_utf8:parse(JSON, Opts); + (jsx_utf8:parser(Opts))(JSON); %% a problem, to autodetect naked single digits' encoding, there is not enough data %% to conclusively determine the encoding correctly. below is an attempt to solve @@ -418,7 +394,7 @@ detect_encoding(<>, Opts) when X =/= 0 -> {incomplete, fun(end_stream) -> try - {incomplete, Next} = jsx_utf8:parse(<>, Opts), + {incomplete, Next} = (jsx_utf8:parser(Opts))(<>), Next(end_stream) catch error:function_clause -> {error, badjson} end @@ -429,7 +405,7 @@ detect_encoding(<<0, X>>, Opts) when X =/= 0 -> {incomplete, fun(end_stream) -> try - {incomplete, Next} = jsx_utf16:parse(<<0, X>>, Opts), + {incomplete, Next} = (jsx_utf16:parser(Opts))(<<0, X>>), Next(end_stream) catch error:function_clause -> {error, badjson} end @@ -440,7 +416,7 @@ detect_encoding(<>, Opts) when X =/= 0 -> {incomplete, fun(end_stream) -> try - {incomplete, Next} = jsx_utf16le:parse(<>, Opts), + {incomplete, Next} = (jsx_utf16le:parser(Opts))(<>), Next(end_stream) catch error:function_clause -> {error, badjson} end