From 46df6e5a385db92d9ec7feefc65a02e80639e278 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 23 Jun 2010 16:43:09 -0700 Subject: [PATCH] incomplete work on multi term streams --- src/jsx.erl | 17 +++++++++++------ src/jsx_decoder.erl | 21 ++++++++++++++++++--- src/jsx_decoder.hrl | 2 +- src/jsx_types.hrl | 2 +- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/jsx.erl b/src/jsx.erl index 317d0a5..843ddd2 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -160,14 +160,19 @@ parse_opts(Opts) -> parse_opts([], Opts) -> Opts; -parse_opts([{comments, Value}|Rest], {_Comments, EscapedUnicode, Stream}) -> +parse_opts([{comments, Value}|Rest], {_Comments, EscapedUnicode, Multi}) -> true = lists:member(Value, [true, false]), - parse_opts(Rest, {Value, EscapedUnicode, Stream}); -parse_opts([{escaped_unicode, Value}|Rest], {Comments, _EscapedUnicode, Stream}) -> + parse_opts(Rest, {Value, EscapedUnicode, Multi}); +parse_opts([{escaped_unicode, Value}|Rest], {Comments, _EscapedUnicode, Multi}) -> true = lists:member(Value, [ascii, codepoint, none]), - parse_opts(Rest, {Comments, Value, Stream}); -parse_opts([{stream_mode, Value}|Rest], {Comments, EscapedUnicode, _Stream}) -> - true = lists:member(Value, [true, false]), + parse_opts(Rest, {Comments, Value, Multi}); +parse_opts([{multi_term, Value}|Rest], {Comments, EscapedUnicode, _Multi}) -> + ok = case Value of + S when is_binary(S) -> ok + ; whitespace -> ok + ; true -> ok + ; false -> ok + end, parse_opts(Rest, {Comments, EscapedUnicode, Value}); parse_opts([{encoding, _}|Rest], Opts) -> parse_opts(Rest, Opts). diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 087f041..6b10bca 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -88,6 +88,10 @@ maybe_done(<>, [array|_] = Stack, Opts) -> value(Rest, Stack, Opts); maybe_done(<>, Stack, ?comments_enabled(Opts)) -> maybe_comment(Rest, fun(Resume) -> maybe_done(Resume, Stack, Opts) end); +maybe_done(Bin, [], ?multi_term(Opts)) -> + {event, end_json, fun(Stream) -> + Rest = strip(<>, Opts), + start(<>, [], Opts) end}; maybe_done(<<>>, [], Opts) -> {event, end_json, fun(Stream) -> maybe_done(Stream, [], Opts) end}; maybe_done(Bin, Stack, Opts) -> @@ -635,8 +639,8 @@ null(Bin, Stack, Opts) -> ). -%% comments are c style, /* blah blah */ and are STRONGLY discouraged. any unicode -%% character is valid in a comment, except, obviously the */ sequence which ends +%% comments are c style, ex: /* blah blah */ +%% any unicode character is valid in a comment except the */ sequence which ends %% the comment. they're implemented as a closure called when the comment ends that %% returns execution to the point where the comment began. comments are not %% recorded in any way, simply parsed. @@ -669,4 +673,15 @@ maybe_comment_done(Bin, Resume) -> ?incomplete(?partial_codepoint(Bin), fun(Stream) -> maybe_comment_done(<>, Resume) end, ?ferror - ). \ No newline at end of file + ). + + +%% strip whitespace and comments (if enabled) from a stream, returning the +%% stream when the first non-whitespace/comment character is encountered + +strip(<>, Opts) when ?is_whitespace(S) -> + strip(Rest, Opts); +strip(<>, ?comments_enabled(Opts)) -> + maybe_comment(Rest, fun(Resume) -> strip(Resume, Opts) end); +strip(Bin, _Opts) -> + Bin. \ No newline at end of file diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 20ba02d..a7cfab3 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -25,7 +25,7 @@ -define(comments_enabled(X), {true, _, _} = X). -define(escaped_unicode_to_ascii(X), {_, ascii, _} = X). -define(escaped_unicode_to_codepoint(X), {_, codepoint, _} = X). --define(stream_mode(X), {_, _, true} = X). +-define(multi_term(X), {_, _, true} = X). %% whitespace -define(space, 16#20). diff --git a/src/jsx_types.hrl b/src/jsx_types.hrl index b0baa7d..891dc56 100644 --- a/src/jsx_types.hrl +++ b/src/jsx_types.hrl @@ -30,7 +30,7 @@ -type jsx_opts() :: [jsx_opt()]. -type jsx_opt() :: {comments, true | false} | {escaped_unicode, ascii | codepoint | none} - | {stream_mode, true | false} + | {multi_term, true | false | whitespace | binary()} | {encoding, auto | utf8 | utf16 | utf16le | utf32 | utf32le }.