major whitespace updates to get line lengths down to < 80 characters whenever possible (a few tests ignore this). srx/jsx_test.erl was not touched because it's shortly to be replaced

This commit is contained in:
alisdair sullivan 2010-08-25 23:17:10 -07:00
parent ac5e0e4fe3
commit a86dec09ef
6 changed files with 998 additions and 414 deletions

View file

@ -26,7 +26,6 @@
%% @version really, really beta
%% @doc this module defines the interface to the jsx json parsing library
-module(jsx).
@ -70,7 +69,11 @@
%% | {multi_term, true | false}
%% | {encoding, auto | supported_utf()}.
%% @type supported_utf() = utf8 | utf16 | {utf16, little} | utf32 | {utf32, little}.
%% @type supported_utf() = utf8
%% | utf16
%% | {utf16, little}
%% | utf32
%% | {utf32, little}.
%% @type eep0018() = eep0018_object() | eep0018_array().
@ -79,7 +82,13 @@
%% @type eep0018_key() = binary() | atom().
%% @type eep0018_term() = eep0018_array() | eep0018_object() | eep0018_string() | eep0018_number() | true | false | null.
%% @type eep0018_term() = eep0018_array()
%% | eep0018_object()
%% | eep0018_string()
%% | eep0018_number()
%% | true
%% | false
%% | null.
%% @type eep0018_string() = binary().
@ -127,7 +136,8 @@ parser() ->
%% @spec parser(Opts::jsx_opts()) -> jsx_parser()
%% @doc
%% produces a function which takes a binary which may or may not represent an encoded json document and returns a generator
%% produces a function which takes a binary which may or may not represent an
%% encoded json document and returns a generator
%%
%% options:
%% <ul>
@ -137,17 +147,23 @@ parser() ->
%% false</p></li>
%%
%% <li>{encoded_unicode, ascii | codepoint | none}
%% <p>if a \uXXXX escape sequence is encountered within a key or string,
%% this option controls how it is interpreted. none makes no attempt
%% to interpret the value, leaving it unconverted. ascii will convert
%% any value that falls within the ascii range. codepoint will convert
%% any value that is a valid unicode codepoint. note that unicode
%% non-characters (including badly formed surrogates) will never be
%% converted. codepoint is the default</p></li>
%% <p>if a \uXXXX escape sequence is encountered within a key or
%% string, this option controls how it is interpreted. none makes no
%% attempt to interpret the value, leaving it unconverted. ascii will
%% convert any value that falls within the ascii range. codepoint will
%% convert any value that is a valid unicode codepoint. note that
%% unicode non-characters (including badly formed surrogates) will
%% never be converted. codepoint is the default</p></li>
%%
%% <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
%% <p>attempt to parse the binary using the specified encoding. auto will
%% auto detect any supported encoding and is the default</p></li>
%% <li>{encoding, auto
%% | utf8
%% | utf16
%% | {utf16, little}
%% | utf32
%% | {utf32, little}
%% }
%% <p>attempt to parse the binary using the specified encoding. auto
%% will auto detect any supported encoding and is the default</p></li>
%%
%% <li>{multi_term, true | false}
%% <p>usually, documents will be parsed in full before the end_json
@ -183,25 +199,32 @@ json_to_term(JSON) ->
%% options:
%% <ul>
%% <li>{strict, true | false}
%% <p>by default, attempting to convert unwrapped json values (numbers, strings and
%% the atoms true, false and null) result in a badarg exception. if strict equals
%% false, these are instead decoded to their equivalent eep0018 value. default is
%% false</p>
%% <p>by default, attempting to convert unwrapped json values (numbers,
%% strings and the atoms true, false and null) result in a badarg
%% exception. if strict equals false, these are instead decoded to
%% their equivalent eep0018 value. default is false</p>
%%
%% <p>note that there is a problem of ambiguity when parsing unwrapped json
%% numbers that requires special handling</p>
%% <p>note that there is a problem of ambiguity when parsing unwrapped
%% json numbers that requires special handling</p>
%%
%% <p>an unwrapped json number has no unambiguous end marker like a json object,
%% array or string. `1', `12' and `123' may all represent either a complete json
%% number or just the beginning of one. in this case, the parser will always
%% return `{incomplete, More}' rather than potentially terminate before input
%% is exhausted. to force termination, `More/1' may be called with the atom
%% `end_stream' as it's argument. note also that numbers followed by whitespace
%% will be parsed correctly</p></li>
%% <p>an unwrapped json number has no unambiguous end marker like a
%% json object, array or string. `1', `12' and `123' may all represent
%% either a complete json number or just the beginning of one. in this
%% case, the parser will always return `{incomplete, More}' rather than
%% potentially terminate before input is exhausted. to force
%% termination, `More/1' may be called with the atom `end_stream' as
%% it's argument. note also that numbers followed by whitespace will be
%% parsed correctly</p></li>
%%
%% <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
%% <p>assume the binary is encoded using the specified binary. default is auto, which
%% attempts to autodetect the encoding</p></li>
%% <li>{encoding, auto
%% | utf8
%% | utf16
%% | {utf16, little}
%% | utf32
%% | {utf32, little}
%% }
%% <p>assume the binary is encoded using the specified binary. default
%% is auto, which attempts to autodetect the encoding</p></li>
%%
%% <li>{comments, true | false}
%% <p>if true, json documents that contain c style (/* ... */) comments
@ -230,29 +253,38 @@ term_to_json(JSON) ->
%% @spec term_to_json(JSON::eep0018(), Opts::encoder_opts()) -> binary()
%% @doc
%% takes the erlang representation of a json object (as defined in eep0018) and returns a (binary encoded) json string
%% takes the erlang representation of a json object (as defined in eep0018) and
%% returns a (binary encoded) json string
%%
%% options:
%% <ul>
%% <li>{strict, true | false}
%% <p>by default, attempting to convert unwrapped json values (numbers,
%% strings and the atoms true, false and null) result in a badarg exception.
%% if strict equals false, these are instead json encoded. default is false</p></li>
%% strings and the atoms true, false and null) result in a badarg
%% exception. if strict equals false, these are instead json encoded.
%% default is false</p></li>
%%
%% <li>{encoding, utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
%% <li>{encoding, utf8
%% | utf16
%% | {utf16, little}
%% | utf32
%% | {utf32, little}
%% }
%% <p>the encoding of the resulting binary. default is utf8</p></li>
%%
%% <li>space
%% <p>space is equivalent to {space, 1}</p></li>
%%
%% <li>{space, N}
%% <p>place N spaces after each colon and comma in the resulting binary. default is zero</p></li>
%% <p>place N spaces after each colon and comma in the resulting
%% binary. default is zero</p></li>
%%
%% <li>indent
%% <p>indent is equivalent to {indent, 1}</p></li>
%%
%% <li>{indent, N}
%% <p>indent each 'level' of the json structure by N spaces. default is zero</p></li>
%% <p>indent each 'level' of the json structure by N spaces. default is
%% zero</p></li>
%% </ul>
%% @end
@ -273,13 +305,19 @@ is_json(JSON) ->
%% options:
%% <ul>
%% <li>{strict, true | false}
%% <p>by default, unwrapped json values (numbers, strings and the atoms
%% true, false and null) return false. if strict equals true, is_json
%% returns true. default is false</p></li>
%% <p>by default, unwrapped json values (numbers, strings and the
%% atoms true, false and null) return false. if strict equals true,
%% is_json returns true. default is false</p></li>
%%
%% <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
%% <p>assume the binary is encoded using the specified binary. default is auto,
%% which attempts to autodetect the encoding</p></li>
%% <li>{encoding, auto
%% | utf8
%% | utf16
%% | {utf16, little}
%% | utf32
%% | {utf32, little}
%% }
%% <p>assume the binary is encoded using the specified binary. default
%% is auto, which attempts to autodetect the encoding</p></li>
%%
%% <li>{comments, true | false}
%% <p>if true, json documents that contain c style (/* ... */) comments
@ -300,20 +338,32 @@ format(JSON) ->
%% @spec format(JSON::binary(), Opts::format_opts()) -> binary()
%% @doc
%% formats a binary encoded json string according to the options chose. the defaults will produced a string stripped of all whitespace
%% formats a binary encoded json string according to the options chose. the
%% defaults will produced a string stripped of all whitespace
%%
%% options:
%% <ul>
%% <li>{strict, true | false}
%% <p>by default, unwrapped json values (numbers, strings and the atoms
%% true, false and null) result in an error. if strict equals true, they
%% are treated as valid json. default is false</p></li>
%% <p>by default, unwrapped json values (numbers, strings and the
%% atoms true, false and null) result in an error. if strict equals
%% true, they are treated as valid json. default is false</p></li>
%%
%% <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
%% <p>assume the binary is encoded using the specified binary. default is auto,
%% which attempts to autodetect the encoding</p></li>
%% <li>{encoding, auto
%% | utf8
%% | utf16
%% | {utf16, little}
%% | utf32
%% | {utf32, little}
%% }
%% <p>assume the binary is encoded using the specified binary. default
%% is auto, which attempts to autodetect the encoding</p></li>
%%
%% <li>{output_encoding, utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
%% <li>{encoding, utf8
%% | utf16
%% | {utf16, little}
%% | utf32
%% | {utf32, little}
%% }
%% <p>the encoding of the resulting binary. default is utf8</p></li>
%%
%% <li>{comments, true | false}
@ -325,13 +375,15 @@ format(JSON) ->
%% <p>space is equivalent to {space, 1}</p></li>
%%
%% <li>{space, N}
%% <p>place N spaces after each colon and comma in the resulting binary. default is zero</p></li>
%% <p>place N spaces after each colon and comma in the resulting
%% binary. default is zero</p></li>
%%
%% <li>indent
%% <p>indent is equivalent to {indent, 1}</p></li>
%%
%% <li>{indent, N}
%% <p>indent each 'level' of the json structure by N spaces. default is zero</p></li>
%% <p>indent each 'level' of the json structure by N spaces. default is
%% zero</p></li>
%% </ul>
%% @end
@ -340,9 +392,17 @@ format(JSON, Opts) ->
%% @spec eventify(List::list()) -> jsx_parser_result()
%% @doc fake the jsx api for any list. useful if you want to serialize a structure to json using the pretty printer, or verify a sequence could be valid json
%% @doc fake the jsx api for any list. useful if you want to serialize a
%% structure to json using the pretty printer, or verify a sequence could be
%% valid json
eventify([]) ->
fun() -> {incomplete, fun(List) when is_list(List) -> eventify(List); (_) -> erlang:error(badarg) end} end;
fun() ->
{incomplete, fun(List) when is_list(List) ->
eventify(List)
; (_) ->
erlang:error(badarg)
end}
end;
eventify([Next|Rest]) ->
fun() -> {event, Next, eventify(Rest)} end.
@ -352,43 +412,50 @@ eventify([Next|Rest]) ->
%% encoding detection
%% first check to see if there's a bom, if not, use the rfc4627 method for determining
%% encoding. this function makes some assumptions about the validity of the stream
%% which may delay failure later than if an encoding is explicitly provided
%% first check to see if there's a bom, if not, use the rfc4627 method for
%% determining encoding. this function makes some assumptions about the
%% validity of the stream which may delay failure later than if an encoding is
%% explicitly provided
detect_encoding(OptsList) ->
fun(Stream) -> detect_encoding(Stream, OptsList) end.
%% utf8 bom detection
detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) -> (jsx_utf8:parser(Opts))(Rest);
%% utf32-little bom detection (this has to come before utf16-little or it'll match that)
detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) -> (jsx_utf32le:parser(Opts))(Rest);
detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) ->
(jsx_utf8:parser(Opts))(Rest);
%% utf32-little bom detection (this has to come before utf16-little or it'll
%% match that)
detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) ->
(jsx_utf32le:parser(Opts))(Rest);
%% utf16-big bom detection
detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) -> (jsx_utf16:parser(Opts))(Rest);
detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) ->
(jsx_utf16:parser(Opts))(Rest);
%% utf16-little bom detection
detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) -> (jsx_utf16le:parser(Opts))(Rest);
detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) ->
(jsx_utf16le:parser(Opts))(Rest);
%% utf32-big bom detection
detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) -> (jsx_utf32:parser(Opts))(Rest);
detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) ->
(jsx_utf32:parser(Opts))(Rest);
%% utf32-little null order detection
detect_encoding(<<X, 0, 0, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
(jsx_utf32le:parser(Opts))(JSON);
%% utf16-big null order detection
detect_encoding(<<0, X, 0, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
(jsx_utf16:parser(Opts))(JSON);
%% utf16-little null order detection
detect_encoding(<<X, 0, Y, 0, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
(jsx_utf16le:parser(Opts))(JSON);
%% utf32-big null order detection
detect_encoding(<<0, 0, 0, X, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
(jsx_utf32:parser(Opts))(JSON);
%% utf16-little null order detection
detect_encoding(<<X, 0, _, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
(jsx_utf16le:parser(Opts))(JSON);
%% utf16-big null order detection
detect_encoding(<<0, X, 0, _, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
(jsx_utf16:parser(Opts))(JSON);
%% utf8 null order detection
detect_encoding(<<X, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
(jsx_utf8:parser(Opts))(JSON);
%% a problem, to autodetect naked single digits' encoding, there is not enough data
%% to conclusively determine the encoding correctly. below is an attempt to solve
%% the problem
%% a problem, to autodetect naked single digits' encoding, there is not enough
%% data to conclusively determine the encoding correctly. below is an attempt
%% to solve the problem
detect_encoding(<<X>>, Opts) when X =/= 0 ->
{incomplete,
fun(end_stream) ->