major whitespace updates to get line lengths down to < 80 characters whenever possible (a few tests ignore this). srx/jsx_test.erl was not touched because it's shortly to be replaced
This commit is contained in:
parent
ac5e0e4fe3
commit
a86dec09ef
6 changed files with 998 additions and 414 deletions
211
src/jsx.erl
211
src/jsx.erl
|
@ -26,7 +26,6 @@
|
|||
%% @version really, really beta
|
||||
%% @doc this module defines the interface to the jsx json parsing library
|
||||
|
||||
|
||||
-module(jsx).
|
||||
|
||||
|
||||
|
@ -70,7 +69,11 @@
|
|||
%% | {multi_term, true | false}
|
||||
%% | {encoding, auto | supported_utf()}.
|
||||
|
||||
%% @type supported_utf() = utf8 | utf16 | {utf16, little} | utf32 | {utf32, little}.
|
||||
%% @type supported_utf() = utf8
|
||||
%% | utf16
|
||||
%% | {utf16, little}
|
||||
%% | utf32
|
||||
%% | {utf32, little}.
|
||||
|
||||
%% @type eep0018() = eep0018_object() | eep0018_array().
|
||||
|
||||
|
@ -79,7 +82,13 @@
|
|||
|
||||
%% @type eep0018_key() = binary() | atom().
|
||||
|
||||
%% @type eep0018_term() = eep0018_array() | eep0018_object() | eep0018_string() | eep0018_number() | true | false | null.
|
||||
%% @type eep0018_term() = eep0018_array()
|
||||
%% | eep0018_object()
|
||||
%% | eep0018_string()
|
||||
%% | eep0018_number()
|
||||
%% | true
|
||||
%% | false
|
||||
%% | null.
|
||||
|
||||
%% @type eep0018_string() = binary().
|
||||
|
||||
|
@ -127,7 +136,8 @@ parser() ->
|
|||
|
||||
%% @spec parser(Opts::jsx_opts()) -> jsx_parser()
|
||||
%% @doc
|
||||
%% produces a function which takes a binary which may or may not represent an encoded json document and returns a generator
|
||||
%% produces a function which takes a binary which may or may not represent an
|
||||
%% encoded json document and returns a generator
|
||||
%%
|
||||
%% options:
|
||||
%% <ul>
|
||||
|
@ -137,17 +147,23 @@ parser() ->
|
|||
%% false</p></li>
|
||||
%%
|
||||
%% <li>{encoded_unicode, ascii | codepoint | none}
|
||||
%% <p>if a \uXXXX escape sequence is encountered within a key or string,
|
||||
%% this option controls how it is interpreted. none makes no attempt
|
||||
%% to interpret the value, leaving it unconverted. ascii will convert
|
||||
%% any value that falls within the ascii range. codepoint will convert
|
||||
%% any value that is a valid unicode codepoint. note that unicode
|
||||
%% non-characters (including badly formed surrogates) will never be
|
||||
%% converted. codepoint is the default</p></li>
|
||||
%% <p>if a \uXXXX escape sequence is encountered within a key or
|
||||
%% string, this option controls how it is interpreted. none makes no
|
||||
%% attempt to interpret the value, leaving it unconverted. ascii will
|
||||
%% convert any value that falls within the ascii range. codepoint will
|
||||
%% convert any value that is a valid unicode codepoint. note that
|
||||
%% unicode non-characters (including badly formed surrogates) will
|
||||
%% never be converted. codepoint is the default</p></li>
|
||||
%%
|
||||
%% <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
|
||||
%% <p>attempt to parse the binary using the specified encoding. auto will
|
||||
%% auto detect any supported encoding and is the default</p></li>
|
||||
%% <li>{encoding, auto
|
||||
%% | utf8
|
||||
%% | utf16
|
||||
%% | {utf16, little}
|
||||
%% | utf32
|
||||
%% | {utf32, little}
|
||||
%% }
|
||||
%% <p>attempt to parse the binary using the specified encoding. auto
|
||||
%% will auto detect any supported encoding and is the default</p></li>
|
||||
%%
|
||||
%% <li>{multi_term, true | false}
|
||||
%% <p>usually, documents will be parsed in full before the end_json
|
||||
|
@ -183,25 +199,32 @@ json_to_term(JSON) ->
|
|||
%% options:
|
||||
%% <ul>
|
||||
%% <li>{strict, true | false}
|
||||
%% <p>by default, attempting to convert unwrapped json values (numbers, strings and
|
||||
%% the atoms true, false and null) result in a badarg exception. if strict equals
|
||||
%% false, these are instead decoded to their equivalent eep0018 value. default is
|
||||
%% false</p>
|
||||
%% <p>by default, attempting to convert unwrapped json values (numbers,
|
||||
%% strings and the atoms true, false and null) result in a badarg
|
||||
%% exception. if strict equals false, these are instead decoded to
|
||||
%% their equivalent eep0018 value. default is false</p>
|
||||
%%
|
||||
%% <p>note that there is a problem of ambiguity when parsing unwrapped json
|
||||
%% numbers that requires special handling</p>
|
||||
%% <p>note that there is a problem of ambiguity when parsing unwrapped
|
||||
%% json numbers that requires special handling</p>
|
||||
%%
|
||||
%% <p>an unwrapped json number has no unambiguous end marker like a json object,
|
||||
%% array or string. `1', `12' and `123' may all represent either a complete json
|
||||
%% number or just the beginning of one. in this case, the parser will always
|
||||
%% return `{incomplete, More}' rather than potentially terminate before input
|
||||
%% is exhausted. to force termination, `More/1' may be called with the atom
|
||||
%% `end_stream' as it's argument. note also that numbers followed by whitespace
|
||||
%% will be parsed correctly</p></li>
|
||||
%% <p>an unwrapped json number has no unambiguous end marker like a
|
||||
%% json object, array or string. `1', `12' and `123' may all represent
|
||||
%% either a complete json number or just the beginning of one. in this
|
||||
%% case, the parser will always return `{incomplete, More}' rather than
|
||||
%% potentially terminate before input is exhausted. to force
|
||||
%% termination, `More/1' may be called with the atom `end_stream' as
|
||||
%% it's argument. note also that numbers followed by whitespace will be
|
||||
%% parsed correctly</p></li>
|
||||
%%
|
||||
%% <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
|
||||
%% <p>assume the binary is encoded using the specified binary. default is auto, which
|
||||
%% attempts to autodetect the encoding</p></li>
|
||||
%% <li>{encoding, auto
|
||||
%% | utf8
|
||||
%% | utf16
|
||||
%% | {utf16, little}
|
||||
%% | utf32
|
||||
%% | {utf32, little}
|
||||
%% }
|
||||
%% <p>assume the binary is encoded using the specified binary. default
|
||||
%% is auto, which attempts to autodetect the encoding</p></li>
|
||||
%%
|
||||
%% <li>{comments, true | false}
|
||||
%% <p>if true, json documents that contain c style (/* ... */) comments
|
||||
|
@ -230,29 +253,38 @@ term_to_json(JSON) ->
|
|||
|
||||
%% @spec term_to_json(JSON::eep0018(), Opts::encoder_opts()) -> binary()
|
||||
%% @doc
|
||||
%% takes the erlang representation of a json object (as defined in eep0018) and returns a (binary encoded) json string
|
||||
%% takes the erlang representation of a json object (as defined in eep0018) and
|
||||
%% returns a (binary encoded) json string
|
||||
%%
|
||||
%% options:
|
||||
%% <ul>
|
||||
%% <li>{strict, true | false}
|
||||
%% <p>by default, attempting to convert unwrapped json values (numbers,
|
||||
%% strings and the atoms true, false and null) result in a badarg exception.
|
||||
%% if strict equals false, these are instead json encoded. default is false</p></li>
|
||||
%% strings and the atoms true, false and null) result in a badarg
|
||||
%% exception. if strict equals false, these are instead json encoded.
|
||||
%% default is false</p></li>
|
||||
%%
|
||||
%% <li>{encoding, utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
|
||||
%% <li>{encoding, utf8
|
||||
%% | utf16
|
||||
%% | {utf16, little}
|
||||
%% | utf32
|
||||
%% | {utf32, little}
|
||||
%% }
|
||||
%% <p>the encoding of the resulting binary. default is utf8</p></li>
|
||||
%%
|
||||
%% <li>space
|
||||
%% <p>space is equivalent to {space, 1}</p></li>
|
||||
%%
|
||||
%% <li>{space, N}
|
||||
%% <p>place N spaces after each colon and comma in the resulting binary. default is zero</p></li>
|
||||
%% <p>place N spaces after each colon and comma in the resulting
|
||||
%% binary. default is zero</p></li>
|
||||
%%
|
||||
%% <li>indent
|
||||
%% <p>indent is equivalent to {indent, 1}</p></li>
|
||||
%%
|
||||
%% <li>{indent, N}
|
||||
%% <p>indent each 'level' of the json structure by N spaces. default is zero</p></li>
|
||||
%% <p>indent each 'level' of the json structure by N spaces. default is
|
||||
%% zero</p></li>
|
||||
%% </ul>
|
||||
%% @end
|
||||
|
||||
|
@ -273,13 +305,19 @@ is_json(JSON) ->
|
|||
%% options:
|
||||
%% <ul>
|
||||
%% <li>{strict, true | false}
|
||||
%% <p>by default, unwrapped json values (numbers, strings and the atoms
|
||||
%% true, false and null) return false. if strict equals true, is_json
|
||||
%% returns true. default is false</p></li>
|
||||
%% <p>by default, unwrapped json values (numbers, strings and the
|
||||
%% atoms true, false and null) return false. if strict equals true,
|
||||
%% is_json returns true. default is false</p></li>
|
||||
%%
|
||||
%% <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
|
||||
%% <p>assume the binary is encoded using the specified binary. default is auto,
|
||||
%% which attempts to autodetect the encoding</p></li>
|
||||
%% <li>{encoding, auto
|
||||
%% | utf8
|
||||
%% | utf16
|
||||
%% | {utf16, little}
|
||||
%% | utf32
|
||||
%% | {utf32, little}
|
||||
%% }
|
||||
%% <p>assume the binary is encoded using the specified binary. default
|
||||
%% is auto, which attempts to autodetect the encoding</p></li>
|
||||
%%
|
||||
%% <li>{comments, true | false}
|
||||
%% <p>if true, json documents that contain c style (/* ... */) comments
|
||||
|
@ -300,20 +338,32 @@ format(JSON) ->
|
|||
|
||||
%% @spec format(JSON::binary(), Opts::format_opts()) -> binary()
|
||||
%% @doc
|
||||
%% formats a binary encoded json string according to the options chose. the defaults will produced a string stripped of all whitespace
|
||||
%% formats a binary encoded json string according to the options chose. the
|
||||
%% defaults will produced a string stripped of all whitespace
|
||||
%%
|
||||
%% options:
|
||||
%% <ul>
|
||||
%% <li>{strict, true | false}
|
||||
%% <p>by default, unwrapped json values (numbers, strings and the atoms
|
||||
%% true, false and null) result in an error. if strict equals true, they
|
||||
%% are treated as valid json. default is false</p></li>
|
||||
%% <p>by default, unwrapped json values (numbers, strings and the
|
||||
%% atoms true, false and null) result in an error. if strict equals
|
||||
%% true, they are treated as valid json. default is false</p></li>
|
||||
%%
|
||||
%% <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
|
||||
%% <p>assume the binary is encoded using the specified binary. default is auto,
|
||||
%% which attempts to autodetect the encoding</p></li>
|
||||
%% <li>{encoding, auto
|
||||
%% | utf8
|
||||
%% | utf16
|
||||
%% | {utf16, little}
|
||||
%% | utf32
|
||||
%% | {utf32, little}
|
||||
%% }
|
||||
%% <p>assume the binary is encoded using the specified binary. default
|
||||
%% is auto, which attempts to autodetect the encoding</p></li>
|
||||
%%
|
||||
%% <li>{output_encoding, utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
|
||||
%% <li>{encoding, utf8
|
||||
%% | utf16
|
||||
%% | {utf16, little}
|
||||
%% | utf32
|
||||
%% | {utf32, little}
|
||||
%% }
|
||||
%% <p>the encoding of the resulting binary. default is utf8</p></li>
|
||||
%%
|
||||
%% <li>{comments, true | false}
|
||||
|
@ -325,13 +375,15 @@ format(JSON) ->
|
|||
%% <p>space is equivalent to {space, 1}</p></li>
|
||||
%%
|
||||
%% <li>{space, N}
|
||||
%% <p>place N spaces after each colon and comma in the resulting binary. default is zero</p></li>
|
||||
%% <p>place N spaces after each colon and comma in the resulting
|
||||
%% binary. default is zero</p></li>
|
||||
%%
|
||||
%% <li>indent
|
||||
%% <p>indent is equivalent to {indent, 1}</p></li>
|
||||
%%
|
||||
%% <li>{indent, N}
|
||||
%% <p>indent each 'level' of the json structure by N spaces. default is zero</p></li>
|
||||
%% <p>indent each 'level' of the json structure by N spaces. default is
|
||||
%% zero</p></li>
|
||||
%% </ul>
|
||||
%% @end
|
||||
|
||||
|
@ -340,9 +392,17 @@ format(JSON, Opts) ->
|
|||
|
||||
|
||||
%% @spec eventify(List::list()) -> jsx_parser_result()
|
||||
%% @doc fake the jsx api for any list. useful if you want to serialize a structure to json using the pretty printer, or verify a sequence could be valid json
|
||||
%% @doc fake the jsx api for any list. useful if you want to serialize a
|
||||
%% structure to json using the pretty printer, or verify a sequence could be
|
||||
%% valid json
|
||||
eventify([]) ->
|
||||
fun() -> {incomplete, fun(List) when is_list(List) -> eventify(List); (_) -> erlang:error(badarg) end} end;
|
||||
fun() ->
|
||||
{incomplete, fun(List) when is_list(List) ->
|
||||
eventify(List)
|
||||
; (_) ->
|
||||
erlang:error(badarg)
|
||||
end}
|
||||
end;
|
||||
eventify([Next|Rest]) ->
|
||||
fun() -> {event, Next, eventify(Rest)} end.
|
||||
|
||||
|
@ -352,43 +412,50 @@ eventify([Next|Rest]) ->
|
|||
|
||||
|
||||
%% encoding detection
|
||||
%% first check to see if there's a bom, if not, use the rfc4627 method for determining
|
||||
%% encoding. this function makes some assumptions about the validity of the stream
|
||||
%% which may delay failure later than if an encoding is explicitly provided
|
||||
%% first check to see if there's a bom, if not, use the rfc4627 method for
|
||||
%% determining encoding. this function makes some assumptions about the
|
||||
%% validity of the stream which may delay failure later than if an encoding is
|
||||
%% explicitly provided
|
||||
|
||||
detect_encoding(OptsList) ->
|
||||
fun(Stream) -> detect_encoding(Stream, OptsList) end.
|
||||
|
||||
%% utf8 bom detection
|
||||
detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) -> (jsx_utf8:parser(Opts))(Rest);
|
||||
%% utf32-little bom detection (this has to come before utf16-little or it'll match that)
|
||||
detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) -> (jsx_utf32le:parser(Opts))(Rest);
|
||||
detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) ->
|
||||
(jsx_utf8:parser(Opts))(Rest);
|
||||
%% utf32-little bom detection (this has to come before utf16-little or it'll
|
||||
%% match that)
|
||||
detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) ->
|
||||
(jsx_utf32le:parser(Opts))(Rest);
|
||||
%% utf16-big bom detection
|
||||
detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) -> (jsx_utf16:parser(Opts))(Rest);
|
||||
detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) ->
|
||||
(jsx_utf16:parser(Opts))(Rest);
|
||||
%% utf16-little bom detection
|
||||
detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) -> (jsx_utf16le:parser(Opts))(Rest);
|
||||
detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) ->
|
||||
(jsx_utf16le:parser(Opts))(Rest);
|
||||
%% utf32-big bom detection
|
||||
detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) -> (jsx_utf32:parser(Opts))(Rest);
|
||||
detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) ->
|
||||
(jsx_utf32:parser(Opts))(Rest);
|
||||
|
||||
%% utf32-little null order detection
|
||||
detect_encoding(<<X, 0, 0, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
||||
(jsx_utf32le:parser(Opts))(JSON);
|
||||
%% utf16-big null order detection
|
||||
detect_encoding(<<0, X, 0, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
|
||||
(jsx_utf16:parser(Opts))(JSON);
|
||||
%% utf16-little null order detection
|
||||
detect_encoding(<<X, 0, Y, 0, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
|
||||
(jsx_utf16le:parser(Opts))(JSON);
|
||||
%% utf32-big null order detection
|
||||
detect_encoding(<<0, 0, 0, X, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
||||
(jsx_utf32:parser(Opts))(JSON);
|
||||
%% utf16-little null order detection
|
||||
detect_encoding(<<X, 0, _, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
||||
(jsx_utf16le:parser(Opts))(JSON);
|
||||
%% utf16-big null order detection
|
||||
detect_encoding(<<0, X, 0, _, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
||||
(jsx_utf16:parser(Opts))(JSON);
|
||||
%% utf8 null order detection
|
||||
detect_encoding(<<X, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
|
||||
(jsx_utf8:parser(Opts))(JSON);
|
||||
|
||||
%% a problem, to autodetect naked single digits' encoding, there is not enough data
|
||||
%% to conclusively determine the encoding correctly. below is an attempt to solve
|
||||
%% the problem
|
||||
%% a problem, to autodetect naked single digits' encoding, there is not enough
|
||||
%% data to conclusively determine the encoding correctly. below is an attempt
|
||||
%% to solve the problem
|
||||
detect_encoding(<<X>>, Opts) when X =/= 0 ->
|
||||
{incomplete,
|
||||
fun(end_stream) ->
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue