major whitespace updates to get line lengths down to < 80 characters whenever possible (a few tests ignore this). srx/jsx_test.erl was not touched because it's shortly to be replaced

2010-08-25 23:17:10 -07:00 · 2010-08-25 23:17:10 -07:00 · a86dec09ef
commit a86dec09ef
parent ac5e0e4fe3
6 changed files with 998 additions and 414 deletions
--- a/src/jsx.erl
+++ b/src/jsx.erl
@ -26,7 +26,6 @@
 %% @version really, really beta
 %% @doc this module defines the interface to the jsx json parsing library

-
 -module(jsx).


@ -70,7 +69,11 @@
 %%    | {multi_term, true | false}
 %%    | {encoding, auto | supported_utf()}.

-%% @type supported_utf() = utf8 | utf16 | {utf16, little} | utf32 | {utf32, little}.
+%% @type supported_utf() = utf8 
+%%    | utf16 
+%%    | {utf16, little} 
+%%    | utf32 
+%%    | {utf32, little}.

 %% @type eep0018() = eep0018_object() | eep0018_array().

@ -79,7 +82,13 @@

 %% @type eep0018_key() = binary() | atom().

-%% @type eep0018_term() = eep0018_array() | eep0018_object() | eep0018_string() | eep0018_number() | true | false | null.
+%% @type eep0018_term() = eep0018_array() 
+%%    | eep0018_object() 
+%%    | eep0018_string() 
+%%    | eep0018_number() 
+%%    | true 
+%%    | false 
+%%    | null.

 %% @type eep0018_string() = binary().

@ -127,7 +136,8 @@ parser() ->
    
 %% @spec parser(Opts::jsx_opts()) -> jsx_parser()    
 %% @doc
-%% produces a function which takes a binary which may or may not represent an encoded json document and returns a generator
+%% produces a function which takes a binary which may or may not represent an 
+%% encoded json document and returns a generator
 %%
 %%      options:
 %%      <ul>
@ -137,17 +147,23 @@ parser() ->
 %%          false</p></li>
 %%    
 %%        <li>{encoded_unicode, ascii | codepoint | none}
-%%          <p>if a \uXXXX escape sequence is encountered within a key or string,
-%%          this option controls how it is interpreted. none makes no attempt
-%%          to interpret the value, leaving it unconverted. ascii will convert
-%%          any value that falls within the ascii range. codepoint will convert
-%%          any value that is a valid unicode codepoint. note that unicode
-%%          non-characters (including badly formed surrogates) will never be
-%%          converted. codepoint is the default</p></li>
+%%          <p>if a \uXXXX escape sequence is encountered within a key or 
+%%          string, this option controls how it is interpreted. none makes no 
+%%          attempt to interpret the value, leaving it unconverted. ascii will 
+%%          convert any value that falls within the ascii range. codepoint will 
+%%          convert any value that is a valid unicode codepoint. note that 
+%%          unicode non-characters (including badly formed surrogates) will 
+%%          never be converted. codepoint is the default</p></li>
 %%
-%%        <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
-%%          <p>attempt to parse the binary using the specified encoding. auto will
-%%          auto detect any supported encoding and is the default</p></li>
+%%        <li>{encoding, auto 
+%%              | utf8 
+%%              | utf16 
+%%              | {utf16, little} 
+%%              | utf32 
+%%              | {utf32, little} 
+%%          }
+%%          <p>attempt to parse the binary using the specified encoding. auto 
+%%          will auto detect any supported encoding and is the default</p></li>
 %%
 %%        <li>{multi_term, true | false}
 %%          <p>usually, documents will be parsed in full before the end_json
@ -183,25 +199,32 @@ json_to_term(JSON) ->
 %%      options:
 %%      <ul>
 %%        <li>{strict, true | false}
-%%          <p>by default, attempting to convert unwrapped json values (numbers, strings and
-%%          the atoms true, false and null) result in a badarg exception. if strict equals
-%%          false, these are instead decoded to their equivalent eep0018 value. default is
-%%          false</p>
+%%          <p>by default, attempting to convert unwrapped json values (numbers, 
+%%          strings and the atoms true, false and null) result in a badarg 
+%%          exception. if strict equals false, these are instead decoded to 
+%%          their equivalent eep0018 value. default is false</p>
 %%        
-%%          <p>note that there is a problem of ambiguity when parsing unwrapped json
-%%          numbers that requires special handling</p>
+%%          <p>note that there is a problem of ambiguity when parsing unwrapped 
+%%          json numbers that requires special handling</p>
 %%
-%%          <p>an unwrapped json number has no unambiguous end marker like a json object,
-%%          array or string. `1', `12' and `123' may all represent either a complete json
-%%          number or just the beginning of one. in this case, the parser will always
-%%          return `{incomplete, More}' rather than potentially terminate before input
-%%          is exhausted. to force termination, `More/1' may be called with the atom
-%%          `end_stream' as it's argument. note also that numbers followed by whitespace
-%%          will be parsed correctly</p></li>
+%%          <p>an unwrapped json number has no unambiguous end marker like a 
+%%          json object, array or string. `1', `12' and `123' may all represent 
+%%          either a complete json number or just the beginning of one. in this 
+%%          case, the parser will always return `{incomplete, More}' rather than 
+%%          potentially terminate before input is exhausted. to force 
+%%          termination, `More/1' may be called with the atom `end_stream' as 
+%%          it's argument. note also that numbers followed by whitespace will be 
+%%          parsed correctly</p></li>
 %%     
-%%        <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
-%%          <p>assume the binary is encoded using the specified binary. default is auto, which
-%%          attempts to autodetect the encoding</p></li>
+%%        <li>{encoding, auto 
+%%              | utf8 
+%%              | utf16 
+%%              | {utf16, little} 
+%%              | utf32 
+%%              | {utf32, little} 
+%%          }
+%%          <p>assume the binary is encoded using the specified binary. default 
+%%          is auto, which attempts to autodetect the encoding</p></li>
 %%     
 %%        <li>{comments, true | false}
 %%          <p>if true, json documents that contain c style (/* ... */) comments
@ -230,29 +253,38 @@ term_to_json(JSON) ->
    
 %% @spec term_to_json(JSON::eep0018(), Opts::encoder_opts()) -> binary()
 %% @doc
-%% takes the erlang representation of a json object (as defined in eep0018) and returns a (binary encoded) json string
+%% takes the erlang representation of a json object (as defined in eep0018) and 
+%% returns a (binary encoded) json string
 %%   
 %%      options:
 %%      <ul>
 %%        <li>{strict, true | false}
 %%          <p>by default, attempting to convert unwrapped json values (numbers, 
-%%          strings and the atoms true, false and null) result in a badarg exception. 
-%%          if strict equals false, these are instead json encoded. default is false</p></li>
+%%          strings and the atoms true, false and null) result in a badarg 
+%%          exception. if strict equals false, these are instead json encoded. 
+%%          default is false</p></li>
 %%        
-%%        <li>{encoding, utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
+%%        <li>{encoding, utf8 
+%%              | utf16 
+%%              | {utf16, little} 
+%%              | utf32 
+%%              | {utf32, little} 
+%%          }
 %%          <p>the encoding of the resulting binary. default is utf8</p></li>
 %%        
 %%        <li>space
 %%          <p>space is equivalent to {space, 1}</p></li>
 %%
 %%        <li>{space, N}
-%%          <p>place N spaces after each colon and comma in the resulting binary. default is zero</p></li>
+%%          <p>place N spaces after each colon and comma in the resulting 
+%%          binary. default is zero</p></li>
 %%          
 %%        <li>indent
 %%          <p>indent is equivalent to {indent, 1}</p></li>
 %%
 %%        <li>{indent, N}
-%%          <p>indent each 'level' of the json structure by N spaces. default is zero</p></li>
+%%          <p>indent each 'level' of the json structure by N spaces. default is 
+%%          zero</p></li>
 %%      </ul>
 %% @end        

@ -273,13 +305,19 @@ is_json(JSON) ->
 %%      options:
 %%      <ul>
 %%        <li>{strict, true | false}
-%%          <p>by default,  unwrapped json values (numbers, strings and the atoms 
-%%          true, false and null) return false. if strict equals true, is_json
-%%          returns true. default is false</p></li>
+%%          <p>by default,  unwrapped json values (numbers, strings and the 
+%%          atoms true, false and null) return false. if strict equals true, 
+%%          is_json returns true. default is false</p></li>
 %%  
-%%        <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
-%%          <p>assume the binary is encoded using the specified binary. default is auto, 
-%%          which attempts to autodetect the encoding</p></li>
+%%        <li>{encoding, auto 
+%%              | utf8 
+%%              | utf16 
+%%              | {utf16, little} 
+%%              | utf32 
+%%              | {utf32, little} 
+%%          }
+%%          <p>assume the binary is encoded using the specified binary. default 
+%%          is auto, which attempts to autodetect the encoding</p></li>
 %%  
 %%        <li>{comments, true | false}
 %%          <p>if true, json documents that contain c style (/* ... */) comments
@ -300,20 +338,32 @@ format(JSON) ->
    
 %% @spec format(JSON::binary(), Opts::format_opts()) -> binary()
 %% @doc
-%% formats a binary encoded json string according to the options chose. the defaults will produced a string stripped of all whitespace
+%% formats a binary encoded json string according to the options chose. the 
+%% defaults will produced a string stripped of all whitespace
 %%
 %%      options:
 %%      <ul>
 %%        <li>{strict, true | false}
-%%          <p>by default,  unwrapped json values (numbers, strings and the atoms 
-%%          true, false and null) result in an error. if strict equals true, they
-%%          are treated as valid json. default is false</p></li>
+%%          <p>by default,  unwrapped json values (numbers, strings and the 
+%%          atoms true, false and null) result in an error. if strict equals 
+%%          true, they are treated as valid json. default is false</p></li>
 %%  
-%%        <li>{encoding, auto | utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
-%%          <p>assume the binary is encoded using the specified binary. default is auto, 
-%%          which attempts to autodetect the encoding</p></li>
+%%        <li>{encoding, auto 
+%%              | utf8 
+%%              | utf16 
+%%              | {utf16, little} 
+%%              | utf32 
+%%              | {utf32, little} 
+%%          }
+%%          <p>assume the binary is encoded using the specified binary. default 
+%%          is auto, which attempts to autodetect the encoding</p></li>
 %%    
-%%        <li>{output_encoding, utf8 | utf16 | {utf16, little} | utf32 | {utf32, little} }
+%%        <li>{encoding, utf8 
+%%              | utf16 
+%%              | {utf16, little} 
+%%              | utf32 
+%%              | {utf32, little} 
+%%          }
 %%          <p>the encoding of the resulting binary. default is utf8</p></li>
 %%    
 %%        <li>{comments, true | false}
@ -325,13 +375,15 @@ format(JSON) ->
 %%          <p>space is equivalent to {space, 1}</p></li>
 %%
 %%        <li>{space, N}
-%%          <p>place N spaces after each colon and comma in the resulting binary. default is zero</p></li>
+%%          <p>place N spaces after each colon and comma in the resulting 
+%%          binary. default is zero</p></li>
 %%          
 %%        <li>indent
 %%          <p>indent is equivalent to {indent, 1}</p></li>
 %%
 %%        <li>{indent, N}
-%%          <p>indent each 'level' of the json structure by N spaces. default is zero</p></li>
+%%          <p>indent each 'level' of the json structure by N spaces. default is 
+%%          zero</p></li>
 %%      </ul>
 %% @end

@ -340,9 +392,17 @@ format(JSON, Opts) ->
    

 %% @spec eventify(List::list()) -> jsx_parser_result()
-%% @doc fake the jsx api for any list. useful if you want to serialize a structure to json using the pretty printer, or verify a sequence could be valid json
+%% @doc fake the jsx api for any list. useful if you want to serialize a 
+%% structure to json using the pretty printer, or verify a sequence could be 
+%% valid json
 eventify([]) ->
-    fun() -> {incomplete, fun(List) when is_list(List) -> eventify(List); (_) -> erlang:error(badarg) end} end;    
+    fun() -> 
+        {incomplete, fun(List) when is_list(List) -> 
+                eventify(List)
+            ; (_) ->
+                erlang:error(badarg) 
+        end}
+    end;    
 eventify([Next|Rest]) ->
    fun() -> {event, Next, eventify(Rest)} end.  

@ -352,43 +412,50 @@ eventify([Next|Rest]) ->

   
 %% encoding detection   
-%% first check to see if there's a bom, if not, use the rfc4627 method for determining
-%%   encoding. this function makes some assumptions about the validity of the stream
-%%   which may delay failure later than if an encoding is explicitly provided
+%% first check to see if there's a bom, if not, use the rfc4627 method for 
+%%   determining encoding. this function makes some assumptions about the 
+%%   validity of the stream which may delay failure later than if an encoding is 
+%%   explicitly provided

 detect_encoding(OptsList) ->
    fun(Stream) -> detect_encoding(Stream, OptsList) end.
    
 %% utf8 bom detection    
-detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) -> (jsx_utf8:parser(Opts))(Rest);    
-%% utf32-little bom detection (this has to come before utf16-little or it'll match that)
-detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) -> (jsx_utf32le:parser(Opts))(Rest);        
+detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) -> 
+    (jsx_utf8:parser(Opts))(Rest);    
+%% utf32-little bom detection (this has to come before utf16-little or it'll 
+%%   match that)
+detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) -> 
+    (jsx_utf32le:parser(Opts))(Rest);        
 %% utf16-big bom detection
-detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) -> (jsx_utf16:parser(Opts))(Rest);
+detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) -> 
+    (jsx_utf16:parser(Opts))(Rest);
 %% utf16-little bom detection
-detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) -> (jsx_utf16le:parser(Opts))(Rest);
+detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) -> 
+    (jsx_utf16le:parser(Opts))(Rest);
 %% utf32-big bom detection
-detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) -> (jsx_utf32:parser(Opts))(Rest);
+detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) -> 
+    (jsx_utf32:parser(Opts))(Rest);
    
 %% utf32-little null order detection
 detect_encoding(<<X, 0, 0, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
    (jsx_utf32le:parser(Opts))(JSON);
-%% utf16-big null order detection
-detect_encoding(<<0, X, 0, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
-    (jsx_utf16:parser(Opts))(JSON);
-%% utf16-little null order detection
-detect_encoding(<<X, 0, Y, 0, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
-    (jsx_utf16le:parser(Opts))(JSON);
 %% utf32-big null order detection
 detect_encoding(<<0, 0, 0, X, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
    (jsx_utf32:parser(Opts))(JSON);
+%% utf16-little null order detection
+detect_encoding(<<X, 0, _, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
+    (jsx_utf16le:parser(Opts))(JSON);
+%% utf16-big null order detection
+detect_encoding(<<0, X, 0, _, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
+    (jsx_utf16:parser(Opts))(JSON);
 %% utf8 null order detection
 detect_encoding(<<X, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
    (jsx_utf8:parser(Opts))(JSON);
    
-%% a problem, to autodetect naked single digits' encoding, there is not enough data
-%%   to conclusively determine the encoding correctly. below is an attempt to solve
-%%   the problem
+%% a problem, to autodetect naked single digits' encoding, there is not enough 
+%%   data to conclusively determine the encoding correctly. below is an attempt 
+%%   to solve the problem
 detect_encoding(<<X>>, Opts) when X =/= 0 ->
    {incomplete,
        fun(end_stream) ->