reorg of opts parsing, some minor source cleanups

This commit is contained in:
alisdair sullivan 2011-10-19 06:51:36 -07:00
parent 95dd7a67c6
commit 9c5710b97c
7 changed files with 23 additions and 210 deletions

View file

@ -1,11 +0,0 @@
parse_opts(Opts) ->
parse_opts(Opts, #opts{}).
parse_opts([], Opts) ->
Opts;
parse_opts([loose_unicode|Rest], Opts) ->
parse_opts(Rest, Opts#opts{loose_unicode=true});
parse_opts([escape_forward_slash|Rest], Opts) ->
parse_opts(Rest, Opts#opts{escape_forward_slash=true});
parse_opts(_, _) ->
{error, badarg}.

View file

@ -20,12 +20,13 @@
-type jsx_encodeable() :: jsx_event() | [jsx_encodeable()]. -type jsx_encodeable() :: jsx_event() | [jsx_encodeable()].
-type jsx_iterator() :: jsx_scanner() | jsx_tokenizer(). -type jsx_iterator() :: jsx_scanner().
-type jsx_scanner() :: fun((binary()) -> jsx_iterator_result()). -type jsx_scanner() :: jsx_decoder() | jsx_tokenizer().
-type jsx_decoder() :: fun((binary()) -> jsx_iterator_result()).
-type jsx_tokenizer() :: fun((jsx_encodeable()) -> jsx_iterator_result()). -type jsx_tokenizer() :: fun((jsx_encodeable()) -> jsx_iterator_result()).

View file

@ -5,7 +5,7 @@
{modules, [ {modules, [
jsx, jsx,
jsx_tokenizer, jsx_tokenizer,
jsx_scanner, jsx_decoder,
jsx_utils jsx_utils
]}, ]},
{registered, []}, {registered, []},

View file

@ -36,7 +36,7 @@ scanner() -> scanner([]).
-spec scanner(OptsList::jsx_opts()) -> jsx_scanner(). -spec scanner(OptsList::jsx_opts()) -> jsx_scanner().
scanner(OptsList) -> scanner(OptsList) ->
fun(Stream) when is_binary(Stream) -> fun(Stream) when is_binary(Stream) ->
(jsx_scanner:scanner(OptsList))(Stream) (jsx_decoder:decoder(OptsList))(Stream)
; (Stream) when is_list(Stream); is_tuple(Stream) -> ; (Stream) when is_list(Stream); is_tuple(Stream) ->
(jsx_tokenizer:tokenizer(OptsList))(Stream) (jsx_tokenizer:tokenizer(OptsList))(Stream)
end. end.

View file

@ -1,190 +0,0 @@
%% The MIT License
%% Copyright (c) 2010 Alisdair Sullivan <alisdairsullivan@yahoo.ca>
%% Permission is hereby granted, free of charge, to any person obtaining a copy
%% of this software and associated documentation files (the "Software"), to deal
%% in the Software without restriction, including without limitation the rights
%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
%% copies of the Software, and to permit persons to whom the Software is
%% furnished to do so, subject to the following conditions:
%% The above copyright notice and this permission notice shall be included in
%% all copies or substantial portions of the Software.
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
%% THE SOFTWARE.
-module(jsx_scanner).
-export([scanner/1]).
-include("../include/jsx_types.hrl").
-spec scanner(Opts::jsx_opts()) -> jsx_scanner().
scanner(Opts) ->
fun(JSON) -> start(JSON, [], [], parse_opts(Opts)) end.
-include("../include/jsx_opts.hrl").
-include("../include/jsx_opts_parser.hrl").
-include("../include/jsx_scanner.hrl").
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
noncharacters_test_() ->
[
{"noncharacters - badjson",
?_assertEqual(check_bad(noncharacters()), [])
},
{"noncharacters - replaced",
?_assertEqual(check_replaced(noncharacters()), [])
}
].
extended_noncharacters_test_() ->
[
{"extended noncharacters - badjson",
?_assertEqual(check_bad(extended_noncharacters()), [])
},
{"extended noncharacters - replaced",
?_assertEqual(check_replaced(extended_noncharacters()), [])
}
].
surrogates_test_() ->
[
{"surrogates - badjson",
?_assertEqual(check_bad(surrogates()), [])
},
{"surrogates - replaced",
?_assertEqual(check_replaced(surrogates()), [])
}
].
control_test_() ->
[
{"control characters - badjson",
?_assertEqual(check_bad(control_characters()), [])
}
].
reserved_test_() ->
[
{"reserved noncharacters - badjson",
?_assertEqual(check_bad(reserved_space()), [])
},
{"reserved noncharacters - replaced",
?_assertEqual(check_replaced(reserved_space()), [])
}
].
zero_test_() ->
[
{"nullbyte - badjson",
?_assertEqual(check_bad(zero()), [])
}
].
good_characters_test_() ->
[
{"acceptable codepoints",
?_assertEqual(check_good(good()), [])
},
{"acceptable extended",
?_assertEqual(check_good(good_extended()), [])
}
].
check_bad(List) ->
lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end,
check(List, [], [])
).
check_replaced(List) ->
lists:dropwhile(fun({_, [{string, [16#fffd]}|_]}) ->
true
; (_) ->
false
end,
check(List, [loose_unicode], [])
).
check_good(List) ->
lists:dropwhile(fun({_, [{string, _}]}) -> true ; (_) -> false end,
check(List, [], [])
).
check([], _Opts, Acc) -> Acc;
check([H|T], Opts, Acc) ->
R = decode(to_fake_utf(H, utf8), Opts),
check(T, Opts, [{H, R}] ++ Acc).
decode(JSON, Opts) ->
try
{ok, Events, _} = (scanner(Opts))(JSON),
loop(Events, [])
catch
error:badarg -> {error, badjson}
end.
loop([end_json], Acc) -> lists:reverse(Acc);
loop([Event|Events], Acc) -> loop(Events, [Event] ++ Acc);
loop(_, _) -> {error, badjson}.
noncharacters() -> lists:seq(16#fffe, 16#ffff).
extended_noncharacters() ->
[16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff].
surrogates() -> lists:seq(16#d800, 16#dfff).
control_characters() -> lists:seq(1, 31).
reserved_space() -> lists:seq(16#fdd0, 16#fdef).
zero() -> [0].
good() -> [32, 33]
++ lists:seq(16#23, 16#5b)
++ lists:seq(16#5d, 16#d7ff)
++ lists:seq(16#e000, 16#fdcf)
++ lists:seq(16#fdf0, 16#fffd).
good_extended() -> lists:seq(16#100000, 16#10fffd).
%% erlang refuses to encode certain codepoints, so fake them all
to_fake_utf(N, utf8) when N < 16#0080 -> <<34/utf8, N:8, 34/utf8>>;
to_fake_utf(N, utf8) when N < 16#0800 ->
<<0:5, Y:5, X:6>> = <<N:16>>,
<<34/utf8, 2#110:3, Y:5, 2#10:2, X:6, 34/utf8>>;
to_fake_utf(N, utf8) when N < 16#10000 ->
<<Z:4, Y:6, X:6>> = <<N:16>>,
<<34/utf8, 2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6, 34/utf8>>;
to_fake_utf(N, utf8) ->
<<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>,
<<34/utf8, 2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6, 34/utf8>>.
-endif.

View file

@ -32,12 +32,10 @@
-spec tokenizer(OptsList::jsx_opts()) -> jsx_tokenizer(). -spec tokenizer(OptsList::jsx_opts()) -> jsx_tokenizer().
tokenizer(OptsList) -> tokenizer(OptsList) ->
fun(Forms) -> start(Forms, [], [], parse_opts(OptsList)) end. fun(Forms) -> start(Forms, [], [], jsx_utils:parse_opts(OptsList)) end.
-include("../include/jsx_opts.hrl"). -include("../include/jsx_opts.hrl").
-include("../include/jsx_opts_parser.hrl").
-include("../include/jsx_tokenizer.hrl"). -include("../include/jsx_tokenizer.hrl").
-ifdef(TEST). -ifdef(TEST).

View file

@ -23,11 +23,26 @@
-module(jsx_utils). -module(jsx_utils).
-export([nice_decimal/1, json_escape/2]). -export([parse_opts/1, nice_decimal/1, json_escape/2]).
-include("../include/jsx_opts.hrl"). -include("../include/jsx_opts.hrl").
%% parsing of jsx opts
parse_opts(Opts) ->
parse_opts(Opts, #opts{}).
parse_opts([], Opts) ->
Opts;
parse_opts([loose_unicode|Rest], Opts) ->
parse_opts(Rest, Opts#opts{loose_unicode=true});
parse_opts([escape_forward_slash|Rest], Opts) ->
parse_opts(Rest, Opts#opts{escape_forward_slash=true});
parse_opts(_, _) ->
{error, badarg}.
%% conversion of floats to 'nice' decimal output. erlang's float implementation %% conversion of floats to 'nice' decimal output. erlang's float implementation
%% is almost but not quite ieee 754. it converts negative zero to plain zero %% is almost but not quite ieee 754. it converts negative zero to plain zero
%% silently, and throws exceptions for any operations that would produce NaN %% silently, and throws exceptions for any operations that would produce NaN
@ -39,7 +54,6 @@
%% algorithm from "Printing Floating-Point Numbers Quickly and Accurately" by %% algorithm from "Printing Floating-Point Numbers Quickly and Accurately" by
%% Burger & Dybvig %% Burger & Dybvig
-spec nice_decimal(Float::float()) -> string(). -spec nice_decimal(Float::float()) -> string().
nice_decimal(0.0) -> "0.0"; nice_decimal(0.0) -> "0.0";
@ -162,6 +176,7 @@ digits_to_list([Digit|Digits], Dpoint, Acc) ->
%% json string escaping, for utf8 binaries. escape the json control sequences to %% json string escaping, for utf8 binaries. escape the json control sequences to
%% their json equivalent, escape other control characters to \uXXXX sequences, %% their json equivalent, escape other control characters to \uXXXX sequences,
%% everything else should be a legal json string component %% everything else should be a legal json string component
json_escape(String, Opts) when is_binary(String) -> json_escape(String, Opts) when is_binary(String) ->
json_escape(String, Opts, <<>>); json_escape(String, Opts, <<>>);
json_escape(String, Opts) when is_list(String) -> json_escape(String, Opts) when is_list(String) ->