From 9c5710b97c1bc16959cd7b38d5108f894cdff77d Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 19 Oct 2011 06:51:36 -0700 Subject: [PATCH] reorg of opts parsing, some minor source cleanups --- include/jsx_opts_parser.hrl | 11 --- include/jsx_types.hrl | 5 +- src/jsx.app.src | 2 +- src/jsx.erl | 2 +- src/jsx_scanner.erl | 190 ------------------------------------ src/jsx_tokenizer.erl | 4 +- src/jsx_utils.erl | 19 +++- 7 files changed, 23 insertions(+), 210 deletions(-) delete mode 100644 include/jsx_opts_parser.hrl delete mode 100644 src/jsx_scanner.erl diff --git a/include/jsx_opts_parser.hrl b/include/jsx_opts_parser.hrl deleted file mode 100644 index e25f3e4..0000000 --- a/include/jsx_opts_parser.hrl +++ /dev/null @@ -1,11 +0,0 @@ -parse_opts(Opts) -> - parse_opts(Opts, #opts{}). - -parse_opts([], Opts) -> - Opts; -parse_opts([loose_unicode|Rest], Opts) -> - parse_opts(Rest, Opts#opts{loose_unicode=true}); -parse_opts([escape_forward_slash|Rest], Opts) -> - parse_opts(Rest, Opts#opts{escape_forward_slash=true}); -parse_opts(_, _) -> - {error, badarg}. \ No newline at end of file diff --git a/include/jsx_types.hrl b/include/jsx_types.hrl index 7d7b9fb..a928ea1 100644 --- a/include/jsx_types.hrl +++ b/include/jsx_types.hrl @@ -20,12 +20,13 @@ -type jsx_encodeable() :: jsx_event() | [jsx_encodeable()]. --type jsx_iterator() :: jsx_scanner() | jsx_tokenizer(). +-type jsx_iterator() :: jsx_scanner(). --type jsx_scanner() :: fun((binary()) -> jsx_iterator_result()). +-type jsx_scanner() :: jsx_decoder() | jsx_tokenizer(). +-type jsx_decoder() :: fun((binary()) -> jsx_iterator_result()). -type jsx_tokenizer() :: fun((jsx_encodeable()) -> jsx_iterator_result()). diff --git a/src/jsx.app.src b/src/jsx.app.src index 3419ac1..c12e084 100644 --- a/src/jsx.app.src +++ b/src/jsx.app.src @@ -5,7 +5,7 @@ {modules, [ jsx, jsx_tokenizer, - jsx_scanner, + jsx_decoder, jsx_utils ]}, {registered, []}, diff --git a/src/jsx.erl b/src/jsx.erl index d500fe5..5487f80 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -36,7 +36,7 @@ scanner() -> scanner([]). -spec scanner(OptsList::jsx_opts()) -> jsx_scanner(). scanner(OptsList) -> fun(Stream) when is_binary(Stream) -> - (jsx_scanner:scanner(OptsList))(Stream) + (jsx_decoder:decoder(OptsList))(Stream) ; (Stream) when is_list(Stream); is_tuple(Stream) -> (jsx_tokenizer:tokenizer(OptsList))(Stream) end. diff --git a/src/jsx_scanner.erl b/src/jsx_scanner.erl deleted file mode 100644 index 8670c2d..0000000 --- a/src/jsx_scanner.erl +++ /dev/null @@ -1,190 +0,0 @@ -%% The MIT License - -%% Copyright (c) 2010 Alisdair Sullivan - -%% Permission is hereby granted, free of charge, to any person obtaining a copy -%% of this software and associated documentation files (the "Software"), to deal -%% in the Software without restriction, including without limitation the rights -%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -%% copies of the Software, and to permit persons to whom the Software is -%% furnished to do so, subject to the following conditions: - -%% The above copyright notice and this permission notice shall be included in -%% all copies or substantial portions of the Software. - -%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -%% THE SOFTWARE. - - --module(jsx_scanner). - --export([scanner/1]). - --include("../include/jsx_types.hrl"). - --spec scanner(Opts::jsx_opts()) -> jsx_scanner(). -scanner(Opts) -> - fun(JSON) -> start(JSON, [], [], parse_opts(Opts)) end. - --include("../include/jsx_opts.hrl"). - --include("../include/jsx_opts_parser.hrl"). - --include("../include/jsx_scanner.hrl"). - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -noncharacters_test_() -> - [ - {"noncharacters - badjson", - ?_assertEqual(check_bad(noncharacters()), []) - }, - {"noncharacters - replaced", - ?_assertEqual(check_replaced(noncharacters()), []) - } - ]. - -extended_noncharacters_test_() -> - [ - {"extended noncharacters - badjson", - ?_assertEqual(check_bad(extended_noncharacters()), []) - }, - {"extended noncharacters - replaced", - ?_assertEqual(check_replaced(extended_noncharacters()), []) - } - ]. - -surrogates_test_() -> - [ - {"surrogates - badjson", - ?_assertEqual(check_bad(surrogates()), []) - }, - {"surrogates - replaced", - ?_assertEqual(check_replaced(surrogates()), []) - } - ]. - -control_test_() -> - [ - {"control characters - badjson", - ?_assertEqual(check_bad(control_characters()), []) - } - ]. - -reserved_test_() -> - [ - {"reserved noncharacters - badjson", - ?_assertEqual(check_bad(reserved_space()), []) - }, - {"reserved noncharacters - replaced", - ?_assertEqual(check_replaced(reserved_space()), []) - } - ]. - -zero_test_() -> - [ - {"nullbyte - badjson", - ?_assertEqual(check_bad(zero()), []) - } - ]. - -good_characters_test_() -> - [ - {"acceptable codepoints", - ?_assertEqual(check_good(good()), []) - }, - {"acceptable extended", - ?_assertEqual(check_good(good_extended()), []) - } - ]. - - -check_bad(List) -> - lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end, - check(List, [], []) - ). - -check_replaced(List) -> - lists:dropwhile(fun({_, [{string, [16#fffd]}|_]}) -> - true - ; (_) -> - false - end, - check(List, [loose_unicode], []) - ). - -check_good(List) -> - lists:dropwhile(fun({_, [{string, _}]}) -> true ; (_) -> false end, - check(List, [], []) - ). - -check([], _Opts, Acc) -> Acc; -check([H|T], Opts, Acc) -> - R = decode(to_fake_utf(H, utf8), Opts), - check(T, Opts, [{H, R}] ++ Acc). - - -decode(JSON, Opts) -> - try - {ok, Events, _} = (scanner(Opts))(JSON), - loop(Events, []) - catch - error:badarg -> {error, badjson} - end. - - -loop([end_json], Acc) -> lists:reverse(Acc); -loop([Event|Events], Acc) -> loop(Events, [Event] ++ Acc); -loop(_, _) -> {error, badjson}. - - - -noncharacters() -> lists:seq(16#fffe, 16#ffff). - -extended_noncharacters() -> - [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] - ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] - ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] - ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] - ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] - ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] - ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] - ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]. - -surrogates() -> lists:seq(16#d800, 16#dfff). - -control_characters() -> lists:seq(1, 31). - -reserved_space() -> lists:seq(16#fdd0, 16#fdef). - -zero() -> [0]. - -good() -> [32, 33] - ++ lists:seq(16#23, 16#5b) - ++ lists:seq(16#5d, 16#d7ff) - ++ lists:seq(16#e000, 16#fdcf) - ++ lists:seq(16#fdf0, 16#fffd). - -good_extended() -> lists:seq(16#100000, 16#10fffd). - -%% erlang refuses to encode certain codepoints, so fake them all -to_fake_utf(N, utf8) when N < 16#0080 -> <<34/utf8, N:8, 34/utf8>>; -to_fake_utf(N, utf8) when N < 16#0800 -> - <<0:5, Y:5, X:6>> = <>, - <<34/utf8, 2#110:3, Y:5, 2#10:2, X:6, 34/utf8>>; -to_fake_utf(N, utf8) when N < 16#10000 -> - <> = <>, - <<34/utf8, 2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6, 34/utf8>>; -to_fake_utf(N, utf8) -> - <<0:3, W:3, Z:6, Y:6, X:6>> = <>, - <<34/utf8, 2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6, 34/utf8>>. - - --endif. \ No newline at end of file diff --git a/src/jsx_tokenizer.erl b/src/jsx_tokenizer.erl index fafc61c..9298345 100644 --- a/src/jsx_tokenizer.erl +++ b/src/jsx_tokenizer.erl @@ -32,12 +32,10 @@ -spec tokenizer(OptsList::jsx_opts()) -> jsx_tokenizer(). tokenizer(OptsList) -> - fun(Forms) -> start(Forms, [], [], parse_opts(OptsList)) end. + fun(Forms) -> start(Forms, [], [], jsx_utils:parse_opts(OptsList)) end. -include("../include/jsx_opts.hrl"). --include("../include/jsx_opts_parser.hrl"). - -include("../include/jsx_tokenizer.hrl"). -ifdef(TEST). diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index b8c1b85..72528b2 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -23,11 +23,26 @@ -module(jsx_utils). --export([nice_decimal/1, json_escape/2]). +-export([parse_opts/1, nice_decimal/1, json_escape/2]). -include("../include/jsx_opts.hrl"). +%% parsing of jsx opts + +parse_opts(Opts) -> + parse_opts(Opts, #opts{}). + +parse_opts([], Opts) -> + Opts; +parse_opts([loose_unicode|Rest], Opts) -> + parse_opts(Rest, Opts#opts{loose_unicode=true}); +parse_opts([escape_forward_slash|Rest], Opts) -> + parse_opts(Rest, Opts#opts{escape_forward_slash=true}); +parse_opts(_, _) -> + {error, badarg}. + + %% conversion of floats to 'nice' decimal output. erlang's float implementation %% is almost but not quite ieee 754. it converts negative zero to plain zero %% silently, and throws exceptions for any operations that would produce NaN @@ -39,7 +54,6 @@ %% algorithm from "Printing Floating-Point Numbers Quickly and Accurately" by %% Burger & Dybvig - -spec nice_decimal(Float::float()) -> string(). nice_decimal(0.0) -> "0.0"; @@ -162,6 +176,7 @@ digits_to_list([Digit|Digits], Dpoint, Acc) -> %% json string escaping, for utf8 binaries. escape the json control sequences to %% their json equivalent, escape other control characters to \uXXXX sequences, %% everything else should be a legal json string component + json_escape(String, Opts) when is_binary(String) -> json_escape(String, Opts, <<>>); json_escape(String, Opts) when is_list(String) ->