2010-06-01 01:03:28 -07:00
|
|
|
%% The MIT License
|
|
|
|
|
|
|
|
%% Copyright (c) 2010 Alisdair Sullivan <alisdairsullivan@yahoo.ca>
|
|
|
|
|
|
|
|
%% Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
%% of this software and associated documentation files (the "Software"), to deal
|
|
|
|
%% in the Software without restriction, including without limitation the rights
|
|
|
|
%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
%% copies of the Software, and to permit persons to whom the Software is
|
|
|
|
%% furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
%% The above copyright notice and this permission notice shall be included in
|
|
|
|
%% all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
%% THE SOFTWARE.
|
|
|
|
|
2010-08-22 11:31:14 -07:00
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
%% this is the implementation of the utf backends for the jsx decoder. it's
|
|
|
|
%% included by the various jsx_utfxx.erl frontends and all modifications to
|
2010-12-28 00:30:25 -08:00
|
|
|
%% this file should take that into account
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
-export([decoder/1]).
|
2011-07-21 06:14:48 -07:00
|
|
|
-spec decoder(OptsList::jsx_opts()) -> jsx_decoder().
|
2010-09-15 21:30:25 -07:00
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
|
|
|
|
decoder(OptsList) ->
|
|
|
|
case parse_opts(OptsList) of
|
|
|
|
{error, badopt} -> {error, badopt}
|
|
|
|
; Opts -> fun(JSON) -> start(JSON, [], Opts) end
|
|
|
|
end.
|
|
|
|
|
|
|
|
|
2010-08-23 13:36:53 -07:00
|
|
|
%% opts record for decoder
|
|
|
|
-record(opts, {
|
|
|
|
multi_term = false,
|
2011-07-27 01:59:03 -07:00
|
|
|
loose_unicode = false,
|
2011-07-29 00:18:33 -07:00
|
|
|
encoding = auto,
|
|
|
|
escape_forward_slash = false %% does nothing, used by encoder
|
2010-08-23 13:36:53 -07:00
|
|
|
}).
|
2010-08-19 18:22:34 -07:00
|
|
|
|
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
%% converts a proplist into a tuple
|
|
|
|
parse_opts(Opts) ->
|
|
|
|
parse_opts(Opts, #opts{}).
|
|
|
|
|
|
|
|
parse_opts([], Opts) ->
|
|
|
|
Opts;
|
|
|
|
parse_opts([multi_term|Rest], Opts) ->
|
|
|
|
parse_opts(Rest, Opts#opts{multi_term=true});
|
|
|
|
parse_opts([loose_unicode|Rest], Opts) ->
|
|
|
|
parse_opts(Rest, Opts#opts{loose_unicode=true});
|
|
|
|
parse_opts([{encoding, _}|Rest], Opts) ->
|
|
|
|
parse_opts(Rest, Opts);
|
|
|
|
parse_opts(_, _) ->
|
|
|
|
{error, badarg}.
|
|
|
|
|
|
|
|
|
2010-06-01 01:03:28 -07:00
|
|
|
%% whitespace
|
|
|
|
-define(space, 16#20).
|
|
|
|
-define(tab, 16#09).
|
|
|
|
-define(cr, 16#0D).
|
|
|
|
-define(newline, 16#0A).
|
|
|
|
|
|
|
|
%% object delimiters
|
|
|
|
-define(start_object, 16#7B).
|
|
|
|
-define(end_object, 16#7D).
|
|
|
|
|
|
|
|
%% array delimiters
|
|
|
|
-define(start_array, 16#5B).
|
|
|
|
-define(end_array, 16#5D).
|
|
|
|
|
|
|
|
%% kv seperator
|
|
|
|
-define(comma, 16#2C).
|
|
|
|
-define(quote, 16#22).
|
|
|
|
-define(colon, 16#3A).
|
|
|
|
|
|
|
|
%% string escape sequences
|
|
|
|
-define(escape, 16#5C).
|
|
|
|
-define(rsolidus, 16#5C).
|
|
|
|
-define(solidus, 16#2F).
|
|
|
|
-define(formfeed, 16#0C).
|
|
|
|
-define(backspace, 16#08).
|
|
|
|
-define(unicode, 16#75).
|
|
|
|
|
|
|
|
%% math
|
|
|
|
-define(zero, 16#30).
|
|
|
|
-define(decimalpoint, 16#2E).
|
|
|
|
-define(negative, 16#2D).
|
|
|
|
-define(positive, 16#2B).
|
|
|
|
|
|
|
|
|
2010-06-07 16:14:22 -07:00
|
|
|
%% some useful guards
|
2010-06-01 01:03:28 -07:00
|
|
|
-define(is_hex(Symbol),
|
|
|
|
(Symbol >= $a andalso Symbol =< $z); (Symbol >= $A andalso Symbol =< $Z);
|
|
|
|
(Symbol >= $0 andalso Symbol =< $9)
|
|
|
|
).
|
|
|
|
|
|
|
|
-define(is_nonzero(Symbol),
|
|
|
|
Symbol >= $1 andalso Symbol =< $9
|
|
|
|
).
|
|
|
|
|
|
|
|
-define(is_noncontrol(Symbol),
|
2011-07-27 01:59:03 -07:00
|
|
|
(Symbol >= ?space)
|
2010-06-01 01:03:28 -07:00
|
|
|
).
|
|
|
|
|
|
|
|
-define(is_whitespace(Symbol),
|
|
|
|
Symbol =:= ?space; Symbol =:= ?tab; Symbol =:= ?cr; Symbol =:= ?newline
|
|
|
|
).
|
|
|
|
|
2010-06-07 16:14:22 -07:00
|
|
|
|
2010-08-23 13:40:56 -07:00
|
|
|
%% partial codepoint max size differs across encodings
|
2010-06-07 16:14:22 -07:00
|
|
|
-ifdef(utf8).
|
2011-07-28 18:47:58 -07:00
|
|
|
-define(encoding, utf8).
|
2010-08-25 23:17:10 -07:00
|
|
|
-define(utfx, utf8).
|
2010-07-27 00:05:15 -07:00
|
|
|
-define(partial_codepoint(Bin), byte_size(Bin) < 1).
|
2010-06-07 16:14:22 -07:00
|
|
|
-endif.
|
|
|
|
|
|
|
|
-ifdef(utf16).
|
2011-07-28 18:47:58 -07:00
|
|
|
-define(encoding, utf16).
|
2010-08-25 23:17:10 -07:00
|
|
|
-define(utfx, utf16).
|
2010-07-27 00:05:15 -07:00
|
|
|
-define(partial_codepoint(Bin), byte_size(Bin) < 2).
|
2010-06-07 16:14:22 -07:00
|
|
|
-endif.
|
|
|
|
|
|
|
|
-ifdef(utf16le).
|
2011-07-28 18:47:58 -07:00
|
|
|
-define(encoding, utf16le).
|
2010-08-25 23:17:10 -07:00
|
|
|
-define(utfx, utf16-little).
|
2010-07-27 00:05:15 -07:00
|
|
|
-define(partial_codepoint(Bin), byte_size(Bin) < 2).
|
2010-06-07 16:14:22 -07:00
|
|
|
-endif.
|
|
|
|
|
|
|
|
-ifdef(utf32).
|
2011-07-28 18:47:58 -07:00
|
|
|
-define(encoding, utf32).
|
2010-08-25 23:17:10 -07:00
|
|
|
-define(utfx, utf32).
|
2010-07-27 00:05:15 -07:00
|
|
|
-define(partial_codepoint(Bin), byte_size(Bin) < 4).
|
2010-06-07 16:14:22 -07:00
|
|
|
-endif.
|
|
|
|
|
|
|
|
-ifdef(utf32le).
|
2011-07-28 18:47:58 -07:00
|
|
|
-define(encoding, utf32le).
|
2010-08-25 23:17:10 -07:00
|
|
|
-define(utfx, utf32-little).
|
2010-07-27 00:05:15 -07:00
|
|
|
-define(partial_codepoint(Bin), byte_size(Bin) < 4).
|
2010-08-22 11:31:14 -07:00
|
|
|
-endif.
|
|
|
|
|
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
%% when parsing strings, the naive detection of partial codepoints is
|
|
|
|
%% insufficient. this incredibly anal function should detect all badly formed
|
|
|
|
%% utf sequences
|
|
|
|
-ifdef(utf8).
|
|
|
|
partial_utf(<<>>) -> true;
|
|
|
|
partial_utf(<<X>>) when X >= 16#c2, X =< 16#df -> true;
|
|
|
|
partial_utf(<<X, Rest/binary>>) when X >= 16#e0, X =< 16#ef ->
|
|
|
|
case Rest of
|
|
|
|
<<>> -> true
|
|
|
|
; <<Y>> when Y >= 16#80, Y =< 16#bf -> true
|
|
|
|
; _ -> false
|
|
|
|
end;
|
|
|
|
partial_utf(<<X, Rest/binary>>) when X >= 16#f0, X =< 16#f4 ->
|
|
|
|
case Rest of
|
|
|
|
<<>> -> true
|
|
|
|
; <<Y>> when Y >= 16#80, Y =< 16#bf -> true
|
|
|
|
; <<Y, Z>> when Y >= 16#80, Y =< 16#bf, Z >= 16#80, Z =< 16#bf -> true
|
|
|
|
; _ -> false
|
|
|
|
end;
|
|
|
|
partial_utf(_) -> false.
|
|
|
|
-endif.
|
2010-08-23 13:36:53 -07:00
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
-ifdef(utf16).
|
|
|
|
partial_utf(<<>>) -> true;
|
|
|
|
partial_utf(<<_X>>) -> true;
|
|
|
|
partial_utf(<<X, _Y>>) when X >= 16#d8, X =< 16#df -> true;
|
|
|
|
partial_utf(<<X, _Y, Z>>) when X >= 16#d8, X =< 16#df, Z >= 16#dc, Z =< 16#df ->
|
|
|
|
true;
|
|
|
|
partial_utf(_) -> false.
|
|
|
|
-endif.
|
2010-08-22 11:31:14 -07:00
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
-ifdef(utf16le).
|
|
|
|
partial_utf(<<>>) -> true;
|
|
|
|
%% this case is not strictly true, there are single bytes that should be
|
|
|
|
%% rejected, but they're rare enough they can be ignored
|
|
|
|
partial_utf(<<_X>>) -> true;
|
|
|
|
partial_utf(<<_Y, X>>) when X >= 16#d8, X =< 16#df -> true;
|
|
|
|
partial_utf(<<_Y, X, _Z>>) when X >= 16#d8, X =< 16#df -> true;
|
|
|
|
partial_utf(_) -> false.
|
|
|
|
-endif.
|
2010-08-22 11:31:14 -07:00
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
-ifdef(utf32).
|
|
|
|
partial_utf(<<>>) -> true;
|
|
|
|
partial_utf(<<_>>) -> true;
|
|
|
|
partial_utf(<<_, _>>) -> true;
|
|
|
|
partial_utf(<<_, _, _>>) -> true;
|
|
|
|
partial_utf(_) -> false.
|
|
|
|
-endif.
|
2010-08-22 11:31:14 -07:00
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
-ifdef(utf32le).
|
|
|
|
partial_utf(<<>>) -> true;
|
|
|
|
partial_utf(<<_>>) -> true;
|
|
|
|
partial_utf(<<_, _>>) -> true;
|
|
|
|
partial_utf(<<_, _, _>>) -> true;
|
|
|
|
partial_utf(_) -> false.
|
|
|
|
-endif.
|
2010-08-23 13:36:53 -07:00
|
|
|
|
2010-08-22 11:31:14 -07:00
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
-define(incomplete(Next),
|
|
|
|
case ?partial_codepoint(Bin) of
|
|
|
|
true ->
|
|
|
|
{jsx, incomplete, fun(end_stream) ->
|
|
|
|
{error, {badjson, Bin}}
|
|
|
|
; (Stream) ->
|
|
|
|
Next
|
|
|
|
end}
|
|
|
|
; false -> {error, {badjson, Bin}}
|
|
|
|
end
|
|
|
|
).
|
|
|
|
|
|
|
|
%% emit takes a list of `events` to present to client code and formats them
|
|
|
|
%% appropriately
|
|
|
|
emit([Event], F) -> {jsx, Event, F};
|
|
|
|
emit([Event|Rest], F) -> {jsx, Event, fun() -> emit(Rest, F) end}.
|
2010-08-22 11:31:14 -07:00
|
|
|
|
2010-08-23 13:42:05 -07:00
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_whitespace(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
start(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<?start_object/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([start_object], fun() -> object(Rest, [key|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<?start_array/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([start_array], fun() -> array(Rest, [array|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<?quote/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
string(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<$t/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
tr(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<$f/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
fa(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<$n/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
nu(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<?negative/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
negative(Rest, Stack, Opts, "-");
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<?zero/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
zero(Rest, Stack, Opts, "0");
|
2010-08-25 23:17:10 -07:00
|
|
|
start(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_nonzero(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
integer(Rest, Stack, Opts, [S]);
|
|
|
|
start(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(start(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
maybe_done(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_whitespace(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
maybe_done(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
maybe_done(<<?end_object/?utfx, Rest/binary>>, [object|Stack], Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([end_object], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
maybe_done(<<?end_array/?utfx, Rest/binary>>, [array|Stack], Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([end_array], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
maybe_done(<<?comma/?utfx, Rest/binary>>, [object|Stack], Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
key(Rest, [key|Stack], Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
maybe_done(<<?comma/?utfx, Rest/binary>>, [array|_] = Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
value(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
maybe_done(Rest, [], #opts{multi_term=true}=Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([end_json], fun() -> start(Rest, [], Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
maybe_done(Rest, [], Opts) ->
|
|
|
|
done(Rest, Opts);
|
|
|
|
maybe_done(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(maybe_done(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
done(<<S/?utfx, Rest/binary>>, Opts) when ?is_whitespace(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
done(Rest, Opts);
|
|
|
|
done(<<>>, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([end_json], fun() ->
|
2011-07-26 00:35:17 -07:00
|
|
|
{jsx, incomplete, fun(end_stream) ->
|
2010-09-27 14:07:36 -07:00
|
|
|
{error, {badjson, <<>>}}
|
2010-08-25 23:17:10 -07:00
|
|
|
; (Stream) ->
|
|
|
|
done(Stream, Opts)
|
|
|
|
end}
|
2011-08-02 20:06:49 -07:00
|
|
|
end);
|
2010-08-22 11:31:14 -07:00
|
|
|
done(Bin, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(done(<<Bin/binary, Stream/binary>>, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
object(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_whitespace(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
object(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
object(<<?quote/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
string(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
object(<<?end_object/?utfx, Rest/binary>>, [key|Stack], Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([end_object], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
object(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(object(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_whitespace(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
array(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<?quote/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
string(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<$t/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
tr(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<$f/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
fa(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<$n/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
nu(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<?negative/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
negative(Rest, Stack, Opts, "-");
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<?zero/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
zero(Rest, Stack, Opts, "0");
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_nonzero(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
integer(Rest, Stack, Opts, [S]);
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<?start_object/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([start_object], fun() -> object(Rest, [key|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<?start_array/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([start_array], fun() -> array(Rest, [array|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
array(<<?end_array/?utfx, Rest/binary>>, [array|Stack], Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([end_array], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
array(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(array(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_whitespace(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
value(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<?quote/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
string(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<$t/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
tr(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<$f/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
fa(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<$n/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
nu(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<?negative/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
negative(Rest, Stack, Opts, "-");
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<?zero/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
zero(Rest, Stack, Opts, "0");
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_nonzero(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
integer(Rest, Stack, Opts, [S]);
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<?start_object/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([start_object], fun() -> object(Rest, [key|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
value(<<?start_array/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([start_array], fun() -> array(Rest, [array|Stack], Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
value(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(value(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
colon(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_whitespace(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
colon(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
colon(<<?colon/?utfx, Rest/binary>>, [key|Stack], Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
value(Rest, [object|Stack], Opts);
|
|
|
|
colon(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(colon(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
key(<<S/?utfx, Rest/binary>>, Stack, Opts) when ?is_whitespace(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
key(Rest, Stack, Opts);
|
2010-08-25 23:17:10 -07:00
|
|
|
key(<<?quote/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
string(Rest, Stack, Opts);
|
2010-08-22 11:31:14 -07:00
|
|
|
key(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(key(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
%% string has an additional parameter, an accumulator (Acc) used to hold the
|
|
|
|
%% intermediate representation of the string being parsed. using a list of
|
|
|
|
%% integers representing unicode codepoints is faster than constructing
|
2011-07-31 00:12:47 -07:00
|
|
|
%% binaries, there's a branch kicking around which proves it
|
2010-08-25 23:17:10 -07:00
|
|
|
%% string uses partial_utf/1 to cease parsing when invalid encodings are
|
|
|
|
%% encountered rather than just checking remaining binary size like other
|
2011-07-31 00:12:47 -07:00
|
|
|
%% states to eliminate certain incomplete states
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Bin, Stack, Opts) -> string(Bin, Stack, Opts, []).
|
2011-07-26 00:35:17 -07:00
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
string(<<?quote/?utfx, Rest/binary>>, [key|_] = Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([{key, lists:reverse(Acc)}], fun() -> colon(Rest, Stack, Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
string(<<?quote/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([{string, lists:reverse(Acc)}], fun() ->
|
2010-08-25 23:17:10 -07:00
|
|
|
maybe_done(Rest, Stack, Opts)
|
2011-08-02 20:06:49 -07:00
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
string(<<?rsolidus/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-07-27 01:59:03 -07:00
|
|
|
escape(Rest, Stack, Opts, Acc);
|
|
|
|
%% things get dumb here. erlang doesn't properly restrict unicode non-characters
|
|
|
|
%% so you can't trust the codepoints it returns always
|
|
|
|
%% the range 32..16#fdcf is safe, so allow that
|
|
|
|
string(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when ?is_noncontrol(S), S < 16#fdd0 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [S] ++ Acc);
|
2011-07-27 01:59:03 -07:00
|
|
|
%% the range 16#fdf0..16#fffd is also safe
|
|
|
|
string(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when S > 16#fdef, S < 16#fffe ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [S] ++ Acc);
|
2011-07-31 00:12:47 -07:00
|
|
|
%% yes, i think it's insane too
|
2011-07-27 01:59:03 -07:00
|
|
|
string(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when S > 16#ffff andalso
|
|
|
|
S =/= 16#1fffe andalso S =/= 16#1ffff andalso
|
|
|
|
S =/= 16#2fffe andalso S =/= 16#2ffff andalso
|
|
|
|
S =/= 16#3fffe andalso S =/= 16#3ffff andalso
|
|
|
|
S =/= 16#4fffe andalso S =/= 16#4ffff andalso
|
|
|
|
S =/= 16#5fffe andalso S =/= 16#5ffff andalso
|
|
|
|
S =/= 16#6fffe andalso S =/= 16#6ffff andalso
|
|
|
|
S =/= 16#7fffe andalso S =/= 16#7ffff andalso
|
|
|
|
S =/= 16#8fffe andalso S =/= 16#8ffff andalso
|
|
|
|
S =/= 16#9fffe andalso S =/= 16#9ffff andalso
|
|
|
|
S =/= 16#afffe andalso S =/= 16#affff andalso
|
|
|
|
S =/= 16#bfffe andalso S =/= 16#bffff andalso
|
|
|
|
S =/= 16#cfffe andalso S =/= 16#cffff andalso
|
|
|
|
S =/= 16#dfffe andalso S =/= 16#dffff andalso
|
|
|
|
S =/= 16#efffe andalso S =/= 16#effff andalso
|
|
|
|
S =/= 16#ffffe andalso S =/= 16#fffff andalso
|
2011-07-28 18:47:58 -07:00
|
|
|
S =/= 16#10fffe andalso S =/= 16#10ffff ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [S] ++ Acc);
|
2010-08-22 11:31:14 -07:00
|
|
|
string(Bin, Stack, Opts, Acc) ->
|
|
|
|
case partial_utf(Bin) of
|
2010-08-25 23:17:10 -07:00
|
|
|
true ->
|
2011-07-26 00:35:17 -07:00
|
|
|
{jsx, incomplete, fun(end_stream) ->
|
2010-09-27 14:07:36 -07:00
|
|
|
{error, {badjson, Bin}}
|
2010-08-25 23:17:10 -07:00
|
|
|
; (Stream) ->
|
|
|
|
string(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)
|
|
|
|
end}
|
2011-07-27 06:52:16 -07:00
|
|
|
; false ->
|
|
|
|
case Opts#opts.loose_unicode of
|
|
|
|
true -> noncharacter(Bin, Stack, Opts, Acc)
|
|
|
|
; false -> {error, {badjson, Bin}}
|
|
|
|
end
|
2010-08-22 11:31:14 -07:00
|
|
|
end.
|
|
|
|
|
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
%% we don't need to guard against partial utf here, because it's already taken
|
|
|
|
%% care of in string. theoretically, the last clause of noncharacter/4 is
|
|
|
|
%% unreachable
|
2011-07-27 06:52:16 -07:00
|
|
|
-ifdef(utf8).
|
|
|
|
%% non-characters erlang doesn't recognize as non-characters, idiotically
|
|
|
|
noncharacter(<<S/utf8, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when ?is_noncontrol(S) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% u+fffe and u+ffff
|
|
|
|
noncharacter(<<239, 191, X, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when X == 190; X == 191 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% surrogates
|
|
|
|
noncharacter(<<237, X, _, Rest/binary>>, Stack, Opts, Acc) when X >= 160 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
|
|
|
{error, {badjson, Bin}}.
|
|
|
|
-endif.
|
|
|
|
|
|
|
|
-ifdef(utf16).
|
|
|
|
%% non-characters blah blah
|
|
|
|
noncharacter(<<S/utf16, Rest/binary>>, Stack, Opts, Acc)
|
2011-07-28 21:15:21 -07:00
|
|
|
when ?is_noncontrol(S) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% u+ffff and u+fffe
|
|
|
|
noncharacter(<<255, X, Rest/binary>>, Stack, Opts, Acc)
|
2011-07-28 18:47:58 -07:00
|
|
|
when X == 254; X == 255 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% surrogates
|
|
|
|
noncharacter(<<X, _, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when X >= 216, X =< 223 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
|
|
|
{error, {badjson, Bin}}.
|
|
|
|
-endif.
|
|
|
|
|
|
|
|
-ifdef(utf16le).
|
2011-07-28 18:47:58 -07:00
|
|
|
%% non-characters blah blah
|
2011-07-27 06:52:16 -07:00
|
|
|
noncharacter(<<S/utf16-little, Rest/binary>>, Stack, Opts, Acc)
|
2011-07-28 21:15:21 -07:00
|
|
|
when ?is_noncontrol(S) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% u+ffff and u+fffe
|
|
|
|
noncharacter(<<X, 255, Rest/binary>>, Stack, Opts, Acc)
|
2011-07-28 18:47:58 -07:00
|
|
|
when X == 254; X == 255 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% surrogates
|
|
|
|
noncharacter(<<_, X, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when X >= 216, X =< 223 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
|
|
|
{error, {badjson, Bin}}.
|
|
|
|
-endif.
|
|
|
|
|
|
|
|
-ifdef(utf32).
|
2011-07-28 18:47:58 -07:00
|
|
|
%% non-characters blah blah
|
2011-07-27 06:52:16 -07:00
|
|
|
noncharacter(<<S/utf32, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when ?is_noncontrol(S) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% u+ffff and u+fffe
|
|
|
|
noncharacter(<<0, 0, 255, X, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when X == 254; X == 255 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% surrogates
|
|
|
|
noncharacter(<<0, 0, X, _, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when X >= 216, X =< 223 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
|
|
|
{error, {badjson, Bin}}.
|
2010-08-22 11:31:14 -07:00
|
|
|
-endif.
|
|
|
|
|
|
|
|
-ifdef(utf32le).
|
2011-07-28 18:47:58 -07:00
|
|
|
%% non-characters blah blah
|
2011-07-27 06:52:16 -07:00
|
|
|
noncharacter(<<S/utf32-little, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when ?is_noncontrol(S) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% u+ffff and u+fffe
|
|
|
|
noncharacter(<<X, 255, 0, 0, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when X == 254; X == 255 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
%% surrogates
|
|
|
|
noncharacter(<<_, X, 0, 0, Rest/binary>>, Stack, Opts, Acc)
|
|
|
|
when X >= 216, X =< 223 ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ Acc);
|
2011-07-27 06:52:16 -07:00
|
|
|
noncharacter(Bin, _Stack, _Opts, _Acc) ->
|
|
|
|
{error, {badjson, Bin}}.
|
2010-08-22 11:31:14 -07:00
|
|
|
-endif.
|
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
%% only thing to note here is the additional accumulator passed to
|
|
|
|
%% escaped_unicode used to hold the codepoint sequence. unescessary, but nicer
|
|
|
|
%% than using the string accumulator
|
|
|
|
escape(<<$b/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, "\b" ++ Acc);
|
2010-08-25 23:17:10 -07:00
|
|
|
escape(<<$f/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, "\f" ++ Acc);
|
2010-08-25 23:17:10 -07:00
|
|
|
escape(<<$n/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, "\n" ++ Acc);
|
2010-08-25 23:17:10 -07:00
|
|
|
escape(<<$r/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, "\r" ++ Acc);
|
2010-08-25 23:17:10 -07:00
|
|
|
escape(<<$t/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, "\t" ++ Acc);
|
2010-08-25 23:17:10 -07:00
|
|
|
escape(<<$u/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
escaped_unicode(Rest, Stack, Opts, Acc, []);
|
2010-08-25 23:17:10 -07:00
|
|
|
escape(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc)
|
2010-08-22 11:31:14 -07:00
|
|
|
when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [S] ++ Acc);
|
2010-08-22 11:31:14 -07:00
|
|
|
escape(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(escape(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
%% this code is ugly and unfortunate, but so is json's handling of escaped
|
2011-07-22 23:47:35 -07:00
|
|
|
%% unicode codepoint sequences.
|
2011-07-26 00:35:17 -07:00
|
|
|
escaped_unicode(<<D/?utfx, Rest/binary>>, Stack, Opts, String, [C, B, A])
|
|
|
|
when ?is_hex(D) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
case erlang:list_to_integer([A, B, C, D], 16) of
|
2011-07-26 00:35:17 -07:00
|
|
|
%% high surrogate, we need a low surrogate next
|
|
|
|
X when X >= 16#d800, X =< 16#dbff ->
|
|
|
|
low_surrogate(Rest, Stack, Opts, String, X)
|
|
|
|
%% non-characters, you're not allowed to exchange these
|
2011-07-26 19:46:31 -07:00
|
|
|
; X when X == 16#fffe; X == 16#ffff; X >= 16#fdd0, X =< 16#fdef ->
|
2011-07-27 01:59:03 -07:00
|
|
|
case Opts#opts.loose_unicode of
|
|
|
|
true ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ String)
|
2011-07-27 01:59:03 -07:00
|
|
|
; false ->
|
|
|
|
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
|
|
|
end
|
2011-07-26 13:34:15 -07:00
|
|
|
%% allowing interchange of null bytes allows attackers to forge
|
|
|
|
%% malicious streams
|
|
|
|
; X when X == 16#0000 ->
|
2011-07-27 01:59:03 -07:00
|
|
|
case Opts#opts.loose_unicode of
|
|
|
|
true ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ String)
|
2011-07-27 01:59:03 -07:00
|
|
|
; false ->
|
|
|
|
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
|
|
|
end
|
2011-07-26 00:35:17 -07:00
|
|
|
%% anything else
|
|
|
|
; X ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Rest, Stack, Opts, [X] ++ String)
|
2010-08-22 11:31:14 -07:00
|
|
|
end;
|
2010-08-25 23:17:10 -07:00
|
|
|
escaped_unicode(<<S/?utfx, Rest/binary>>, Stack, Opts, String, Acc)
|
2011-07-26 00:35:17 -07:00
|
|
|
when ?is_hex(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
escaped_unicode(Rest, Stack, Opts, String, [S] ++ Acc);
|
|
|
|
escaped_unicode(Bin, Stack, Opts, String, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(
|
|
|
|
escaped_unicode(<<Bin/binary, Stream/binary>>, Stack, Opts, String, Acc)
|
|
|
|
).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2011-07-26 00:35:17 -07:00
|
|
|
low_surrogate(<<?rsolidus/?utfx, Rest/binary>>, Stack, Opts, String, High) ->
|
|
|
|
low_surrogate_u(Rest, Stack, Opts, String, High);
|
2011-08-03 00:45:49 -07:00
|
|
|
%% not an escaped codepoint, our high codepoint is illegal. dispatch back to
|
|
|
|
%% string to handle
|
2011-07-27 01:59:03 -07:00
|
|
|
low_surrogate(<<S/?utfx, Rest/binary>> = Bin, Stack, Opts, String, _) ->
|
|
|
|
case Opts#opts.loose_unicode of
|
|
|
|
true ->
|
2011-07-30 23:52:29 -07:00
|
|
|
string(Bin, Stack, Opts, [16#fffd] ++ String)
|
2011-07-27 01:59:03 -07:00
|
|
|
; false ->
|
|
|
|
{error, {badjson, <<S/?utfx, Rest/binary>>}}
|
|
|
|
end;
|
2011-07-26 00:35:17 -07:00
|
|
|
low_surrogate(Bin, Stack, Opts, String, High) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(
|
|
|
|
low_surrogate(<<Bin/binary, Stream/binary>>, Stack, Opts, String, High)
|
|
|
|
).
|
2011-07-26 00:35:17 -07:00
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
|
|
|
|
low_surrogate_u(<<$u/?utfx, Rest/binary>>, Stack, Opts, String, H) ->
|
|
|
|
low_surrogate(Rest, Stack, Opts, String, [], H);
|
2011-07-27 01:59:03 -07:00
|
|
|
%% not a low surrogate, dispatch back to string to handle, including the
|
|
|
|
%% rsolidus we parsed previously
|
|
|
|
low_surrogate_u(<<S/?utfx, Rest/binary>> = Bin, Stack, Opts, String, _) ->
|
|
|
|
case Opts#opts.loose_unicode of
|
|
|
|
true ->
|
|
|
|
string(<<?rsolidus/?utfx, Bin/binary>>,
|
|
|
|
Stack,
|
|
|
|
Opts,
|
2011-07-30 23:52:29 -07:00
|
|
|
[16#fffd] ++ String
|
2011-07-27 01:59:03 -07:00
|
|
|
)
|
|
|
|
; false ->
|
|
|
|
{error, {badjson, <<S/?utfx, Rest/binary>>}}
|
|
|
|
end;
|
2011-08-02 20:06:49 -07:00
|
|
|
low_surrogate_u(Bin, Stack, Opts, String, H) ->
|
|
|
|
?incomplete(
|
|
|
|
low_surrogate_u(<<Bin/binary, Stream/binary>>, Stack, Opts, String, H)
|
|
|
|
).
|
2011-07-26 00:35:17 -07:00
|
|
|
|
|
|
|
|
2011-08-02 20:06:49 -07:00
|
|
|
low_surrogate(<<D/?utfx, Rest/binary>>, Stack, Opts, String, [C, B, A], H)
|
2011-07-26 00:35:17 -07:00
|
|
|
when ?is_hex(D) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
case erlang:list_to_integer([A, B, C, D], 16) of
|
2011-07-26 00:35:17 -07:00
|
|
|
X when X >= 16#dc00, X =< 16#dfff ->
|
2011-08-02 20:06:49 -07:00
|
|
|
V = surrogate_to_codepoint(H, X),
|
2011-08-03 00:45:49 -07:00
|
|
|
case V rem 16#10000 of Y when Y == 16#fffe; Y == 16#ffff ->
|
2011-07-27 01:59:03 -07:00
|
|
|
case Opts#opts.loose_unicode of
|
|
|
|
true ->
|
2011-08-03 00:45:49 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd] ++ String)
|
2011-07-27 01:59:03 -07:00
|
|
|
; false ->
|
|
|
|
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
|
|
|
end
|
2011-07-30 23:52:29 -07:00
|
|
|
; _ ->
|
|
|
|
string(Rest, Stack, Opts, [V] ++ String)
|
2011-07-26 13:46:24 -07:00
|
|
|
end
|
2011-07-26 00:35:17 -07:00
|
|
|
%% not a low surrogate, bad bad bad
|
2011-07-26 13:46:24 -07:00
|
|
|
; _ ->
|
2011-07-27 01:59:03 -07:00
|
|
|
case Opts#opts.loose_unicode of
|
|
|
|
true ->
|
2011-08-03 00:45:49 -07:00
|
|
|
string(Rest, Stack, Opts, [16#fffd, 16#fffd] ++ String)
|
2011-07-27 01:59:03 -07:00
|
|
|
; false ->
|
|
|
|
{error, {badjson, <<D/?utfx, Rest/binary>>}}
|
|
|
|
end
|
2010-08-22 11:31:14 -07:00
|
|
|
end;
|
2011-08-02 20:06:49 -07:00
|
|
|
low_surrogate(<<S/?utfx, Rest/binary>>, Stack, Opts, String, Acc, H)
|
2011-07-26 00:35:17 -07:00
|
|
|
when ?is_hex(S) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
low_surrogate(Rest, Stack, Opts, String, [S] ++ Acc, H);
|
|
|
|
low_surrogate(Bin, Stack, Opts, String, Acc, H) ->
|
|
|
|
?incomplete(
|
|
|
|
low_surrogate(
|
|
|
|
<<Bin/binary, Stream/binary>>, Stack, Opts, String, Acc, H
|
|
|
|
)
|
|
|
|
).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
|
|
|
%% stole this from the unicode spec
|
|
|
|
surrogate_to_codepoint(High, Low) ->
|
|
|
|
(High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000.
|
|
|
|
|
|
|
|
|
|
|
|
%% like strings, numbers are collected in an intermediate accumulator before
|
|
|
|
%% being emitted to the callback handler
|
2010-08-25 23:17:10 -07:00
|
|
|
negative(<<$0/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
zero(Rest, Stack, Opts, "0" ++ Acc);
|
2010-08-25 23:17:10 -07:00
|
|
|
negative(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc) when ?is_nonzero(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
integer(Rest, Stack, Opts, [S] ++ Acc);
|
|
|
|
negative(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(negative(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
zero(<<?end_object/?utfx, Rest/binary>>, [object|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_object], fun() ->
|
|
|
|
maybe_done(Rest, Stack, Opts)
|
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
zero(<<?end_array/?utfx, Rest/binary>>, [array|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_array], fun() ->
|
|
|
|
maybe_done(Rest, Stack, Opts)
|
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
zero(<<?comma/?utfx, Rest/binary>>, [object|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> key(Rest, [key|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
zero(<<?comma/?utfx, Rest/binary>>, [array|_] = Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> value(Rest, Stack, Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
zero(<<?decimalpoint/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-07-10 10:54:04 -07:00
|
|
|
initial_decimal(Rest, Stack, Opts, {Acc, []});
|
2010-08-25 23:17:10 -07:00
|
|
|
zero(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc) when ?is_whitespace(S) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
zero(<<>>, [], Opts, Acc) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
{jsx, incomplete, fun(end_stream) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_json],
|
|
|
|
fun() -> decimal(<<>>, [], Opts, Acc) end)
|
2010-08-22 11:31:14 -07:00
|
|
|
; (Stream) -> zero(Stream, [], Opts, Acc)
|
|
|
|
end};
|
|
|
|
zero(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(zero(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
integer(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc) when ?is_nonzero(S) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
integer(Rest, Stack, Opts, [S] ++ Acc);
|
2010-08-25 23:17:10 -07:00
|
|
|
integer(<<?end_object/?utfx, Rest/binary>>, [object|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_object], fun() ->
|
|
|
|
maybe_done(Rest, Stack, Opts)
|
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
integer(<<?end_array/?utfx, Rest/binary>>, [array|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_array], fun() ->
|
|
|
|
maybe_done(Rest, Stack, Opts)
|
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
integer(<<?comma/?utfx, Rest/binary>>, [object|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> key(Rest, [key|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
integer(<<?comma/?utfx, Rest/binary>>, [array|_] = Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> value(Rest, Stack, Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
integer(<<?decimalpoint/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2011-07-10 10:54:04 -07:00
|
|
|
initial_decimal(Rest, Stack, Opts, {Acc, []});
|
2010-08-25 23:17:10 -07:00
|
|
|
integer(<<?zero/?utfx, Rest/binary>>, Stack, Opts, Acc) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
integer(Rest, Stack, Opts, [?zero] ++ Acc);
|
2011-07-10 10:54:04 -07:00
|
|
|
integer(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc) when S =:= $e; S =:= $E ->
|
2011-07-31 18:43:14 -07:00
|
|
|
e(Rest, Stack, Opts, {Acc, [], []});
|
2010-08-25 23:17:10 -07:00
|
|
|
integer(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc) when ?is_whitespace(S) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
integer(<<>>, [], Opts, Acc) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
{jsx, incomplete, fun(end_stream) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_json],
|
|
|
|
fun() -> decimal(<<>>, [], Opts, Acc) end)
|
2010-08-22 11:31:14 -07:00
|
|
|
; (Stream) -> integer(Stream, [], Opts, Acc)
|
|
|
|
end};
|
|
|
|
integer(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(integer(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2011-07-10 10:54:04 -07:00
|
|
|
initial_decimal(<<S/?utfx, Rest/binary>>, Stack, Opts, {Int, Frac})
|
|
|
|
when S =:= ?zero; ?is_nonzero(S) ->
|
|
|
|
decimal(Rest, Stack, Opts, {Int, [S] ++ Frac});
|
2010-08-25 23:17:10 -07:00
|
|
|
initial_decimal(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(
|
|
|
|
initial_decimal(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)
|
|
|
|
).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2011-07-10 10:54:04 -07:00
|
|
|
decimal(<<S/?utfx, Rest/binary>>, Stack, Opts, {Int, Frac})
|
|
|
|
when S=:= ?zero; ?is_nonzero(S) ->
|
|
|
|
decimal(Rest, Stack, Opts, {Int, [S] ++ Frac});
|
2010-08-25 23:17:10 -07:00
|
|
|
decimal(<<?end_object/?utfx, Rest/binary>>, [object|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_object], fun() ->
|
|
|
|
maybe_done(Rest, Stack, Opts)
|
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
decimal(<<?end_array/?utfx, Rest/binary>>, [array|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_array], fun() ->
|
|
|
|
maybe_done(Rest, Stack, Opts)
|
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
decimal(<<?comma/?utfx, Rest/binary>>, [object|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> key(Rest, [key|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
decimal(<<?comma/?utfx, Rest/binary>>, [array|_] = Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> value(Rest, Stack, Opts) end);
|
2011-07-10 10:54:04 -07:00
|
|
|
decimal(<<S/?utfx, Rest/binary>>, Stack, Opts, {Int, Frac})
|
|
|
|
when S =:= $e; S =:= $E ->
|
|
|
|
e(Rest, Stack, Opts, {Int, Frac, []});
|
2010-08-25 23:17:10 -07:00
|
|
|
decimal(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc) when ?is_whitespace(S) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
decimal(<<>>, [], Opts, Acc) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
{jsx, incomplete, fun(end_stream) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_json],
|
|
|
|
fun() -> decimal(<<>>, [], Opts, Acc) end)
|
2010-08-22 11:31:14 -07:00
|
|
|
; (Stream) -> decimal(Stream, [], Opts, Acc)
|
|
|
|
end};
|
|
|
|
decimal(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(decimal(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2011-07-10 10:54:04 -07:00
|
|
|
e(<<S/?utfx, Rest/binary>>, Stack, Opts, {Int, Frac, Exp})
|
|
|
|
when S =:= ?zero; ?is_nonzero(S) ->
|
|
|
|
exp(Rest, Stack, Opts, {Int, Frac, [S] ++ Exp});
|
|
|
|
e(<<S/?utfx, Rest/binary>>, Stack, Opts, {Int, Frac, Exp})
|
|
|
|
when S =:= ?positive; S =:= ?negative ->
|
|
|
|
ex(Rest, Stack, Opts, {Int, Frac, [S] ++ Exp});
|
2010-08-22 11:31:14 -07:00
|
|
|
e(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(e(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2011-07-10 10:54:04 -07:00
|
|
|
ex(<<S/?utfx, Rest/binary>>, Stack, Opts, {Int, Frac, Exp})
|
|
|
|
when S =:= ?zero; ?is_nonzero(S) ->
|
|
|
|
exp(Rest, Stack, Opts, {Int, Frac, [S] ++ Exp});
|
2010-08-22 11:31:14 -07:00
|
|
|
ex(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(ex(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2011-07-10 10:54:04 -07:00
|
|
|
exp(<<S/?utfx, Rest/binary>>, Stack, Opts, {Int, Frac, Exp})
|
|
|
|
when S =:= ?zero; ?is_nonzero(S) ->
|
|
|
|
exp(Rest, Stack, Opts, {Int, Frac, [S] ++ Exp});
|
2010-08-25 23:17:10 -07:00
|
|
|
exp(<<?end_object/?utfx, Rest/binary>>, [object|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_object], fun() ->
|
|
|
|
maybe_done(Rest, Stack, Opts)
|
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
exp(<<?end_array/?utfx, Rest/binary>>, [array|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_array], fun() ->
|
|
|
|
maybe_done(Rest, Stack, Opts)
|
|
|
|
end);
|
2010-08-25 23:17:10 -07:00
|
|
|
exp(<<?comma/?utfx, Rest/binary>>, [object|Stack], Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> key(Rest, [key|Stack], Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
exp(<<?comma/?utfx, Rest/binary>>, [array|_] = Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> value(Rest, Stack, Opts) end);
|
2010-08-25 23:17:10 -07:00
|
|
|
exp(<<S/?utfx, Rest/binary>>, Stack, Opts, Acc) when ?is_whitespace(S) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc)], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
exp(<<>>, [], Opts, Acc) ->
|
2011-07-26 00:35:17 -07:00
|
|
|
{jsx, incomplete, fun(end_stream) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([format_number(Acc), end_json],
|
|
|
|
fun() -> exp(<<>>, [], Opts, Acc) end)
|
2010-08-22 11:31:14 -07:00
|
|
|
; (Stream) -> exp(Stream, [], Opts, Acc)
|
|
|
|
end};
|
|
|
|
exp(Bin, Stack, Opts, Acc) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(exp(<<Bin/binary, Stream/binary>>, Stack, Opts, Acc)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2011-07-10 10:54:04 -07:00
|
|
|
format_number(Int) when is_list(Int) ->
|
|
|
|
{integer, list_to_integer(lists:reverse(Int))};
|
|
|
|
format_number({Int, Frac}) ->
|
|
|
|
{float, list_to_float(lists:reverse(Frac ++ "." ++ Int))};
|
|
|
|
format_number({Int, [], Exp}) ->
|
|
|
|
{float, list_to_float(lists:reverse(Exp ++ "e0." ++ Int))};
|
|
|
|
format_number({Int, Frac, Exp}) ->
|
|
|
|
{float, list_to_float(lists:reverse(Exp ++ "e" ++ Frac ++ "." ++ Int))}.
|
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
tr(<<$r/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
tru(Rest, Stack, Opts);
|
|
|
|
tr(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(tr(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
tru(<<$u/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
true(Rest, Stack, Opts);
|
|
|
|
tru(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(tru(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
true(<<$e/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([{literal, true}], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
true(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(true(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
fa(<<$a/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
fal(Rest, Stack, Opts);
|
|
|
|
fa(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(fa(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
fal(<<$l/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
fals(Rest, Stack, Opts);
|
|
|
|
fal(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(fal(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
fals(<<$s/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
false(Rest, Stack, Opts);
|
|
|
|
fals(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(fals(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
false(<<$e/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([{literal, false}], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
false(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(false(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
nu(<<$u/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
nul(Rest, Stack, Opts);
|
|
|
|
nu(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(nu(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
nul(<<$l/?utfx, Rest/binary>>, Stack, Opts) ->
|
2010-08-22 11:31:14 -07:00
|
|
|
null(Rest, Stack, Opts);
|
|
|
|
nul(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(nul(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
2010-08-22 11:31:14 -07:00
|
|
|
|
|
|
|
|
2010-08-25 23:17:10 -07:00
|
|
|
null(<<$l/?utfx, Rest/binary>>, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
emit([{literal, null}], fun() -> maybe_done(Rest, Stack, Opts) end);
|
2010-08-22 11:31:14 -07:00
|
|
|
null(Bin, Stack, Opts) ->
|
2011-08-02 20:06:49 -07:00
|
|
|
?incomplete(null(<<Bin/binary, Stream/binary>>, Stack, Opts)).
|
|
|
|
|
|
|
|
|
2011-07-28 18:47:58 -07:00
|
|
|
|
|
|
|
-ifdef(TEST).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
|
|
|
|
|
|
noncharacters_test_() ->
|
|
|
|
[
|
|
|
|
{"noncharacters - badjson",
|
|
|
|
?_assertEqual(check_bad(noncharacters()), [])
|
|
|
|
},
|
|
|
|
{"noncharacters - replaced",
|
|
|
|
?_assertEqual(check_replaced(noncharacters()), [])
|
|
|
|
}
|
|
|
|
].
|
|
|
|
|
|
|
|
extended_noncharacters_test_() ->
|
|
|
|
[
|
|
|
|
{"extended noncharacters - badjson",
|
|
|
|
?_assertEqual(check_bad(extended_noncharacters()), [])
|
|
|
|
},
|
|
|
|
{"extended noncharacters - replaced",
|
2011-07-28 21:15:21 -07:00
|
|
|
?_assertEqual(check_replaced(extended_noncharacters()), [])
|
2011-07-28 18:47:58 -07:00
|
|
|
}
|
|
|
|
].
|
|
|
|
|
|
|
|
surrogates_test_() ->
|
|
|
|
[
|
|
|
|
{"surrogates - badjson",
|
|
|
|
?_assertEqual(check_bad(surrogates()), [])
|
|
|
|
},
|
|
|
|
{"surrogates - replaced",
|
|
|
|
?_assertEqual(check_replaced(surrogates()), [])
|
|
|
|
}
|
|
|
|
].
|
|
|
|
|
|
|
|
control_test_() ->
|
|
|
|
[
|
|
|
|
{"control characters - badjson",
|
|
|
|
?_assertEqual(check_bad(control_characters()), [])
|
|
|
|
}
|
|
|
|
].
|
|
|
|
|
|
|
|
reserved_test_() ->
|
|
|
|
[
|
|
|
|
{"reserved noncharacters - badjson",
|
|
|
|
?_assertEqual(check_bad(reserved_space()), [])
|
|
|
|
},
|
|
|
|
{"reserved noncharacters - replaced",
|
|
|
|
?_assertEqual(check_replaced(reserved_space()), [])
|
|
|
|
}
|
|
|
|
].
|
|
|
|
|
|
|
|
zero_test_() ->
|
|
|
|
[
|
|
|
|
{"nullbyte - badjson",
|
|
|
|
?_assertEqual(check_bad(zero()), [])
|
|
|
|
}
|
|
|
|
].
|
|
|
|
|
|
|
|
good_characters_test_() ->
|
|
|
|
[
|
|
|
|
{"acceptable codepoints",
|
|
|
|
?_assertEqual(check_good(good()), [])
|
|
|
|
},
|
|
|
|
{"acceptable extended",
|
|
|
|
?_assertEqual(check_good(good_extended()), [])
|
|
|
|
}
|
|
|
|
].
|
|
|
|
|
|
|
|
|
|
|
|
check_bad(List) ->
|
|
|
|
lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end,
|
|
|
|
check(List, [], [])
|
|
|
|
).
|
|
|
|
|
|
|
|
check_replaced(List) ->
|
2011-07-31 19:05:30 -07:00
|
|
|
lists:dropwhile(fun({_, [{string, [16#fffd]}|_]}) ->
|
2011-07-31 00:05:34 -07:00
|
|
|
true
|
|
|
|
; (_) ->
|
|
|
|
false
|
|
|
|
end,
|
2011-07-28 18:47:58 -07:00
|
|
|
check(List, [loose_unicode], [])
|
|
|
|
).
|
|
|
|
|
|
|
|
check_good(List) ->
|
|
|
|
lists:dropwhile(fun({_, [{string, _}|_]}) -> true ; (_) -> false end,
|
|
|
|
check(List, [], [])
|
|
|
|
).
|
|
|
|
|
|
|
|
check([], _Opts, Acc) -> Acc;
|
|
|
|
check([H|T], Opts, Acc) ->
|
|
|
|
R = decode(to_fake_utf(H, ?encoding), Opts),
|
|
|
|
check(T, Opts, [{H, R}] ++ Acc).
|
|
|
|
|
|
|
|
|
|
|
|
decode(JSON, Opts) ->
|
|
|
|
F = decoder(Opts),
|
|
|
|
loop(F(JSON), []).
|
|
|
|
|
|
|
|
|
|
|
|
loop({jsx, end_json, _}, Acc) -> lists:reverse(Acc);
|
|
|
|
loop({jsx, incomplete, More}, Acc) -> loop(More(end_stream), Acc);
|
|
|
|
loop({jsx, Event, Next}, Acc) -> loop(Next(), [Event] ++ Acc);
|
|
|
|
loop(_, _) -> {error, badjson}.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
noncharacters() -> lists:seq(16#fffe, 16#ffff).
|
|
|
|
|
|
|
|
extended_noncharacters() ->
|
|
|
|
[16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff]
|
|
|
|
++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff]
|
|
|
|
++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff]
|
|
|
|
++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff]
|
|
|
|
++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff]
|
|
|
|
++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff]
|
|
|
|
++ [16#dfffe, 16#dffff, 16#efffe, 16#effff]
|
|
|
|
++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff].
|
|
|
|
|
|
|
|
surrogates() -> lists:seq(16#d800, 16#dfff).
|
|
|
|
|
|
|
|
control_characters() -> lists:seq(1, 31).
|
|
|
|
|
|
|
|
reserved_space() -> lists:seq(16#fdd0, 16#fdef).
|
|
|
|
|
|
|
|
zero() -> [0].
|
|
|
|
|
|
|
|
good() -> [32, 33]
|
|
|
|
++ lists:seq(16#23, 16#5b)
|
|
|
|
++ lists:seq(16#5d, 16#d7ff)
|
|
|
|
++ lists:seq(16#e000, 16#fdcf)
|
|
|
|
++ lists:seq(16#fdf0, 16#fffd).
|
|
|
|
|
|
|
|
good_extended() -> lists:seq(16#100000, 16#10fffd).
|
|
|
|
|
|
|
|
%% erlang refuses to encode certain codepoints, so fake them all
|
|
|
|
to_fake_utf(N, utf8) when N < 16#0080 -> <<34/utf8, N:8, 34/utf8>>;
|
|
|
|
to_fake_utf(N, utf8) when N < 16#0800 ->
|
|
|
|
<<0:5, Y:5, X:6>> = <<N:16>>,
|
|
|
|
<<34/utf8, 2#110:3, Y:5, 2#10:2, X:6, 34/utf8>>;
|
|
|
|
to_fake_utf(N, utf8) when N < 16#10000 ->
|
|
|
|
<<Z:4, Y:6, X:6>> = <<N:16>>,
|
|
|
|
<<34/utf8, 2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6, 34/utf8>>;
|
|
|
|
to_fake_utf(N, utf8) ->
|
|
|
|
<<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>,
|
|
|
|
<<34/utf8, 2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6, 34/utf8>>;
|
|
|
|
|
|
|
|
to_fake_utf(N, utf16) when N < 16#10000 -> <<34/utf16, N:16, 34/utf16>>;
|
|
|
|
to_fake_utf(N, utf16) -> <<34/utf16, N/utf16, 34/utf16>>;
|
|
|
|
|
|
|
|
to_fake_utf(N, utf16le) when N < 16#10000 ->
|
|
|
|
<<A:8, B:8>> = <<N:16>>,
|
|
|
|
<<34, 0, B:8, A:8, 34, 0>>;
|
|
|
|
to_fake_utf(N, utf16le) -> <<34/utf16-little, N/utf16-little, 34/utf16-little>>;
|
|
|
|
|
|
|
|
to_fake_utf(N, utf32) -> <<34/utf32, N:32, 34/utf32>>;
|
|
|
|
|
|
|
|
to_fake_utf(N, utf32le) ->
|
|
|
|
<<A:8, B:8, C:8, D:8>> = <<N:32>>,
|
|
|
|
<<34/utf32-little, D:8, C:8, B:8, A:8, 34/utf32-little>>.
|
|
|
|
|
|
|
|
|
|
|
|
-endif.
|