updated comments in decoder
This commit is contained in:
parent
4dcb929491
commit
f8c749497f
1 changed files with 54 additions and 41 deletions
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
-module(jsx_decoder).
|
-module(jsx_decoder).
|
||||||
|
|
||||||
%% inline sequence accumulation, handle_event and format_number
|
%% inline sequence accumulation, handle_event, format_number and maybe_replace
|
||||||
-compile({inline, [new_seq/0, new_seq/1, acc_seq/2, end_seq/1, end_seq/2]}).
|
-compile({inline, [new_seq/0, new_seq/1, acc_seq/2, end_seq/1, end_seq/2]}).
|
||||||
-compile({inline, [handle_event/3]}).
|
-compile({inline, [handle_event/3]}).
|
||||||
-compile({inline, [format_number/1]}).
|
-compile({inline, [format_number/1]}).
|
||||||
|
@ -38,6 +38,38 @@ decoder(Handler, State, Config) ->
|
||||||
fun(JSON) -> start(JSON, {Handler, Handler:init(State)}, [], jsx_utils:parse_config(Config)) end.
|
fun(JSON) -> start(JSON, {Handler, Handler:init(State)}, [], jsx_utils:parse_config(Config)) end.
|
||||||
|
|
||||||
|
|
||||||
|
%% resume allows continuation from interrupted decoding without having to explicitly export
|
||||||
|
%% all states
|
||||||
|
-spec resume(
|
||||||
|
Rest::binary(),
|
||||||
|
State::atom(),
|
||||||
|
Handler::{atom(), any()},
|
||||||
|
Acc::any(),
|
||||||
|
Stack::list(atom()),
|
||||||
|
Config::jsx:config()
|
||||||
|
) -> jsx:decoder().
|
||||||
|
|
||||||
|
resume(Rest, State, Handler, Acc, Stack, Config) ->
|
||||||
|
case State of
|
||||||
|
start -> start(Rest, Handler, Stack, Config);
|
||||||
|
value -> value(Rest, Handler, Stack, Config);
|
||||||
|
object -> object(Rest, Handler, Stack, Config);
|
||||||
|
array -> array(Rest, Handler, Stack, Config);
|
||||||
|
colon -> colon(Rest, Handler, Stack, Config);
|
||||||
|
key -> key(Rest, Handler, Stack, Config);
|
||||||
|
string -> string(Rest, Handler, Acc, Stack, Config);
|
||||||
|
integer -> integer(Rest, Handler, Acc, Stack, Config);
|
||||||
|
decimal -> decimal(Rest, Handler, Acc, Stack, Config);
|
||||||
|
exp -> exp(Rest, Handler, Acc, Stack, Config);
|
||||||
|
true -> true(Rest, Handler, Stack, Config);
|
||||||
|
false -> false(Rest, Handler, Stack, Config);
|
||||||
|
null -> null(Rest, Handler, Stack, Config);
|
||||||
|
comment -> comment(Rest, Handler, Acc, Stack, Config);
|
||||||
|
maybe_done -> maybe_done(Rest, Handler, Stack, Config);
|
||||||
|
done -> done(Rest, Handler, Stack, Config)
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
-include("jsx_config.hrl").
|
-include("jsx_config.hrl").
|
||||||
|
|
||||||
|
|
||||||
|
@ -122,27 +154,7 @@ incomplete(State, Rest, Handler, Acc, Stack, Config=#config{incomplete_handler=F
|
||||||
F(Rest, {decoder, State, Handler, Acc, Stack}, jsx_utils:config_to_list(Config)).
|
F(Rest, {decoder, State, Handler, Acc, Stack}, jsx_utils:config_to_list(Config)).
|
||||||
|
|
||||||
|
|
||||||
resume(Rest, State, Handler, Acc, Stack, Config) ->
|
%% lists are benchmarked to be faster (tho higher in memory usage) than binaries
|
||||||
case State of
|
|
||||||
start -> start(Rest, Handler, Stack, Config);
|
|
||||||
value -> value(Rest, Handler, Stack, Config);
|
|
||||||
object -> object(Rest, Handler, Stack, Config);
|
|
||||||
array -> array(Rest, Handler, Stack, Config);
|
|
||||||
colon -> colon(Rest, Handler, Stack, Config);
|
|
||||||
key -> key(Rest, Handler, Stack, Config);
|
|
||||||
string -> string(Rest, Handler, Acc, Stack, Config);
|
|
||||||
integer -> integer(Rest, Handler, Acc, Stack, Config);
|
|
||||||
decimal -> decimal(Rest, Handler, Acc, Stack, Config);
|
|
||||||
exp -> exp(Rest, Handler, Acc, Stack, Config);
|
|
||||||
true -> true(Rest, Handler, Stack, Config);
|
|
||||||
false -> false(Rest, Handler, Stack, Config);
|
|
||||||
null -> null(Rest, Handler, Stack, Config);
|
|
||||||
comment -> comment(Rest, Handler, Acc, Stack, Config);
|
|
||||||
maybe_done -> maybe_done(Rest, Handler, Stack, Config);
|
|
||||||
done -> done(Rest, Handler, Stack, Config)
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
new_seq() -> [].
|
new_seq() -> [].
|
||||||
new_seq(C) -> [C].
|
new_seq(C) -> [C].
|
||||||
|
|
||||||
|
@ -278,8 +290,10 @@ key(Bin, Handler, Stack, Config) ->
|
||||||
?error(key, Bin, Handler, Stack, Config).
|
?error(key, Bin, Handler, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
%% explicitly whitelist ascii set for better efficiency (seriously, it's worth
|
%% explicitly whitelist ascii set for faster parsing. really? really. someone should
|
||||||
%% almost a 20% increase)
|
%% submit a patch that unrolls simple guards
|
||||||
|
%% note that if you encounter an error from string and you can't find the clause that
|
||||||
|
%% caused it here, it might be in unescape below
|
||||||
string(<<32, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<32, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 32), Stack, Config);
|
string(Rest, Handler, acc_seq(Acc, 32), Stack, Config);
|
||||||
string(<<33, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<33, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
|
@ -534,13 +548,14 @@ string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X >= 16#100000
|
||||||
string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true})
|
string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true})
|
||||||
when X >= 160 ->
|
when X >= 160 ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
||||||
%% u+fffe and u+ffff for R14BXX
|
|
||||||
string(<<239, 191, X, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true})
|
|
||||||
when X == 190; X == 191 ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
|
||||||
%% u+xfffe, u+xffff, control codes and other noncharacters
|
%% u+xfffe, u+xffff, control codes and other noncharacters
|
||||||
string(<<_/utf8, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) ->
|
string(<<_/utf8, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true}) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
||||||
|
%% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match the
|
||||||
|
%% preceeding clause
|
||||||
|
string(<<239, 191, X, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true})
|
||||||
|
when X == 190; X == 191 ->
|
||||||
|
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
||||||
%% overlong encodings and missing continuations of a 2 byte sequence
|
%% overlong encodings and missing continuations of a 2 byte sequence
|
||||||
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true})
|
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{replaced_bad_utf8=true})
|
||||||
when X >= 192, X =< 223 ->
|
when X >= 192, X =< 223 ->
|
||||||
|
@ -562,10 +577,6 @@ string(Bin, Handler, Acc, Stack, Config) ->
|
||||||
false -> ?error(string, Bin, Handler, Acc, Stack, Config)
|
false -> ?error(string, Bin, Handler, Acc, Stack, Config)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
%% string appends it's output to the term at the top of the stack. for
|
|
||||||
%% efficiency the strings are build in reverse order and reversed before
|
|
||||||
%% being added to the output stream
|
|
||||||
%% when parsing strings, the naive detection of partial codepoints is
|
%% when parsing strings, the naive detection of partial codepoints is
|
||||||
%% insufficient. this incredibly anal function should detect all badly formed
|
%% insufficient. this incredibly anal function should detect all badly formed
|
||||||
%% utf sequences
|
%% utf sequences
|
||||||
|
@ -579,28 +590,29 @@ is_partial_utf(<<X, Y, Z>>)
|
||||||
true;
|
true;
|
||||||
is_partial_utf(_) -> false.
|
is_partial_utf(_) -> false.
|
||||||
|
|
||||||
|
|
||||||
%% strips continuation bytes after bad utf bytes, guards against both too short
|
%% strips continuation bytes after bad utf bytes, guards against both too short
|
||||||
%% and overlong sequences. N is the maximum number of bytes to strip
|
%% and overlong sequences. N is the maximum number of bytes to strip
|
||||||
%% if end of input is reached before stripping the max number of continuations
|
|
||||||
%% possible magic numbers are reinserted into the stream that get us back to
|
|
||||||
%% the same state without complicated machinery
|
|
||||||
strip_continuations(Rest, Handler, Acc, Stack, Config, 0) ->
|
strip_continuations(Rest, Handler, Acc, Stack, Config, 0) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
||||||
strip_continuations(<<X, Rest/binary>>, Handler, Acc, Stack, Config, N) when X >= 128, X =< 191 ->
|
strip_continuations(<<X, Rest/binary>>, Handler, Acc, Stack, Config, N) when X >= 128, X =< 191 ->
|
||||||
strip_continuations(Rest, Handler, Acc, Stack, Config, N - 1);
|
strip_continuations(Rest, Handler, Acc, Stack, Config, N - 1);
|
||||||
%% incomplete
|
%% if end of input is reached before stripping the max number of continuations
|
||||||
|
%% possible magic numbers are reinserted into the stream that get us back to
|
||||||
|
%% the same state without complicated machinery
|
||||||
strip_continuations(<<>>, Handler, Acc, Stack, Config, N) ->
|
strip_continuations(<<>>, Handler, Acc, Stack, Config, N) ->
|
||||||
case N of
|
case N of
|
||||||
1 -> incomplete(string, <<192>>, Handler, Acc, Stack, Config);
|
1 -> incomplete(string, <<192>>, Handler, Acc, Stack, Config);
|
||||||
2 -> incomplete(string, <<224>>, Handler, Acc, Stack, Config);
|
2 -> incomplete(string, <<224>>, Handler, Acc, Stack, Config);
|
||||||
3 -> incomplete(string, <<240>>, Handler, Acc, Stack, Config)
|
3 -> incomplete(string, <<240>>, Handler, Acc, Stack, Config)
|
||||||
end;
|
end;
|
||||||
%% not a continuation byte, dispatch back to string
|
%% not a continuation byte, insert a replacement character for sequence thus
|
||||||
|
%% far and dispatch back to string
|
||||||
strip_continuations(Rest, Handler, Acc, Stack, Config, _) ->
|
strip_continuations(Rest, Handler, Acc, Stack, Config, _) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config).
|
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
|
%% this all gets really gross and should probably eventually be folded into
|
||||||
|
%% but for now it fakes being part of string on incompletes and errors
|
||||||
unescape(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
unescape(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config);
|
string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config);
|
||||||
unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
|
@ -696,8 +708,9 @@ maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
|
||||||
maybe_replace(X, _Config) -> [X].
|
maybe_replace(X, _Config) -> [X].
|
||||||
|
|
||||||
|
|
||||||
%% like strings, numbers are collected in an intermediate accumulator before
|
%% like in strings, there's some pseudo states in here that will never
|
||||||
%% being emitted to the callback handler
|
%% show up in errors or incompletes. some show up in value, some show
|
||||||
|
%% up in integer, decimal or exp
|
||||||
negative(<<$0, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
negative(<<$0, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
zero(Rest, Handler, acc_seq(Acc, $0), Stack, Config);
|
zero(Rest, Handler, acc_seq(Acc, $0), Stack, Config);
|
||||||
negative(<<S, Rest/binary>>, Handler, Acc, Stack, Config) when ?is_nonzero(S) ->
|
negative(<<S, Rest/binary>>, Handler, Acc, Stack, Config) when ?is_nonzero(S) ->
|
||||||
|
@ -732,6 +745,7 @@ integer(Bin, Handler, Acc, Stack, Config) ->
|
||||||
|
|
||||||
decimal(<<S, Rest/binary>>, Handler, Acc, Stack, Config) when S=:= ?zero; ?is_nonzero(S) ->
|
decimal(<<S, Rest/binary>>, Handler, Acc, Stack, Config) when S=:= ?zero; ?is_nonzero(S) ->
|
||||||
decimal(Rest, Handler, acc_seq(Acc, S), Stack, Config);
|
decimal(Rest, Handler, acc_seq(Acc, S), Stack, Config);
|
||||||
|
%% guard against the insidious `1.e1` error
|
||||||
decimal(<<S, Rest/binary>>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E ->
|
decimal(<<S, Rest/binary>>, Handler, Acc, Stack, Config) when S =:= $e; S =:= $E ->
|
||||||
case Acc of
|
case Acc of
|
||||||
[?decimalpoint|_] -> ?error(decimal, <<S, Rest/binary>>, Handler, Acc, Stack, Config);
|
[?decimalpoint|_] -> ?error(decimal, <<S, Rest/binary>>, Handler, Acc, Stack, Config);
|
||||||
|
@ -799,7 +813,6 @@ finish_number(Bin, Handler, {NumType, Acc}, Stack, Config) ->
|
||||||
?error(value, <<$0, Bin/binary>>, Handler, OldAcc, Stack, Config)
|
?error(value, <<$0, Bin/binary>>, Handler, OldAcc, Stack, Config)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
format_number({zero, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))};
|
format_number({zero, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))};
|
||||||
format_number({integer, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))};
|
format_number({integer, Acc}) -> {integer, list_to_integer(lists:reverse(Acc))};
|
||||||
format_number({decimal, Acc}) -> {float, list_to_float(lists:reverse(Acc))};
|
format_number({decimal, Acc}) -> {float, list_to_float(lists:reverse(Acc))};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue