switch string parsing to a count and slice implementation
This commit is contained in:
parent
9528216d15
commit
dc8e6a0c54
2 changed files with 535 additions and 407 deletions
|
@ -23,8 +23,7 @@
|
|||
|
||||
-module(jsx_decoder).
|
||||
|
||||
%% inline sequence accumulation, handle_event, format_number and maybe_replace
|
||||
-compile({inline, [new_seq/0, new_seq/1, acc_seq/2, end_seq/1, end_seq/2]}).
|
||||
%% inline handle_event, format_number and maybe_replace
|
||||
-compile({inline, [handle_event/3]}).
|
||||
-compile({inline, [format_number/1]}).
|
||||
-compile({inline, [maybe_replace/2]}).
|
||||
|
@ -161,19 +160,6 @@ incomplete(State, Rest, Handler, Acc, Stack, Config = #config{incomplete_handler
|
|||
F(Rest, {decoder, State, Handler, Acc, Stack}, jsx_config:config_to_list(Config)).
|
||||
|
||||
|
||||
%% lists are benchmarked to be faster (tho higher in memory usage) than binaries
|
||||
new_seq() -> [].
|
||||
new_seq(C) -> [C].
|
||||
|
||||
acc_seq(Seq, C) when is_list(C) -> lists:reverse(C) ++ Seq;
|
||||
acc_seq(Seq, C) -> [C] ++ Seq.
|
||||
|
||||
end_seq(Seq) -> unicode:characters_to_binary(lists:reverse(Seq)).
|
||||
|
||||
end_seq(Seq, #config{dirty_strings=true}) -> list_to_binary(lists:reverse(Seq));
|
||||
end_seq(Seq, _) -> end_seq(Seq).
|
||||
|
||||
|
||||
handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}.
|
||||
|
||||
|
||||
|
@ -190,9 +176,9 @@ start(Bin, Handler, Stack, Config) ->
|
|||
|
||||
|
||||
value(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
||||
string(Rest, Handler, new_seq(), Stack, Config);
|
||||
string(Rest, Handler, Stack, Config);
|
||||
value(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
||||
string(Rest, Handler, new_seq(), [singlequote|Stack], Config);
|
||||
string(Rest, Handler, [singlequote|Stack], Config);
|
||||
value(<<$t, Rest/binary>>, Handler, Stack, Config) ->
|
||||
true(Rest, Handler, Stack, Config);
|
||||
value(<<$f, Rest/binary>>, Handler, Stack, Config) ->
|
||||
|
@ -200,11 +186,11 @@ value(<<$f, Rest/binary>>, Handler, Stack, Config) ->
|
|||
value(<<$n, Rest/binary>>, Handler, Stack, Config) ->
|
||||
null(Rest, Handler, Stack, Config);
|
||||
value(<<?negative, Rest/binary>>, Handler, Stack, Config) ->
|
||||
negative(Rest, Handler, new_seq($-), Stack, Config);
|
||||
negative(Rest, Handler, [$-], Stack, Config);
|
||||
value(<<?zero, Rest/binary>>, Handler, Stack, Config) ->
|
||||
zero(Rest, Handler, new_seq($0), Stack, Config);
|
||||
zero(Rest, Handler, [$0], Stack, Config);
|
||||
value(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_nonzero(S) ->
|
||||
integer(Rest, Handler, new_seq(S), Stack, Config);
|
||||
integer(Rest, Handler, [S], Stack, Config);
|
||||
value(<<?start_object, Rest/binary>>, Handler, Stack, Config) ->
|
||||
object(Rest, handle_event(start_object, Handler, Config), [key|Stack], Config);
|
||||
value(<<?start_array, Rest/binary>>, Handler, Stack, Config) ->
|
||||
|
@ -228,9 +214,9 @@ value(Bin, Handler, Stack, Config) ->
|
|||
|
||||
|
||||
object(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
||||
string(Rest, Handler, new_seq(), Stack, Config);
|
||||
string(Rest, Handler, Stack, Config);
|
||||
object(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
||||
string(Rest, Handler, new_seq(), [singlequote|Stack], Config);
|
||||
string(Rest, Handler, [singlequote|Stack], Config);
|
||||
object(<<?end_object, Rest/binary>>, Handler, [key|Stack], Config) ->
|
||||
maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config);
|
||||
object(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_whitespace(S) ->
|
||||
|
@ -286,9 +272,9 @@ colon(Bin, Handler, Stack, Config) ->
|
|||
|
||||
|
||||
key(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
||||
string(Rest, Handler, new_seq(), Stack, Config);
|
||||
string(Rest, Handler, Stack, Config);
|
||||
key(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
||||
string(Rest, Handler, new_seq(), [singlequote|Stack], Config);
|
||||
string(Rest, Handler, [singlequote|Stack], Config);
|
||||
key(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_whitespace(S) ->
|
||||
key(Rest, Handler, Stack, Config);
|
||||
key(<<?end_object, Rest/binary>>, Handler, [key|Stack], Config=#config{strict_commas=false}) ->
|
||||
|
@ -311,209 +297,34 @@ key(Bin, Handler, Stack, Config) ->
|
|||
%% submit a patch that unrolls simple guards
|
||||
%% note that if you encounter an error from string and you can't find the clause that
|
||||
%% caused it here, it might be in unescape below
|
||||
string(<<32, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 32), Stack, Config);
|
||||
string(<<33, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 33), Stack, Config);
|
||||
|
||||
string(Bin, Handler, Stack, Config) ->
|
||||
string(Bin, Handler, [], Stack, Config).
|
||||
|
||||
|
||||
string(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
doublequote(Rest, Handler, Acc, Stack, Config);
|
||||
string(<<35, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 35), Stack, Config);
|
||||
string(<<36, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 36), Stack, Config);
|
||||
string(<<37, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 37), Stack, Config);
|
||||
string(<<38, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 38), Stack, Config);
|
||||
string(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
singlequote(Rest, Handler, Acc, Stack, Config);
|
||||
string(<<40, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 40), Stack, Config);
|
||||
string(<<41, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 41), Stack, Config);
|
||||
string(<<42, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 42), Stack, Config);
|
||||
string(<<43, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 43), Stack, Config);
|
||||
string(<<44, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 44), Stack, Config);
|
||||
string(<<45, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 45), Stack, Config);
|
||||
string(<<46, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 46), Stack, Config);
|
||||
string(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace(?solidus, Config)), Stack, Config);
|
||||
string(<<48, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 48), Stack, Config);
|
||||
string(<<49, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 49), Stack, Config);
|
||||
string(<<50, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 50), Stack, Config);
|
||||
string(<<51, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 51), Stack, Config);
|
||||
string(<<52, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 52), Stack, Config);
|
||||
string(<<53, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 53), Stack, Config);
|
||||
string(<<54, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 54), Stack, Config);
|
||||
string(<<55, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 55), Stack, Config);
|
||||
string(<<56, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 56), Stack, Config);
|
||||
string(<<57, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 57), Stack, Config);
|
||||
string(<<58, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 58), Stack, Config);
|
||||
string(<<59, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 59), Stack, Config);
|
||||
string(<<60, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 60), Stack, Config);
|
||||
string(<<61, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 61), Stack, Config);
|
||||
string(<<62, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 62), Stack, Config);
|
||||
string(<<63, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 63), Stack, Config);
|
||||
string(<<64, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 64), Stack, Config);
|
||||
string(<<65, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 65), Stack, Config);
|
||||
string(<<66, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 66), Stack, Config);
|
||||
string(<<67, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 67), Stack, Config);
|
||||
string(<<68, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 68), Stack, Config);
|
||||
string(<<69, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 69), Stack, Config);
|
||||
string(<<70, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 70), Stack, Config);
|
||||
string(<<71, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 71), Stack, Config);
|
||||
string(<<72, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 72), Stack, Config);
|
||||
string(<<73, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 73), Stack, Config);
|
||||
string(<<74, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 74), Stack, Config);
|
||||
string(<<75, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 75), Stack, Config);
|
||||
string(<<76, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 76), Stack, Config);
|
||||
string(<<77, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 77), Stack, Config);
|
||||
string(<<78, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 78), Stack, Config);
|
||||
string(<<79, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 79), Stack, Config);
|
||||
string(<<80, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 80), Stack, Config);
|
||||
string(<<81, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 81), Stack, Config);
|
||||
string(<<82, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 82), Stack, Config);
|
||||
string(<<83, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 83), Stack, Config);
|
||||
string(<<84, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 84), Stack, Config);
|
||||
string(<<85, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 85), Stack, Config);
|
||||
string(<<86, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 86), Stack, Config);
|
||||
string(<<87, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 87), Stack, Config);
|
||||
string(<<88, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 88), Stack, Config);
|
||||
string(<<89, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 89), Stack, Config);
|
||||
string(<<90, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 90), Stack, Config);
|
||||
string(<<91, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 91), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace(?solidus, Config)], Stack, Config);
|
||||
string(<<?rsolidus/utf8, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
unescape(Rest, Handler, Acc, Stack, Config);
|
||||
string(<<93, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 93), Stack, Config);
|
||||
string(<<94, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 94), Stack, Config);
|
||||
string(<<95, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 95), Stack, Config);
|
||||
string(<<96, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 96), Stack, Config);
|
||||
string(<<97, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 97), Stack, Config);
|
||||
string(<<98, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 98), Stack, Config);
|
||||
string(<<99, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 99), Stack, Config);
|
||||
string(<<100, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 100), Stack, Config);
|
||||
string(<<101, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 101), Stack, Config);
|
||||
string(<<102, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 102), Stack, Config);
|
||||
string(<<103, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 103), Stack, Config);
|
||||
string(<<104, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 104), Stack, Config);
|
||||
string(<<105, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 105), Stack, Config);
|
||||
string(<<106, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 106), Stack, Config);
|
||||
string(<<107, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 107), Stack, Config);
|
||||
string(<<108, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 108), Stack, Config);
|
||||
string(<<109, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 109), Stack, Config);
|
||||
string(<<110, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 110), Stack, Config);
|
||||
string(<<111, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 111), Stack, Config);
|
||||
string(<<112, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 112), Stack, Config);
|
||||
string(<<113, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 113), Stack, Config);
|
||||
string(<<114, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 114), Stack, Config);
|
||||
string(<<115, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 115), Stack, Config);
|
||||
string(<<116, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 116), Stack, Config);
|
||||
string(<<117, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 117), Stack, Config);
|
||||
string(<<118, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 118), Stack, Config);
|
||||
string(<<119, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 119), Stack, Config);
|
||||
string(<<120, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 120), Stack, Config);
|
||||
string(<<121, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 121), Stack, Config);
|
||||
string(<<122, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 122), Stack, Config);
|
||||
string(<<123, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 123), Stack, Config);
|
||||
string(<<124, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 124), Stack, Config);
|
||||
string(<<125, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 125), Stack, Config);
|
||||
string(<<126, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 126), Stack, Config);
|
||||
string(<<127, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 127), Stack, Config);
|
||||
string(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||
string(Rest, Handler, acc_seq(Acc, C), Stack, Config);
|
||||
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace(X, Config)), Stack, Config);
|
||||
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X >= 16#80 ->
|
||||
string(Rest, Handler, acc_seq(Acc, X), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config);
|
||||
string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) ->
|
||||
Size = count(Bin, 0, Config),
|
||||
<<Clean:Size/binary, Rest/binary>> = Bin,
|
||||
string(Rest, Handler, [Acc, Clean], Stack, Config);
|
||||
%% really, really dirty strings. if there's no valid utf8 we never reach `count`
|
||||
%% and things get replaced instead of ignored
|
||||
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||
string(Rest, Handler, [Acc, X], Stack, Config);
|
||||
%% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match preceeding
|
||||
string(<<239, 191, 190, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 16#fffe), Stack, Config);
|
||||
string(Rest, Handler, [Acc, <<16#fffe/utf8>>], Stack, Config);
|
||||
string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 16#ffff), Stack, Config);
|
||||
string(Rest, Handler, [Acc, <<16#ffff/utf8>>], Stack, Config);
|
||||
%% partial utf8 codepoints
|
||||
string(<<>>, Handler, Acc, Stack, Config) ->
|
||||
incomplete(string, <<>>, Handler, Acc, Stack, Config);
|
||||
|
@ -527,7 +338,7 @@ string(<<X, Y, Z>>, Handler, Acc, Stack, Config)
|
|||
%% surrogates
|
||||
string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false})
|
||||
when X >= 160 ->
|
||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
||||
string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config);
|
||||
%% overlong encodings and missing continuations of a 2 byte sequence
|
||||
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false})
|
||||
when X >= 192, X =< 223 ->
|
||||
|
@ -542,32 +353,232 @@ string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false
|
|||
strip_continuations(Rest, Handler, Acc, Stack, Config, 3);
|
||||
%% incompletes and unexpected bytes, including orphan continuations
|
||||
string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
||||
string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config);
|
||||
string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config).
|
||||
|
||||
|
||||
count(<<32, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<33, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<?doublequote, _/binary>>, N, _) -> N;
|
||||
count(<<35, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<36, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<37, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<38, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<?singlequote, _/binary>>, N, _) -> N;
|
||||
count(<<40, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<41, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<42, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<43, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<44, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<45, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<46, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<?solidus, _/binary>>, N, _) -> N;
|
||||
count(<<48, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<49, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<50, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<51, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<52, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<53, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<54, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<55, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<56, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<57, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<58, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<59, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<60, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<61, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<62, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<63, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<64, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<65, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<66, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<67, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<68, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<69, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<70, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<71, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<72, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<73, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<74, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<75, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<76, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<77, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<78, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<79, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<80, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<81, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<82, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<83, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<84, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<85, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<86, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<87, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<88, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<89, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<90, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<91, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<?rsolidus, _/binary>>, N, _) -> N;
|
||||
count(<<93, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<94, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<95, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<96, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<97, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<98, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<99, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<100, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<101, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<102, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<103, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<104, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<105, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<106, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<107, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<108, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<109, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<110, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<111, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<112, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<113, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<114, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<115, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<116, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<117, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<118, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<119, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<120, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<121, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<122, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<123, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<124, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<125, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<126, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<127, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) ->
|
||||
count(Rest, N + 1, Config);
|
||||
count(<<X/utf8, Rest/binary>>, N, Config) when X < 16#800 ->
|
||||
count(Rest, N + 2, Config);
|
||||
count(<<X/utf8, _/binary>>, N, _) when X == 16#2028; X == 16#2029 -> N;
|
||||
count(<<X/utf8, Rest/binary>>, N, Config) when X < 16#10000 ->
|
||||
count(Rest, N + 3, Config);
|
||||
count(<<_/utf8, Rest/binary>>, N, Config) ->
|
||||
count(Rest, N + 4, Config);
|
||||
count(_, N, _) -> N.
|
||||
|
||||
|
||||
doublequote(Rest, Handler, Acc, [key|_] = Stack, Config) ->
|
||||
colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), Stack, Config);
|
||||
colon(Rest, handle_event({key, iolist_to_binary(Acc)}, Handler, Config), Stack, Config);
|
||||
doublequote(Rest, Handler, Acc, [singlequote|_] = Stack, Config) ->
|
||||
string(Rest, Handler,acc_seq(Acc, maybe_replace(?doublequote, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace(?doublequote, Config)], Stack, Config);
|
||||
doublequote(<<>>, Handler, Acc, [singlequote|_] = Stack, Config) ->
|
||||
incomplete(string, <<?doublequote>>, Handler, Acc, Stack, Config);
|
||||
doublequote(Rest, Handler, Acc, Stack, Config) ->
|
||||
maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config).
|
||||
maybe_done(Rest, handle_event({string, iolist_to_binary(Acc)}, Handler, Config), Stack, Config).
|
||||
|
||||
|
||||
singlequote(Rest, Handler, Acc, [singlequote, key|Stack], Config) ->
|
||||
colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), [key|Stack], Config);
|
||||
colon(Rest, handle_event({key, iolist_to_binary(Acc)}, Handler, Config), [key|Stack], Config);
|
||||
singlequote(Rest, Handler, Acc, [singlequote|Stack], Config) ->
|
||||
maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config);
|
||||
maybe_done(Rest, handle_event({string, iolist_to_binary(Acc)}, Handler, Config), Stack, Config);
|
||||
singlequote(Rest, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config).
|
||||
string(Rest, Handler, [Acc, ?singlequote], Stack, Config).
|
||||
|
||||
|
||||
%% strips continuation bytes after bad utf bytes, guards against both too short
|
||||
%% and overlong sequences. N is the maximum number of bytes to strip
|
||||
strip_continuations(<<Rest/binary>>, Handler, Acc, Stack, Config, 0) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
||||
string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config);
|
||||
strip_continuations(<<X, Rest/binary>>, Handler, Acc, Stack, Config, N) when X >= 128, X =< 191 ->
|
||||
strip_continuations(Rest, Handler, Acc, Stack, Config, N - 1);
|
||||
%% if end of input is reached before stripping the max number of continuations
|
||||
|
@ -582,33 +593,33 @@ strip_continuations(<<>>, Handler, Acc, Stack, Config, N) ->
|
|||
%% not a continuation byte, insert a replacement character for sequence thus
|
||||
%% far and dispatch back to string
|
||||
strip_continuations(<<Rest/binary>>, Handler, Acc, Stack, Config, _) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config).
|
||||
string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config).
|
||||
|
||||
|
||||
%% this all gets really gross and should probably eventually be folded into
|
||||
%% but for now it fakes being part of string on incompletes and errors
|
||||
unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||
string(<<?rsolidus, Rest/binary>>, Handler, acc_seq(Acc, ?rsolidus), Stack, Config);
|
||||
string(<<?rsolidus, Rest/binary>>, Handler, [Acc, ?rsolidus], Stack, Config);
|
||||
unescape(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||
string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config);
|
||||
string(Rest, Handler, [Acc, ?rsolidus, C], Stack, Config);
|
||||
unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\b, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace($\b, Config)], Stack, Config);
|
||||
unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\f, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace($\f, Config)], Stack, Config);
|
||||
unescape(<<$n, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\n, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace($\n, Config)], Stack, Config);
|
||||
unescape(<<$r, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\r, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace($\r, Config)], Stack, Config);
|
||||
unescape(<<$t, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\t, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace($\t, Config)], Stack, Config);
|
||||
unescape(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\", Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace($\", Config)], Stack, Config);
|
||||
unescape(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) ->
|
||||
string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config);
|
||||
string(Rest, Handler, [Acc, ?singlequote], Stack, Config);
|
||||
unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\\, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace($\\, Config)], Stack, Config);
|
||||
unescape(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($/, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace($/, Config)], Stack, Config);
|
||||
unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config)
|
||||
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
||||
(X == $c orelse X == $d orelse X == $e orelse X == $f),
|
||||
|
@ -617,14 +628,14 @@ unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler,
|
|||
High = erlang:list_to_integer([$d, A, B, C], 16),
|
||||
Low = erlang:list_to_integer([$d, X, Y, Z], 16),
|
||||
Codepoint = (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000,
|
||||
string(Rest, Handler, acc_seq(Acc, Codepoint), Stack, Config);
|
||||
string(Rest, Handler, [Acc, <<Codepoint/utf8>>], Stack, Config);
|
||||
unescape(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config)
|
||||
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
||||
?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z)
|
||||
->
|
||||
case Config#config.strict_utf8 of
|
||||
true -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config);
|
||||
false -> string(Rest, Handler, acc_seq(Acc, [16#fffd, 16#fffd]), Stack, Config)
|
||||
false -> string(Rest, Handler, [Acc, <<16#fffd/utf8>>, <<16#fffd/utf8>>], Stack, Config)
|
||||
end;
|
||||
unescape(<<$u, $d, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config)
|
||||
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
||||
|
@ -640,17 +651,17 @@ unescape(<<$u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config)
|
|||
when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D) ->
|
||||
case erlang:list_to_integer([A, B, C, D], 16) of
|
||||
Codepoint when Codepoint < 16#d800; Codepoint > 16#dfff ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace(Codepoint, Config)), Stack, Config);
|
||||
string(Rest, Handler, [Acc, maybe_replace(Codepoint, Config)], Stack, Config);
|
||||
_ when Config#config.strict_utf8 ->
|
||||
?error(string, <<?rsolidus, $u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config);
|
||||
_ -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config)
|
||||
_ -> string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config)
|
||||
end;
|
||||
unescape(Bin, Handler, Acc, Stack, Config) ->
|
||||
case is_partial_escape(Bin) of
|
||||
true -> incomplete(string, <<?rsolidus/utf8, Bin/binary>>, Handler, Acc, Stack, Config);
|
||||
false -> case Config#config.strict_escapes of
|
||||
true -> ?error(string, <<?rsolidus, Bin/binary>>, Handler, Acc, Stack, Config);
|
||||
false -> string(Bin, Handler, acc_seq(Acc, ?rsolidus), Stack, Config)
|
||||
false -> string(Bin, Handler, [Acc, ?rsolidus], Stack, Config)
|
||||
end
|
||||
end.
|
||||
|
||||
|
@ -678,15 +689,16 @@ maybe_replace($/, Config=#config{escaped_strings=true}) ->
|
|||
maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\];
|
||||
maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
|
||||
case Config#config.unescaped_jsonp of
|
||||
true -> X
|
||||
true -> <<X/utf8>>
|
||||
; false -> json_escape_sequence(X)
|
||||
end;
|
||||
maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> json_escape_sequence(X);
|
||||
maybe_replace(X, _Config) -> X.
|
||||
maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
|
||||
json_escape_sequence(X);
|
||||
maybe_replace(X, _Config) -> <<X/utf8>>.
|
||||
|
||||
|
||||
%% convert a codepoint to it's \uXXXX equiv.
|
||||
json_escape_sequence(X) ->
|
||||
json_escape_sequence(X) when X < 65536 ->
|
||||
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
|
||||
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
|
||||
|
||||
|
@ -772,6 +784,10 @@ exp(Bin, Handler, Acc, Stack, Config) ->
|
|||
finish_number(Bin, Handler, {exp, Acc}, Stack, Config).
|
||||
|
||||
|
||||
acc_seq(Seq, C) when is_list(C) -> lists:reverse(C) ++ Seq;
|
||||
acc_seq(Seq, C) -> [C] ++ Seq.
|
||||
|
||||
|
||||
finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) ->
|
||||
maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), [], Config);
|
||||
finish_number(<<>>, Handler, {NumType, Acc}, Stack, Config) ->
|
||||
|
@ -1230,8 +1246,6 @@ codepoints() ->
|
|||
[16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++
|
||||
[16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000].
|
||||
|
||||
controls() -> lists:seq(0, 31).
|
||||
|
||||
surrogates() -> lists:seq(16#d800, 16#dfff).
|
||||
|
||||
|
||||
|
@ -1250,7 +1264,7 @@ to_fake_utf8(N) ->
|
|||
|
||||
clean_string_test_() ->
|
||||
Clean = codepoints(),
|
||||
Dirty = surrogates() ++ controls(),
|
||||
Dirty = surrogates(),
|
||||
% clean codepoints
|
||||
[{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual(
|
||||
[{string, <<Codepoint/utf8>>}, end_json],
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue