switch string parsing to a count and slice implementation
This commit is contained in:
parent
9528216d15
commit
dc8e6a0c54
2 changed files with 535 additions and 407 deletions
|
@ -23,8 +23,7 @@
|
||||||
|
|
||||||
-module(jsx_decoder).
|
-module(jsx_decoder).
|
||||||
|
|
||||||
%% inline sequence accumulation, handle_event, format_number and maybe_replace
|
%% inline handle_event, format_number and maybe_replace
|
||||||
-compile({inline, [new_seq/0, new_seq/1, acc_seq/2, end_seq/1, end_seq/2]}).
|
|
||||||
-compile({inline, [handle_event/3]}).
|
-compile({inline, [handle_event/3]}).
|
||||||
-compile({inline, [format_number/1]}).
|
-compile({inline, [format_number/1]}).
|
||||||
-compile({inline, [maybe_replace/2]}).
|
-compile({inline, [maybe_replace/2]}).
|
||||||
|
@ -161,19 +160,6 @@ incomplete(State, Rest, Handler, Acc, Stack, Config = #config{incomplete_handler
|
||||||
F(Rest, {decoder, State, Handler, Acc, Stack}, jsx_config:config_to_list(Config)).
|
F(Rest, {decoder, State, Handler, Acc, Stack}, jsx_config:config_to_list(Config)).
|
||||||
|
|
||||||
|
|
||||||
%% lists are benchmarked to be faster (tho higher in memory usage) than binaries
|
|
||||||
new_seq() -> [].
|
|
||||||
new_seq(C) -> [C].
|
|
||||||
|
|
||||||
acc_seq(Seq, C) when is_list(C) -> lists:reverse(C) ++ Seq;
|
|
||||||
acc_seq(Seq, C) -> [C] ++ Seq.
|
|
||||||
|
|
||||||
end_seq(Seq) -> unicode:characters_to_binary(lists:reverse(Seq)).
|
|
||||||
|
|
||||||
end_seq(Seq, #config{dirty_strings=true}) -> list_to_binary(lists:reverse(Seq));
|
|
||||||
end_seq(Seq, _) -> end_seq(Seq).
|
|
||||||
|
|
||||||
|
|
||||||
handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}.
|
handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}.
|
||||||
|
|
||||||
|
|
||||||
|
@ -190,9 +176,9 @@ start(Bin, Handler, Stack, Config) ->
|
||||||
|
|
||||||
|
|
||||||
value(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
value(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
string(Rest, Handler, new_seq(), Stack, Config);
|
string(Rest, Handler, Stack, Config);
|
||||||
value(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
value(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
||||||
string(Rest, Handler, new_seq(), [singlequote|Stack], Config);
|
string(Rest, Handler, [singlequote|Stack], Config);
|
||||||
value(<<$t, Rest/binary>>, Handler, Stack, Config) ->
|
value(<<$t, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
true(Rest, Handler, Stack, Config);
|
true(Rest, Handler, Stack, Config);
|
||||||
value(<<$f, Rest/binary>>, Handler, Stack, Config) ->
|
value(<<$f, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
|
@ -200,11 +186,11 @@ value(<<$f, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
value(<<$n, Rest/binary>>, Handler, Stack, Config) ->
|
value(<<$n, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
null(Rest, Handler, Stack, Config);
|
null(Rest, Handler, Stack, Config);
|
||||||
value(<<?negative, Rest/binary>>, Handler, Stack, Config) ->
|
value(<<?negative, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
negative(Rest, Handler, new_seq($-), Stack, Config);
|
negative(Rest, Handler, [$-], Stack, Config);
|
||||||
value(<<?zero, Rest/binary>>, Handler, Stack, Config) ->
|
value(<<?zero, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
zero(Rest, Handler, new_seq($0), Stack, Config);
|
zero(Rest, Handler, [$0], Stack, Config);
|
||||||
value(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_nonzero(S) ->
|
value(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_nonzero(S) ->
|
||||||
integer(Rest, Handler, new_seq(S), Stack, Config);
|
integer(Rest, Handler, [S], Stack, Config);
|
||||||
value(<<?start_object, Rest/binary>>, Handler, Stack, Config) ->
|
value(<<?start_object, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
object(Rest, handle_event(start_object, Handler, Config), [key|Stack], Config);
|
object(Rest, handle_event(start_object, Handler, Config), [key|Stack], Config);
|
||||||
value(<<?start_array, Rest/binary>>, Handler, Stack, Config) ->
|
value(<<?start_array, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
|
@ -228,9 +214,9 @@ value(Bin, Handler, Stack, Config) ->
|
||||||
|
|
||||||
|
|
||||||
object(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
object(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
string(Rest, Handler, new_seq(), Stack, Config);
|
string(Rest, Handler, Stack, Config);
|
||||||
object(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
object(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
||||||
string(Rest, Handler, new_seq(), [singlequote|Stack], Config);
|
string(Rest, Handler, [singlequote|Stack], Config);
|
||||||
object(<<?end_object, Rest/binary>>, Handler, [key|Stack], Config) ->
|
object(<<?end_object, Rest/binary>>, Handler, [key|Stack], Config) ->
|
||||||
maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config);
|
maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config);
|
||||||
object(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_whitespace(S) ->
|
object(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_whitespace(S) ->
|
||||||
|
@ -286,9 +272,9 @@ colon(Bin, Handler, Stack, Config) ->
|
||||||
|
|
||||||
|
|
||||||
key(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
key(<<?doublequote, Rest/binary>>, Handler, Stack, Config) ->
|
||||||
string(Rest, Handler, new_seq(), Stack, Config);
|
string(Rest, Handler, Stack, Config);
|
||||||
key(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
key(<<?singlequote, Rest/binary>>, Handler, Stack, Config=#config{strict_single_quotes=false}) ->
|
||||||
string(Rest, Handler, new_seq(), [singlequote|Stack], Config);
|
string(Rest, Handler, [singlequote|Stack], Config);
|
||||||
key(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_whitespace(S) ->
|
key(<<S, Rest/binary>>, Handler, Stack, Config) when ?is_whitespace(S) ->
|
||||||
key(Rest, Handler, Stack, Config);
|
key(Rest, Handler, Stack, Config);
|
||||||
key(<<?end_object, Rest/binary>>, Handler, [key|Stack], Config=#config{strict_commas=false}) ->
|
key(<<?end_object, Rest/binary>>, Handler, [key|Stack], Config=#config{strict_commas=false}) ->
|
||||||
|
@ -311,209 +297,34 @@ key(Bin, Handler, Stack, Config) ->
|
||||||
%% submit a patch that unrolls simple guards
|
%% submit a patch that unrolls simple guards
|
||||||
%% note that if you encounter an error from string and you can't find the clause that
|
%% note that if you encounter an error from string and you can't find the clause that
|
||||||
%% caused it here, it might be in unescape below
|
%% caused it here, it might be in unescape below
|
||||||
string(<<32, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 32), Stack, Config);
|
string(Bin, Handler, Stack, Config) ->
|
||||||
string(<<33, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(Bin, Handler, [], Stack, Config).
|
||||||
string(Rest, Handler, acc_seq(Acc, 33), Stack, Config);
|
|
||||||
|
|
||||||
string(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
doublequote(Rest, Handler, Acc, Stack, Config);
|
doublequote(Rest, Handler, Acc, Stack, Config);
|
||||||
string(<<35, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 35), Stack, Config);
|
|
||||||
string(<<36, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 36), Stack, Config);
|
|
||||||
string(<<37, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 37), Stack, Config);
|
|
||||||
string(<<38, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 38), Stack, Config);
|
|
||||||
string(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
singlequote(Rest, Handler, Acc, Stack, Config);
|
singlequote(Rest, Handler, Acc, Stack, Config);
|
||||||
string(<<40, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 40), Stack, Config);
|
|
||||||
string(<<41, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 41), Stack, Config);
|
|
||||||
string(<<42, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 42), Stack, Config);
|
|
||||||
string(<<43, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 43), Stack, Config);
|
|
||||||
string(<<44, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 44), Stack, Config);
|
|
||||||
string(<<45, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 45), Stack, Config);
|
|
||||||
string(<<46, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 46), Stack, Config);
|
|
||||||
string(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace(?solidus, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace(?solidus, Config)], Stack, Config);
|
||||||
string(<<48, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 48), Stack, Config);
|
|
||||||
string(<<49, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 49), Stack, Config);
|
|
||||||
string(<<50, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 50), Stack, Config);
|
|
||||||
string(<<51, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 51), Stack, Config);
|
|
||||||
string(<<52, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 52), Stack, Config);
|
|
||||||
string(<<53, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 53), Stack, Config);
|
|
||||||
string(<<54, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 54), Stack, Config);
|
|
||||||
string(<<55, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 55), Stack, Config);
|
|
||||||
string(<<56, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 56), Stack, Config);
|
|
||||||
string(<<57, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 57), Stack, Config);
|
|
||||||
string(<<58, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 58), Stack, Config);
|
|
||||||
string(<<59, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 59), Stack, Config);
|
|
||||||
string(<<60, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 60), Stack, Config);
|
|
||||||
string(<<61, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 61), Stack, Config);
|
|
||||||
string(<<62, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 62), Stack, Config);
|
|
||||||
string(<<63, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 63), Stack, Config);
|
|
||||||
string(<<64, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 64), Stack, Config);
|
|
||||||
string(<<65, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 65), Stack, Config);
|
|
||||||
string(<<66, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 66), Stack, Config);
|
|
||||||
string(<<67, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 67), Stack, Config);
|
|
||||||
string(<<68, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 68), Stack, Config);
|
|
||||||
string(<<69, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 69), Stack, Config);
|
|
||||||
string(<<70, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 70), Stack, Config);
|
|
||||||
string(<<71, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 71), Stack, Config);
|
|
||||||
string(<<72, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 72), Stack, Config);
|
|
||||||
string(<<73, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 73), Stack, Config);
|
|
||||||
string(<<74, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 74), Stack, Config);
|
|
||||||
string(<<75, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 75), Stack, Config);
|
|
||||||
string(<<76, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 76), Stack, Config);
|
|
||||||
string(<<77, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 77), Stack, Config);
|
|
||||||
string(<<78, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 78), Stack, Config);
|
|
||||||
string(<<79, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 79), Stack, Config);
|
|
||||||
string(<<80, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 80), Stack, Config);
|
|
||||||
string(<<81, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 81), Stack, Config);
|
|
||||||
string(<<82, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 82), Stack, Config);
|
|
||||||
string(<<83, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 83), Stack, Config);
|
|
||||||
string(<<84, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 84), Stack, Config);
|
|
||||||
string(<<85, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 85), Stack, Config);
|
|
||||||
string(<<86, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 86), Stack, Config);
|
|
||||||
string(<<87, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 87), Stack, Config);
|
|
||||||
string(<<88, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 88), Stack, Config);
|
|
||||||
string(<<89, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 89), Stack, Config);
|
|
||||||
string(<<90, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 90), Stack, Config);
|
|
||||||
string(<<91, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 91), Stack, Config);
|
|
||||||
string(<<?rsolidus/utf8, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<?rsolidus/utf8, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
unescape(Rest, Handler, Acc, Stack, Config);
|
unescape(Rest, Handler, Acc, Stack, Config);
|
||||||
string(<<93, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 93), Stack, Config);
|
|
||||||
string(<<94, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 94), Stack, Config);
|
|
||||||
string(<<95, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 95), Stack, Config);
|
|
||||||
string(<<96, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 96), Stack, Config);
|
|
||||||
string(<<97, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 97), Stack, Config);
|
|
||||||
string(<<98, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 98), Stack, Config);
|
|
||||||
string(<<99, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 99), Stack, Config);
|
|
||||||
string(<<100, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 100), Stack, Config);
|
|
||||||
string(<<101, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 101), Stack, Config);
|
|
||||||
string(<<102, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 102), Stack, Config);
|
|
||||||
string(<<103, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 103), Stack, Config);
|
|
||||||
string(<<104, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 104), Stack, Config);
|
|
||||||
string(<<105, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 105), Stack, Config);
|
|
||||||
string(<<106, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 106), Stack, Config);
|
|
||||||
string(<<107, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 107), Stack, Config);
|
|
||||||
string(<<108, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 108), Stack, Config);
|
|
||||||
string(<<109, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 109), Stack, Config);
|
|
||||||
string(<<110, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 110), Stack, Config);
|
|
||||||
string(<<111, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 111), Stack, Config);
|
|
||||||
string(<<112, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 112), Stack, Config);
|
|
||||||
string(<<113, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 113), Stack, Config);
|
|
||||||
string(<<114, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 114), Stack, Config);
|
|
||||||
string(<<115, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 115), Stack, Config);
|
|
||||||
string(<<116, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 116), Stack, Config);
|
|
||||||
string(<<117, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 117), Stack, Config);
|
|
||||||
string(<<118, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 118), Stack, Config);
|
|
||||||
string(<<119, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 119), Stack, Config);
|
|
||||||
string(<<120, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 120), Stack, Config);
|
|
||||||
string(<<121, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 121), Stack, Config);
|
|
||||||
string(<<122, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 122), Stack, Config);
|
|
||||||
string(<<123, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 123), Stack, Config);
|
|
||||||
string(<<124, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 124), Stack, Config);
|
|
||||||
string(<<125, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 125), Stack, Config);
|
|
||||||
string(<<126, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 126), Stack, Config);
|
|
||||||
string(<<127, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, 127), Stack, Config);
|
|
||||||
string(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
|
||||||
string(Rest, Handler, acc_seq(Acc, C), Stack, Config);
|
|
||||||
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 ->
|
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace(X, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config);
|
||||||
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X >= 16#80 ->
|
string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, X), Stack, Config);
|
Size = count(Bin, 0, Config),
|
||||||
|
<<Clean:Size/binary, Rest/binary>> = Bin,
|
||||||
|
string(Rest, Handler, [Acc, Clean], Stack, Config);
|
||||||
|
%% really, really dirty strings. if there's no valid utf8 we never reach `count`
|
||||||
|
%% and things get replaced instead of ignored
|
||||||
|
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||||
|
string(Rest, Handler, [Acc, X], Stack, Config);
|
||||||
%% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match preceeding
|
%% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match preceeding
|
||||||
string(<<239, 191, 190, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<239, 191, 190, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffe), Stack, Config);
|
string(Rest, Handler, [Acc, <<16#fffe/utf8>>], Stack, Config);
|
||||||
string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#ffff), Stack, Config);
|
string(Rest, Handler, [Acc, <<16#ffff/utf8>>], Stack, Config);
|
||||||
%% partial utf8 codepoints
|
%% partial utf8 codepoints
|
||||||
string(<<>>, Handler, Acc, Stack, Config) ->
|
string(<<>>, Handler, Acc, Stack, Config) ->
|
||||||
incomplete(string, <<>>, Handler, Acc, Stack, Config);
|
incomplete(string, <<>>, Handler, Acc, Stack, Config);
|
||||||
|
@ -527,7 +338,7 @@ string(<<X, Y, Z>>, Handler, Acc, Stack, Config)
|
||||||
%% surrogates
|
%% surrogates
|
||||||
string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false})
|
string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false})
|
||||||
when X >= 160 ->
|
when X >= 160 ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config);
|
||||||
%% overlong encodings and missing continuations of a 2 byte sequence
|
%% overlong encodings and missing continuations of a 2 byte sequence
|
||||||
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false})
|
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false})
|
||||||
when X >= 192, X =< 223 ->
|
when X >= 192, X =< 223 ->
|
||||||
|
@ -542,32 +353,232 @@ string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false
|
||||||
strip_continuations(Rest, Handler, Acc, Stack, Config, 3);
|
strip_continuations(Rest, Handler, Acc, Stack, Config, 3);
|
||||||
%% incompletes and unexpected bytes, including orphan continuations
|
%% incompletes and unexpected bytes, including orphan continuations
|
||||||
string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) ->
|
string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config);
|
||||||
string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config).
|
string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
|
count(<<32, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<33, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<?doublequote, _/binary>>, N, _) -> N;
|
||||||
|
count(<<35, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<36, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<37, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<38, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<?singlequote, _/binary>>, N, _) -> N;
|
||||||
|
count(<<40, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<41, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<42, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<43, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<44, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<45, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<46, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<?solidus, _/binary>>, N, _) -> N;
|
||||||
|
count(<<48, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<49, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<50, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<51, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<52, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<53, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<54, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<55, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<56, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<57, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<58, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<59, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<60, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<61, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<62, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<63, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<64, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<65, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<66, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<67, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<68, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<69, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<70, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<71, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<72, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<73, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<74, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<75, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<76, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<77, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<78, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<79, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<80, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<81, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<82, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<83, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<84, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<85, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<86, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<87, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<88, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<89, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<90, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<91, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<?rsolidus, _/binary>>, N, _) -> N;
|
||||||
|
count(<<93, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<94, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<95, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<96, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<97, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<98, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<99, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<100, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<101, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<102, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<103, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<104, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<105, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<106, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<107, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<108, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<109, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<110, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<111, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<112, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<113, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<114, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<115, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<116, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<117, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<118, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<119, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<120, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<121, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<122, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<123, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<124, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<125, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<126, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<127, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<X/utf8, Rest/binary>>, N, Config) when X < 16#800 ->
|
||||||
|
count(Rest, N + 2, Config);
|
||||||
|
count(<<X/utf8, _/binary>>, N, _) when X == 16#2028; X == 16#2029 -> N;
|
||||||
|
count(<<X/utf8, Rest/binary>>, N, Config) when X < 16#10000 ->
|
||||||
|
count(Rest, N + 3, Config);
|
||||||
|
count(<<_/utf8, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 4, Config);
|
||||||
|
count(_, N, _) -> N.
|
||||||
|
|
||||||
|
|
||||||
doublequote(Rest, Handler, Acc, [key|_] = Stack, Config) ->
|
doublequote(Rest, Handler, Acc, [key|_] = Stack, Config) ->
|
||||||
colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), Stack, Config);
|
colon(Rest, handle_event({key, iolist_to_binary(Acc)}, Handler, Config), Stack, Config);
|
||||||
doublequote(Rest, Handler, Acc, [singlequote|_] = Stack, Config) ->
|
doublequote(Rest, Handler, Acc, [singlequote|_] = Stack, Config) ->
|
||||||
string(Rest, Handler,acc_seq(Acc, maybe_replace(?doublequote, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace(?doublequote, Config)], Stack, Config);
|
||||||
doublequote(<<>>, Handler, Acc, [singlequote|_] = Stack, Config) ->
|
doublequote(<<>>, Handler, Acc, [singlequote|_] = Stack, Config) ->
|
||||||
incomplete(string, <<?doublequote>>, Handler, Acc, Stack, Config);
|
incomplete(string, <<?doublequote>>, Handler, Acc, Stack, Config);
|
||||||
doublequote(Rest, Handler, Acc, Stack, Config) ->
|
doublequote(Rest, Handler, Acc, Stack, Config) ->
|
||||||
maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config).
|
maybe_done(Rest, handle_event({string, iolist_to_binary(Acc)}, Handler, Config), Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
singlequote(Rest, Handler, Acc, [singlequote, key|Stack], Config) ->
|
singlequote(Rest, Handler, Acc, [singlequote, key|Stack], Config) ->
|
||||||
colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), [key|Stack], Config);
|
colon(Rest, handle_event({key, iolist_to_binary(Acc)}, Handler, Config), [key|Stack], Config);
|
||||||
singlequote(Rest, Handler, Acc, [singlequote|Stack], Config) ->
|
singlequote(Rest, Handler, Acc, [singlequote|Stack], Config) ->
|
||||||
maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config);
|
maybe_done(Rest, handle_event({string, iolist_to_binary(Acc)}, Handler, Config), Stack, Config);
|
||||||
singlequote(Rest, Handler, Acc, Stack, Config) ->
|
singlequote(Rest, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config).
|
string(Rest, Handler, [Acc, ?singlequote], Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
%% strips continuation bytes after bad utf bytes, guards against both too short
|
%% strips continuation bytes after bad utf bytes, guards against both too short
|
||||||
%% and overlong sequences. N is the maximum number of bytes to strip
|
%% and overlong sequences. N is the maximum number of bytes to strip
|
||||||
strip_continuations(<<Rest/binary>>, Handler, Acc, Stack, Config, 0) ->
|
strip_continuations(<<Rest/binary>>, Handler, Acc, Stack, Config, 0) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config);
|
string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config);
|
||||||
strip_continuations(<<X, Rest/binary>>, Handler, Acc, Stack, Config, N) when X >= 128, X =< 191 ->
|
strip_continuations(<<X, Rest/binary>>, Handler, Acc, Stack, Config, N) when X >= 128, X =< 191 ->
|
||||||
strip_continuations(Rest, Handler, Acc, Stack, Config, N - 1);
|
strip_continuations(Rest, Handler, Acc, Stack, Config, N - 1);
|
||||||
%% if end of input is reached before stripping the max number of continuations
|
%% if end of input is reached before stripping the max number of continuations
|
||||||
|
@ -582,33 +593,33 @@ strip_continuations(<<>>, Handler, Acc, Stack, Config, N) ->
|
||||||
%% not a continuation byte, insert a replacement character for sequence thus
|
%% not a continuation byte, insert a replacement character for sequence thus
|
||||||
%% far and dispatch back to string
|
%% far and dispatch back to string
|
||||||
strip_continuations(<<Rest/binary>>, Handler, Acc, Stack, Config, _) ->
|
strip_continuations(<<Rest/binary>>, Handler, Acc, Stack, Config, _) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config).
|
string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
%% this all gets really gross and should probably eventually be folded into
|
%% this all gets really gross and should probably eventually be folded into
|
||||||
%% but for now it fakes being part of string on incompletes and errors
|
%% but for now it fakes being part of string on incompletes and errors
|
||||||
unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||||
string(<<?rsolidus, Rest/binary>>, Handler, acc_seq(Acc, ?rsolidus), Stack, Config);
|
string(<<?rsolidus, Rest/binary>>, Handler, [Acc, ?rsolidus], Stack, Config);
|
||||||
unescape(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
unescape(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config);
|
string(Rest, Handler, [Acc, ?rsolidus, C], Stack, Config);
|
||||||
unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\b, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace($\b, Config)], Stack, Config);
|
||||||
unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\f, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace($\f, Config)], Stack, Config);
|
||||||
unescape(<<$n, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<$n, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\n, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace($\n, Config)], Stack, Config);
|
||||||
unescape(<<$r, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<$r, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\r, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace($\r, Config)], Stack, Config);
|
||||||
unescape(<<$t, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<$t, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\t, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace($\t, Config)], Stack, Config);
|
||||||
unescape(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\", Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace($\", Config)], Stack, Config);
|
||||||
unescape(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) ->
|
unescape(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config);
|
string(Rest, Handler, [Acc, ?singlequote], Stack, Config);
|
||||||
unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\\, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace($\\, Config)], Stack, Config);
|
||||||
unescape(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
unescape(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($/, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace($/, Config)], Stack, Config);
|
||||||
unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config)
|
unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config)
|
||||||
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
||||||
(X == $c orelse X == $d orelse X == $e orelse X == $f),
|
(X == $c orelse X == $d orelse X == $e orelse X == $f),
|
||||||
|
@ -617,14 +628,14 @@ unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler,
|
||||||
High = erlang:list_to_integer([$d, A, B, C], 16),
|
High = erlang:list_to_integer([$d, A, B, C], 16),
|
||||||
Low = erlang:list_to_integer([$d, X, Y, Z], 16),
|
Low = erlang:list_to_integer([$d, X, Y, Z], 16),
|
||||||
Codepoint = (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000,
|
Codepoint = (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000,
|
||||||
string(Rest, Handler, acc_seq(Acc, Codepoint), Stack, Config);
|
string(Rest, Handler, [Acc, <<Codepoint/utf8>>], Stack, Config);
|
||||||
unescape(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config)
|
unescape(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config)
|
||||||
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
||||||
?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z)
|
?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z)
|
||||||
->
|
->
|
||||||
case Config#config.strict_utf8 of
|
case Config#config.strict_utf8 of
|
||||||
true -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config);
|
true -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config);
|
||||||
false -> string(Rest, Handler, acc_seq(Acc, [16#fffd, 16#fffd]), Stack, Config)
|
false -> string(Rest, Handler, [Acc, <<16#fffd/utf8>>, <<16#fffd/utf8>>], Stack, Config)
|
||||||
end;
|
end;
|
||||||
unescape(<<$u, $d, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config)
|
unescape(<<$u, $d, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config)
|
||||||
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
when (A == $8 orelse A == $9 orelse A == $a orelse A == $b),
|
||||||
|
@ -640,17 +651,17 @@ unescape(<<$u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config)
|
||||||
when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D) ->
|
when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D) ->
|
||||||
case erlang:list_to_integer([A, B, C, D], 16) of
|
case erlang:list_to_integer([A, B, C, D], 16) of
|
||||||
Codepoint when Codepoint < 16#d800; Codepoint > 16#dfff ->
|
Codepoint when Codepoint < 16#d800; Codepoint > 16#dfff ->
|
||||||
string(Rest, Handler, acc_seq(Acc, maybe_replace(Codepoint, Config)), Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace(Codepoint, Config)], Stack, Config);
|
||||||
_ when Config#config.strict_utf8 ->
|
_ when Config#config.strict_utf8 ->
|
||||||
?error(string, <<?rsolidus, $u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config);
|
?error(string, <<?rsolidus, $u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config);
|
||||||
_ -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config)
|
_ -> string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config)
|
||||||
end;
|
end;
|
||||||
unescape(Bin, Handler, Acc, Stack, Config) ->
|
unescape(Bin, Handler, Acc, Stack, Config) ->
|
||||||
case is_partial_escape(Bin) of
|
case is_partial_escape(Bin) of
|
||||||
true -> incomplete(string, <<?rsolidus/utf8, Bin/binary>>, Handler, Acc, Stack, Config);
|
true -> incomplete(string, <<?rsolidus/utf8, Bin/binary>>, Handler, Acc, Stack, Config);
|
||||||
false -> case Config#config.strict_escapes of
|
false -> case Config#config.strict_escapes of
|
||||||
true -> ?error(string, <<?rsolidus, Bin/binary>>, Handler, Acc, Stack, Config);
|
true -> ?error(string, <<?rsolidus, Bin/binary>>, Handler, Acc, Stack, Config);
|
||||||
false -> string(Bin, Handler, acc_seq(Acc, ?rsolidus), Stack, Config)
|
false -> string(Bin, Handler, [Acc, ?rsolidus], Stack, Config)
|
||||||
end
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -678,15 +689,16 @@ maybe_replace($/, Config=#config{escaped_strings=true}) ->
|
||||||
maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\];
|
maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\];
|
||||||
maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
|
maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
|
||||||
case Config#config.unescaped_jsonp of
|
case Config#config.unescaped_jsonp of
|
||||||
true -> X
|
true -> <<X/utf8>>
|
||||||
; false -> json_escape_sequence(X)
|
; false -> json_escape_sequence(X)
|
||||||
end;
|
end;
|
||||||
maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> json_escape_sequence(X);
|
maybe_replace(X, #config{escaped_strings=true}) when X < 32 ->
|
||||||
maybe_replace(X, _Config) -> X.
|
json_escape_sequence(X);
|
||||||
|
maybe_replace(X, _Config) -> <<X/utf8>>.
|
||||||
|
|
||||||
|
|
||||||
%% convert a codepoint to it's \uXXXX equiv.
|
%% convert a codepoint to it's \uXXXX equiv.
|
||||||
json_escape_sequence(X) ->
|
json_escape_sequence(X) when X < 65536 ->
|
||||||
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
|
<<A:4, B:4, C:4, D:4>> = <<X:16>>,
|
||||||
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
|
[$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))].
|
||||||
|
|
||||||
|
@ -772,6 +784,10 @@ exp(Bin, Handler, Acc, Stack, Config) ->
|
||||||
finish_number(Bin, Handler, {exp, Acc}, Stack, Config).
|
finish_number(Bin, Handler, {exp, Acc}, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
|
acc_seq(Seq, C) when is_list(C) -> lists:reverse(C) ++ Seq;
|
||||||
|
acc_seq(Seq, C) -> [C] ++ Seq.
|
||||||
|
|
||||||
|
|
||||||
finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) ->
|
finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) ->
|
||||||
maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), [], Config);
|
maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), [], Config);
|
||||||
finish_number(<<>>, Handler, {NumType, Acc}, Stack, Config) ->
|
finish_number(<<>>, Handler, {NumType, Acc}, Stack, Config) ->
|
||||||
|
@ -1230,8 +1246,6 @@ codepoints() ->
|
||||||
[16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++
|
[16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++
|
||||||
[16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000].
|
[16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000].
|
||||||
|
|
||||||
controls() -> lists:seq(0, 31).
|
|
||||||
|
|
||||||
surrogates() -> lists:seq(16#d800, 16#dfff).
|
surrogates() -> lists:seq(16#d800, 16#dfff).
|
||||||
|
|
||||||
|
|
||||||
|
@ -1250,7 +1264,7 @@ to_fake_utf8(N) ->
|
||||||
|
|
||||||
clean_string_test_() ->
|
clean_string_test_() ->
|
||||||
Clean = codepoints(),
|
Clean = codepoints(),
|
||||||
Dirty = surrogates() ++ controls(),
|
Dirty = surrogates(),
|
||||||
% clean codepoints
|
% clean codepoints
|
||||||
[{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual(
|
[{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual(
|
||||||
[{string, <<Codepoint/utf8>>}, end_json],
|
[{string, <<Codepoint/utf8>>}, end_json],
|
||||||
|
|
|
@ -214,149 +214,26 @@ fix_key(Key) when is_binary(Key) -> Key.
|
||||||
|
|
||||||
|
|
||||||
clean_string(Bin, #config{dirty_strings=true}) -> Bin;
|
clean_string(Bin, #config{dirty_strings=true}) -> Bin;
|
||||||
clean_string(Bin, Config) ->
|
clean_string(Bin, Config) -> clean(Bin, [], Config).
|
||||||
case clean(Bin, [], Config) of
|
|
||||||
{error, badarg} -> erlang:error(badarg);
|
|
||||||
String -> String
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
%% escape and/or replace bad codepoints if requested
|
clean(<<>>, Acc, _) -> iolist_to_binary(Acc);
|
||||||
clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc));
|
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#20 ->
|
||||||
clean(<<0, Rest/binary>>, Acc, Config) -> maybe_replace(0, Rest, Acc, Config);
|
maybe_replace(X, Rest, Acc, Config);
|
||||||
clean(<<1, Rest/binary>>, Acc, Config) -> maybe_replace(1, Rest, Acc, Config);
|
clean(<<34, Rest/binary>>, Acc, Config) ->
|
||||||
clean(<<2, Rest/binary>>, Acc, Config) -> maybe_replace(2, Rest, Acc, Config);
|
maybe_replace(34, Rest, Acc, Config);
|
||||||
clean(<<3, Rest/binary>>, Acc, Config) -> maybe_replace(3, Rest, Acc, Config);
|
clean(<<47, Rest/binary>>, Acc, Config) ->
|
||||||
clean(<<4, Rest/binary>>, Acc, Config) -> maybe_replace(4, Rest, Acc, Config);
|
maybe_replace(47, Rest, Acc, Config);
|
||||||
clean(<<5, Rest/binary>>, Acc, Config) -> maybe_replace(5, Rest, Acc, Config);
|
clean(<<92, Rest/binary>>, Acc, Config) ->
|
||||||
clean(<<6, Rest/binary>>, Acc, Config) -> maybe_replace(6, Rest, Acc, Config);
|
maybe_replace(92, Rest, Acc, Config);
|
||||||
clean(<<7, Rest/binary>>, Acc, Config) -> maybe_replace(7, Rest, Acc, Config);
|
clean(<<X/utf8, Rest/binary>>, Acc, Config=#config{uescape=true}) when X >= 16#80 ->
|
||||||
clean(<<8, Rest/binary>>, Acc, Config) -> maybe_replace(8, Rest, Acc, Config);
|
|
||||||
clean(<<9, Rest/binary>>, Acc, Config) -> maybe_replace(9, Rest, Acc, Config);
|
|
||||||
clean(<<10, Rest/binary>>, Acc, Config) -> maybe_replace(10, Rest, Acc, Config);
|
|
||||||
clean(<<11, Rest/binary>>, Acc, Config) -> maybe_replace(11, Rest, Acc, Config);
|
|
||||||
clean(<<12, Rest/binary>>, Acc, Config) -> maybe_replace(12, Rest, Acc, Config);
|
|
||||||
clean(<<13, Rest/binary>>, Acc, Config) -> maybe_replace(13, Rest, Acc, Config);
|
|
||||||
clean(<<14, Rest/binary>>, Acc, Config) -> maybe_replace(14, Rest, Acc, Config);
|
|
||||||
clean(<<15, Rest/binary>>, Acc, Config) -> maybe_replace(15, Rest, Acc, Config);
|
|
||||||
clean(<<16, Rest/binary>>, Acc, Config) -> maybe_replace(16, Rest, Acc, Config);
|
|
||||||
clean(<<17, Rest/binary>>, Acc, Config) -> maybe_replace(17, Rest, Acc, Config);
|
|
||||||
clean(<<18, Rest/binary>>, Acc, Config) -> maybe_replace(18, Rest, Acc, Config);
|
|
||||||
clean(<<19, Rest/binary>>, Acc, Config) -> maybe_replace(19, Rest, Acc, Config);
|
|
||||||
clean(<<20, Rest/binary>>, Acc, Config) -> maybe_replace(20, Rest, Acc, Config);
|
|
||||||
clean(<<21, Rest/binary>>, Acc, Config) -> maybe_replace(21, Rest, Acc, Config);
|
|
||||||
clean(<<22, Rest/binary>>, Acc, Config) -> maybe_replace(22, Rest, Acc, Config);
|
|
||||||
clean(<<23, Rest/binary>>, Acc, Config) -> maybe_replace(23, Rest, Acc, Config);
|
|
||||||
clean(<<24, Rest/binary>>, Acc, Config) -> maybe_replace(24, Rest, Acc, Config);
|
|
||||||
clean(<<25, Rest/binary>>, Acc, Config) -> maybe_replace(25, Rest, Acc, Config);
|
|
||||||
clean(<<26, Rest/binary>>, Acc, Config) -> maybe_replace(26, Rest, Acc, Config);
|
|
||||||
clean(<<27, Rest/binary>>, Acc, Config) -> maybe_replace(27, Rest, Acc, Config);
|
|
||||||
clean(<<28, Rest/binary>>, Acc, Config) -> maybe_replace(28, Rest, Acc, Config);
|
|
||||||
clean(<<29, Rest/binary>>, Acc, Config) -> maybe_replace(29, Rest, Acc, Config);
|
|
||||||
clean(<<30, Rest/binary>>, Acc, Config) -> maybe_replace(30, Rest, Acc, Config);
|
|
||||||
clean(<<31, Rest/binary>>, Acc, Config) -> maybe_replace(31, Rest, Acc, Config);
|
|
||||||
clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config);
|
|
||||||
clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config);
|
|
||||||
clean(<<34, Rest/binary>>, Acc, Config) -> maybe_replace(34, Rest, Acc, Config);
|
|
||||||
clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config);
|
|
||||||
clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config);
|
|
||||||
clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config);
|
|
||||||
clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config);
|
|
||||||
clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config);
|
|
||||||
clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config);
|
|
||||||
clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config);
|
|
||||||
clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config);
|
|
||||||
clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config);
|
|
||||||
clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config);
|
|
||||||
clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config);
|
|
||||||
clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config);
|
|
||||||
clean(<<47, Rest/binary>>, Acc, Config) -> maybe_replace(47, Rest, Acc, Config);
|
|
||||||
clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config);
|
|
||||||
clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config);
|
|
||||||
clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config);
|
|
||||||
clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config);
|
|
||||||
clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config);
|
|
||||||
clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config);
|
|
||||||
clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config);
|
|
||||||
clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config);
|
|
||||||
clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config);
|
|
||||||
clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config);
|
|
||||||
clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config);
|
|
||||||
clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config);
|
|
||||||
clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config);
|
|
||||||
clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config);
|
|
||||||
clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config);
|
|
||||||
clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config);
|
|
||||||
clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config);
|
|
||||||
clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config);
|
|
||||||
clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config);
|
|
||||||
clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config);
|
|
||||||
clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config);
|
|
||||||
clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config);
|
|
||||||
clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config);
|
|
||||||
clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config);
|
|
||||||
clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config);
|
|
||||||
clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config);
|
|
||||||
clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config);
|
|
||||||
clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config);
|
|
||||||
clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config);
|
|
||||||
clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config);
|
|
||||||
clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config);
|
|
||||||
clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config);
|
|
||||||
clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config);
|
|
||||||
clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config);
|
|
||||||
clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config);
|
|
||||||
clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config);
|
|
||||||
clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config);
|
|
||||||
clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config);
|
|
||||||
clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config);
|
|
||||||
clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config);
|
|
||||||
clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config);
|
|
||||||
clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config);
|
|
||||||
clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config);
|
|
||||||
clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config);
|
|
||||||
clean(<<92, Rest/binary>>, Acc, Config) -> maybe_replace(92, Rest, Acc, Config);
|
|
||||||
clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config);
|
|
||||||
clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config);
|
|
||||||
clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config);
|
|
||||||
clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config);
|
|
||||||
clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config);
|
|
||||||
clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config);
|
|
||||||
clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config);
|
|
||||||
clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config);
|
|
||||||
clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config);
|
|
||||||
clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config);
|
|
||||||
clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config);
|
|
||||||
clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config);
|
|
||||||
clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config);
|
|
||||||
clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config);
|
|
||||||
clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config);
|
|
||||||
clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config);
|
|
||||||
clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config);
|
|
||||||
clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config);
|
|
||||||
clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config);
|
|
||||||
clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config);
|
|
||||||
clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config);
|
|
||||||
clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config);
|
|
||||||
clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config);
|
|
||||||
clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config);
|
|
||||||
clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config);
|
|
||||||
clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config);
|
|
||||||
clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config);
|
|
||||||
clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config);
|
|
||||||
clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config);
|
|
||||||
clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config);
|
|
||||||
clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config);
|
|
||||||
clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config);
|
|
||||||
clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config);
|
|
||||||
clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config);
|
|
||||||
clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config);
|
|
||||||
clean(<<X/utf8, Rest/binary>>, Acc, Config=#config{uescape=true}) ->
|
|
||||||
maybe_replace(X, Rest, Acc, Config);
|
maybe_replace(X, Rest, Acc, Config);
|
||||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
|
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
|
||||||
maybe_replace(X, Rest, Acc, Config);
|
maybe_replace(X, Rest, Acc, Config);
|
||||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) ->
|
clean(<<_/utf8, _/binary>> = Bin, Acc, Config) ->
|
||||||
clean(Rest, [X] ++ Acc, Config);
|
Size = count(Bin, 0, Config),
|
||||||
|
<<Clean:Size/binary, Rest/binary>> = Bin,
|
||||||
|
clean(Rest, [Acc, Clean], Config);
|
||||||
%% surrogates
|
%% surrogates
|
||||||
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
|
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
|
||||||
maybe_replace(surrogate, Rest, Acc, Config);
|
maybe_replace(surrogate, Rest, Acc, Config);
|
||||||
|
@ -373,6 +250,239 @@ clean(<<_, Rest/binary>>, Acc, Config) ->
|
||||||
maybe_replace(badutf, Rest, Acc, Config).
|
maybe_replace(badutf, Rest, Acc, Config).
|
||||||
|
|
||||||
|
|
||||||
|
count(<<>>, N, _) -> N;
|
||||||
|
count(<<0, _/binary>>, N, _) -> N;
|
||||||
|
count(<<1, _/binary>>, N, _) -> N;
|
||||||
|
count(<<2, _/binary>>, N, _) -> N;
|
||||||
|
count(<<3, _/binary>>, N, _) -> N;
|
||||||
|
count(<<4, _/binary>>, N, _) -> N;
|
||||||
|
count(<<5, _/binary>>, N, _) -> N;
|
||||||
|
count(<<6, _/binary>>, N, _) -> N;
|
||||||
|
count(<<7, _/binary>>, N, _) -> N;
|
||||||
|
count(<<8, _/binary>>, N, _) -> N;
|
||||||
|
count(<<9, _/binary>>, N, _) -> N;
|
||||||
|
count(<<10, _/binary>>, N, _) -> N;
|
||||||
|
count(<<11, _/binary>>, N, _) -> N;
|
||||||
|
count(<<12, _/binary>>, N, _) -> N;
|
||||||
|
count(<<13, _/binary>>, N, _) -> N;
|
||||||
|
count(<<14, _/binary>>, N, _) -> N;
|
||||||
|
count(<<15, _/binary>>, N, _) -> N;
|
||||||
|
count(<<16, _/binary>>, N, _) -> N;
|
||||||
|
count(<<17, _/binary>>, N, _) -> N;
|
||||||
|
count(<<18, _/binary>>, N, _) -> N;
|
||||||
|
count(<<19, _/binary>>, N, _) -> N;
|
||||||
|
count(<<20, _/binary>>, N, _) -> N;
|
||||||
|
count(<<21, _/binary>>, N, _) -> N;
|
||||||
|
count(<<22, _/binary>>, N, _) -> N;
|
||||||
|
count(<<23, _/binary>>, N, _) -> N;
|
||||||
|
count(<<24, _/binary>>, N, _) -> N;
|
||||||
|
count(<<25, _/binary>>, N, _) -> N;
|
||||||
|
count(<<26, _/binary>>, N, _) -> N;
|
||||||
|
count(<<27, _/binary>>, N, _) -> N;
|
||||||
|
count(<<28, _/binary>>, N, _) -> N;
|
||||||
|
count(<<29, _/binary>>, N, _) -> N;
|
||||||
|
count(<<30, _/binary>>, N, _) -> N;
|
||||||
|
count(<<31, _/binary>>, N, _) -> N;
|
||||||
|
count(<<32, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<33, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<34, _/binary>>, N, _) -> N;
|
||||||
|
count(<<35, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<36, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<37, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<38, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<39, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<40, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<41, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<42, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<43, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<44, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<45, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<46, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<47, _/binary>>, N, _) -> N;
|
||||||
|
count(<<48, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<49, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<50, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<51, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<52, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<53, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<54, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<55, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<56, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<57, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<58, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<59, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<60, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<61, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<62, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<63, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<64, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<65, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<66, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<67, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<68, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<69, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<70, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<71, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<72, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<73, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<74, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<75, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<76, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<77, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<78, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<79, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<80, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<81, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<82, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<83, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<84, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<85, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<86, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<87, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<88, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<89, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<90, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<91, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<92, _/binary>>, N, _) -> N;
|
||||||
|
count(<<93, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<94, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<95, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<96, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<97, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<98, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<99, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<100, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<101, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<102, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<103, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<104, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<105, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<106, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<107, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<108, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<109, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<110, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<111, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<112, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<113, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<114, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<115, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<116, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<117, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<118, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<119, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<120, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<121, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<122, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<123, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<124, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<125, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<126, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<127, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 1, Config);
|
||||||
|
count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N;
|
||||||
|
count(<<X/utf8, _/binary>>, N, _) when X == 16#2028; X == 16#2029 -> N;
|
||||||
|
count(<<X/utf8, Rest/binary>>, N, Config) when X < 16#800 ->
|
||||||
|
count(Rest, N + 2, Config);
|
||||||
|
count(<<X/utf8, Rest/binary>>, N, Config) when X < 16#10000 ->
|
||||||
|
count(Rest, N + 3, Config);
|
||||||
|
count(<<_/utf8, Rest/binary>>, N, Config) ->
|
||||||
|
count(Rest, N + 4, Config);
|
||||||
|
count(<<_, _/binary>>, N, _) -> N.
|
||||||
|
|
||||||
|
|
||||||
strip_continuations(Bin, 0) -> Bin;
|
strip_continuations(Bin, 0) -> Bin;
|
||||||
strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
|
strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 ->
|
||||||
strip_continuations(Rest, N - 1);
|
strip_continuations(Rest, N - 1);
|
||||||
|
@ -381,38 +491,42 @@ strip_continuations(Bin, _) -> Bin.
|
||||||
|
|
||||||
|
|
||||||
maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||||
clean(Rest, [$b, $\\] ++ Acc, Config);
|
clean(Rest, [Acc, $\\, $b], Config);
|
||||||
maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||||
clean(Rest, [$t, $\\] ++ Acc, Config);
|
clean(Rest, [Acc, $\\, $t], Config);
|
||||||
maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||||
clean(Rest, [$n, $\\] ++ Acc, Config);
|
clean(Rest, [Acc, $\\, $n], Config);
|
||||||
maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||||
clean(Rest, [$f, $\\] ++ Acc, Config);
|
clean(Rest, [Acc, $\\, $f], Config);
|
||||||
maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||||
clean(Rest, [$r, $\\] ++ Acc, Config);
|
clean(Rest, [Acc, $\\, $r], Config);
|
||||||
maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) ->
|
maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||||
clean(Rest, [$\", $\\] ++ Acc, Config);
|
clean(Rest, [Acc, $\\, $\"], Config);
|
||||||
maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||||
case Config#config.escaped_forward_slashes of
|
case Config#config.escaped_forward_slashes of
|
||||||
true -> clean(Rest, [$/, $\\] ++ Acc, Config);
|
true -> clean(Rest, [Acc, $\\, $/], Config);
|
||||||
false -> clean(Rest, [$/] ++ Acc, Config)
|
false -> clean(Rest, [Acc, $/], Config)
|
||||||
end;
|
end;
|
||||||
maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) ->
|
||||||
clean(Rest, [$\\, $\\] ++ Acc, Config);
|
clean(Rest, [Acc, $\\, $\\], Config);
|
||||||
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 ->
|
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 ->
|
||||||
clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config);
|
clean(Rest, [Acc, json_escape_sequence(X)], Config);
|
||||||
maybe_replace(X, Rest, Acc, Config=#config{uescape=true}) when is_integer(X) ->
|
%% escaped even if no other escaping was requested!
|
||||||
clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config);
|
maybe_replace(X, Rest, Acc, Config=#config{uescape=true}) when X >= 16#80 ->
|
||||||
|
clean(Rest, [Acc, json_escape_sequence(X)], Config);
|
||||||
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
|
maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
|
||||||
case Config#config.unescaped_jsonp of
|
case Config#config.unescaped_jsonp of
|
||||||
true -> clean(Rest, [X] ++ Acc, Config);
|
true -> clean(Rest, [Acc, <<X/utf8>>], Config);
|
||||||
false -> clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config)
|
false -> clean(Rest, [Acc, json_escape_sequence(X)], Config)
|
||||||
end;
|
end;
|
||||||
maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> {error, badarg};
|
maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) ->
|
||||||
maybe_replace(noncharacter, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
|
erlang:error(badarg);
|
||||||
maybe_replace(surrogate, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
|
maybe_replace(surrogate, Rest, Acc, Config) ->
|
||||||
maybe_replace(badutf, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config);
|
clean(Rest, [Acc, <<16#fffd/utf8>>], Config);
|
||||||
maybe_replace(X, Rest, Acc, Config) -> clean(Rest, [X] ++ Acc, Config).
|
maybe_replace(badutf, Rest, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, <<16#fffd/utf8>>], Config);
|
||||||
|
maybe_replace(X, Rest, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, <<X/utf8>>], Config).
|
||||||
|
|
||||||
|
|
||||||
%% convert a codepoint to it's \uXXXX equiv.
|
%% convert a codepoint to it's \uXXXX equiv.
|
||||||
|
@ -943,7 +1057,7 @@ json_escape_sequence_test_() ->
|
||||||
{"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
|
{"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")}
|
||||||
].
|
].
|
||||||
|
|
||||||
uescaped_test_() ->
|
uescape_test_() ->
|
||||||
[
|
[
|
||||||
{"\"\\u0080\"", ?_assertEqual(
|
{"\"\\u0080\"", ?_assertEqual(
|
||||||
<<"\\u0080">>,
|
<<"\\u0080">>,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue