From dc8e6a0c543019fa7f9c4040e1e3786a248a3574 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Tue, 2 Dec 2014 06:57:16 +0000 Subject: [PATCH] switch string parsing to a count and slice implementation --- src/jsx_decoder.erl | 512 +++++++++++++++++++++++--------------------- src/jsx_parser.erl | 430 +++++++++++++++++++++++-------------- 2 files changed, 535 insertions(+), 407 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index a006a0e..58c039f 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -23,8 +23,7 @@ -module(jsx_decoder). -%% inline sequence accumulation, handle_event, format_number and maybe_replace --compile({inline, [new_seq/0, new_seq/1, acc_seq/2, end_seq/1, end_seq/2]}). +%% inline handle_event, format_number and maybe_replace -compile({inline, [handle_event/3]}). -compile({inline, [format_number/1]}). -compile({inline, [maybe_replace/2]}). @@ -161,19 +160,6 @@ incomplete(State, Rest, Handler, Acc, Stack, Config = #config{incomplete_handler F(Rest, {decoder, State, Handler, Acc, Stack}, jsx_config:config_to_list(Config)). -%% lists are benchmarked to be faster (tho higher in memory usage) than binaries -new_seq() -> []. -new_seq(C) -> [C]. - -acc_seq(Seq, C) when is_list(C) -> lists:reverse(C) ++ Seq; -acc_seq(Seq, C) -> [C] ++ Seq. - -end_seq(Seq) -> unicode:characters_to_binary(lists:reverse(Seq)). - -end_seq(Seq, #config{dirty_strings=true}) -> list_to_binary(lists:reverse(Seq)); -end_seq(Seq, _) -> end_seq(Seq). - - handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}. @@ -190,9 +176,9 @@ start(Bin, Handler, Stack, Config) -> value(<>, Handler, Stack, Config) -> - string(Rest, Handler, new_seq(), Stack, Config); + string(Rest, Handler, Stack, Config); value(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> - string(Rest, Handler, new_seq(), [singlequote|Stack], Config); + string(Rest, Handler, [singlequote|Stack], Config); value(<<$t, Rest/binary>>, Handler, Stack, Config) -> true(Rest, Handler, Stack, Config); value(<<$f, Rest/binary>>, Handler, Stack, Config) -> @@ -200,11 +186,11 @@ value(<<$f, Rest/binary>>, Handler, Stack, Config) -> value(<<$n, Rest/binary>>, Handler, Stack, Config) -> null(Rest, Handler, Stack, Config); value(<>, Handler, Stack, Config) -> - negative(Rest, Handler, new_seq($-), Stack, Config); + negative(Rest, Handler, [$-], Stack, Config); value(<>, Handler, Stack, Config) -> - zero(Rest, Handler, new_seq($0), Stack, Config); + zero(Rest, Handler, [$0], Stack, Config); value(<>, Handler, Stack, Config) when ?is_nonzero(S) -> - integer(Rest, Handler, new_seq(S), Stack, Config); + integer(Rest, Handler, [S], Stack, Config); value(<>, Handler, Stack, Config) -> object(Rest, handle_event(start_object, Handler, Config), [key|Stack], Config); value(<>, Handler, Stack, Config) -> @@ -228,9 +214,9 @@ value(Bin, Handler, Stack, Config) -> object(<>, Handler, Stack, Config) -> - string(Rest, Handler, new_seq(), Stack, Config); + string(Rest, Handler, Stack, Config); object(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> - string(Rest, Handler, new_seq(), [singlequote|Stack], Config); + string(Rest, Handler, [singlequote|Stack], Config); object(<>, Handler, [key|Stack], Config) -> maybe_done(Rest, handle_event(end_object, Handler, Config), Stack, Config); object(<>, Handler, Stack, Config) when ?is_whitespace(S) -> @@ -286,9 +272,9 @@ colon(Bin, Handler, Stack, Config) -> key(<>, Handler, Stack, Config) -> - string(Rest, Handler, new_seq(), Stack, Config); + string(Rest, Handler, Stack, Config); key(<>, Handler, Stack, Config=#config{strict_single_quotes=false}) -> - string(Rest, Handler, new_seq(), [singlequote|Stack], Config); + string(Rest, Handler, [singlequote|Stack], Config); key(<>, Handler, Stack, Config) when ?is_whitespace(S) -> key(Rest, Handler, Stack, Config); key(<>, Handler, [key|Stack], Config=#config{strict_commas=false}) -> @@ -311,209 +297,34 @@ key(Bin, Handler, Stack, Config) -> %% submit a patch that unrolls simple guards %% note that if you encounter an error from string and you can't find the clause that %% caused it here, it might be in unescape below -string(<<32, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 32), Stack, Config); -string(<<33, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 33), Stack, Config); + +string(Bin, Handler, Stack, Config) -> + string(Bin, Handler, [], Stack, Config). + + string(<>, Handler, Acc, Stack, Config) -> doublequote(Rest, Handler, Acc, Stack, Config); -string(<<35, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 35), Stack, Config); -string(<<36, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 36), Stack, Config); -string(<<37, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 37), Stack, Config); -string(<<38, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 38), Stack, Config); string(<>, Handler, Acc, Stack, Config) -> singlequote(Rest, Handler, Acc, Stack, Config); -string(<<40, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 40), Stack, Config); -string(<<41, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 41), Stack, Config); -string(<<42, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 42), Stack, Config); -string(<<43, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 43), Stack, Config); -string(<<44, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 44), Stack, Config); -string(<<45, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 45), Stack, Config); -string(<<46, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 46), Stack, Config); string(<>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace(?solidus, Config)), Stack, Config); -string(<<48, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 48), Stack, Config); -string(<<49, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 49), Stack, Config); -string(<<50, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 50), Stack, Config); -string(<<51, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 51), Stack, Config); -string(<<52, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 52), Stack, Config); -string(<<53, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 53), Stack, Config); -string(<<54, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 54), Stack, Config); -string(<<55, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 55), Stack, Config); -string(<<56, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 56), Stack, Config); -string(<<57, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 57), Stack, Config); -string(<<58, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 58), Stack, Config); -string(<<59, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 59), Stack, Config); -string(<<60, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 60), Stack, Config); -string(<<61, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 61), Stack, Config); -string(<<62, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 62), Stack, Config); -string(<<63, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 63), Stack, Config); -string(<<64, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 64), Stack, Config); -string(<<65, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 65), Stack, Config); -string(<<66, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 66), Stack, Config); -string(<<67, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 67), Stack, Config); -string(<<68, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 68), Stack, Config); -string(<<69, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 69), Stack, Config); -string(<<70, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 70), Stack, Config); -string(<<71, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 71), Stack, Config); -string(<<72, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 72), Stack, Config); -string(<<73, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 73), Stack, Config); -string(<<74, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 74), Stack, Config); -string(<<75, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 75), Stack, Config); -string(<<76, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 76), Stack, Config); -string(<<77, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 77), Stack, Config); -string(<<78, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 78), Stack, Config); -string(<<79, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 79), Stack, Config); -string(<<80, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 80), Stack, Config); -string(<<81, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 81), Stack, Config); -string(<<82, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 82), Stack, Config); -string(<<83, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 83), Stack, Config); -string(<<84, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 84), Stack, Config); -string(<<85, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 85), Stack, Config); -string(<<86, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 86), Stack, Config); -string(<<87, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 87), Stack, Config); -string(<<88, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 88), Stack, Config); -string(<<89, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 89), Stack, Config); -string(<<90, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 90), Stack, Config); -string(<<91, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 91), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace(?solidus, Config)], Stack, Config); string(<>, Handler, Acc, Stack, Config) -> unescape(Rest, Handler, Acc, Stack, Config); -string(<<93, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 93), Stack, Config); -string(<<94, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 94), Stack, Config); -string(<<95, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 95), Stack, Config); -string(<<96, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 96), Stack, Config); -string(<<97, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 97), Stack, Config); -string(<<98, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 98), Stack, Config); -string(<<99, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 99), Stack, Config); -string(<<100, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 100), Stack, Config); -string(<<101, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 101), Stack, Config); -string(<<102, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 102), Stack, Config); -string(<<103, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 103), Stack, Config); -string(<<104, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 104), Stack, Config); -string(<<105, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 105), Stack, Config); -string(<<106, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 106), Stack, Config); -string(<<107, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 107), Stack, Config); -string(<<108, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 108), Stack, Config); -string(<<109, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 109), Stack, Config); -string(<<110, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 110), Stack, Config); -string(<<111, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 111), Stack, Config); -string(<<112, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 112), Stack, Config); -string(<<113, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 113), Stack, Config); -string(<<114, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 114), Stack, Config); -string(<<115, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 115), Stack, Config); -string(<<116, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 116), Stack, Config); -string(<<117, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 117), Stack, Config); -string(<<118, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 118), Stack, Config); -string(<<119, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 119), Stack, Config); -string(<<120, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 120), Stack, Config); -string(<<121, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 121), Stack, Config); -string(<<122, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 122), Stack, Config); -string(<<123, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 123), Stack, Config); -string(<<124, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 124), Stack, Config); -string(<<125, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 125), Stack, Config); -string(<<126, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 126), Stack, Config); -string(<<127, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 127), Stack, Config); -string(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> - string(Rest, Handler, acc_seq(Acc, C), Stack, Config); string(<>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 -> - string(Rest, Handler, acc_seq(Acc, maybe_replace(X, Config)), Stack, Config); -string(<>, Handler, Acc, Stack, Config) when X >= 16#80 -> - string(Rest, Handler, acc_seq(Acc, X), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace(X, Config)], Stack, Config); +string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) -> + Size = count(Bin, 0, Config), + <> = Bin, + string(Rest, Handler, [Acc, Clean], Stack, Config); +%% really, really dirty strings. if there's no valid utf8 we never reach `count` +%% and things get replaced instead of ignored +string(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> + string(Rest, Handler, [Acc, X], Stack, Config); %% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match preceeding string(<<239, 191, 190, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 16#fffe), Stack, Config); + string(Rest, Handler, [Acc, <<16#fffe/utf8>>], Stack, Config); string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, 16#ffff), Stack, Config); + string(Rest, Handler, [Acc, <<16#ffff/utf8>>], Stack, Config); %% partial utf8 codepoints string(<<>>, Handler, Acc, Stack, Config) -> incomplete(string, <<>>, Handler, Acc, Stack, Config); @@ -527,7 +338,7 @@ string(<>, Handler, Acc, Stack, Config) %% surrogates string(<<237, X, _, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 160 -> - string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); + string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config); %% overlong encodings and missing continuations of a 2 byte sequence string(<>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) when X >= 192, X =< 223 -> @@ -542,32 +353,232 @@ string(<>, Handler, Acc, Stack, Config=#config{strict_utf8=false strip_continuations(Rest, Handler, Acc, Stack, Config, 3); %% incompletes and unexpected bytes, including orphan continuations string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false}) -> - string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); + string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config); string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config). +count(<<32, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<33, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<>, N, _) -> N; +count(<<35, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<36, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<37, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<38, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<>, N, _) -> N; +count(<<40, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<41, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<42, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<43, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<44, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<45, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<46, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<>, N, _) -> N; +count(<<48, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<49, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<50, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<51, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<52, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<53, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<54, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<55, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<56, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<57, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<58, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<59, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<60, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<61, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<62, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<63, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<64, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<65, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<66, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<67, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<68, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<69, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<70, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<71, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<72, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<73, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<74, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<75, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<76, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<77, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<78, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<79, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<80, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<81, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<82, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<83, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<84, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<85, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<86, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<87, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<88, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<89, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<90, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<91, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<>, N, _) -> N; +count(<<93, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<94, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<95, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<96, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<97, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<98, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<99, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<100, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<101, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<102, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<103, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<104, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<105, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<106, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<107, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<108, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<109, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<110, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<111, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<112, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<113, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<114, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<115, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<116, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<117, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<118, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<119, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<120, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<121, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<122, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<123, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<124, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<125, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<126, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<127, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) -> + count(Rest, N + 1, Config); +count(<>, N, Config) when X < 16#800 -> + count(Rest, N + 2, Config); +count(<>, N, _) when X == 16#2028; X == 16#2029 -> N; +count(<>, N, Config) when X < 16#10000 -> + count(Rest, N + 3, Config); +count(<<_/utf8, Rest/binary>>, N, Config) -> + count(Rest, N + 4, Config); +count(_, N, _) -> N. + + doublequote(Rest, Handler, Acc, [key|_] = Stack, Config) -> - colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), Stack, Config); + colon(Rest, handle_event({key, iolist_to_binary(Acc)}, Handler, Config), Stack, Config); doublequote(Rest, Handler, Acc, [singlequote|_] = Stack, Config) -> - string(Rest, Handler,acc_seq(Acc, maybe_replace(?doublequote, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace(?doublequote, Config)], Stack, Config); doublequote(<<>>, Handler, Acc, [singlequote|_] = Stack, Config) -> incomplete(string, <>, Handler, Acc, Stack, Config); doublequote(Rest, Handler, Acc, Stack, Config) -> - maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config). + maybe_done(Rest, handle_event({string, iolist_to_binary(Acc)}, Handler, Config), Stack, Config). singlequote(Rest, Handler, Acc, [singlequote, key|Stack], Config) -> - colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), [key|Stack], Config); + colon(Rest, handle_event({key, iolist_to_binary(Acc)}, Handler, Config), [key|Stack], Config); singlequote(Rest, Handler, Acc, [singlequote|Stack], Config) -> - maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config); + maybe_done(Rest, handle_event({string, iolist_to_binary(Acc)}, Handler, Config), Stack, Config); singlequote(Rest, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config). + string(Rest, Handler, [Acc, ?singlequote], Stack, Config). %% strips continuation bytes after bad utf bytes, guards against both too short %% and overlong sequences. N is the maximum number of bytes to strip strip_continuations(<>, Handler, Acc, Stack, Config, 0) -> - string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config); + string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config); strip_continuations(<>, Handler, Acc, Stack, Config, N) when X >= 128, X =< 191 -> strip_continuations(Rest, Handler, Acc, Stack, Config, N - 1); %% if end of input is reached before stripping the max number of continuations @@ -582,33 +593,33 @@ strip_continuations(<<>>, Handler, Acc, Stack, Config, N) -> %% not a continuation byte, insert a replacement character for sequence thus %% far and dispatch back to string strip_continuations(<>, Handler, Acc, Stack, Config, _) -> - string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config). + string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config). %% this all gets really gross and should probably eventually be folded into %% but for now it fakes being part of string on incompletes and errors unescape(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> - string(<>, Handler, acc_seq(Acc, ?rsolidus), Stack, Config); + string(<>, Handler, [Acc, ?rsolidus], Stack, Config); unescape(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> - string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config); + string(Rest, Handler, [Acc, ?rsolidus, C], Stack, Config); unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace($\b, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace($\b, Config)], Stack, Config); unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace($\f, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace($\f, Config)], Stack, Config); unescape(<<$n, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace($\n, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace($\n, Config)], Stack, Config); unescape(<<$r, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace($\r, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace($\r, Config)], Stack, Config); unescape(<<$t, Rest/binary>>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace($\t, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace($\t, Config)], Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace($\", Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace($\", Config)], Stack, Config); unescape(<>, Handler, Acc, Stack, Config=#config{strict_single_quotes=false}) -> - string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config); + string(Rest, Handler, [Acc, ?singlequote], Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace($\\, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace($\\, Config)], Stack, Config); unescape(<>, Handler, Acc, Stack, Config) -> - string(Rest, Handler, acc_seq(Acc, maybe_replace($/, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace($/, Config)], Stack, Config); unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), (X == $c orelse X == $d orelse X == $e orelse X == $f), @@ -617,14 +628,14 @@ unescape(<<$u, $d, A, B, C, ?rsolidus, $u, $d, X, Y, Z, Rest/binary>>, Handler, High = erlang:list_to_integer([$d, A, B, C], 16), Low = erlang:list_to_integer([$d, X, Y, Z], 16), Codepoint = (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000, - string(Rest, Handler, acc_seq(Acc, Codepoint), Stack, Config); + string(Rest, Handler, [Acc, <>], Stack, Config); unescape(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config) when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), ?is_hex(B), ?is_hex(C), ?is_hex(W), ?is_hex(X), ?is_hex(Y), ?is_hex(Z) -> case Config#config.strict_utf8 of true -> ?error(<<$u, $d, A, B, C, ?rsolidus, $u, W, X, Y, Z, Rest/binary>>, Handler, Acc, Stack, Config); - false -> string(Rest, Handler, acc_seq(Acc, [16#fffd, 16#fffd]), Stack, Config) + false -> string(Rest, Handler, [Acc, <<16#fffd/utf8>>, <<16#fffd/utf8>>], Stack, Config) end; unescape(<<$u, $d, A, B, C, ?rsolidus, Rest/binary>>, Handler, Acc, Stack, Config) when (A == $8 orelse A == $9 orelse A == $a orelse A == $b), @@ -640,17 +651,17 @@ unescape(<<$u, A, B, C, D, Rest/binary>>, Handler, Acc, Stack, Config) when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D) -> case erlang:list_to_integer([A, B, C, D], 16) of Codepoint when Codepoint < 16#d800; Codepoint > 16#dfff -> - string(Rest, Handler, acc_seq(Acc, maybe_replace(Codepoint, Config)), Stack, Config); + string(Rest, Handler, [Acc, maybe_replace(Codepoint, Config)], Stack, Config); _ when Config#config.strict_utf8 -> ?error(string, <>, Handler, Acc, Stack, Config); - _ -> string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config) + _ -> string(Rest, Handler, [Acc, <<16#fffd/utf8>>], Stack, Config) end; unescape(Bin, Handler, Acc, Stack, Config) -> case is_partial_escape(Bin) of true -> incomplete(string, <>, Handler, Acc, Stack, Config); false -> case Config#config.strict_escapes of true -> ?error(string, <>, Handler, Acc, Stack, Config); - false -> string(Bin, Handler, acc_seq(Acc, ?rsolidus), Stack, Config) + false -> string(Bin, Handler, [Acc, ?rsolidus], Stack, Config) end end. @@ -678,15 +689,16 @@ maybe_replace($/, Config=#config{escaped_strings=true}) -> maybe_replace($\\, #config{escaped_strings=true}) -> [$\\, $\\]; maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> case Config#config.unescaped_jsonp of - true -> X + true -> <> ; false -> json_escape_sequence(X) end; -maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> json_escape_sequence(X); -maybe_replace(X, _Config) -> X. +maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> + json_escape_sequence(X); +maybe_replace(X, _Config) -> <>. %% convert a codepoint to it's \uXXXX equiv. -json_escape_sequence(X) -> +json_escape_sequence(X) when X < 65536 -> <> = <>, [$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))]. @@ -772,6 +784,10 @@ exp(Bin, Handler, Acc, Stack, Config) -> finish_number(Bin, Handler, {exp, Acc}, Stack, Config). +acc_seq(Seq, C) when is_list(C) -> lists:reverse(C) ++ Seq; +acc_seq(Seq, C) -> [C] ++ Seq. + + finish_number(Rest, Handler, Acc, [], Config=#config{stream=false}) -> maybe_done(Rest, handle_event(format_number(Acc), Handler, Config), [], Config); finish_number(<<>>, Handler, {NumType, Acc}, Stack, Config) -> @@ -1230,8 +1246,6 @@ codepoints() -> [16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++ [16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000]. -controls() -> lists:seq(0, 31). - surrogates() -> lists:seq(16#d800, 16#dfff). @@ -1250,7 +1264,7 @@ to_fake_utf8(N) -> clean_string_test_() -> Clean = codepoints(), - Dirty = surrogates() ++ controls(), + Dirty = surrogates(), % clean codepoints [{"clean u+" ++ integer_to_list(Codepoint, 16), ?_assertEqual( [{string, <>}, end_json], diff --git a/src/jsx_parser.erl b/src/jsx_parser.erl index d33d22a..da128ff 100644 --- a/src/jsx_parser.erl +++ b/src/jsx_parser.erl @@ -214,149 +214,26 @@ fix_key(Key) when is_binary(Key) -> Key. clean_string(Bin, #config{dirty_strings=true}) -> Bin; -clean_string(Bin, Config) -> - case clean(Bin, [], Config) of - {error, badarg} -> erlang:error(badarg); - String -> String - end. +clean_string(Bin, Config) -> clean(Bin, [], Config). -%% escape and/or replace bad codepoints if requested -clean(<<>>, Acc, _Config) -> unicode:characters_to_binary(lists:reverse(Acc)); -clean(<<0, Rest/binary>>, Acc, Config) -> maybe_replace(0, Rest, Acc, Config); -clean(<<1, Rest/binary>>, Acc, Config) -> maybe_replace(1, Rest, Acc, Config); -clean(<<2, Rest/binary>>, Acc, Config) -> maybe_replace(2, Rest, Acc, Config); -clean(<<3, Rest/binary>>, Acc, Config) -> maybe_replace(3, Rest, Acc, Config); -clean(<<4, Rest/binary>>, Acc, Config) -> maybe_replace(4, Rest, Acc, Config); -clean(<<5, Rest/binary>>, Acc, Config) -> maybe_replace(5, Rest, Acc, Config); -clean(<<6, Rest/binary>>, Acc, Config) -> maybe_replace(6, Rest, Acc, Config); -clean(<<7, Rest/binary>>, Acc, Config) -> maybe_replace(7, Rest, Acc, Config); -clean(<<8, Rest/binary>>, Acc, Config) -> maybe_replace(8, Rest, Acc, Config); -clean(<<9, Rest/binary>>, Acc, Config) -> maybe_replace(9, Rest, Acc, Config); -clean(<<10, Rest/binary>>, Acc, Config) -> maybe_replace(10, Rest, Acc, Config); -clean(<<11, Rest/binary>>, Acc, Config) -> maybe_replace(11, Rest, Acc, Config); -clean(<<12, Rest/binary>>, Acc, Config) -> maybe_replace(12, Rest, Acc, Config); -clean(<<13, Rest/binary>>, Acc, Config) -> maybe_replace(13, Rest, Acc, Config); -clean(<<14, Rest/binary>>, Acc, Config) -> maybe_replace(14, Rest, Acc, Config); -clean(<<15, Rest/binary>>, Acc, Config) -> maybe_replace(15, Rest, Acc, Config); -clean(<<16, Rest/binary>>, Acc, Config) -> maybe_replace(16, Rest, Acc, Config); -clean(<<17, Rest/binary>>, Acc, Config) -> maybe_replace(17, Rest, Acc, Config); -clean(<<18, Rest/binary>>, Acc, Config) -> maybe_replace(18, Rest, Acc, Config); -clean(<<19, Rest/binary>>, Acc, Config) -> maybe_replace(19, Rest, Acc, Config); -clean(<<20, Rest/binary>>, Acc, Config) -> maybe_replace(20, Rest, Acc, Config); -clean(<<21, Rest/binary>>, Acc, Config) -> maybe_replace(21, Rest, Acc, Config); -clean(<<22, Rest/binary>>, Acc, Config) -> maybe_replace(22, Rest, Acc, Config); -clean(<<23, Rest/binary>>, Acc, Config) -> maybe_replace(23, Rest, Acc, Config); -clean(<<24, Rest/binary>>, Acc, Config) -> maybe_replace(24, Rest, Acc, Config); -clean(<<25, Rest/binary>>, Acc, Config) -> maybe_replace(25, Rest, Acc, Config); -clean(<<26, Rest/binary>>, Acc, Config) -> maybe_replace(26, Rest, Acc, Config); -clean(<<27, Rest/binary>>, Acc, Config) -> maybe_replace(27, Rest, Acc, Config); -clean(<<28, Rest/binary>>, Acc, Config) -> maybe_replace(28, Rest, Acc, Config); -clean(<<29, Rest/binary>>, Acc, Config) -> maybe_replace(29, Rest, Acc, Config); -clean(<<30, Rest/binary>>, Acc, Config) -> maybe_replace(30, Rest, Acc, Config); -clean(<<31, Rest/binary>>, Acc, Config) -> maybe_replace(31, Rest, Acc, Config); -clean(<<32, Rest/binary>>, Acc, Config) -> clean(Rest, [32] ++ Acc, Config); -clean(<<33, Rest/binary>>, Acc, Config) -> clean(Rest, [33] ++ Acc, Config); -clean(<<34, Rest/binary>>, Acc, Config) -> maybe_replace(34, Rest, Acc, Config); -clean(<<35, Rest/binary>>, Acc, Config) -> clean(Rest, [35] ++ Acc, Config); -clean(<<36, Rest/binary>>, Acc, Config) -> clean(Rest, [36] ++ Acc, Config); -clean(<<37, Rest/binary>>, Acc, Config) -> clean(Rest, [37] ++ Acc, Config); -clean(<<38, Rest/binary>>, Acc, Config) -> clean(Rest, [38] ++ Acc, Config); -clean(<<39, Rest/binary>>, Acc, Config) -> clean(Rest, [39] ++ Acc, Config); -clean(<<40, Rest/binary>>, Acc, Config) -> clean(Rest, [40] ++ Acc, Config); -clean(<<41, Rest/binary>>, Acc, Config) -> clean(Rest, [41] ++ Acc, Config); -clean(<<42, Rest/binary>>, Acc, Config) -> clean(Rest, [42] ++ Acc, Config); -clean(<<43, Rest/binary>>, Acc, Config) -> clean(Rest, [43] ++ Acc, Config); -clean(<<44, Rest/binary>>, Acc, Config) -> clean(Rest, [44] ++ Acc, Config); -clean(<<45, Rest/binary>>, Acc, Config) -> clean(Rest, [45] ++ Acc, Config); -clean(<<46, Rest/binary>>, Acc, Config) -> clean(Rest, [46] ++ Acc, Config); -clean(<<47, Rest/binary>>, Acc, Config) -> maybe_replace(47, Rest, Acc, Config); -clean(<<48, Rest/binary>>, Acc, Config) -> clean(Rest, [48] ++ Acc, Config); -clean(<<49, Rest/binary>>, Acc, Config) -> clean(Rest, [49] ++ Acc, Config); -clean(<<50, Rest/binary>>, Acc, Config) -> clean(Rest, [50] ++ Acc, Config); -clean(<<51, Rest/binary>>, Acc, Config) -> clean(Rest, [51] ++ Acc, Config); -clean(<<52, Rest/binary>>, Acc, Config) -> clean(Rest, [52] ++ Acc, Config); -clean(<<53, Rest/binary>>, Acc, Config) -> clean(Rest, [53] ++ Acc, Config); -clean(<<54, Rest/binary>>, Acc, Config) -> clean(Rest, [54] ++ Acc, Config); -clean(<<55, Rest/binary>>, Acc, Config) -> clean(Rest, [55] ++ Acc, Config); -clean(<<56, Rest/binary>>, Acc, Config) -> clean(Rest, [56] ++ Acc, Config); -clean(<<57, Rest/binary>>, Acc, Config) -> clean(Rest, [57] ++ Acc, Config); -clean(<<58, Rest/binary>>, Acc, Config) -> clean(Rest, [58] ++ Acc, Config); -clean(<<59, Rest/binary>>, Acc, Config) -> clean(Rest, [59] ++ Acc, Config); -clean(<<60, Rest/binary>>, Acc, Config) -> clean(Rest, [60] ++ Acc, Config); -clean(<<61, Rest/binary>>, Acc, Config) -> clean(Rest, [61] ++ Acc, Config); -clean(<<62, Rest/binary>>, Acc, Config) -> clean(Rest, [62] ++ Acc, Config); -clean(<<63, Rest/binary>>, Acc, Config) -> clean(Rest, [63] ++ Acc, Config); -clean(<<64, Rest/binary>>, Acc, Config) -> clean(Rest, [64] ++ Acc, Config); -clean(<<65, Rest/binary>>, Acc, Config) -> clean(Rest, [65] ++ Acc, Config); -clean(<<66, Rest/binary>>, Acc, Config) -> clean(Rest, [66] ++ Acc, Config); -clean(<<67, Rest/binary>>, Acc, Config) -> clean(Rest, [67] ++ Acc, Config); -clean(<<68, Rest/binary>>, Acc, Config) -> clean(Rest, [68] ++ Acc, Config); -clean(<<69, Rest/binary>>, Acc, Config) -> clean(Rest, [69] ++ Acc, Config); -clean(<<70, Rest/binary>>, Acc, Config) -> clean(Rest, [70] ++ Acc, Config); -clean(<<71, Rest/binary>>, Acc, Config) -> clean(Rest, [71] ++ Acc, Config); -clean(<<72, Rest/binary>>, Acc, Config) -> clean(Rest, [72] ++ Acc, Config); -clean(<<73, Rest/binary>>, Acc, Config) -> clean(Rest, [73] ++ Acc, Config); -clean(<<74, Rest/binary>>, Acc, Config) -> clean(Rest, [74] ++ Acc, Config); -clean(<<75, Rest/binary>>, Acc, Config) -> clean(Rest, [75] ++ Acc, Config); -clean(<<76, Rest/binary>>, Acc, Config) -> clean(Rest, [76] ++ Acc, Config); -clean(<<77, Rest/binary>>, Acc, Config) -> clean(Rest, [77] ++ Acc, Config); -clean(<<78, Rest/binary>>, Acc, Config) -> clean(Rest, [78] ++ Acc, Config); -clean(<<79, Rest/binary>>, Acc, Config) -> clean(Rest, [79] ++ Acc, Config); -clean(<<80, Rest/binary>>, Acc, Config) -> clean(Rest, [80] ++ Acc, Config); -clean(<<81, Rest/binary>>, Acc, Config) -> clean(Rest, [81] ++ Acc, Config); -clean(<<82, Rest/binary>>, Acc, Config) -> clean(Rest, [82] ++ Acc, Config); -clean(<<83, Rest/binary>>, Acc, Config) -> clean(Rest, [83] ++ Acc, Config); -clean(<<84, Rest/binary>>, Acc, Config) -> clean(Rest, [84] ++ Acc, Config); -clean(<<85, Rest/binary>>, Acc, Config) -> clean(Rest, [85] ++ Acc, Config); -clean(<<86, Rest/binary>>, Acc, Config) -> clean(Rest, [86] ++ Acc, Config); -clean(<<87, Rest/binary>>, Acc, Config) -> clean(Rest, [87] ++ Acc, Config); -clean(<<88, Rest/binary>>, Acc, Config) -> clean(Rest, [88] ++ Acc, Config); -clean(<<89, Rest/binary>>, Acc, Config) -> clean(Rest, [89] ++ Acc, Config); -clean(<<90, Rest/binary>>, Acc, Config) -> clean(Rest, [90] ++ Acc, Config); -clean(<<91, Rest/binary>>, Acc, Config) -> clean(Rest, [91] ++ Acc, Config); -clean(<<92, Rest/binary>>, Acc, Config) -> maybe_replace(92, Rest, Acc, Config); -clean(<<93, Rest/binary>>, Acc, Config) -> clean(Rest, [93] ++ Acc, Config); -clean(<<94, Rest/binary>>, Acc, Config) -> clean(Rest, [94] ++ Acc, Config); -clean(<<95, Rest/binary>>, Acc, Config) -> clean(Rest, [95] ++ Acc, Config); -clean(<<96, Rest/binary>>, Acc, Config) -> clean(Rest, [96] ++ Acc, Config); -clean(<<97, Rest/binary>>, Acc, Config) -> clean(Rest, [97] ++ Acc, Config); -clean(<<98, Rest/binary>>, Acc, Config) -> clean(Rest, [98] ++ Acc, Config); -clean(<<99, Rest/binary>>, Acc, Config) -> clean(Rest, [99] ++ Acc, Config); -clean(<<100, Rest/binary>>, Acc, Config) -> clean(Rest, [100] ++ Acc, Config); -clean(<<101, Rest/binary>>, Acc, Config) -> clean(Rest, [101] ++ Acc, Config); -clean(<<102, Rest/binary>>, Acc, Config) -> clean(Rest, [102] ++ Acc, Config); -clean(<<103, Rest/binary>>, Acc, Config) -> clean(Rest, [103] ++ Acc, Config); -clean(<<104, Rest/binary>>, Acc, Config) -> clean(Rest, [104] ++ Acc, Config); -clean(<<105, Rest/binary>>, Acc, Config) -> clean(Rest, [105] ++ Acc, Config); -clean(<<106, Rest/binary>>, Acc, Config) -> clean(Rest, [106] ++ Acc, Config); -clean(<<107, Rest/binary>>, Acc, Config) -> clean(Rest, [107] ++ Acc, Config); -clean(<<108, Rest/binary>>, Acc, Config) -> clean(Rest, [108] ++ Acc, Config); -clean(<<109, Rest/binary>>, Acc, Config) -> clean(Rest, [109] ++ Acc, Config); -clean(<<110, Rest/binary>>, Acc, Config) -> clean(Rest, [110] ++ Acc, Config); -clean(<<111, Rest/binary>>, Acc, Config) -> clean(Rest, [111] ++ Acc, Config); -clean(<<112, Rest/binary>>, Acc, Config) -> clean(Rest, [112] ++ Acc, Config); -clean(<<113, Rest/binary>>, Acc, Config) -> clean(Rest, [113] ++ Acc, Config); -clean(<<114, Rest/binary>>, Acc, Config) -> clean(Rest, [114] ++ Acc, Config); -clean(<<115, Rest/binary>>, Acc, Config) -> clean(Rest, [115] ++ Acc, Config); -clean(<<116, Rest/binary>>, Acc, Config) -> clean(Rest, [116] ++ Acc, Config); -clean(<<117, Rest/binary>>, Acc, Config) -> clean(Rest, [117] ++ Acc, Config); -clean(<<118, Rest/binary>>, Acc, Config) -> clean(Rest, [118] ++ Acc, Config); -clean(<<119, Rest/binary>>, Acc, Config) -> clean(Rest, [119] ++ Acc, Config); -clean(<<120, Rest/binary>>, Acc, Config) -> clean(Rest, [120] ++ Acc, Config); -clean(<<121, Rest/binary>>, Acc, Config) -> clean(Rest, [121] ++ Acc, Config); -clean(<<122, Rest/binary>>, Acc, Config) -> clean(Rest, [122] ++ Acc, Config); -clean(<<123, Rest/binary>>, Acc, Config) -> clean(Rest, [123] ++ Acc, Config); -clean(<<124, Rest/binary>>, Acc, Config) -> clean(Rest, [124] ++ Acc, Config); -clean(<<125, Rest/binary>>, Acc, Config) -> clean(Rest, [125] ++ Acc, Config); -clean(<<126, Rest/binary>>, Acc, Config) -> clean(Rest, [126] ++ Acc, Config); -clean(<<127, Rest/binary>>, Acc, Config) -> clean(Rest, [127] ++ Acc, Config); -clean(<>, Acc, Config=#config{uescape=true}) -> +clean(<<>>, Acc, _) -> iolist_to_binary(Acc); +clean(<>, Acc, Config) when X < 16#20 -> + maybe_replace(X, Rest, Acc, Config); +clean(<<34, Rest/binary>>, Acc, Config) -> + maybe_replace(34, Rest, Acc, Config); +clean(<<47, Rest/binary>>, Acc, Config) -> + maybe_replace(47, Rest, Acc, Config); +clean(<<92, Rest/binary>>, Acc, Config) -> + maybe_replace(92, Rest, Acc, Config); +clean(<>, Acc, Config=#config{uescape=true}) when X >= 16#80 -> maybe_replace(X, Rest, Acc, Config); clean(<>, Acc, Config) when X == 16#2028; X == 16#2029 -> maybe_replace(X, Rest, Acc, Config); -clean(<>, Acc, Config) -> - clean(Rest, [X] ++ Acc, Config); +clean(<<_/utf8, _/binary>> = Bin, Acc, Config) -> + Size = count(Bin, 0, Config), + <> = Bin, + clean(Rest, [Acc, Clean], Config); %% surrogates clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> maybe_replace(surrogate, Rest, Acc, Config); @@ -373,6 +250,239 @@ clean(<<_, Rest/binary>>, Acc, Config) -> maybe_replace(badutf, Rest, Acc, Config). +count(<<>>, N, _) -> N; +count(<<0, _/binary>>, N, _) -> N; +count(<<1, _/binary>>, N, _) -> N; +count(<<2, _/binary>>, N, _) -> N; +count(<<3, _/binary>>, N, _) -> N; +count(<<4, _/binary>>, N, _) -> N; +count(<<5, _/binary>>, N, _) -> N; +count(<<6, _/binary>>, N, _) -> N; +count(<<7, _/binary>>, N, _) -> N; +count(<<8, _/binary>>, N, _) -> N; +count(<<9, _/binary>>, N, _) -> N; +count(<<10, _/binary>>, N, _) -> N; +count(<<11, _/binary>>, N, _) -> N; +count(<<12, _/binary>>, N, _) -> N; +count(<<13, _/binary>>, N, _) -> N; +count(<<14, _/binary>>, N, _) -> N; +count(<<15, _/binary>>, N, _) -> N; +count(<<16, _/binary>>, N, _) -> N; +count(<<17, _/binary>>, N, _) -> N; +count(<<18, _/binary>>, N, _) -> N; +count(<<19, _/binary>>, N, _) -> N; +count(<<20, _/binary>>, N, _) -> N; +count(<<21, _/binary>>, N, _) -> N; +count(<<22, _/binary>>, N, _) -> N; +count(<<23, _/binary>>, N, _) -> N; +count(<<24, _/binary>>, N, _) -> N; +count(<<25, _/binary>>, N, _) -> N; +count(<<26, _/binary>>, N, _) -> N; +count(<<27, _/binary>>, N, _) -> N; +count(<<28, _/binary>>, N, _) -> N; +count(<<29, _/binary>>, N, _) -> N; +count(<<30, _/binary>>, N, _) -> N; +count(<<31, _/binary>>, N, _) -> N; +count(<<32, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<33, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<34, _/binary>>, N, _) -> N; +count(<<35, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<36, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<37, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<38, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<39, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<40, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<41, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<42, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<43, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<44, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<45, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<46, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<47, _/binary>>, N, _) -> N; +count(<<48, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<49, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<50, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<51, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<52, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<53, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<54, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<55, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<56, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<57, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<58, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<59, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<60, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<61, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<62, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<63, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<64, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<65, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<66, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<67, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<68, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<69, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<70, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<71, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<72, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<73, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<74, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<75, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<76, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<77, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<78, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<79, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<80, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<81, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<82, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<83, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<84, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<85, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<86, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<87, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<88, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<89, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<90, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<91, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<92, _/binary>>, N, _) -> N; +count(<<93, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<94, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<95, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<96, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<97, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<98, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<99, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<100, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<101, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<102, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<103, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<104, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<105, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<106, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<107, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<108, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<109, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<110, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<111, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<112, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<113, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<114, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<115, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<116, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<117, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<118, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<119, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<120, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<121, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<122, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<123, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<124, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<125, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<126, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<127, Rest/binary>>, N, Config) -> + count(Rest, N + 1, Config); +count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N; +count(<>, N, _) when X == 16#2028; X == 16#2029 -> N; +count(<>, N, Config) when X < 16#800 -> + count(Rest, N + 2, Config); +count(<>, N, Config) when X < 16#10000 -> + count(Rest, N + 3, Config); +count(<<_/utf8, Rest/binary>>, N, Config) -> + count(Rest, N + 4, Config); +count(<<_, _/binary>>, N, _) -> N. + + strip_continuations(Bin, 0) -> Bin; strip_continuations(<>, N) when X >= 128, X =< 191 -> strip_continuations(Rest, N - 1); @@ -381,38 +491,42 @@ strip_continuations(Bin, _) -> Bin. maybe_replace($\b, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [$b, $\\] ++ Acc, Config); + clean(Rest, [Acc, $\\, $b], Config); maybe_replace($\t, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [$t, $\\] ++ Acc, Config); + clean(Rest, [Acc, $\\, $t], Config); maybe_replace($\n, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [$n, $\\] ++ Acc, Config); + clean(Rest, [Acc, $\\, $n], Config); maybe_replace($\f, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [$f, $\\] ++ Acc, Config); + clean(Rest, [Acc, $\\, $f], Config); maybe_replace($\r, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [$r, $\\] ++ Acc, Config); + clean(Rest, [Acc, $\\, $r], Config); maybe_replace($\", Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [$\", $\\] ++ Acc, Config); + clean(Rest, [Acc, $\\, $\"], Config); maybe_replace($/, Rest, Acc, Config=#config{escaped_strings=true}) -> case Config#config.escaped_forward_slashes of - true -> clean(Rest, [$/, $\\] ++ Acc, Config); - false -> clean(Rest, [$/] ++ Acc, Config) + true -> clean(Rest, [Acc, $\\, $/], Config); + false -> clean(Rest, [Acc, $/], Config) end; maybe_replace($\\, Rest, Acc, Config=#config{escaped_strings=true}) -> - clean(Rest, [$\\, $\\] ++ Acc, Config); + clean(Rest, [Acc, $\\, $\\], Config); maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X < 32 -> - clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config); -maybe_replace(X, Rest, Acc, Config=#config{uescape=true}) when is_integer(X) -> - clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config); + clean(Rest, [Acc, json_escape_sequence(X)], Config); +%% escaped even if no other escaping was requested! +maybe_replace(X, Rest, Acc, Config=#config{uescape=true}) when X >= 16#80 -> + clean(Rest, [Acc, json_escape_sequence(X)], Config); maybe_replace(X, Rest, Acc, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> case Config#config.unescaped_jsonp of - true -> clean(Rest, [X] ++ Acc, Config); - false -> clean(Rest, lists:reverse(json_escape_sequence(X)) ++ Acc, Config) + true -> clean(Rest, [Acc, <>], Config); + false -> clean(Rest, [Acc, json_escape_sequence(X)], Config) end; -maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> {error, badarg}; -maybe_replace(noncharacter, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); -maybe_replace(surrogate, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); -maybe_replace(badutf, Rest, Acc, Config) -> clean(Rest, [16#fffd] ++ Acc, Config); -maybe_replace(X, Rest, Acc, Config) -> clean(Rest, [X] ++ Acc, Config). +maybe_replace(Atom, _, _, #config{strict_utf8=true}) when is_atom(Atom) -> + erlang:error(badarg); +maybe_replace(surrogate, Rest, Acc, Config) -> + clean(Rest, [Acc, <<16#fffd/utf8>>], Config); +maybe_replace(badutf, Rest, Acc, Config) -> + clean(Rest, [Acc, <<16#fffd/utf8>>], Config); +maybe_replace(X, Rest, Acc, Config) -> + clean(Rest, [Acc, <>], Config). %% convert a codepoint to it's \uXXXX equiv. @@ -943,7 +1057,7 @@ json_escape_sequence_test_() -> {"json escape sequence test - 16#def", ?_assertEqual(json_escape_sequence(16#def), "\\u0def")} ]. -uescaped_test_() -> +uescape_test_() -> [ {"\"\\u0080\"", ?_assertEqual( <<"\\u0080">>,