From e5bfa8a39e426eb9f797404144ca1c01c1751cff Mon Sep 17 00:00:00 2001 From: alisdair Date: Sat, 23 Dec 2017 19:08:28 -0800 Subject: [PATCH] add `control_codes` mode to strict parsing that rejects strings with ascii control codes --- README.md | 6 +++- src/jsx_config.erl | 17 ++++++++--- src/jsx_config.hrl | 1 + src/jsx_decoder.erl | 72 ++++----------------------------------------- 4 files changed, 25 insertions(+), 71 deletions(-) diff --git a/README.md b/README.md index 9f72709..ffd0bbd 100644 --- a/README.md +++ b/README.md @@ -439,8 +439,12 @@ additional options beyond these. see escape sequences not adhering to the json spec result in a `badarg` error + * `control_codes` + + control codes in strings result in `badarg` errors + any combination of these can be passed to **jsx** by using `{strict, [strict_option()]}`. - `strict` is equivalent to `{strict, [comments, trailing_commas, utf8, single_quotes, escapes]}` + `strict` is equivalent to `{strict, [comments, trailing_commas, utf8, single_quotes, escapes, control_codes]}` - `return_tail` diff --git a/src/jsx_config.erl b/src/jsx_config.erl index 92a958b..47cbcf7 100644 --- a/src/jsx_config.erl +++ b/src/jsx_config.erl @@ -78,7 +78,8 @@ parse_config([strict|Rest], Config) -> strict_commas=true, strict_utf8=true, strict_single_quotes=true, - strict_escapes=true + strict_escapes=true, + strict_control_codes=true }); parse_config([{strict, Strict}|Rest], Config) -> parse_strict(Strict, Rest, Config); @@ -108,6 +109,8 @@ parse_strict([single_quotes|Strict], Rest, Config) -> parse_strict(Strict, Rest, Config#config{strict_single_quotes=true}); parse_strict([escapes|Strict], Rest, Config) -> parse_strict(Strict, Rest, Config#config{strict_escapes=true}); +parse_strict([control_codes|Strict], Rest, Config) -> + parse_strict(Strict, Rest, Config#config{strict_control_codes=true}); parse_strict(_Strict, _Rest, _Config) -> erlang:error(badarg). @@ -133,7 +136,7 @@ reduce_config(Input) -> reduce_config(Input, [], []). reduce_config([], Output, Strict) -> case length(Strict) of 0 -> lists:reverse(Output); - 4 -> lists:reverse(Output) ++ [strict]; + 5 -> lists:reverse(Output) ++ [strict]; _ -> lists:reverse(Output) ++ [{strict, lists:reverse(Strict)}] end; reduce_config([strict_comments|Input], Output, Strict) -> @@ -144,6 +147,8 @@ reduce_config([strict_single_quotes|Input], Output, Strict) -> reduce_config(Input, Output, [single_quotes] ++ Strict); reduce_config([strict_escapes|Input], Output, Strict) -> reduce_config(Input, Output, [escapes] ++ Strict); +reduce_config([strict_control_codes|Input], Output, Strict) -> + reduce_config(Input, Output, [control_codes] ++ Strict); reduce_config([Else|Input], Output, Strict) -> reduce_config(Input, [Else] ++ Output, Strict). @@ -205,6 +210,7 @@ config_test_() -> strict_utf8 = true, strict_single_quotes = true, strict_escapes = true, + strict_control_codes = true, stream = true, uescape = true }, @@ -227,7 +233,8 @@ config_test_() -> strict_commas = true, strict_utf8 = true, strict_single_quotes = true, - strict_escapes = true + strict_escapes = true, + strict_control_codes = true }, parse_config([strict]) ) @@ -300,6 +307,7 @@ config_to_list_test_() -> strict_utf8 = true, strict_single_quotes = true, strict_escapes = true, + strict_control_codes = true, stream = true, uescape = true } @@ -318,7 +326,8 @@ config_to_list_test_() -> config_to_list(#config{strict_comments = true, strict_utf8 = true, strict_single_quotes = true, - strict_escapes = true}) + strict_escapes = true, + strict_control_codes = true}) )}, {"error handler", ?_assertEqual( [{error_handler, fun ?MODULE:fake_error_handler/3}], diff --git a/src/jsx_config.hrl b/src/jsx_config.hrl index be619c0..c89963c 100644 --- a/src/jsx_config.hrl +++ b/src/jsx_config.hrl @@ -8,6 +8,7 @@ strict_utf8 = false :: boolean(), strict_single_quotes = false :: boolean(), strict_escapes = false :: boolean(), + strict_control_codes = false :: boolean(), stream = false :: boolean(), return_tail = false :: boolean(), uescape = false :: boolean(), diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 4600956..1a834d9 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -364,7 +364,9 @@ string(<<226, 128, 168, Rest/binary>>, Handler, Acc, Stack, Config) -> %% u+2029 string(<<226, 128, 169, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, maybe_replace(16#2029, Config)], Stack, Config); -string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) -> +string(<> = Bin, Handler, Acc, Stack, Config=#config{strict_control_codes=true}) when X > 16#1f -> + count(Bin, Handler, Acc, Stack, Config); +string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config=#config{strict_control_codes=false}) -> count(Bin, Handler, Acc, Stack, Config); %% necessary for bytes that are badly formed utf8 that won't match in `count` string(<>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) -> @@ -376,7 +378,6 @@ string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) -> string(Rest, Handler, [Acc, <<16#ffff/utf8>>], Stack, Config); string(<<>>, Handler, Acc, Stack, Config) -> incomplete(string, <<>>, Handler, Acc, Stack, Config); -%% partial utf8 codepoints string(<>, Handler, Acc, Stack, Config) when X >= 2#11000000 -> incomplete(string, <>, Handler, Acc, Stack, Config); string(<>, Handler, Acc, Stack, Config) when X >= 2#11100000, Y >= 2#10000000 -> @@ -414,70 +415,6 @@ count(Bin, Handler, Acc, Stack, Config) -> %% explicitly whitelist ascii set for faster parsing. really? really. someone should %% submit a patch that unrolls simple guards -count(<<0, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<1, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<2, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<3, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<4, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<5, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<6, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<7, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<8, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<9, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<10, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<11, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<12, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<13, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<14, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<15, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<16, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<17, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<18, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<19, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<20, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<21, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<22, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<23, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<24, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<25, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<26, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<27, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<28, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<29, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<30, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); -count(<<31, Rest/binary>>, N, Config) -> - count(Rest, N + 1, Config); count(<<32, Rest/binary>>, N, Config) -> count(Rest, N + 1, Config); count(<<33, Rest/binary>>, N, Config) -> @@ -669,6 +606,9 @@ count(<<127, Rest/binary>>, N, Config) -> count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) -> count(Rest, N + 1, Config); count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N; +count(<>, N, Config=#config{strict_control_codes=false}) when X < 32 -> + count(Rest, N + 1, Config); +count(<>, N, #config{strict_control_codes=true}) when X < 32 -> N; count(<>, N, Config) -> case X of X when X < 16#800 -> count(Rest, N + 2, Config);