add control_codes mode to strict parsing that rejects strings with

ascii control codes
This commit is contained in:
alisdair 2017-12-23 19:08:28 -08:00
parent 6a01f3a43b
commit e5bfa8a39e
4 changed files with 25 additions and 71 deletions

View file

@ -439,8 +439,12 @@ additional options beyond these. see
escape sequences not adhering to the json spec result in a `badarg` error
* `control_codes`
control codes in strings result in `badarg` errors
any combination of these can be passed to **jsx** by using `{strict, [strict_option()]}`.
`strict` is equivalent to `{strict, [comments, trailing_commas, utf8, single_quotes, escapes]}`
`strict` is equivalent to `{strict, [comments, trailing_commas, utf8, single_quotes, escapes, control_codes]}`
- `return_tail`

View file

@ -78,7 +78,8 @@ parse_config([strict|Rest], Config) ->
strict_commas=true,
strict_utf8=true,
strict_single_quotes=true,
strict_escapes=true
strict_escapes=true,
strict_control_codes=true
});
parse_config([{strict, Strict}|Rest], Config) ->
parse_strict(Strict, Rest, Config);
@ -108,6 +109,8 @@ parse_strict([single_quotes|Strict], Rest, Config) ->
parse_strict(Strict, Rest, Config#config{strict_single_quotes=true});
parse_strict([escapes|Strict], Rest, Config) ->
parse_strict(Strict, Rest, Config#config{strict_escapes=true});
parse_strict([control_codes|Strict], Rest, Config) ->
parse_strict(Strict, Rest, Config#config{strict_control_codes=true});
parse_strict(_Strict, _Rest, _Config) ->
erlang:error(badarg).
@ -133,7 +136,7 @@ reduce_config(Input) -> reduce_config(Input, [], []).
reduce_config([], Output, Strict) ->
case length(Strict) of
0 -> lists:reverse(Output);
4 -> lists:reverse(Output) ++ [strict];
5 -> lists:reverse(Output) ++ [strict];
_ -> lists:reverse(Output) ++ [{strict, lists:reverse(Strict)}]
end;
reduce_config([strict_comments|Input], Output, Strict) ->
@ -144,6 +147,8 @@ reduce_config([strict_single_quotes|Input], Output, Strict) ->
reduce_config(Input, Output, [single_quotes] ++ Strict);
reduce_config([strict_escapes|Input], Output, Strict) ->
reduce_config(Input, Output, [escapes] ++ Strict);
reduce_config([strict_control_codes|Input], Output, Strict) ->
reduce_config(Input, Output, [control_codes] ++ Strict);
reduce_config([Else|Input], Output, Strict) ->
reduce_config(Input, [Else] ++ Output, Strict).
@ -205,6 +210,7 @@ config_test_() ->
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true,
strict_control_codes = true,
stream = true,
uescape = true
},
@ -227,7 +233,8 @@ config_test_() ->
strict_commas = true,
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true
strict_escapes = true,
strict_control_codes = true
},
parse_config([strict])
)
@ -300,6 +307,7 @@ config_to_list_test_() ->
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true,
strict_control_codes = true,
stream = true,
uescape = true
}
@ -318,7 +326,8 @@ config_to_list_test_() ->
config_to_list(#config{strict_comments = true,
strict_utf8 = true,
strict_single_quotes = true,
strict_escapes = true})
strict_escapes = true,
strict_control_codes = true})
)},
{"error handler", ?_assertEqual(
[{error_handler, fun ?MODULE:fake_error_handler/3}],

View file

@ -8,6 +8,7 @@
strict_utf8 = false :: boolean(),
strict_single_quotes = false :: boolean(),
strict_escapes = false :: boolean(),
strict_control_codes = false :: boolean(),
stream = false :: boolean(),
return_tail = false :: boolean(),
uescape = false :: boolean(),

View file

@ -364,7 +364,9 @@ string(<<226, 128, 168, Rest/binary>>, Handler, Acc, Stack, Config) ->
%% u+2029
string(<<226, 128, 169, Rest/binary>>, Handler, Acc, Stack, Config) ->
string(Rest, Handler, [Acc, maybe_replace(16#2029, Config)], Stack, Config);
string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) ->
string(<<X/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config=#config{strict_control_codes=true}) when X > 16#1f ->
count(Bin, Handler, Acc, Stack, Config);
string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config=#config{strict_control_codes=false}) ->
count(Bin, Handler, Acc, Stack, Config);
%% necessary for bytes that are badly formed utf8 that won't match in `count`
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
@ -376,7 +378,6 @@ string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) ->
string(Rest, Handler, [Acc, <<16#ffff/utf8>>], Stack, Config);
string(<<>>, Handler, Acc, Stack, Config) ->
incomplete(string, <<>>, Handler, Acc, Stack, Config);
%% partial utf8 codepoints
string(<<X>>, Handler, Acc, Stack, Config) when X >= 2#11000000 ->
incomplete(string, <<X>>, Handler, Acc, Stack, Config);
string(<<X, Y>>, Handler, Acc, Stack, Config) when X >= 2#11100000, Y >= 2#10000000 ->
@ -414,70 +415,6 @@ count(Bin, Handler, Acc, Stack, Config) ->
%% explicitly whitelist ascii set for faster parsing. really? really. someone should
%% submit a patch that unrolls simple guards
count(<<0, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<1, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<2, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<3, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<4, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<5, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<6, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<7, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<8, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<9, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<10, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<11, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<12, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<13, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<14, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<15, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<16, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<17, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<18, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<19, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<20, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<21, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<22, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<23, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<24, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<25, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<26, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<27, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<28, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<29, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<30, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<31, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<32, Rest/binary>>, N, Config) ->
count(Rest, N + 1, Config);
count(<<33, Rest/binary>>, N, Config) ->
@ -669,6 +606,9 @@ count(<<127, Rest/binary>>, N, Config) ->
count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) ->
count(Rest, N + 1, Config);
count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N;
count(<<X/utf8, Rest/binary>>, N, Config=#config{strict_control_codes=false}) when X < 32 ->
count(Rest, N + 1, Config);
count(<<X/utf8, _/binary>>, N, #config{strict_control_codes=true}) when X < 32 -> N;
count(<<X/utf8, Rest/binary>>, N, Config) ->
case X of
X when X < 16#800 -> count(Rest, N + 2, Config);