From 7b31bef0f630a09df378de20dd8399b8d0db481b Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Sun, 24 Feb 2013 01:32:17 -0800 Subject: [PATCH] more comprehensive string checking --- src/jsx_decoder.erl | 35 ++++++++++++- src/jsx_utils.erl | 116 ++++++++++++++++++-------------------------- 2 files changed, 80 insertions(+), 71 deletions(-) diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index d508887..b8a3ff9 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -1023,7 +1023,12 @@ done(Bin, Handler, Stack, Config) -> ?error([Bin, Handler, Stack, Config]). -include_lib("eunit/include/eunit.hrl"). -decode(JSON, Config) -> start(JSON, {jsx, []}, [], jsx_utils:parse_config(Config)). +decode(JSON, Config) -> + try + start(JSON, {jsx, []}, [], jsx_utils:parse_config(Config)) + catch + error:badarg -> {error, badarg} + end. decode_test_() -> @@ -1330,9 +1335,37 @@ clean_string_test_() -> [{string, extended_codepoints()}, end_json], decode(<<34, (extended_codepoints())/binary, 34>>, []) )}, + {"error reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), {error, badarg}), + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, reserved_space()) + )}, + {"error surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), {error, badarg}), + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, surrogates()) + )}, + {"error noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, noncharacters()) + )}, + {"error extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, extended_noncharacters()) + )}, {"clean reserved space", ?_assertEqual( lists:duplicate(length(reserved_space()), [{string, <<16#fffd/utf8>>}, end_json]), lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, reserved_space()) + )}, + {"clean surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), [{string, <<16#fffd/utf8>>}, end_json]), + lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, surrogates()) + )}, + {"clean noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), + lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, noncharacters()) + )}, + {"clean extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), + lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, extended_noncharacters()) )} ]. diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index 505a3b1..5556643 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -670,80 +670,56 @@ extended_noncharacters() -> ]. +decode(String, Config) -> + try + [{string, clean_string(String, jsx_utils:parse_config(Config))}, end_json] + catch + error:badarg -> {error, badarg} + end. + + clean_string_test_() -> [ - {"clean codepoints test", ?_assertEqual( - codepoints(), - clean_string(codepoints(), #config{}) + {"clean codepoints", ?_assertEqual( + [{string, codepoints()}, end_json], + decode(codepoints(), []) )}, - {"clean extended codepoints test", ?_assertEqual( - extended_codepoints(), - clean_string(extended_codepoints(), #config{}) + {"clean extended codepoints", ?_assertEqual( + [{string, extended_codepoints()}, end_json], + decode(extended_codepoints(), []) + )}, + {"error reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), {error, badarg}), + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, reserved_space()) + )}, + {"error surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), {error, badarg}), + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, surrogates()) + )}, + {"error noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, noncharacters()) + )}, + {"error extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), {error, badarg}), + lists:map(fun(Codepoint) -> decode(Codepoint, []) end, extended_noncharacters()) + )}, + {"clean reserved space", ?_assertEqual( + lists:duplicate(length(reserved_space()), [{string, <<16#fffd/utf8>>}, end_json]), + lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, reserved_space()) + )}, + {"clean surrogates", ?_assertEqual( + lists:duplicate(length(surrogates()), [{string, <<16#fffd/utf8>>}, end_json]), + lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, surrogates()) + )}, + {"clean noncharacters", ?_assertEqual( + lists:duplicate(length(noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), + lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, noncharacters()) + )}, + {"clean extended noncharacters", ?_assertEqual( + lists:duplicate(length(extended_noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]), + lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, extended_noncharacters()) )} - ] ++ [ - { - "reserved character: " ++ lists:flatten(io_lib:format("~p", [Codepoint])), - ?_assertError( - badarg, - clean_string(Codepoint, #config{}) - ) - } || Codepoint <- reserved_space() - ] ++ [ - { - "reserved character: " ++ lists:flatten(io_lib:format("~p", [Codepoint])) ++ " (replaced)", - ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(Codepoint, #config{replaced_bad_utf8=true}) - ) - } || Codepoint <- reserved_space() - ] ++ [ - { - "surrogate: " ++ lists:flatten(io_lib:format("~p", [Codepoint])), - ?_assertError( - badarg, - clean_string(Codepoint, #config{}) - ) - } || Codepoint <- surrogates() - ] ++ [ - { - "surrogate: " ++ lists:flatten(io_lib:format("~p", [Codepoint])) ++ " (replaced)", - ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(Codepoint, #config{replaced_bad_utf8=true}) - ) - } || Codepoint <- surrogates() - ] ++ [ - { - "noncharacter: " ++ lists:flatten(io_lib:format("~p", [Codepoint])), - ?_assertError( - badarg, - clean_string(Codepoint, #config{}) - ) - } || Codepoint <- noncharacters() - ] ++ [ - { - "noncharacter: " ++ lists:flatten(io_lib:format("~p", [Codepoint])) ++ " (replaced)", - ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(Codepoint, #config{replaced_bad_utf8=true}) - ) - } || Codepoint <- noncharacters() - ] ++ [ - { - "extended noncharacter: " ++ lists:flatten(io_lib:format("~p", [Codepoint])), - ?_assertError( - badarg, - clean_string(Codepoint, #config{}) - ) - } || Codepoint <- extended_noncharacters() - ] ++ [ - { - "extended noncharacter: " ++ lists:flatten(io_lib:format("~p", [Codepoint])) ++ " (replaced)", - ?_assertEqual( - <<16#fffd/utf8>>, - clean_string(Codepoint, #config{replaced_bad_utf8=true}) - ) - } || Codepoint <- extended_noncharacters() ].