From 65449753791a9dd259a32ac289cb6f191b7f63e1 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 28 Mar 2012 23:46:06 -0700 Subject: [PATCH] add more comprehensive checking to json_escape --- src/jsx_utils.erl | 136 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 127 insertions(+), 9 deletions(-) diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index aea689c..7c688f9 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -592,15 +592,6 @@ binary_escape_test_() -> <<"\\/Date(1303502009425)\\/">> ) }, - {"bad utf8", - ?_assertError(badarg, json_escape(<<32, 64, 128, 255>>, #opts{})) - }, - {"bad utf8 ok", - ?_assertEqual( - json_escape(<<32, 64, 128, 255>>, #opts{loose_unicode=true}), - <<32, 64, 16#fffd/utf8, 16#fffd/utf8>> - ) - }, {"bad surrogate", ?_assertError(badarg, json_escape(<<237, 160, 127>>, #opts{}))}, {"bad surrogate ok", ?_assertEqual( @@ -616,4 +607,131 @@ binary_escape_test_() -> } ]. + +surrogates_test_() -> + [ + {"surrogates - badjson", + ?_assertEqual(check_bad(surrogates()), []) + }, + {"surrogates - replaced", + ?_assertEqual(check_replaced(surrogates()), []) + } + ]. + + +good_characters_test_() -> + [ + {"acceptable codepoints", + ?_assertEqual(check_good(good()), []) + }, + {"acceptable extended", + ?_assertEqual(check_good(good_extended()), []) + } + ]. + + +reserved_test_() -> + [ + {"reserved noncharacters - badjson", + ?_assertEqual(check_bad(reserved_space()), []) + }, + {"reserved noncharacters - replaced", + ?_assertEqual(check_replaced(reserved_space()), []) + } + ]. + + +noncharacters_test_() -> + [ + {"noncharacters - badjson", + ?_assertEqual(check_bad(noncharacters()), []) + }, + {"noncharacters - replaced", + ?_assertEqual(check_replaced(noncharacters()), []) + } + ]. + + +extended_noncharacters_test_() -> + [ + {"extended noncharacters - badjson", + ?_assertEqual(check_bad(extended_noncharacters()), []) + }, + {"extended noncharacters - replaced", + ?_assertEqual(check_replaced(extended_noncharacters()), []) + } + ]. + + +check_bad(List) -> + lists:dropwhile(fun({_, {error, badjson}}) -> true ; (_) -> false end, + check(List, #opts{}, []) + ). + + +check_replaced(List) -> + lists:dropwhile(fun({_, <<16#fffd/utf8>>}) -> true + ; (_) -> false + end, + check(List, #opts{loose_unicode=true}, []) + ). + + +check_good(List) -> + lists:dropwhile(fun({_, _}) -> true ; (_) -> false end, + check(List, #opts{}, []) + ). + + +check([], _Opts, Acc) -> Acc; +check([H|T], Opts, Acc) -> + R = escape(to_fake_utf(H, utf8), Opts), + check(T, Opts, [{H, R}] ++ Acc). + + +escape(JSON, Opts) -> + try json_escape(JSON, Opts) + catch error:badarg -> {error, badjson} + end. + + +noncharacters() -> lists:seq(16#fffe, 16#ffff). + + +extended_noncharacters() -> + [16#1fffe, 16#1ffff, 16#2fffe, 16#2ffff] + ++ [16#3fffe, 16#3ffff, 16#4fffe, 16#4ffff] + ++ [16#5fffe, 16#5ffff, 16#6fffe, 16#6ffff] + ++ [16#7fffe, 16#7ffff, 16#8fffe, 16#8ffff] + ++ [16#9fffe, 16#9ffff, 16#afffe, 16#affff] + ++ [16#bfffe, 16#bffff, 16#cfffe, 16#cffff] + ++ [16#dfffe, 16#dffff, 16#efffe, 16#effff] + ++ [16#ffffe, 16#fffff, 16#10fffe, 16#10ffff]. + + +surrogates() -> lists:seq(16#d800, 16#dfff). + + +reserved_space() -> lists:seq(16#fdd0, 16#fdef). + + +good() -> lists:seq(16#0000, 16#d7ff) ++ lists:seq(16#e000, 16#fdcf) ++ lists:seq(16#fdf0, 16#fffd). + + +good_extended() -> lists:seq(16#100000, 16#10fffd). + + +%% erlang refuses to encode certain codepoints, so fake them all +to_fake_utf(N, utf8) when N < 16#0080 -> <>; +to_fake_utf(N, utf8) when N < 16#0800 -> + <<0:5, Y:5, X:6>> = <>, + <<2#110:3, Y:5, 2#10:2, X:6>>; +to_fake_utf(N, utf8) when N < 16#10000 -> + <> = <>, + <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; +to_fake_utf(N, utf8) -> + <<0:3, W:3, Z:6, Y:6, X:6>> = <>, + <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. + + -endif. \ No newline at end of file