fix the tenses and forms of option flags

This commit is contained in:
alisdair sullivan 2012-03-31 21:58:18 -07:00
parent fdea98ebbf
commit 3bc2c68ea3
8 changed files with 221 additions and 211 deletions

View file

@ -1,4 +1,4 @@
{name, "bad_low_surrogate_replaced"}. {name, "bad_low_surrogate_replaced"}.
{jsx, [start_array,{string, <<16#fffd/utf8, 16#fffd/utf8>>},end_array,end_json]}. {jsx, [start_array,{string, <<16#fffd/utf8, 16#fffd/utf8>>},end_array,end_json]}.
{json, "bad_low_surrogate_replaced.json"}. {json, "bad_low_surrogate_replaced.json"}.
{jsx_flags, [loose_unicode]}. {jsx_flags, [replaced_bad_utf8]}.

View file

@ -1,4 +1,4 @@
{name, "unpaired surrogate replaced"}. {name, "unpaired surrogate replaced"}.
{jsx, [start_array,{string,<<65533/utf8,$b,$l,$a,$h>>},end_array,end_json]}. {jsx, [start_array,{string,<<65533/utf8,$b,$l,$a,$h>>},end_array,end_json]}.
{json, "unpaired_surrogate_replaced.json"}. {json, "unpaired_surrogate_replaced.json"}.
{jsx_flags, [loose_unicode]}. {jsx_flags, [replaced_bad_utf8]}.

View file

@ -130,55 +130,45 @@ encoder_decoder_equiv_test_() ->
<<"[\"a\", 17, 3.14, true, {\"k\":false}, []]">> <<"[\"a\", 17, 3.14, true, {\"k\":false}, []]">>
) =:= (jsx_encoder:encoder(?MODULE, [], []))([<<"a">>, 17, 3.14, true, [{<<"k">>, false}], []]) ) =:= (jsx_encoder:encoder(?MODULE, [], []))([<<"a">>, 17, 3.14, true, [{<<"k">>, false}], []])
) )
},
{"string escape equivalency",
?_assertEqual(
(jsx_decoder:decoder(?MODULE, [], [json_escape, loose_unicode]))(
<<"\"\\u0000\\b\\t\\n\\f\\r\\\\\\\"'/", 16#2028/utf8, 16#2029/utf8, 239, 191, 191, "\"">>
),
(jsx_encoder:encoder(?MODULE, [], [json_escape, loose_unicode]))(
<<0, $\b, $\t, $\n, $\f, $\r, $\\, $\", $', $/, 16#2028/utf8, 16#2029/utf8, 239, 191, 191>>
)
)
} }
]. ].
single_quotes_test_() -> single_quoted_strings_test_() ->
[ [
{"single quoted keys", {"single quoted keys",
?_assertEqual( ?_assertEqual(
to_term(<<"{'key':true}">>, [single_quotes]), to_term(<<"{'key':true}">>, [single_quoted_strings]),
[{<<"key">>, true}] [{<<"key">>, true}]
) )
}, },
{"multiple single quoted keys", {"multiple single quoted keys",
?_assertEqual( ?_assertEqual(
to_term(<<"{'key':true, 'another key':true}">>, [single_quotes]), to_term(<<"{'key':true, 'another key':true}">>, [single_quoted_strings]),
[{<<"key">>, true}, {<<"another key">>, true}] [{<<"key">>, true}, {<<"another key">>, true}]
) )
}, },
{"nested single quoted keys", {"nested single quoted keys",
?_assertEqual( ?_assertEqual(
to_term(<<"{'key': {'key':true, 'another key':true}}">>, [single_quotes]), to_term(<<"{'key': {'key':true, 'another key':true}}">>, [single_quoted_strings]),
[{<<"key">>, [{<<"key">>, true}, {<<"another key">>, true}]}] [{<<"key">>, [{<<"key">>, true}, {<<"another key">>, true}]}]
) )
}, },
{"single quoted string", {"single quoted string",
?_assertEqual( ?_assertEqual(
to_term(<<"['string']">>, [single_quotes]), to_term(<<"['string']">>, [single_quoted_strings]),
[<<"string">>] [<<"string">>]
) )
}, },
{"single quote in double quoted string", {"single quote in double quoted string",
?_assertEqual( ?_assertEqual(
to_term(<<"[\"a single quote: '\"]">>, [single_quotes]), to_term(<<"[\"a single quote: '\"]">>, [single_quoted_strings]),
[<<"a single quote: '">>] [<<"a single quote: '">>]
) )
}, },
{"escaped single quote in single quoted string", {"escaped single quote in single quoted string",
?_assertEqual( ?_assertEqual(
to_term(<<"['a single quote: \\'']">>, [single_quotes]), to_term(<<"['a single quote: \\'']">>, [single_quoted_strings]),
[<<"a single quote: '">>] [<<"a single quote: '">>]
) )
}, },
@ -191,7 +181,7 @@ single_quotes_test_() ->
{"mismatched quotes", {"mismatched quotes",
?_assertError( ?_assertError(
badarg, badarg,
to_term(<<"['mismatched\"]">>, [single_quotes]) to_term(<<"['mismatched\"]">>, [single_quoted_strings])
) )
} }
]. ].

View file

@ -130,7 +130,7 @@ decoder(Handler, State, Opts) ->
value(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) -> value(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) ->
string(Rest, Handler, [?new_seq()|Stack], Opts); string(Rest, Handler, [?new_seq()|Stack], Opts);
value(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quotes=true}) -> value(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) ->
string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts);
value(<<$t, Rest/binary>>, Handler, Stack, Opts) -> value(<<$t, Rest/binary>>, Handler, Stack, Opts) ->
tr(Rest, Handler, Stack, Opts); tr(Rest, Handler, Stack, Opts);
@ -161,7 +161,7 @@ value(Bin, Handler, Stack, Opts) ->
object(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) -> object(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) ->
string(Rest, Handler, [?new_seq()|Stack], Opts); string(Rest, Handler, [?new_seq()|Stack], Opts);
object(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quotes=true}) -> object(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) ->
string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts);
object(<<?end_object, Rest/binary>>, {Handler, State}, [key|Stack], Opts) -> object(<<?end_object, Rest/binary>>, {Handler, State}, [key|Stack], Opts) ->
maybe_done(Rest, {Handler, Handler:handle_event(end_object, State)}, Stack, Opts); maybe_done(Rest, {Handler, Handler:handle_event(end_object, State)}, Stack, Opts);
@ -178,7 +178,7 @@ object(Bin, Handler, Stack, Opts) ->
array(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) -> array(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) ->
string(Rest, Handler, [?new_seq()|Stack], Opts); string(Rest, Handler, [?new_seq()|Stack], Opts);
array(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quotes=true}) -> array(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) ->
string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts);
array(<<$t, Rest/binary>>, Handler, Stack, Opts) -> array(<<$t, Rest/binary>>, Handler, Stack, Opts) ->
tr(Rest, Handler, Stack, Opts); tr(Rest, Handler, Stack, Opts);
@ -224,7 +224,7 @@ colon(Bin, Handler, Stack, Opts) ->
key(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) -> key(<<?doublequote, Rest/binary>>, Handler, Stack, Opts) ->
string(Rest, Handler, [?new_seq()|Stack], Opts); string(Rest, Handler, [?new_seq()|Stack], Opts);
key(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quotes=true}) -> key(<<?singlequote, Rest/binary>>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) ->
string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts); string(Rest, Handler, [?new_seq(), single_quote|Stack], Opts);
key(<<S, Rest/binary>>, Handler, Stack, Opts) when ?is_whitespace(S) -> key(<<S, Rest/binary>>, Handler, Stack, Opts) when ?is_whitespace(S) ->
key(Rest, Handler, Stack, Opts); key(Rest, Handler, Stack, Opts);
@ -278,7 +278,7 @@ string(<<37, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
string(<<38, Rest/binary>>, Handler, [Acc|Stack], Opts) -> string(<<38, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
string(Rest, Handler, [?acc_seq(Acc, 38)|Stack], Opts); string(Rest, Handler, [?acc_seq(Acc, 38)|Stack], Opts);
string(<<?singlequote, Rest/binary>>, {Handler, State}, [Acc|Stack], Opts) -> string(<<?singlequote, Rest/binary>>, {Handler, State}, [Acc|Stack], Opts) ->
case Opts#opts.single_quotes of case Opts#opts.single_quoted_strings of
true -> true ->
case Stack of case Stack of
[single_quote, key|S] -> [single_quote, key|S] ->
@ -511,7 +511,7 @@ string(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
; S when S >= 16#100000, S < 16#10fffe -> ; S when S >= 16#100000, S < 16#10fffe ->
string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts) string(Rest, Handler, [?acc_seq(Acc, S)|Stack], Opts)
; _ -> ; _ ->
case Opts#opts.loose_unicode of case Opts#opts.replaced_bad_utf8 of
true -> noncharacter(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts) true -> noncharacter(<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts)
; false -> ?error([<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts]) ; false -> ?error([<<S/utf8, Rest/binary>>, Handler, [Acc|Stack], Opts])
end end
@ -520,7 +520,7 @@ string(Bin, Handler, Stack, Opts) ->
case partial_utf(Bin) of case partial_utf(Bin) of
true -> ?incomplete(string, Bin, Handler, Stack, Opts) true -> ?incomplete(string, Bin, Handler, Stack, Opts)
; false -> ; false ->
case Opts#opts.loose_unicode of case Opts#opts.replaced_bad_utf8 of
true -> noncharacter(Bin, Handler, Stack, Opts) true -> noncharacter(Bin, Handler, Stack, Opts)
; false -> ?error([Bin, Handler, Stack, Opts]) ; false -> ?error([Bin, Handler, Stack, Opts])
end end
@ -580,17 +580,17 @@ escape(<<$t, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\t, Opts))|Stack], Opts); string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\t, Opts))|Stack], Opts);
escape(<<?rsolidus, Rest/binary>>, Handler, [Acc|Stack], Opts) -> escape(<<?rsolidus, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\\, Opts))|Stack], Opts); string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\\, Opts))|Stack], Opts);
escape(<<?solidus, Rest/binary>>, Handler, [Acc|Stack], Opts=#opts{escape_forward_slash=true}) -> escape(<<?solidus, Rest/binary>>, Handler, [Acc|Stack], Opts=#opts{escaped_forward_slashes=true}) ->
string(Rest, Handler, [?acc_seq(Acc, maybe_replace($/, Opts))|Stack], Opts); string(Rest, Handler, [?acc_seq(Acc, maybe_replace($/, Opts))|Stack], Opts);
escape(<<?doublequote, Rest/binary>>, Handler, [Acc|Stack], Opts) -> escape(<<?doublequote, Rest/binary>>, Handler, [Acc|Stack], Opts) ->
string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\", Opts))|Stack], Opts); string(Rest, Handler, [?acc_seq(Acc, maybe_replace($\", Opts))|Stack], Opts);
escape(<<?singlequote, Rest/binary>>, Handler, [Acc|Stack], Opts = #opts{single_quotes=true}) -> escape(<<?singlequote, Rest/binary>>, Handler, [Acc|Stack], Opts = #opts{single_quoted_strings=true}) ->
string(Rest, Handler, [?acc_seq(Acc, maybe_replace(?singlequote, Opts))|Stack], Opts); string(Rest, Handler, [?acc_seq(Acc, maybe_replace(?singlequote, Opts))|Stack], Opts);
escape(<<$u, Rest/binary>>, Handler, Stack, Opts) -> escape(<<$u, Rest/binary>>, Handler, Stack, Opts) ->
escaped_unicode(Rest, Handler, Stack, Opts); escaped_unicode(Rest, Handler, Stack, Opts);
escape(<<>>, Handler, Stack, Opts) -> escape(<<>>, Handler, Stack, Opts) ->
?incomplete(escape, <<>>, Handler, Stack, Opts); ?incomplete(escape, <<>>, Handler, Stack, Opts);
escape(Bin, Handler, [Acc|Stack], Opts=#opts{ignore_bad_escapes=true}) -> escape(Bin, Handler, [Acc|Stack], Opts=#opts{ignored_bad_escapes=true}) ->
string(Bin, Handler, [?acc_seq(Acc, ?rsolidus)|Stack], Opts); string(Bin, Handler, [?acc_seq(Acc, ?rsolidus)|Stack], Opts);
escape(Bin, Handler, Stack, Opts) -> escape(Bin, Handler, Stack, Opts) ->
?error([Bin, Handler, Stack, Opts]). ?error([Bin, Handler, Stack, Opts]).
@ -606,7 +606,7 @@ escaped_unicode(<<A, B, C, D, Rest/binary>>, Handler, [Acc|Stack], Opts)
low_surrogate(Rest, Handler, [X, Acc|Stack], Opts) low_surrogate(Rest, Handler, [X, Acc|Stack], Opts)
%% low surrogate, illegal in this position %% low surrogate, illegal in this position
; X when X >= 16#dc00, X =< 16#dfff -> ; X when X >= 16#dc00, X =< 16#dfff ->
case Opts#opts.loose_unicode of case Opts#opts.replaced_bad_utf8 of
true -> string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts) true -> string(Rest, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts)
; false -> ?error([<<A, B, C, D, Rest/binary>>, Handler, [Acc|Stack], Opts]) ; false -> ?error([<<A, B, C, D, Rest/binary>>, Handler, [Acc|Stack], Opts])
end end
@ -635,7 +635,7 @@ low_surrogate(<<?rsolidus, $u, A, B, C, D, Rest/binary>>, Handler, [High, Acc|St
case (Y =< 16#d800 orelse Y >= 16#e000) of case (Y =< 16#d800 orelse Y >= 16#e000) of
true -> string(Rest, Handler, [?acc_seq(Acc, Y)|Stack], Opts) true -> string(Rest, Handler, [?acc_seq(Acc, Y)|Stack], Opts)
; false -> ; false ->
case Opts#opts.loose_unicode of case Opts#opts.replaced_bad_utf8 of
true -> true ->
string(Rest, Handler, [?acc_seq(Acc, 16#fffd, 16#fffd)|Stack], Opts) string(Rest, Handler, [?acc_seq(Acc, 16#fffd, 16#fffd)|Stack], Opts)
; false -> ; false ->
@ -643,7 +643,7 @@ low_surrogate(<<?rsolidus, $u, A, B, C, D, Rest/binary>>, Handler, [High, Acc|St
end end
end end
; _ -> ; _ ->
case Opts#opts.loose_unicode of case Opts#opts.replaced_bad_utf8 of
true -> string(Rest, Handler, [?acc_seq(Acc, 16#fffd, 16#fffd)|Stack], Opts) true -> string(Rest, Handler, [?acc_seq(Acc, 16#fffd, 16#fffd)|Stack], Opts)
; false -> ?error([<<?rsolidus, $u, A, B, C, D, Rest/binary>>, Handler, [High, Acc|Stack], Opts]) ; false -> ?error([<<?rsolidus, $u, A, B, C, D, Rest/binary>>, Handler, [High, Acc|Stack], Opts])
end end
@ -652,7 +652,7 @@ low_surrogate(Bin, Handler, [High, Acc|Stack], Opts) ->
case is_partial_low(Bin) of case is_partial_low(Bin) of
true -> ?incomplete(low_surrogate, Bin, Handler, [High, Acc|Stack], Opts) true -> ?incomplete(low_surrogate, Bin, Handler, [High, Acc|Stack], Opts)
; false -> ; false ->
case Opts#opts.loose_unicode of case Opts#opts.replaced_bad_utf8 of
true -> string(Bin, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts) true -> string(Bin, Handler, [?acc_seq(Acc, 16#fffd)|Stack], Opts)
; false -> ?error([Bin, Handler, [High, Acc|Stack], Opts]) ; false -> ?error([Bin, Handler, [High, Acc|Stack], Opts])
end end
@ -674,29 +674,29 @@ surrogate_to_codepoint(High, Low) ->
maybe_replace(X, #opts{dirty_strings=true}) when is_integer(X) -> [X]; maybe_replace(X, #opts{dirty_strings=true}) when is_integer(X) -> [X];
maybe_replace($\b, #opts{json_escape=true}) -> [$\\, $b]; maybe_replace($\b, #opts{escaped_strings=true}) -> [$\\, $b];
maybe_replace($\t, #opts{json_escape=true}) -> [$\\, $t]; maybe_replace($\t, #opts{escaped_strings=true}) -> [$\\, $t];
maybe_replace($\n, #opts{json_escape=true}) -> [$\\, $n]; maybe_replace($\n, #opts{escaped_strings=true}) -> [$\\, $n];
maybe_replace($\f, #opts{json_escape=true}) -> [$\\, $f]; maybe_replace($\f, #opts{escaped_strings=true}) -> [$\\, $f];
maybe_replace($\r, #opts{json_escape=true}) -> [$\\, $r]; maybe_replace($\r, #opts{escaped_strings=true}) -> [$\\, $r];
maybe_replace($\", #opts{json_escape=true}) -> [$\\, $\"]; maybe_replace($\", #opts{escaped_strings=true}) -> [$\\, $\"];
maybe_replace($', Opts=#opts{json_escape=true}) -> maybe_replace($', Opts=#opts{escaped_strings=true}) ->
case Opts#opts.single_quotes of case Opts#opts.single_quoted_strings of
true -> [$\\, $'] true -> [$\\, $']
; false -> [$'] ; false -> [$']
end; end;
maybe_replace($/, Opts=#opts{json_escape=true}) -> maybe_replace($/, Opts=#opts{escaped_strings=true}) ->
case Opts#opts.escape_forward_slash of case Opts#opts.escaped_forward_slashes of
true -> [$\\, $/] true -> [$\\, $/]
; false -> [$/] ; false -> [$/]
end; end;
maybe_replace($\\, #opts{json_escape=true}) -> [$\\, $\\]; maybe_replace($\\, #opts{escaped_strings=true}) -> [$\\, $\\];
maybe_replace(X, Opts=#opts{json_escape=true}) when X == 16#2028; X == 16#2029 -> maybe_replace(X, Opts=#opts{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
case Opts#opts.no_jsonp_escapes of case Opts#opts.unescaped_jsonp of
true -> [X] true -> [X]
; false -> jsx_utils:json_escape_sequence(X) ; false -> jsx_utils:json_escape_sequence(X)
end; end;
maybe_replace(X, #opts{json_escape=true}) when X < 32 -> maybe_replace(X, #opts{escaped_strings=true}) when X < 32 ->
jsx_utils:json_escape_sequence(X); jsx_utils:json_escape_sequence(X);
maybe_replace(X, _Opts) -> [X]. maybe_replace(X, _Opts) -> [X].
@ -1073,20 +1073,20 @@ bad_utf8_test_() ->
?_assert(is_bad(xcode(<<16#0080>>))) ?_assert(is_bad(xcode(<<16#0080>>)))
}, },
{"orphan continuation byte u+0080 replaced", {"orphan continuation byte u+0080 replaced",
?_assertEqual(xcode(<<16#0080>>, [loose_unicode]), <<16#fffd/utf8>>) ?_assertEqual(xcode(<<16#0080>>, [replaced_bad_utf8]), <<16#fffd/utf8>>)
}, },
{"orphan continuation byte u+00bf", {"orphan continuation byte u+00bf",
?_assert(is_bad(xcode(<<16#00bf>>))) ?_assert(is_bad(xcode(<<16#00bf>>)))
}, },
{"orphan continuation byte u+00bf replaced", {"orphan continuation byte u+00bf replaced",
?_assertEqual(xcode(<<16#00bf>>, [loose_unicode]), <<16#fffd/utf8>>) ?_assertEqual(xcode(<<16#00bf>>, [replaced_bad_utf8]), <<16#fffd/utf8>>)
}, },
{"2 continuation bytes", {"2 continuation bytes",
?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>))) ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>)))
}, },
{"2 continuation bytes replaced", {"2 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 2) binary:copy(<<16#fffd/utf8>>, 2)
) )
}, },
@ -1095,7 +1095,7 @@ bad_utf8_test_() ->
}, },
{"3 continuation bytes replaced", {"3 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 3) binary:copy(<<16#fffd/utf8>>, 3)
) )
}, },
@ -1104,7 +1104,7 @@ bad_utf8_test_() ->
}, },
{"4 continuation bytes replaced", {"4 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 4) binary:copy(<<16#fffd/utf8>>, 4)
) )
}, },
@ -1113,7 +1113,7 @@ bad_utf8_test_() ->
}, },
{"5 continuation bytes replaced", {"5 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 5) binary:copy(<<16#fffd/utf8>>, 5)
) )
}, },
@ -1122,7 +1122,7 @@ bad_utf8_test_() ->
}, },
{"6 continuation bytes replaced", {"6 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 6) binary:copy(<<16#fffd/utf8>>, 6)
) )
}, },
@ -1131,7 +1131,7 @@ bad_utf8_test_() ->
}, },
{"all continuation bytes replaced", {"all continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [loose_unicode]), xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))) binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf)))
) )
}, },
@ -1140,7 +1140,7 @@ bad_utf8_test_() ->
}, },
{"lonely start byte replaced", {"lonely start byte replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#00c0>>, [loose_unicode]), xcode(<<16#00c0>>, [replaced_bad_utf8]),
<<16#fffd/utf8>> <<16#fffd/utf8>>
) )
}, },
@ -1149,7 +1149,7 @@ bad_utf8_test_() ->
}, },
{"lonely start bytes (2 byte) replaced", {"lonely start bytes (2 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#00c0, 32, 16#00df>>, [loose_unicode]), xcode(<<16#00c0, 32, 16#00df>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32, 16#fffd/utf8>> <<16#fffd/utf8, 32, 16#fffd/utf8>>
) )
}, },
@ -1158,7 +1158,7 @@ bad_utf8_test_() ->
}, },
{"lonely start bytes (3 byte) replaced", {"lonely start bytes (3 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#00e0, 32, 16#00ef>>, [loose_unicode]), xcode(<<16#00e0, 32, 16#00ef>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32, 16#fffd/utf8>> <<16#fffd/utf8, 32, 16#fffd/utf8>>
) )
}, },
@ -1167,7 +1167,7 @@ bad_utf8_test_() ->
}, },
{"lonely start bytes (4 byte) replaced", {"lonely start bytes (4 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#00f0, 32, 16#00f7>>, [loose_unicode]), xcode(<<16#00f0, 32, 16#00f7>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32, 16#fffd/utf8>> <<16#fffd/utf8, 32, 16#fffd/utf8>>
) )
}, },
@ -1176,7 +1176,7 @@ bad_utf8_test_() ->
}, },
{"missing continuation byte (3 byte) replaced", {"missing continuation byte (3 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<224, 160, 32>>, [loose_unicode]), xcode(<<224, 160, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -1185,7 +1185,7 @@ bad_utf8_test_() ->
}, },
{"missing continuation byte2 (4 byte missing one) replaced", {"missing continuation byte2 (4 byte missing one) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<240, 144, 128, 32>>, [loose_unicode]), xcode(<<240, 144, 128, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -1194,7 +1194,7 @@ bad_utf8_test_() ->
}, },
{"missing continuation byte2 (4 byte missing two) replaced", {"missing continuation byte2 (4 byte missing two) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<240, 144, 32>>, [loose_unicode]), xcode(<<240, 144, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -1203,7 +1203,7 @@ bad_utf8_test_() ->
}, },
{"overlong encoding of u+002f (2 byte) replaced", {"overlong encoding of u+002f (2 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#c0, 16#af, 32>>, [loose_unicode]), xcode(<<16#c0, 16#af, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -1212,7 +1212,7 @@ bad_utf8_test_() ->
}, },
{"overlong encoding of u+002f (3 byte) replaced", {"overlong encoding of u+002f (3 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#e0, 16#80, 16#af, 32>>, [loose_unicode]), xcode(<<16#e0, 16#80, 16#af, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -1221,7 +1221,7 @@ bad_utf8_test_() ->
}, },
{"overlong encoding of u+002f (4 byte) replaced", {"overlong encoding of u+002f (4 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [loose_unicode]), xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -1230,7 +1230,7 @@ bad_utf8_test_() ->
}, },
{"highest overlong 2 byte sequence replaced", {"highest overlong 2 byte sequence replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#c1, 16#bf, 32>>, [loose_unicode]), xcode(<<16#c1, 16#bf, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -1239,7 +1239,7 @@ bad_utf8_test_() ->
}, },
{"highest overlong 3 byte sequence replaced", {"highest overlong 3 byte sequence replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#e0, 16#9f, 16#bf, 32>>, [loose_unicode]), xcode(<<16#e0, 16#9f, 16#bf, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -1248,7 +1248,7 @@ bad_utf8_test_() ->
}, },
{"highest overlong 4 byte sequence replaced", {"highest overlong 4 byte sequence replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [loose_unicode]), xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
} }
@ -1263,10 +1263,10 @@ decode(JSON, Opts) ->
end. end.
ignore_bad_escapes_test_() -> ignored_bad_escapes_test_() ->
[ [
{"ignore unrecognized escape sequence", ?_assertEqual( {"ignore unrecognized escape sequence", ?_assertEqual(
decode(<<"[\"\\x25\"]">>, [ignore_bad_escapes]), decode(<<"[\"\\x25\"]">>, [ignored_bad_escapes]),
[start_array, {string, <<"\\x25">>}, end_array, end_json] [start_array, {string, <<"\\x25">>}, end_array, end_json]
)} )}
]. ].
@ -1457,10 +1457,10 @@ comments_test_() ->
]. ].
escape_forward_slash_test_() -> escaped_forward_slashes_test_() ->
[ [
{"escape forward slash test", ?_assertEqual( {"escape forward slash test", ?_assertEqual(
decode(<<"[ \" \/ \" ]">>, [escape_forward_slash]), decode(<<"[ \" \/ \" ]">>, [escaped_forward_slashes]),
[start_array, {string, <<" / ">>}, end_array, end_json] [start_array, {string, <<" / ">>}, end_array, end_json]
)} )}
]. ].
@ -1468,29 +1468,29 @@ escape_forward_slash_test_() ->
escapes_test_() -> escapes_test_() ->
[ [
{"backspace escape", ?_assertEqual(decode(<<"\"\\b\"">>, [json_escape]), [{string, <<"\\b">>}, end_json])}, {"backspace escape", ?_assertEqual(decode(<<"\"\\b\"">>, [escaped_strings]), [{string, <<"\\b">>}, end_json])},
{"formfeed escape", ?_assertEqual(decode(<<"\"\\f\"">>, [json_escape]), [{string, <<"\\f">>}, end_json])}, {"formfeed escape", ?_assertEqual(decode(<<"\"\\f\"">>, [escaped_strings]), [{string, <<"\\f">>}, end_json])},
{"newline escape", ?_assertEqual(decode(<<"\"\\n\"">>, [json_escape]), [{string, <<"\\n">>}, end_json])}, {"newline escape", ?_assertEqual(decode(<<"\"\\n\"">>, [escaped_strings]), [{string, <<"\\n">>}, end_json])},
{"carriage return escape", ?_assertEqual(decode(<<"\"\\r\"">>, [json_escape]), [{string, <<"\\r">>}, end_json])}, {"carriage return escape", ?_assertEqual(decode(<<"\"\\r\"">>, [escaped_strings]), [{string, <<"\\r">>}, end_json])},
{"tab escape", ?_assertEqual(decode(<<"\"\\t\"">>, [json_escape]), [{string, <<"\\t">>}, end_json])}, {"tab escape", ?_assertEqual(decode(<<"\"\\t\"">>, [escaped_strings]), [{string, <<"\\t">>}, end_json])},
{"quote escape", ?_assertEqual(decode(<<"\"\\\"\"">>, [json_escape]), [{string, <<"\\\"">>}, end_json])}, {"quote escape", ?_assertEqual(decode(<<"\"\\\"\"">>, [escaped_strings]), [{string, <<"\\\"">>}, end_json])},
{"single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, {"single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [escaped_strings, single_quoted_strings]), [{string, <<"\\'">>}, end_json])},
{"naked single quote escape", ?_assertEqual(decode(<<"'\\''">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, {"naked single quote escape", ?_assertEqual(decode(<<"'\\''">>, [escaped_strings, single_quoted_strings]), [{string, <<"\\'">>}, end_json])},
{"no single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [json_escape]), [{string, <<"'">>}, end_json])}, {"no single quote escape", ?_assertEqual(decode(<<"\"'\"">>, [escaped_strings]), [{string, <<"'">>}, end_json])},
{"forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [json_escape, escape_forward_slash]), [{string, <<"\\/">>}, end_json])}, {"forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [escaped_strings, escaped_forward_slashes]), [{string, <<"\\/">>}, end_json])},
{"no forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [json_escape]), [{string, <<"/">>}, end_json])}, {"no forward slash escape", ?_assertEqual(decode(<<"\"/\"">>, [escaped_strings]), [{string, <<"/">>}, end_json])},
{"back slash escape", ?_assertEqual(decode(<<"\"\\\\\"">>, [json_escape]), [{string, <<"\\\\">>}, end_json])}, {"back slash escape", ?_assertEqual(decode(<<"\"\\\\\"">>, [escaped_strings]), [{string, <<"\\\\">>}, end_json])},
{"jsonp escape", ?_assertEqual( {"jsonp escape", ?_assertEqual(
decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [json_escape]), decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [escaped_strings]),
[{string, <<"\\u2028\\u2029">>}, end_json] [{string, <<"\\u2028\\u2029">>}, end_json]
)}, )},
{"no jsonp escape", ?_assertEqual( {"no jsonp escape", ?_assertEqual(
decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [json_escape, no_jsonp_escapes]), decode(<<$\", 16#2028/utf8, 16#2029/utf8, $\">>, [escaped_strings, unescaped_jsonp]),
[{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json]
)}, )},
{"control escape", ?_assertEqual(decode(<<$\", "\\u0000"/utf8, $\">>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, {"control escape", ?_assertEqual(decode(<<$\", "\\u0000"/utf8, $\">>, [escaped_strings]), [{string, <<"\\u0000">>}, end_json])},
{"dirty strings", ?_assertEqual(decode(<<"\"\\n\"">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])}, {"dirty strings", ?_assertEqual(decode(<<"\"\\n\"">>, [escaped_strings, dirty_strings]), [{string, <<"\n">>}, end_json])},
{"ignore bad escapes", ?_assertEqual(decode(<<"\"\\x25\"">>, [json_escape, ignore_bad_escapes]), [{string, <<"\\x25">>}, end_json])} {"ignore bad escapes", ?_assertEqual(decode(<<"\"\\x25\"">>, [escaped_strings, ignored_bad_escapes]), [{string, <<"\\x25">>}, end_json])}
]. ].
@ -1551,14 +1551,14 @@ good_characters_test_() ->
{"acceptable codepoints", {"acceptable codepoints",
?_assert(check_good(good())) ?_assert(check_good(good()))
}, },
{"acceptable codepoints - json_escape", {"acceptable codepoints - escaped_strings",
?_assert(check_good(good(), [json_escape])) ?_assert(check_good(good(), [escaped_strings]))
}, },
{"acceptable codepoints - loose_unicode", {"acceptable codepoints - replaced_bad_utf8",
?_assert(check_good(good(), [json_escape])) ?_assert(check_good(good(), [escaped_strings]))
}, },
{"acceptable codepoints - json_escape + loose_unicode", {"acceptable codepoints - escaped_strings + replaced_bad_utf8",
?_assert(check_good(good(), [json_escape, loose_unicode])) ?_assert(check_good(good(), [escaped_strings, replaced_bad_utf8]))
}, },
{"acceptable extended", {"acceptable extended",
?_assert(check_good(good_extended())) ?_assert(check_good(good_extended()))
@ -1575,7 +1575,7 @@ check_bad(List) ->
check_replaced(List) -> check_replaced(List) ->
[] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false [] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false
end, end,
check(List, [loose_unicode], []) check(List, [replaced_bad_utf8], [])
). ).

View file

@ -104,7 +104,7 @@ fix_key(Key) when is_binary(Key) -> Key.
clean_string(Bin, Opts) -> clean_string(Bin, Opts) ->
case Opts#opts.loose_unicode orelse Opts#opts.json_escape of case Opts#opts.replaced_bad_utf8 orelse Opts#opts.escaped_strings of
true -> clean(Bin, [], Opts) true -> clean(Bin, [], Opts)
; false -> ensure_clean(Bin), Bin ; false -> ensure_clean(Bin), Bin
end. end.
@ -465,33 +465,33 @@ strip_continuations(Bin, _) -> Bin.
maybe_replace(X, #opts{dirty_strings=true}) when is_integer(X) -> [X]; maybe_replace(X, #opts{dirty_strings=true}) when is_integer(X) -> [X];
maybe_replace($\b, #opts{json_escape=true}) -> [$b, $\\]; maybe_replace($\b, #opts{escaped_strings=true}) -> [$b, $\\];
maybe_replace($\t, #opts{json_escape=true}) -> [$t, $\\]; maybe_replace($\t, #opts{escaped_strings=true}) -> [$t, $\\];
maybe_replace($\n, #opts{json_escape=true}) -> [$n, $\\]; maybe_replace($\n, #opts{escaped_strings=true}) -> [$n, $\\];
maybe_replace($\f, #opts{json_escape=true}) -> [$f, $\\]; maybe_replace($\f, #opts{escaped_strings=true}) -> [$f, $\\];
maybe_replace($\r, #opts{json_escape=true}) -> [$r, $\\]; maybe_replace($\r, #opts{escaped_strings=true}) -> [$r, $\\];
maybe_replace($\", #opts{json_escape=true}) -> [$\", $\\]; maybe_replace($\", #opts{escaped_strings=true}) -> [$\", $\\];
maybe_replace($', Opts=#opts{json_escape=true}) -> maybe_replace($', Opts=#opts{escaped_strings=true}) ->
case Opts#opts.single_quotes of case Opts#opts.single_quoted_strings of
true -> [$', $\\] true -> [$', $\\]
; false -> [$'] ; false -> [$']
end; end;
maybe_replace($/, Opts=#opts{json_escape=true}) -> maybe_replace($/, Opts=#opts{escaped_strings=true}) ->
case Opts#opts.escape_forward_slash of case Opts#opts.escaped_forward_slashes of
true -> [$/, $\\] true -> [$/, $\\]
; false -> [$/] ; false -> [$/]
end; end;
maybe_replace($\\, #opts{json_escape=true}) -> [$\\, $\\]; maybe_replace($\\, #opts{escaped_strings=true}) -> [$\\, $\\];
maybe_replace(X, Opts=#opts{json_escape=true}) when X == 16#2028; X == 16#2029 -> maybe_replace(X, Opts=#opts{escaped_strings=true}) when X == 16#2028; X == 16#2029 ->
case Opts#opts.no_jsonp_escapes of case Opts#opts.unescaped_jsonp of
true -> [X] true -> [X]
; false -> lists:reverse(jsx_utils:json_escape_sequence(X)) ; false -> lists:reverse(jsx_utils:json_escape_sequence(X))
end; end;
maybe_replace(X, #opts{json_escape=true}) when X < 32 -> maybe_replace(X, #opts{escaped_strings=true}) when X < 32 ->
lists:reverse(jsx_utils:json_escape_sequence(X)); lists:reverse(jsx_utils:json_escape_sequence(X));
maybe_replace(noncharacter, #opts{loose_unicode=true}) -> [16#fffd]; maybe_replace(noncharacter, #opts{replaced_bad_utf8=true}) -> [16#fffd];
maybe_replace(surrogate, #opts{loose_unicode=true}) -> [16#fffd]; maybe_replace(surrogate, #opts{replaced_bad_utf8=true}) -> [16#fffd];
maybe_replace(badutf, #opts{loose_unicode=true}) -> [16#fffd]. maybe_replace(badutf, #opts{replaced_bad_utf8=true}) -> [16#fffd].
-ifdef(TEST). -ifdef(TEST).
@ -500,7 +500,7 @@ maybe_replace(badutf, #opts{loose_unicode=true}) -> [16#fffd].
xcode(Bin) -> xcode(Bin, #opts{}). xcode(Bin) -> xcode(Bin, #opts{}).
xcode(Bin, [loose_unicode]) -> xcode(Bin, #opts{loose_unicode=true}); xcode(Bin, [replaced_bad_utf8]) -> xcode(Bin, #opts{replaced_bad_utf8=true});
xcode(Bin, Opts) -> xcode(Bin, Opts) ->
try clean_string(Bin, Opts) try clean_string(Bin, Opts)
catch error:badarg -> {error, badarg} catch error:badarg -> {error, badarg}
@ -517,20 +517,20 @@ bad_utf8_test_() ->
?_assert(is_bad(xcode(<<16#0080>>))) ?_assert(is_bad(xcode(<<16#0080>>)))
}, },
{"orphan continuation byte u+0080 replaced", {"orphan continuation byte u+0080 replaced",
?_assertEqual(xcode(<<16#0080>>, [loose_unicode]), <<16#fffd/utf8>>) ?_assertEqual(xcode(<<16#0080>>, [replaced_bad_utf8]), <<16#fffd/utf8>>)
}, },
{"orphan continuation byte u+00bf", {"orphan continuation byte u+00bf",
?_assert(is_bad(xcode(<<16#00bf>>))) ?_assert(is_bad(xcode(<<16#00bf>>)))
}, },
{"orphan continuation byte u+00bf replaced", {"orphan continuation byte u+00bf replaced",
?_assertEqual(xcode(<<16#00bf>>, [loose_unicode]), <<16#fffd/utf8>>) ?_assertEqual(xcode(<<16#00bf>>, [replaced_bad_utf8]), <<16#fffd/utf8>>)
}, },
{"2 continuation bytes", {"2 continuation bytes",
?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>))) ?_assert(is_bad(xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>)))
}, },
{"2 continuation bytes replaced", {"2 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 2))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 2) binary:copy(<<16#fffd/utf8>>, 2)
) )
}, },
@ -539,7 +539,7 @@ bad_utf8_test_() ->
}, },
{"3 continuation bytes replaced", {"3 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 3))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 3) binary:copy(<<16#fffd/utf8>>, 3)
) )
}, },
@ -548,7 +548,7 @@ bad_utf8_test_() ->
}, },
{"4 continuation bytes replaced", {"4 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 4))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 4) binary:copy(<<16#fffd/utf8>>, 4)
) )
}, },
@ -557,7 +557,7 @@ bad_utf8_test_() ->
}, },
{"5 continuation bytes replaced", {"5 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 5))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 5) binary:copy(<<16#fffd/utf8>>, 5)
) )
}, },
@ -566,7 +566,7 @@ bad_utf8_test_() ->
}, },
{"6 continuation bytes replaced", {"6 continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [loose_unicode]), xcode(<<(binary:copy(<<16#0080>>, 6))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, 6) binary:copy(<<16#fffd/utf8>>, 6)
) )
}, },
@ -575,7 +575,7 @@ bad_utf8_test_() ->
}, },
{"all continuation bytes replaced", {"all continuation bytes replaced",
?_assertEqual( ?_assertEqual(
xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [loose_unicode]), xcode(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, [replaced_bad_utf8]),
binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))) binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf)))
) )
}, },
@ -584,7 +584,7 @@ bad_utf8_test_() ->
}, },
{"lonely start byte replaced", {"lonely start byte replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#00c0>>, [loose_unicode]), xcode(<<16#00c0>>, [replaced_bad_utf8]),
<<16#fffd/utf8>> <<16#fffd/utf8>>
) )
}, },
@ -593,7 +593,7 @@ bad_utf8_test_() ->
}, },
{"lonely start bytes (2 byte) replaced", {"lonely start bytes (2 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#00c0, 32, 16#00df>>, [loose_unicode]), xcode(<<16#00c0, 32, 16#00df>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32, 16#fffd/utf8>> <<16#fffd/utf8, 32, 16#fffd/utf8>>
) )
}, },
@ -602,7 +602,7 @@ bad_utf8_test_() ->
}, },
{"lonely start bytes (3 byte) replaced", {"lonely start bytes (3 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#00e0, 32, 16#00ef>>, [loose_unicode]), xcode(<<16#00e0, 32, 16#00ef>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32, 16#fffd/utf8>> <<16#fffd/utf8, 32, 16#fffd/utf8>>
) )
}, },
@ -611,7 +611,7 @@ bad_utf8_test_() ->
}, },
{"lonely start bytes (4 byte) replaced", {"lonely start bytes (4 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#00f0, 32, 16#00f7>>, [loose_unicode]), xcode(<<16#00f0, 32, 16#00f7>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32, 16#fffd/utf8>> <<16#fffd/utf8, 32, 16#fffd/utf8>>
) )
}, },
@ -620,7 +620,7 @@ bad_utf8_test_() ->
}, },
{"missing continuation byte (3 byte) replaced", {"missing continuation byte (3 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<224, 160, 32>>, [loose_unicode]), xcode(<<224, 160, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -629,7 +629,7 @@ bad_utf8_test_() ->
}, },
{"missing continuation byte (4 byte missing one) replaced", {"missing continuation byte (4 byte missing one) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<240, 144, 128, 32>>, [loose_unicode]), xcode(<<240, 144, 128, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -638,7 +638,7 @@ bad_utf8_test_() ->
}, },
{"missing continuation byte (4 byte missing two) replaced", {"missing continuation byte (4 byte missing two) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<240, 144, 32>>, [loose_unicode]), xcode(<<240, 144, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -647,7 +647,7 @@ bad_utf8_test_() ->
}, },
{"overlong encoding of u+002f (2 byte) replaced", {"overlong encoding of u+002f (2 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#c0, 16#af, 32>>, [loose_unicode]), xcode(<<16#c0, 16#af, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -656,7 +656,7 @@ bad_utf8_test_() ->
}, },
{"overlong encoding of u+002f (3 byte) replaced", {"overlong encoding of u+002f (3 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#e0, 16#80, 16#af, 32>>, [loose_unicode]), xcode(<<16#e0, 16#80, 16#af, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -665,7 +665,7 @@ bad_utf8_test_() ->
}, },
{"overlong encoding of u+002f (4 byte) replaced", {"overlong encoding of u+002f (4 byte) replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [loose_unicode]), xcode(<<16#f0, 16#80, 16#80, 16#af, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -674,7 +674,7 @@ bad_utf8_test_() ->
}, },
{"highest overlong 2 byte sequence replaced", {"highest overlong 2 byte sequence replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#c1, 16#bf, 32>>, [loose_unicode]), xcode(<<16#c1, 16#bf, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -683,7 +683,7 @@ bad_utf8_test_() ->
}, },
{"highest overlong 3 byte sequence replaced", {"highest overlong 3 byte sequence replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#e0, 16#9f, 16#bf, 32>>, [loose_unicode]), xcode(<<16#e0, 16#9f, 16#bf, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
}, },
@ -692,7 +692,7 @@ bad_utf8_test_() ->
}, },
{"highest overlong 4 byte sequence replaced", {"highest overlong 4 byte sequence replaced",
?_assertEqual( ?_assertEqual(
xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [loose_unicode]), xcode(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, [replaced_bad_utf8]),
<<16#fffd/utf8, 32>> <<16#fffd/utf8, 32>>
) )
} }
@ -710,7 +710,7 @@ encode(Term, Opts) ->
encode_test_() -> encode_test_() ->
[ [
{"naked string", ?_assertEqual(encode(<<"a string\n">>), [{string, <<"a string\n">>}, end_json])}, {"naked string", ?_assertEqual(encode(<<"a string\n">>), [{string, <<"a string\n">>}, end_json])},
{"escaped naked string", ?_assertEqual(encode(<<"a string\n">>, [json_escape]), [{string, <<"a string\\n">>}, end_json])}, {"escaped naked string", ?_assertEqual(encode(<<"a string\n">>, [escaped_strings]), [{string, <<"a string\\n">>}, end_json])},
{"naked integer", ?_assertEqual(encode(123), [{integer, 123}, end_json])}, {"naked integer", ?_assertEqual(encode(123), [{integer, 123}, end_json])},
{"naked float", ?_assertEqual(encode(1.23), [{float, 1.23}, end_json])}, {"naked float", ?_assertEqual(encode(1.23), [{float, 1.23}, end_json])},
{"naked literal", ?_assertEqual(encode(null), [{literal, null}, end_json])}, {"naked literal", ?_assertEqual(encode(null), [{literal, null}, end_json])},
@ -782,28 +782,28 @@ encode_test_() ->
escapes_test_() -> escapes_test_() ->
[ [
{"backspace escape", ?_assertEqual(encode(<<"\b">>, [json_escape]), [{string, <<"\\b">>}, end_json])}, {"backspace escape", ?_assertEqual(encode(<<"\b">>, [escaped_strings]), [{string, <<"\\b">>}, end_json])},
{"formfeed escape", ?_assertEqual(encode(<<"\f">>, [json_escape]), [{string, <<"\\f">>}, end_json])}, {"formfeed escape", ?_assertEqual(encode(<<"\f">>, [escaped_strings]), [{string, <<"\\f">>}, end_json])},
{"newline escape", ?_assertEqual(encode(<<"\n">>, [json_escape]), [{string, <<"\\n">>}, end_json])}, {"newline escape", ?_assertEqual(encode(<<"\n">>, [escaped_strings]), [{string, <<"\\n">>}, end_json])},
{"carriage return escape", ?_assertEqual(encode(<<"\r">>, [json_escape]), [{string, <<"\\r">>}, end_json])}, {"carriage return escape", ?_assertEqual(encode(<<"\r">>, [escaped_strings]), [{string, <<"\\r">>}, end_json])},
{"tab escape", ?_assertEqual(encode(<<"\t">>, [json_escape]), [{string, <<"\\t">>}, end_json])}, {"tab escape", ?_assertEqual(encode(<<"\t">>, [escaped_strings]), [{string, <<"\\t">>}, end_json])},
{"quote escape", ?_assertEqual(encode(<<"\"">>, [json_escape]), [{string, <<"\\\"">>}, end_json])}, {"quote escape", ?_assertEqual(encode(<<"\"">>, [escaped_strings]), [{string, <<"\\\"">>}, end_json])},
{"single quote escape", ?_assertEqual(encode(<<"'">>, [json_escape, single_quotes]), [{string, <<"\\'">>}, end_json])}, {"single quote escape", ?_assertEqual(encode(<<"'">>, [escaped_strings, single_quoted_strings]), [{string, <<"\\'">>}, end_json])},
{"no single quote escape", ?_assertEqual(encode(<<"'">>, [json_escape]), [{string, <<"'">>}, end_json])}, {"no single quote escape", ?_assertEqual(encode(<<"'">>, [escaped_strings]), [{string, <<"'">>}, end_json])},
{"forward slash escape", ?_assertEqual(encode(<<"/">>, [json_escape, escape_forward_slash]), [{string, <<"\\/">>}, end_json])}, {"forward slash escape", ?_assertEqual(encode(<<"/">>, [escaped_strings, escaped_forward_slashes]), [{string, <<"\\/">>}, end_json])},
{"no forward slash escape", ?_assertEqual(encode(<<"/">>, [json_escape]), [{string, <<"/">>}, end_json])}, {"no forward slash escape", ?_assertEqual(encode(<<"/">>, [escaped_strings]), [{string, <<"/">>}, end_json])},
{"back slash escape", ?_assertEqual(encode(<<"\\">>, [json_escape]), [{string, <<"\\\\">>}, end_json])}, {"back slash escape", ?_assertEqual(encode(<<"\\">>, [escaped_strings]), [{string, <<"\\\\">>}, end_json])},
{"jsonp escape", ?_assertEqual( {"jsonp escape", ?_assertEqual(
encode(<<16#2028/utf8, 16#2029/utf8>>, [json_escape]), encode(<<16#2028/utf8, 16#2029/utf8>>, [escaped_strings]),
[{string, <<"\\u2028\\u2029">>}, end_json] [{string, <<"\\u2028\\u2029">>}, end_json]
)}, )},
{"no jsonp escape", ?_assertEqual( {"no jsonp escape", ?_assertEqual(
encode(<<16#2028/utf8, 16#2029/utf8>>, [json_escape, no_jsonp_escapes]), encode(<<16#2028/utf8, 16#2029/utf8>>, [escaped_strings, unescaped_jsonp]),
[{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json] [{string, <<16#2028/utf8, 16#2029/utf8>>}, end_json]
)}, )},
{"control escape", ?_assertEqual(encode(<<0>>, [json_escape]), [{string, <<"\\u0000">>}, end_json])}, {"control escape", ?_assertEqual(encode(<<0>>, [escaped_strings]), [{string, <<"\\u0000">>}, end_json])},
{"dirty strings", ?_assertEqual(encode(<<"\n">>, [json_escape, dirty_strings]), [{string, <<"\n">>}, end_json])}, {"dirty strings", ?_assertEqual(encode(<<"\n">>, [escaped_strings, dirty_strings]), [{string, <<"\n">>}, end_json])},
{"ignore bad escapes", ?_assertEqual(encode(<<"\\x25">>, [json_escape, ignore_bad_escapes]), [{string, <<"\\\\x25">>}, end_json])} {"ignore bad escapes", ?_assertEqual(encode(<<"\\x25">>, [escaped_strings, ignored_bad_escapes]), [{string, <<"\\\\x25">>}, end_json])}
]. ].
@ -823,23 +823,23 @@ good_characters_test_() ->
{"acceptable codepoints", {"acceptable codepoints",
?_assert(check_good(good())) ?_assert(check_good(good()))
}, },
{"acceptable codepoints - json_escape", {"acceptable codepoints - escaped_strings",
?_assert(check_good(good(), [json_escape])) ?_assert(check_good(good(), [escaped_strings]))
}, },
{"acceptable codepoints - loose_unicode", {"acceptable codepoints - replaced_bad_utf8",
?_assert(check_good(good(), [json_escape])) ?_assert(check_good(good(), [escaped_strings]))
}, },
{"acceptable codepoints - json_escape + loose_unicode", {"acceptable codepoints - escaped_strings + replaced_bad_utf8",
?_assert(check_good(good(), [json_escape, loose_unicode])) ?_assert(check_good(good(), [escaped_strings, replaced_bad_utf8]))
}, },
{"acceptable extended", {"acceptable extended",
?_assert(check_good(good_extended())) ?_assert(check_good(good_extended()))
}, },
{"acceptable extended - json_escape", {"acceptable extended - escaped_strings",
?_assert(check_good(good_extended(), [json_escape])) ?_assert(check_good(good_extended(), [escaped_strings]))
}, },
{"acceptable extended - json_escape", {"acceptable extended - escaped_strings",
?_assert(check_good(good_extended(), [loose_unicode])) ?_assert(check_good(good_extended(), [replaced_bad_utf8]))
} }
]. ].
@ -886,7 +886,7 @@ check_bad(List) ->
check_replaced(List) -> check_replaced(List) ->
[] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false [] == lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) -> true ; (_) -> false
end, end,
check(List, [loose_unicode], []) check(List, [replaced_bad_utf8], [])
). ).

View file

@ -1,11 +1,11 @@
-record(opts, { -record(opts, {
loose_unicode = false, replaced_bad_utf8 = false,
escape_forward_slash = false, escaped_forward_slashes = false,
explicit_end = false, single_quoted_strings = false,
single_quotes = false, unescaped_jsonp = false,
no_jsonp_escapes = false,
comments = false, comments = false,
json_escape = false, escaped_strings = false,
dirty_strings = false, dirty_strings = false,
ignore_bad_escapes = false ignored_bad_escapes = false,
explicit_end = false
}). }).

View file

@ -39,13 +39,13 @@
-spec to_json(Source::any(), Opts::opts()) -> binary(). -spec to_json(Source::any(), Opts::opts()) -> binary().
to_json(Source, Opts) when is_list(Opts) -> to_json(Source, Opts) when is_list(Opts) ->
(jsx:encoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [json_escape])))(Source). (jsx:encoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [escaped_strings])))(Source).
-spec format(Source::binary(), Opts::opts()) -> binary(). -spec format(Source::binary(), Opts::opts()) -> binary().
format(Source, Opts) when is_binary(Source) andalso is_list(Opts) -> format(Source, Opts) when is_binary(Source) andalso is_list(Opts) ->
(jsx:decoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [json_escape])))(Source). (jsx:decoder(?MODULE, Opts, jsx_utils:extract_opts(Opts ++ [escaped_strings])))(Source).
parse_opts(Opts) -> parse_opts(Opts, #opts{}). parse_opts(Opts) -> parse_opts(Opts, #opts{}).

View file

@ -36,49 +36,69 @@ parse_opts(Opts) ->
parse_opts([], Opts) -> parse_opts([], Opts) ->
Opts; Opts;
parse_opts([loose_unicode|Rest], Opts) -> parse_opts([replaced_bad_utf8|Rest], Opts) ->
parse_opts(Rest, Opts#opts{loose_unicode=true}); parse_opts(Rest, Opts#opts{replaced_bad_utf8=true});
parse_opts([escape_forward_slash|Rest], Opts) -> parse_opts([escaped_forward_slashes|Rest], Opts) ->
parse_opts(Rest, Opts#opts{escape_forward_slash=true}); parse_opts(Rest, Opts#opts{escaped_forward_slashes=true});
parse_opts([explicit_end|Rest], Opts) -> parse_opts([explicit_end|Rest], Opts) ->
parse_opts(Rest, Opts#opts{explicit_end=true}); parse_opts(Rest, Opts#opts{explicit_end=true});
parse_opts([single_quotes|Rest], Opts) -> parse_opts([single_quoted_strings|Rest], Opts) ->
parse_opts(Rest, Opts#opts{single_quotes=true}); parse_opts(Rest, Opts#opts{single_quoted_strings=true});
parse_opts([no_jsonp_escapes|Rest], Opts) -> parse_opts([unescaped_jsonp|Rest], Opts) ->
parse_opts(Rest, Opts#opts{no_jsonp_escapes=true}); parse_opts(Rest, Opts#opts{unescaped_jsonp=true});
parse_opts([comments|Rest], Opts) -> parse_opts([comments|Rest], Opts) ->
parse_opts(Rest, Opts#opts{comments=true}); parse_opts(Rest, Opts#opts{comments=true});
parse_opts([json_escape|Rest], Opts) -> parse_opts([escaped_strings|Rest], Opts) ->
parse_opts(Rest, Opts#opts{json_escape=true}); parse_opts(Rest, Opts#opts{escaped_strings=true});
parse_opts([dirty_strings|Rest], Opts) -> parse_opts([dirty_strings|Rest], Opts) ->
parse_opts(Rest, Opts#opts{dirty_strings=true}); parse_opts(Rest, Opts#opts{dirty_strings=true});
parse_opts([ignore_bad_escapes|Rest], Opts) -> parse_opts([ignored_bad_escapes|Rest], Opts) ->
parse_opts(Rest, Opts#opts{ignore_bad_escapes=true}); parse_opts(Rest, Opts#opts{ignored_bad_escapes=true});
parse_opts([relax|Rest], Opts) -> parse_opts([relax|Rest], Opts) ->
parse_opts(Rest, Opts#opts{ parse_opts(Rest, Opts#opts{
loose_unicode = true, replaced_bad_utf8 = true,
single_quotes = true, single_quoted_strings = true,
comments = true, comments = true,
ignore_bad_escapes = true ignored_bad_escapes = true
}); });
%% deprecated flags
parse_opts([loose_unicode|Rest], Opts) ->
parse_opts(Rest, Opts#opts{replaced_bad_utf8=true});
parse_opts([escape_forward_slash|Rest], Opts) ->
parse_opts(Rest, Opts#opts{escaped_forward_slashes=true});
parse_opts([single_quotes|Rest], Opts) ->
parse_opts(Rest, Opts#opts{single_quoted_strings=true});
parse_opts([no_jsonp_escapes|Rest], Opts) ->
parse_opts(Rest, Opts#opts{unescaped_jsonp=true});
parse_opts([json_escape|Rest], Opts) ->
parse_opts(Rest, Opts#opts{escaped_strings=true});
parse_opts([ignore_bad_escapes|Rest], Opts) ->
parse_opts(Rest, Opts#opts{ignored_bad_escapes=true});
parse_opts(_, _) -> parse_opts(_, _) ->
{error, badarg}. {error, badarg}.
valid_flags() -> valid_flags() ->
[ [
loose_unicode, replaced_bad_utf8,
escape_forward_slash, escaped_forward_slashes,
explicit_end, single_quoted_strings,
single_quotes, unescaped_jsonp,
no_jsonp_escapes,
comments, comments,
json_escape, escaped_strings,
dirty_strings, dirty_strings,
ignore_bad_escapes, ignored_bad_escapes,
relax explicit_end,
relax,
%% deprecated flags
loose_unicode, %% replaced_bad_utf8
escape_forward_slash, %% escaped_forward_slashes
single_quotes, %% single_quotes_strings
no_jsonp_escapes, %% unescaped_jsonp
json_escape, %% escaped_strings
ignore_bad_escapes %% ignored_bad_escapes
]. ].
extract_opts(Opts) -> extract_opts(Opts) ->
extract_parser_opts(Opts, []). extract_parser_opts(Opts, []).
@ -128,24 +148,24 @@ opts_test_() ->
{"all flags", {"all flags",
?_assertEqual( ?_assertEqual(
parse_opts([ parse_opts([
loose_unicode, replaced_bad_utf8,
escape_forward_slash, escaped_forward_slashes,
explicit_end, explicit_end,
single_quotes, single_quoted_strings,
no_jsonp_escapes, unescaped_jsonp,
comments, comments,
dirty_strings, dirty_strings,
ignore_bad_escapes ignored_bad_escapes
]), ]),
#opts{ #opts{
loose_unicode=true, replaced_bad_utf8=true,
escape_forward_slash=true, escaped_forward_slashes=true,
explicit_end=true, explicit_end=true,
single_quotes=true, single_quoted_strings=true,
no_jsonp_escapes=true, unescaped_jsonp=true,
comments=true, comments=true,
dirty_strings=true, dirty_strings=true,
ignore_bad_escapes=true ignored_bad_escapes=true
} }
) )
}, },
@ -153,10 +173,10 @@ opts_test_() ->
?_assertEqual( ?_assertEqual(
parse_opts([relax]), parse_opts([relax]),
#opts{ #opts{
loose_unicode=true, replaced_bad_utf8=true,
single_quotes=true, single_quoted_strings=true,
comments=true, comments=true,
ignore_bad_escapes=true ignored_bad_escapes=true
} }
) )
} }