strings returned as binaries

This commit is contained in:
alisdair sullivan 2011-10-27 23:11:26 -07:00
parent d5a91c0a06
commit 0ea61233c5
22 changed files with 86 additions and 98 deletions

View file

@ -120,12 +120,12 @@ decoder(Opts) ->
).
-endif.
-define(new_seq(), []).
-define(new_seq(C), [C]).
-define(new_seq(), <<>>).
-define(new_seq(C), <<C/utf8>>).
-define(acc_seq(Seq, C), [C] ++ Seq).
-define(acc_seq(Seq, C), <<Seq/binary, C/utf8>>).
-define(end_seq(Seq), lists:reverse(Seq)).
-define(end_seq(Seq), Seq).
start(<<?start_object, Rest/binary>>, Out, Stack, Opts) ->
@ -141,11 +141,11 @@ start(<<$f, Rest/binary>>, Out, Stack, Opts) ->
start(<<$n, Rest/binary>>, Out, Stack, Opts) ->
nu(Rest, Out, Stack, Opts);
start(<<?negative, Rest/binary>>, Out, Stack, Opts) ->
negative(Rest, Out, [?new_seq($-)|Stack], Opts);
negative(Rest, Out, [[$-]|Stack], Opts);
start(<<?zero, Rest/binary>>, Out, Stack, Opts) ->
zero(Rest, Out, [?new_seq($0)|Stack], Opts);
zero(Rest, Out, [[$0]|Stack], Opts);
start(<<S/utf8, Rest/binary>>, Out, Stack, Opts) when ?is_nonzero(S) ->
integer(Rest, Out, [?new_seq(S)|Stack], Opts);
integer(Rest, Out, [[S]|Stack], Opts);
start(<<S, Rest/binary>>, Out, Stack, Opts) when ?is_whitespace(S) ->
start(Rest, Out, Stack, Opts);
start(<<>>, Out, Stack, Opts) ->
@ -175,11 +175,11 @@ array(<<$f, Rest/binary>>, Out, Stack, Opts) ->
array(<<$n, Rest/binary>>, Out, Stack, Opts) ->
nu(Rest, Out, Stack, Opts);
array(<<?negative, Rest/binary>>, Out, Stack, Opts) ->
negative(Rest, Out, [?new_seq($-)|Stack], Opts);
negative(Rest, Out, [[$-]|Stack], Opts);
array(<<?zero, Rest/binary>>, Out, Stack, Opts) ->
zero(Rest, Out, [?new_seq($0)|Stack], Opts);
zero(Rest, Out, [[$0]|Stack], Opts);
array(<<S, Rest/binary>>, Out, Stack, Opts) when ?is_nonzero(S) ->
integer(Rest, Out, [?new_seq(S)|Stack], Opts);
integer(Rest, Out, [[S]|Stack], Opts);
array(<<?start_object, Rest/binary>>, Out, Stack, Opts) ->
?event([start_object], object, Rest, Out, [key|Stack], Opts);
array(<<?start_array, Rest/binary>>, Out, Stack, Opts) ->
@ -203,11 +203,11 @@ value(<<$f, Rest/binary>>, Out, Stack, Opts) ->
value(<<$n, Rest/binary>>, Out, Stack, Opts) ->
nu(Rest, Out, Stack, Opts);
value(<<?negative, Rest/binary>>, Out, Stack, Opts) ->
negative(Rest, Out, [?new_seq($-)|Stack], Opts);
negative(Rest, Out, [[$-]|Stack], Opts);
value(<<?zero, Rest/binary>>, Out, Stack, Opts) ->
zero(Rest, Out, [?new_seq($0)|Stack], Opts);
zero(Rest, Out, [[$0]|Stack], Opts);
value(<<S, Rest/binary>>, Out, Stack, Opts) when ?is_nonzero(S) ->
integer(Rest, Out, [?new_seq(S)|Stack], Opts);
integer(Rest, Out, [[S]|Stack], Opts);
value(<<?start_object, Rest/binary>>, Out, Stack, Opts) ->
?event([start_object], object, Rest, Out, [key|Stack], Opts);
value(<<?start_array, Rest/binary>>, Out, Stack, Opts) ->
@ -335,7 +335,7 @@ escape(<<$r, Rest/binary>>, Out, [Acc|Stack], Opts) ->
escape(<<$t, Rest/binary>>, Out, [Acc|Stack], Opts) ->
string(Rest, Out, [?acc_seq(Acc, $\t)|Stack], Opts);
escape(<<$u, Rest/binary>>, Out, Stack, Opts) ->
escaped_unicode(Rest, Out, [?new_seq()|Stack], Opts);
escaped_unicode(Rest, Out, [[]|Stack], Opts);
escape(<<S, Rest/binary>>, Out, [Acc|Stack], Opts)
when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus ->
string(Rest, Out, [?acc_seq(Acc, S)|Stack], Opts);
@ -376,7 +376,7 @@ escaped_unicode(<<D, Rest/binary>>, Out, [[C,B,A], Acc|Stack], Opts)
end;
escaped_unicode(<<S, Rest/binary>>, Out, [Acc|Stack], Opts)
when ?is_hex(S) ->
escaped_unicode(Rest, Out, [?acc_seq(Acc, S)|Stack], Opts);
escaped_unicode(Rest, Out, [[S] ++ Acc|Stack], Opts);
escaped_unicode(<<>>, Out, Stack, Opts) ->
?incomplete(escaped_unicode, <<>>, Out, Stack, Opts);
escaped_unicode(Bin, Out, Stack, Opts) ->
@ -401,7 +401,7 @@ low_surrogate(Bin, Out, Stack, Opts) ->
low_surrogate_u(<<$u, Rest/binary>>, Out, Stack, Opts) ->
low_surrogate_v(Rest, Out, [?new_seq()|Stack], Opts);
low_surrogate_v(Rest, Out, [[]|Stack], Opts);
low_surrogate_u(<<>>, Out, Stack, Opts) ->
?incomplete(low_surrogate_u, <<>>, Out, Stack, Opts);
%% not a low surrogate, dispatch back to string to handle, including the
@ -441,7 +441,7 @@ low_surrogate_v(<<D, Rest/binary>>, Out, [[C,B,A], High, String|Stack], Opts)
end;
low_surrogate_v(<<S, Rest/binary>>, Out, [Acc|Stack], Opts)
when ?is_hex(S) ->
low_surrogate_v(Rest, Out, [?acc_seq(Acc, S)|Stack], Opts);
low_surrogate_v(Rest, Out, [[S] ++ Acc|Stack], Opts);
low_surrogate_v(<<>>, Out, Stack, Opts) ->
?incomplete(low_surrogate_v, <<>>, Out, Stack, Opts);
low_surrogate_v(Bin, Out, Stack, Opts) ->
@ -783,7 +783,7 @@ check_bad(List) ->
).
check_replaced(List) ->
lists:dropwhile(fun({_, [{string, [16#fffd]}|_]}) ->
lists:dropwhile(fun({_, [{string, <<16#fffd/utf8>>}|_]}) ->
true
; (_) ->
false

View file

@ -61,26 +61,15 @@ encoder(OptsList) ->
start([{string, String}], [], [], Opts) when is_binary(String); is_list(String) ->
{ok,
[{string,
unicode:characters_to_list(jsx_utils:json_escape(String, Opts))},
end_json
]
};
start([{string, String}], [], [], Opts) when is_binary(String) ->
{ok, [{string, jsx_utils:json_escape(String, Opts)}, end_json]};
start([{float, Float}], [], [], _Opts) when is_float(Float) ->
{ok,
[{float, Float}, end_json]
};
{ok, [{float, Float}, end_json]};
start([{integer, Int}], [], [], _Opts) when is_integer(Int) ->
{ok,
[{integer, Int}, end_json]
};
{ok, [{integer, Int}, end_json]};
start([{literal, Atom}], [], [], _Opts)
when Atom == true; Atom == false; Atom == null ->
{ok,
[{literal, Atom}, end_json]
};
{ok, [{literal, Atom}, end_json]};
%% third parameter is a stack to match end_foos to start_foos
start(Forms, [], [], Opts) when is_list(Forms) ->
list_or_object(Forms, [], [], Opts);
@ -96,9 +85,9 @@ list_or_object([], T, Stack, Opts) ->
list_or_object(Forms, T, Stack, Opts) -> ?error([Forms, T, Stack, Opts]).
key([{key, Key}|Forms], T, Stack, Opts) when is_binary(Key); is_list(Key) ->
key([{key, Key}|Forms], T, Stack, Opts) when is_binary(Key) ->
?event([{key,
unicode:characters_to_list(jsx_utils:json_escape(Key, Opts))
unicode:characters_to_binary(jsx_utils:json_escape(Key, Opts))
}],
value, Forms, T, Stack, Opts
);
@ -108,8 +97,8 @@ key([], T, Stack, Opts) -> ?incomplete(key, T, Stack, Opts);
key(Forms, T, Stack, Opts) -> ?error([Forms, T, Stack, Opts]).
value([{string, S}|Forms], T, Stack, Opts) when is_binary(S); is_list(S) ->
?event([{string, unicode:characters_to_list(jsx_utils:json_escape(S, Opts))}],
value([{string, S}|Forms], T, Stack, Opts) when is_binary(S) ->
?event([{string, jsx_utils:json_escape(S, Opts)}],
maybe_done, Forms, T, Stack, Opts
);
value([{float, F}|Forms], T, Stack, Opts) when is_float(F) ->
@ -141,8 +130,7 @@ maybe_done([], T, Stack, Opts) -> ?incomplete(maybe_done, T, Stack, Opts);
maybe_done(Forms, T, Stack, Opts) -> ?error([Forms, T, Stack, Opts]).
done([], T, [], _Opts) ->
{ok, lists:reverse(T)};
done([], T, [], _Opts) -> {ok, lists:reverse(T)};
done(Forms, T, Stack, Opts) -> ?error([Forms, T, Stack, Opts]).
@ -164,9 +152,9 @@ encode_test_() ->
{"empty object", ?_assert(encode([start_object, end_object, end_json]))},
{"empty array", ?_assert(encode([start_array, end_array, end_json]))},
{"nested empty objects", ?_assert(encode([start_object,
{key, "empty object"},
{key, <<"empty object">>},
start_object,
{key, "empty object"},
{key, <<"empty object">>},
start_object,
end_object,
end_object,
@ -182,19 +170,19 @@ encode_test_() ->
end_json
]))},
{"simple object", ?_assert(encode([start_object,
{key, "a"},
{string, "hello"},
{key, "b"},
{key, <<"a">>},
{string, <<"hello">>},
{key, <<"b">>},
{integer, 1},
{key, "c"},
{key, <<"c">>},
{float, 1.0},
{key, "d"},
{key, <<"d">>},
{literal, true},
end_object,
end_json
]))},
{"simple array", ?_assert(encode([start_array,
{string, "hello"},
{string, <<"hello">>},
{integer, 1},
{float, 1.0},
{literal, true},
@ -207,8 +195,8 @@ encode_test_() ->
end_array,
end_json
]))},
{"naked string", ?_assert((jsx:scanner())([{string, "hello"}])
=:= {ok, [{string, "hello"}, end_json]}
{"naked string", ?_assert((jsx:scanner())([{string, <<"hello">>}])
=:= {ok, [{string, <<"hello">>}, end_json]}
)},
{"naked literal", ?_assert((jsx:scanner())([{literal, true}])
=:= {ok, [{literal, true}, end_json]}

View file

@ -1,8 +1,8 @@
{name, "array"}.
{jsx, [start_array,
{string,"foo"},
{string,"bar"},
{string,"baz"},
{string,<<"foo">>},
{string,<<"bar">>},
{string,<<"baz">>},
start_array,
{literal,true},
end_array,start_array,
@ -15,14 +15,14 @@
{literal,null},
{float,0.7},
start_object,
{key,"key"},
{string,"value"},
{key,<<"key">>},
{string,<<"value">>},
end_object,start_array,start_object,end_object,
{literal,null},
{literal,null},
{literal,null},
start_array,end_array,end_array,
{string,"\n\r\\"},
{string,<<"\n\r\\">>},
start_array,
{integer,-1},
end_array,end_array,end_json]}.

View file

@ -1,4 +1,4 @@
{name, "bad_low_surrogate_replaced"}.
{jsx, [start_array,{string, [16#fffd, 16#fffd]},end_array,end_json]}.
{jsx, [start_array,{string, <<16#fffd/utf8, 16#fffd/utf8>>},end_array,end_json]}.
{json, "bad_low_surrogate_replaced.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -1,3 +1,3 @@
{name, "encoded_surrogates"}.
{jsx, [start_array,{string,[66560]},end_array,end_json]}.
{jsx, [start_array,{string,<<66560/utf8>>},end_array,end_json]}.
{json, "encoded_surrogates.json"}.

View file

@ -1,4 +1,4 @@
{name, "escaped noncharacter (extended)"}.
{jsx, [{string, [16#fffd]}, end_json]}.
{jsx, [{string, <<16#fffd/utf8>>}, end_json]}.
{json, "escaped_noncharacter_ext.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -1,4 +1,4 @@
{name, "escaped noncharacter replacement"}.
{jsx, [{string,[16#fffd]},end_json]}.
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
{json, "escaped_noncharacter_replaced.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -1,4 +1,4 @@
{name, "escaped nullbyte replaced"}.
{jsx, [{string,[16#fffd]},end_json]}.
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
{json, "escaped_nullbyte_replaced.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -4,9 +4,9 @@
end_array,
{float,2.0e7},
start_object,
{key,"key"},
{key,<<"key">>},
{float,2.0e7},
{key,"another key"},
{key,<<"another key">>},
{float,2.0e7},
end_object,
{float,4.2e70},

View file

@ -4,14 +4,14 @@
end_array,
{float,2.0},
start_object,
{key,"key"},
{key,<<"key">>},
{float,2.0e7},
{key,"another key"},
{key,<<"another key">>},
{float,2.0e7},
end_object,start_object,
{key,"key"},
{key,<<"key">>},
{float,2.0},
{key,"another key"},
{key,<<"another key">>},
{float,2.0},
end_object,
{float,4.321},

View file

@ -4,9 +4,9 @@
end_array,
{integer,20},
start_object,
{key,"key"},
{key,<<"key">>},
{integer,20},
{key,"another key"},
{key,<<"another key">>},
{integer,20},
end_object,
{integer,42},

View file

@ -1,3 +1,3 @@
{name, "multibyte_utf"}.
{jsx, [start_array,{string,[32,119070,32]},end_array,end_json]}.
{jsx, [start_array,{string,<<32,119070/utf8,32>>},end_array,end_json]}.
{json, "multibyte_utf.json"}.

View file

@ -1,3 +1,3 @@
{name, "naked_string"}.
{jsx, [{string,"this is a naked string"},end_json]}.
{jsx, [{string,<<"this is a naked string">>},end_json]}.
{json, "naked_string.json"}.

View file

@ -4,9 +4,9 @@
end_array,
{integer,0},
start_object,
{key,"key"},
{key,<<"key">>},
{integer,0},
{key,"another key"},
{key,<<"another key">>},
{integer,0},
end_object,
{integer,0},

View file

@ -1,4 +1,4 @@
{name, "noncharacter replaced"}.
{jsx, [{string,[16#fffd]},end_json]}.
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
{json, "noncharacter_replaced.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -1,4 +1,4 @@
{name, "nullbyte replaced"}.
{jsx, [{string,[16#fffd]},end_json]}.
{jsx, [{string,<<16#fffd/utf8>>},end_json]}.
{json, "nullbyte_replaced.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -1,22 +1,22 @@
{name, "object"}.
{jsx, [start_object,
{key,"foo"},
{string,"bar"},
{key,"baz"},
{key,<<"foo">>},
{string,<<"bar">>},
{key,<<"baz">>},
{literal,true},
{key,"false"},
{key,<<"false">>},
{literal,null},
{key,"object"},
{key,<<"object">>},
start_object,
{key,"key"},
{string,"value"},
{key,<<"key">>},
{string,<<"value">>},
end_object,
{key,"list"},
{key,<<"list">>},
start_array,
{literal,null},
{literal,null},
{literal,null},
start_array,end_array,
{string,"\n\r\\"},
{string,<<"\n\r\\">>},
end_array,end_object,end_json]}.
{json, "object.json"}.

View file

@ -1,5 +1,5 @@
{name, "string"}.
{jsx, [start_array,
{string,"this is a random string with \n embedded escapes in it"},
{string,<<"this is a random string with \n embedded escapes in it">>},
end_array,end_json]}.
{json, "string.json"}.

View file

@ -1,12 +1,12 @@
{name, "string_escapes"}.
{jsx, [start_array,
{string,"\""},
{string,"\\"},
{string,"/"},
{string,"\b"},
{string,"\f"},
{string,"\n"},
{string,"\r"},
{string,"\t"},
{string,<<"\"">>},
{string,<<"\\">>},
{string,<<"/">>},
{string,<<"\b">>},
{string,<<"\f">>},
{string,<<"\n">>},
{string,<<"\r">>},
{string,<<"\t">>},
end_array,end_json]}.
{json, "string_escapes.json"}.

View file

@ -1,6 +1,6 @@
{name, "unicode_to_codepoint"}.
{jsx, [start_array,
{string,"arabic letter alef: "},
{string,[1575]},
{string,<<"arabic letter alef: ">>},
{string,<<1575/utf8>>},
end_array,end_json]}.
{json, "unicode_to_codepoint.json"}.

View file

@ -1,4 +1,4 @@
{name, "unpaired surrogate replaced"}.
{jsx, [start_array,{string,[65533,$b,$l,$a,$h]},end_array,end_json]}.
{jsx, [start_array,{string,<<65533/utf8,$b,$l,$a,$h>>},end_array,end_json]}.
{json, "unpaired_surrogate_replaced.json"}.
{jsx_flags, [loose_unicode]}.

View file

@ -4,9 +4,9 @@
end_array,
{integer,0},
start_object,
{key,"key"},
{key,<<"key">>},
{integer,0},
{key,"another key"},
{key,<<"another key">>},
{integer,0},
end_object,
{integer,0},