diff --git a/extras/jsx_ex.erl b/extras/jsx_ex.erl index 8a317a3..3247487 100644 --- a/extras/jsx_ex.erl +++ b/extras/jsx_ex.erl @@ -28,7 +28,7 @@ simple_decode(JSON) when is_binary(JSON) -> P = jsx:decoder(), decode(P(JSON), []). -decode({event, end_json, _Next}, Acc) -> +decode({jsx, end_json, _Next}, Acc) -> lists:reverse(Acc); -decode({event, Event, Next}, Acc) -> +decode({jsx, Event, Next}, Acc) -> decode(Next(), [Event] ++ Acc). \ No newline at end of file diff --git a/src/jsx.app.src b/src/jsx.app.src index 9f69ff4..41a170b 100644 --- a/src/jsx.app.src +++ b/src/jsx.app.src @@ -1,7 +1,7 @@ {application, jsx, [ {description, "a streaming, evented json parsing toolkit"}, - {vsn, "0.9.0"}, + {vsn, "0.10.0"}, {modules, [ jsx, jsx_encoder, @@ -10,7 +10,7 @@ jsx_utf16le, jsx_utf32, jsx_utf32le, - jsx_eep0018, + jsx_terms, jsx_format, jsx_verify, jsx_utils diff --git a/src/jsx.erl b/src/jsx.erl index 708a5d9..79c831c 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -82,7 +82,7 @@ encoder() -> encoder([]). encoder(Opts) -> jsx_encoder:encoder(Opts). --spec json_to_term(JSON::binary()) -> eep0018(). +-spec json_to_term(JSON::binary()) -> jsx_term(). json_to_term(JSON) -> try json_to_term(JSON, []) @@ -91,13 +91,13 @@ json_to_term(JSON) -> end. --spec json_to_term(JSON::binary(), Opts::decoder_opts()) -> eep0018(). +-spec json_to_term(JSON::binary(), Opts::decoder_opts()) -> jsx_term(). json_to_term(JSON, Opts) -> - jsx_eep0018:json_to_term(JSON, Opts). + jsx_terms:json_to_term(JSON, Opts). --spec term_to_json(JSON::eep0018()) -> binary(). +-spec term_to_json(JSON::jsx_term()) -> binary(). term_to_json(JSON) -> try term_to_json(JSON, []) @@ -106,10 +106,10 @@ term_to_json(JSON) -> end. --spec term_to_json(JSON::eep0018(), Opts::encoder_opts()) -> binary(). +-spec term_to_json(JSON::jsx_term(), Opts::encoder_opts()) -> binary(). term_to_json(JSON, Opts) -> - try jsx_eep0018:term_to_json(JSON, Opts) + try jsx_terms:term_to_json(JSON, Opts) %% rethrow exception so internals aren't confusingly exposed to users catch error:badarg -> erlang:error(badarg, [JSON, Opts]) end. @@ -227,11 +227,11 @@ decode(JSON, Flags) -> P = jsx:decoder(Flags), decode_loop(P(JSON), []). -decode_loop({event, end_json, _Next}, Acc) -> +decode_loop({jsx, end_json, _Next}, Acc) -> lists:reverse([end_json] ++ Acc); -decode_loop({incomplete, More}, Acc) -> +decode_loop({jsx, incomplete, More}, Acc) -> decode_loop(More(end_stream), Acc); -decode_loop({event, E, Next}, Acc) -> +decode_loop({jsx, E, Next}, Acc) -> decode_loop(Next(), [E] ++ Acc). @@ -239,13 +239,13 @@ incremental_decode(<>, Flags) -> P = jsx:decoder(Flags), incremental_decode_loop(P(C), Rest, []). -incremental_decode_loop({incomplete, Next}, <<>>, Acc) -> +incremental_decode_loop({jsx, incomplete, Next}, <<>>, Acc) -> incremental_decode_loop(Next(end_stream), <<>>, Acc); -incremental_decode_loop({incomplete, Next}, <>, Acc) -> +incremental_decode_loop({jsx, incomplete, Next}, <>, Acc) -> incremental_decode_loop(Next(C), Rest, Acc); -incremental_decode_loop({event, end_json, _Next}, _Rest, Acc) -> +incremental_decode_loop({jsx, end_json, _Next}, _Rest, Acc) -> lists:reverse([end_json] ++ Acc); -incremental_decode_loop({event, Event, Next}, Rest, Acc) -> +incremental_decode_loop({jsx, Event, Next}, Rest, Acc) -> incremental_decode_loop(Next(), Rest, [Event] ++ Acc). @@ -261,11 +261,11 @@ multi_decode(JSON, Flags) -> P = jsx:decoder(Flags ++ [{multi_term, true}]), multi_decode_loop(P(JSON), [[]]). -multi_decode_loop({incomplete, _Next}, [[]|Acc]) -> +multi_decode_loop({jsx, incomplete, _Next}, [[]|Acc]) -> lists:reverse(Acc); -multi_decode_loop({event, end_json, Next}, [S|Acc]) -> +multi_decode_loop({jsx, end_json, Next}, [S|Acc]) -> multi_decode_loop(Next(), [[]|[lists:reverse(S)] ++ Acc]); -multi_decode_loop({event, E, Next}, [S|Acc]) -> +multi_decode_loop({jsx, E, Next}, [S|Acc]) -> multi_decode_loop(Next(), [[E] ++ S] ++ Acc). @@ -283,10 +283,10 @@ multi_test_result() -> [{literal, false}], [{literal, null}], [start_object, end_object], - [start_object, {key, "key"}, {string, "value"}, end_object], + [start_object, {key, <<"key">>}, {string, <<"value">>}, end_object], [start_array, end_array], [start_array, {integer, 1}, {integer, 2}, {integer, 3}, end_array], - [{string, "hope this works"}] + [{string, <<"hope this works">>}] ]. diff --git a/src/jsx_common.hrl b/src/jsx_common.hrl index 09c758c..242f94d 100644 --- a/src/jsx_common.hrl +++ b/src/jsx_common.hrl @@ -33,8 +33,7 @@ -type jsx_opts() :: [jsx_opt()]. --type jsx_opt() :: {escaped_unicode, ascii | codepoint | replace | none} - | {multi_term, true | false} +-type jsx_opt() :: {multi_term, true | false} | {encoding, auto | utf8 | utf16 @@ -44,19 +43,15 @@ }. -%% events emitted by the parser and component types --type unicode_codepoint() :: 0..16#10ffff. --type unicode_string() :: [unicode_codepoint()]. - -type jsx_event() :: start_object | end_object | start_array | end_array | end_json - | {key, unicode_string()} - | {string, unicode_string()} - | {integer, unicode_string()} - | {float, unicode_string()} + | {key, binary()} + | {string, binary()} + | {integer, integer()} + | {float, float()} | {literal, true} | {literal, false} | {literal, null}. @@ -76,16 +71,12 @@ -type jsx_encoder() :: fun((jsx_encodeable()) -> jsx_iterator_result()). -type jsx_iterator_result() :: - {event, jsx_event(), fun(() -> jsx_iterator_result())} - | {incomplete, jsx_iterator()} + {jsx, jsx_event(), fun(() -> jsx_iterator_result())} + | {jsx, incomplete, jsx_iterator()} | {error, {badjson, any()}}. - - - - -type supported_utf() :: utf8 | utf16 | {utf16, little} @@ -93,23 +84,22 @@ | {utf32, little}. -%% eep0018 json specification --type eep0018() :: eep0018_object() | eep0018_array(). --type eep0018_array() :: [eep0018_term()]. --type eep0018_object() :: [{eep0018_key(), eep0018_term()}]. +%% json specification +-type jsx_array() :: [jsx_term()] | []. +-type jsx_object() :: [{jsx_key(), jsx_term()}] | [{}]. --type eep0018_key() :: binary() | atom(). +-type jsx_key() :: binary(). --type eep0018_term() :: eep0018_array() - | eep0018_object() - | eep0018_string() - | eep0018_number() +-type jsx_term() :: jsx_array() + | jsx_object() + | jsx_string() + | jsx_number() | true | false | null. --type eep0018_string() :: binary(). +-type jsx_string() :: binary(). --type eep0018_number() :: float() | integer(). +-type jsx_number() :: float() | integer(). -type encoder_opts() :: [encoder_opt()]. @@ -123,10 +113,7 @@ -type decoder_opts() :: [decoder_opt()]. -type decoder_opt() :: {strict, true | false} - | {encoding, supported_utf()} - | {label, atom | binary | existing_atom} - | {float, true | false}. - + | {encoding, supported_utf()}. -type verify_opts() :: [verify_opt()]. -type verify_opt() :: {encoding, auto | supported_utf()} diff --git a/src/jsx_decoder.hrl b/src/jsx_decoder.hrl index 19983df..0a893fa 100644 --- a/src/jsx_decoder.hrl +++ b/src/jsx_decoder.hrl @@ -30,7 +30,6 @@ %% opts record for decoder -record(opts, { - escaped_unicode = codepoint, multi_term = false, encoding = auto }). @@ -134,9 +133,6 @@ parse_opts(Opts) -> parse_opts([], Opts) -> Opts; -parse_opts([{escaped_unicode, Value}|Rest], Opts) -> - true = lists:member(Value, [ascii, codepoint, replace, none]), - parse_opts(Rest, Opts#opts{escaped_unicode=Value}); parse_opts([{multi_term, Value}|Rest], Opts) -> true = lists:member(Value, [true, false]), parse_opts(Rest, Opts#opts{multi_term=Value}); @@ -152,11 +148,11 @@ parse_opts(_, _) -> start(<>, Stack, Opts) when ?is_whitespace(S) -> start(Rest, Stack, Opts); start(<>, Stack, Opts) -> - {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; + {jsx, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; start(<>, Stack, Opts) -> - {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; + {jsx, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; start(<>, Stack, Opts) -> - string(Rest, Stack, Opts, []); + string(Rest, Stack, Opts); start(<<$t/?utfx, Rest/binary>>, Stack, Opts) -> tr(Rest, Stack, Opts); start(<<$f/?utfx, Rest/binary>>, Stack, Opts) -> @@ -172,7 +168,7 @@ start(<>, Stack, Opts) when ?is_nonzero(S) -> start(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> start(<>, Stack, Opts) @@ -184,21 +180,21 @@ start(Bin, Stack, Opts) -> maybe_done(<>, Stack, Opts) when ?is_whitespace(S) -> maybe_done(Rest, Stack, Opts); maybe_done(<>, [object|Stack], Opts) -> - {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; + {jsx, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; maybe_done(<>, [array|Stack], Opts) -> - {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; + {jsx, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; maybe_done(<>, [object|Stack], Opts) -> key(Rest, [key|Stack], Opts); maybe_done(<>, [array|_] = Stack, Opts) -> value(Rest, Stack, Opts); maybe_done(Rest, [], #opts{multi_term=true}=Opts) -> - {event, end_json, fun() -> start(Rest, [], Opts) end}; + {jsx, end_json, fun() -> start(Rest, [], Opts) end}; maybe_done(Rest, [], Opts) -> done(Rest, Opts); maybe_done(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> maybe_done(<>, Stack, Opts) @@ -210,8 +206,8 @@ maybe_done(Bin, Stack, Opts) -> done(<>, Opts) when ?is_whitespace(S) -> done(Rest, Opts); done(<<>>, Opts) -> - {event, end_json, fun() -> - {incomplete, fun(end_stream) -> + {jsx, end_json, fun() -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, <<>>}} ; (Stream) -> done(Stream, Opts) @@ -220,7 +216,7 @@ done(<<>>, Opts) -> done(Bin, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> done(<>, Opts) @@ -232,13 +228,13 @@ done(Bin, Opts) -> object(<>, Stack, Opts) when ?is_whitespace(S) -> object(Rest, Stack, Opts); object(<>, Stack, Opts) -> - string(Rest, Stack, Opts, []); + string(Rest, Stack, Opts); object(<>, [key|Stack], Opts) -> - {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; + {jsx, end_object, fun() -> maybe_done(Rest, Stack, Opts) end}; object(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> object(<>, Stack, Opts) @@ -250,7 +246,7 @@ object(Bin, Stack, Opts) -> array(<>, Stack, Opts) when ?is_whitespace(S) -> array(Rest, Stack, Opts); array(<>, Stack, Opts) -> - string(Rest, Stack, Opts, []); + string(Rest, Stack, Opts); array(<<$t/?utfx, Rest/binary>>, Stack, Opts) -> tr(Rest, Stack, Opts); array(<<$f/?utfx, Rest/binary>>, Stack, Opts) -> @@ -264,15 +260,15 @@ array(<>, Stack, Opts) -> array(<>, Stack, Opts) when ?is_nonzero(S) -> integer(Rest, Stack, Opts, [S]); array(<>, Stack, Opts) -> - {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; + {jsx, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; array(<>, Stack, Opts) -> - {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; + {jsx, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; array(<>, [array|Stack], Opts) -> - {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; + {jsx, end_array, fun() -> maybe_done(Rest, Stack, Opts) end}; array(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> array(<>, Stack, Opts) @@ -284,7 +280,7 @@ array(Bin, Stack, Opts) -> value(<>, Stack, Opts) when ?is_whitespace(S) -> value(Rest, Stack, Opts); value(<>, Stack, Opts) -> - string(Rest, Stack, Opts, []); + string(Rest, Stack, Opts); value(<<$t/?utfx, Rest/binary>>, Stack, Opts) -> tr(Rest, Stack, Opts); value(<<$f/?utfx, Rest/binary>>, Stack, Opts) -> @@ -298,13 +294,13 @@ value(<>, Stack, Opts) -> value(<>, Stack, Opts) when ?is_nonzero(S) -> integer(Rest, Stack, Opts, [S]); value(<>, Stack, Opts) -> - {event, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; + {jsx, start_object, fun() -> object(Rest, [key|Stack], Opts) end}; value(<>, Stack, Opts) -> - {event, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; + {jsx, start_array, fun() -> array(Rest, [array|Stack], Opts) end}; value(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> value(<>, Stack, Opts) @@ -320,7 +316,7 @@ colon(<>, [key|Stack], Opts) -> colon(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> colon(<>, Stack, Opts) @@ -332,11 +328,11 @@ colon(Bin, Stack, Opts) -> key(<>, Stack, Opts) when ?is_whitespace(S) -> key(Rest, Stack, Opts); key(<>, Stack, Opts) -> - string(Rest, Stack, Opts, []); + string(Rest, Stack, Opts); key(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> key(<>, Stack, Opts) @@ -352,20 +348,23 @@ key(Bin, Stack, Opts) -> %% string uses partial_utf/1 to cease parsing when invalid encodings are %% encountered rather than just checking remaining binary size like other %% states +string(Bin, Stack, Opts) -> string(Bin, Stack, Opts, <<>>). + + string(<>, [key|_] = Stack, Opts, Acc) -> - {event, {key, lists:reverse(Acc)}, fun() -> colon(Rest, Stack, Opts) end}; + {jsx, {key, Acc}, fun() -> colon(Rest, Stack, Opts) end}; string(<>, Stack, Opts, Acc) -> - {event, {string, lists:reverse(Acc)}, fun() -> + {jsx, {string, Acc}, fun() -> maybe_done(Rest, Stack, Opts) end}; string(<>, Stack, Opts, Acc) -> escape(Rest, Stack, Opts, Acc); string(<>, Stack, Opts, Acc) when ?is_noncontrol(S) -> - string(Rest, Stack, Opts, [S] ++ Acc); + string(Rest, Stack, Opts, <>); string(Bin, Stack, Opts, Acc) -> case partial_utf(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> string(<>, Stack, Opts, Acc) @@ -431,24 +430,24 @@ partial_utf(_) -> true. %% escaped_unicode used to hold the codepoint sequence. unescessary, but nicer %% than using the string accumulator escape(<<$b/?utfx, Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\b" ++ Acc); + string(Rest, Stack, Opts, <>); escape(<<$f/?utfx, Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\f" ++ Acc); + string(Rest, Stack, Opts, <>); escape(<<$n/?utfx, Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\n" ++ Acc); + string(Rest, Stack, Opts, <>); escape(<<$r/?utfx, Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\r" ++ Acc); + string(Rest, Stack, Opts, <>); escape(<<$t/?utfx, Rest/binary>>, Stack, Opts, Acc) -> - string(Rest, Stack, Opts, "\t" ++ Acc); + string(Rest, Stack, Opts, <>); escape(<<$u/?utfx, Rest/binary>>, Stack, Opts, Acc) -> escaped_unicode(Rest, Stack, Opts, Acc, []); escape(<>, Stack, Opts, Acc) when S =:= ?quote; S =:= ?solidus; S =:= ?rsolidus -> - string(Rest, Stack, Opts, [S] ++ Acc); + string(Rest, Stack, Opts, <>); escape(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> escape(<>, Stack, Opts, Acc) @@ -459,92 +458,27 @@ escape(Bin, Stack, Opts, Acc) -> %% this code is ugly and unfortunate, but so is json's handling of escaped %% unicode codepoint sequences. -%% if the ascii option is present, the sequence is converted to a codepoint -%% and inserted into the string if it represents an ascii value. -%% if the codepoint option is present the sequence is converted and inserted -%% as long as it represents a valid unicode codepoint. this means -%% non-characters representable in 16 bits are not converted (the utf16 -%% surrogates and the two special non-characters). -%% if the replace option is present sequences are converted as in codepoint -%% with the exception that the non-characters are replaced with u+fffd, the -%% unicode replacement character -%% any other option and no conversion is done -escaped_unicode(<>, - Stack, - #opts{escaped_unicode=ascii}=Opts, - String, - [C, B, A]) - when ?is_hex(D) -> - case erlang:list_to_integer([A, B, C, D], 16) of - X when X < 128 -> - string(Rest, Stack, Opts, [X] ++ String) - ; _ -> - string(Rest, Stack, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) - end; -escaped_unicode(<>, - Stack, - #opts{escaped_unicode=codepoint}=Opts, - String, - [C, B, A]) - when ?is_hex(D) -> - case erlang:list_to_integer([A, B, C, D], 16) of - X when X >= 16#dc00, X =< 16#dfff -> - case check_acc_for_surrogate(String) of - false -> - string(Rest, - Stack, - Opts, - [D, C, B, A, $u, ?rsolidus] ++ String - ) - ; {Y, NewString} -> - string(Rest, - Stack, - Opts, - [surrogate_to_codepoint(Y, X)] ++ NewString - ) - end - ; X when X < 16#d800; X > 16#dfff, X < 16#fffe -> - string(Rest, Stack, Opts, [X] ++ String) - ; _ -> - string(Rest, Stack, Opts, [D, C, B, A, $u, ?rsolidus] ++ String) - end; -escaped_unicode(<>, - Stack, - #opts{escaped_unicode=replace}=Opts, - String, - [C, B, A]) - when ?is_hex(D) -> - case erlang:list_to_integer([A, B, C, D], 16) of - X when X >= 16#dc00, X =< 16#dfff -> - case check_acc_for_surrogate(String) of - false -> - string(Rest, - Stack, - Opts, - [16#fffd] ++ String - ) - ; {Y, NewString} -> - string(Rest, - Stack, - Opts, - [surrogate_to_codepoint(Y, X)] ++ NewString - ) - end - ; X when X < 16#d800; X > 16#dfff, X < 16#fffe -> - string(Rest, Stack, Opts, [X] ++ String) - ; _ -> - string(Rest, Stack, Opts, [16#fffd] ++ String) - end; +%% fuck json escaping. new rule: if it's not a valid codepoint, it's an error escaped_unicode(<>, Stack, Opts, String, [C, B, A]) - when ?is_hex(D) -> - string(Rest, Stack, Opts, [D, C, B, A, $u, ?rsolidus] ++ String); + when ?is_hex(D) -> + case erlang:list_to_integer([A, B, C, D], 16) of + %% high surrogate, we need a low surrogate next + X when X >= 16#d800, X =< 16#dbff -> + low_surrogate(Rest, Stack, Opts, String, X) + %% non-characters, you're not allowed to exchange these + ; X when X == 16#fffe; X == 16#ffff -> + {error, {badjson, <>}} + %% anything else + ; X -> + string(Rest, Stack, Opts, <>) + end; escaped_unicode(<>, Stack, Opts, String, Acc) - when ?is_hex(S) -> + when ?is_hex(S) -> escaped_unicode(Rest, Stack, Opts, String, [S] ++ Acc); escaped_unicode(Bin, Stack, Opts, String, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> escaped_unicode(<>, @@ -558,22 +492,83 @@ escaped_unicode(Bin, Stack, Opts, String, Acc) -> end. -%% upon encountering a low pair json/hex encoded value, check to see if there's -%% a high value already in the accumulator -check_acc_for_surrogate([D, C, B, A, $u, ?rsolidus|Rest]) - when ?is_hex(D), ?is_hex(C), ?is_hex(B), ?is_hex(A) -> +low_surrogate(<>, Stack, Opts, String, High) -> + low_surrogate_u(Rest, Stack, Opts, String, High); +low_surrogate(Bin, Stack, Opts, String, High) -> + case ?partial_codepoint(Bin) of + true -> + {jsx, incomplete, fun(end_stream) -> + {error, {badjson, Bin}} + ; (Stream) -> + low_surrogate(<>, + Stack, + Opts, + String, + High + ) + end} + ; false -> {error, {badjson, Bin}} + end. + + + +low_surrogate_u(<<$u/?utfx, Rest/binary>>, Stack, Opts, String, High) -> + low_surrogate(Rest, Stack, Opts, String, [], High); +low_surrogate_u(Bin, Stack, Opts, String, High) -> + case ?partial_codepoint(Bin) of + true -> + {jsx, incomplete, fun(end_stream) -> + {error, {badjson, Bin}} + ; (Stream) -> + low_surrogate_u(<>, + Stack, + Opts, + String, + High + ) + end} + ; false -> {error, {badjson, Bin}} + end. + + + +low_surrogate(<>, Stack, Opts, String, [C, B, A], High) + when ?is_hex(D) -> case erlang:list_to_integer([A, B, C, D], 16) of - X when X >=16#d800, X =< 16#dbff -> - {X, Rest}; - _ -> - false + X when X >= 16#dc00, X =< 16#dfff -> + string(Rest, + Stack, + Opts, + <> + ) + %% not a low surrogate, bad bad bad + ; X -> + {error, {badjson, <>}} end; -check_acc_for_surrogate(_) -> - false. +low_surrogate(<>, Stack, Opts, String, Acc, High) + when ?is_hex(S) -> + low_surrogate(Rest, Stack, Opts, String, [S] ++ Acc, High); +low_surrogate(Bin, Stack, Opts, String, Acc, High) -> + case ?partial_codepoint(Bin) of + true -> + {jsx, incomplete, fun(end_stream) -> + {error, {badjson, Bin}} + ; (Stream) -> + low_surrogate(<>, + Stack, + Opts, + String, + Acc, + High + ) + end} + ; false -> {error, {badjson, Bin}} + end. %% stole this from the unicode spec surrogate_to_codepoint(High, Low) -> + io:format("~p ~p~n", [High, Low]), (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. @@ -586,7 +581,7 @@ negative(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> negative(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> negative(<>, Stack, Opts, Acc) @@ -596,38 +591,38 @@ negative(Bin, Stack, Opts, Acc) -> zero(<>, [object|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> - {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + {jsx, format_number(Acc), fun() -> + {jsx, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} end}; zero(<>, [array|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> - {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + {jsx, format_number(Acc), fun() -> + {jsx, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} end}; zero(<>, [object|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> key(Rest, [key|Stack], Opts) end}; zero(<>, [array|_] = Stack, Opts, Acc) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> value(Rest, Stack, Opts) end}; zero(<>, Stack, Opts, Acc) -> initial_decimal(Rest, Stack, Opts, {Acc, []}); zero(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> maybe_done(Rest, Stack, Opts) end}; zero(<<>>, [], Opts, Acc) -> - {incomplete, fun(end_stream) -> - {event, format_number(Acc), fun() -> - {event, end_json, fun() -> zero(<<>>, [], Opts, Acc) end} + {jsx, incomplete, fun(end_stream) -> + {jsx, format_number(Acc), fun() -> + {jsx, end_json, fun() -> zero(<<>>, [], Opts, Acc) end} end} ; (Stream) -> zero(Stream, [], Opts, Acc) end}; zero(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> zero(<>, Stack, Opts, Acc) @@ -639,19 +634,19 @@ zero(Bin, Stack, Opts, Acc) -> integer(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> integer(Rest, Stack, Opts, [S] ++ Acc); integer(<>, [object|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> - {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + {jsx, format_number(Acc), fun() -> + {jsx, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} end}; integer(<>, [array|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> - {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + {jsx, format_number(Acc), fun() -> + {jsx, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} end}; integer(<>, [object|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> key(Rest, [key|Stack], Opts) end}; integer(<>, [array|_] = Stack, Opts, Acc) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> value(Rest, Stack, Opts) end}; integer(<>, Stack, Opts, Acc) -> @@ -661,20 +656,20 @@ integer(<>, Stack, Opts, Acc) -> integer(<>, Stack, Opts, Acc) when S =:= $e; S =:= $E -> e(Rest, Stack, Opts, {lists:reverse(Acc), [], []}); integer(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> maybe_done(Rest, Stack, Opts) end}; integer(<<>>, [], Opts, Acc) -> - {incomplete, fun(end_stream) -> - {event, format_number(Acc), fun() -> - {event, end_json, fun() -> integer(<<>>, [], Opts, Acc) end} + {jsx, incomplete, fun(end_stream) -> + {jsx, format_number(Acc), fun() -> + {jsx, end_json, fun() -> integer(<<>>, [], Opts, Acc) end} end} ; (Stream) -> integer(Stream, [], Opts, Acc) end}; integer(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> integer(<>, Stack, Opts, Acc) @@ -689,7 +684,7 @@ initial_decimal(<>, Stack, Opts, {Int, Frac}) initial_decimal(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> initial_decimal(<>, @@ -706,39 +701,39 @@ decimal(<>, Stack, Opts, {Int, Frac}) when S=:= ?zero; ?is_nonzero(S) -> decimal(Rest, Stack, Opts, {Int, [S] ++ Frac}); decimal(<>, [object|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> - {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + {jsx, format_number(Acc), fun() -> + {jsx, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} end}; decimal(<>, [array|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> - {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + {jsx, format_number(Acc), fun() -> + {jsx, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} end}; decimal(<>, [object|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> key(Rest, [key|Stack], Opts) end}; decimal(<>, [array|_] = Stack, Opts, Acc) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> value(Rest, Stack, Opts) end}; decimal(<>, Stack, Opts, {Int, Frac}) when S =:= $e; S =:= $E -> e(Rest, Stack, Opts, {Int, Frac, []}); decimal(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> maybe_done(Rest, Stack, Opts) end}; decimal(<<>>, [], Opts, Acc) -> - {incomplete, fun(end_stream) -> - {event, format_number(Acc), fun() -> - {event, end_json, fun() -> decimal(<<>>, [], Opts, Acc) end} + {jsx, incomplete, fun(end_stream) -> + {jsx, format_number(Acc), fun() -> + {jsx, end_json, fun() -> decimal(<<>>, [], Opts, Acc) end} end} ; (Stream) -> decimal(Stream, [], Opts, Acc) end}; decimal(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> decimal(<>, Stack, Opts, Acc) @@ -756,7 +751,7 @@ e(<>, Stack, Opts, {Int, Frac, Exp}) e(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> e(<>, Stack, Opts, Acc) @@ -771,7 +766,7 @@ ex(<>, Stack, Opts, {Int, Frac, Exp}) ex(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> ex(<>, Stack, Opts, Acc) @@ -784,36 +779,36 @@ exp(<>, Stack, Opts, {Int, Frac, Exp}) when S =:= ?zero; ?is_nonzero(S) -> exp(Rest, Stack, Opts, {Int, Frac, [S] ++ Exp}); exp(<>, [object|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> - {event, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} + {jsx, format_number(Acc), fun() -> + {jsx, end_object, fun() -> maybe_done(Rest, Stack, Opts) end} end}; exp(<>, [array|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> - {event, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} + {jsx, format_number(Acc), fun() -> + {jsx, end_array, fun() -> maybe_done(Rest, Stack, Opts) end} end}; exp(<>, [object|Stack], Opts, Acc) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> key(Rest, [key|Stack], Opts) end}; exp(<>, [array|_] = Stack, Opts, Acc) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> value(Rest, Stack, Opts) end}; exp(<>, Stack, Opts, Acc) when ?is_whitespace(S) -> - {event, format_number(Acc), fun() -> + {jsx, format_number(Acc), fun() -> maybe_done(Rest, Stack, Opts) end}; exp(<<>>, [], Opts, Acc) -> - {incomplete, fun(end_stream) -> - {event, format_number(Acc), fun() -> - {event, end_json, fun() -> exp(<<>>, [], Opts, Acc) end} + {jsx, incomplete, fun(end_stream) -> + {jsx, format_number(Acc), fun() -> + {jsx, end_json, fun() -> exp(<<>>, [], Opts, Acc) end} end} ; (Stream) -> exp(Stream, [], Opts, Acc) end}; exp(Bin, Stack, Opts, Acc) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> exp(<>, Stack, Opts, Acc) @@ -839,7 +834,7 @@ tr(<<$r/?utfx, Rest/binary>>, Stack, Opts) -> tr(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> tr(<>, Stack, Opts) @@ -853,7 +848,7 @@ tru(<<$u/?utfx, Rest/binary>>, Stack, Opts) -> tru(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> tru(<>, Stack, Opts) @@ -863,11 +858,11 @@ tru(Bin, Stack, Opts) -> true(<<$e/?utfx, Rest/binary>>, Stack, Opts) -> - {event, {literal, true}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {jsx, {literal, true}, fun() -> maybe_done(Rest, Stack, Opts) end}; true(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> true(<>, Stack, Opts) @@ -881,7 +876,7 @@ fa(<<$a/?utfx, Rest/binary>>, Stack, Opts) -> fa(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> fa(<>, Stack, Opts) @@ -895,7 +890,7 @@ fal(<<$l/?utfx, Rest/binary>>, Stack, Opts) -> fal(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> fal(<>, Stack, Opts) @@ -909,7 +904,7 @@ fals(<<$s/?utfx, Rest/binary>>, Stack, Opts) -> fals(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> fals(<>, Stack, Opts) @@ -919,11 +914,11 @@ fals(Bin, Stack, Opts) -> false(<<$e/?utfx, Rest/binary>>, Stack, Opts) -> - {event, {literal, false}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {jsx, {literal, false}, fun() -> maybe_done(Rest, Stack, Opts) end}; false(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> false(<>, Stack, Opts) @@ -937,7 +932,7 @@ nu(<<$u/?utfx, Rest/binary>>, Stack, Opts) -> nu(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> nu(<>, Stack, Opts) @@ -951,7 +946,7 @@ nul(<<$l/?utfx, Rest/binary>>, Stack, Opts) -> nul(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> nul(<>, Stack, Opts) @@ -961,11 +956,11 @@ nul(Bin, Stack, Opts) -> null(<<$l/?utfx, Rest/binary>>, Stack, Opts) -> - {event, {literal, null}, fun() -> maybe_done(Rest, Stack, Opts) end}; + {jsx, {literal, null}, fun() -> maybe_done(Rest, Stack, Opts) end}; null(Bin, Stack, Opts) -> case ?partial_codepoint(Bin) of true -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> null(<>, Stack, Opts) diff --git a/src/jsx_encoder.erl b/src/jsx_encoder.erl index 9c494c9..3b3fae8 100644 --- a/src/jsx_encoder.erl +++ b/src/jsx_encoder.erl @@ -31,7 +31,6 @@ -record(opts, { - escaped_unicode = codepoint, multi_term = false, encoding = auto }). @@ -43,30 +42,30 @@ encoder(Opts) -> fun(Forms) -> start(Forms, Opts) end. -define(ENDJSON, - {event, end_json, fun() -> - {incomplete, fun(Forms) -> {error, {badjson, Forms}} end} + {jsx, end_json, fun() -> + {jsx, incomplete, fun(Forms) -> {error, {badjson, Forms}} end} end} ). -start({string, String}, _Opts) when is_list(String) -> - {event, {string, json_escape(String)}, fun() -> ?ENDJSON end}; -start({float, Float}, _Opts) when is_list(Float) -> - {event, {float, Float}, fun() -> ?ENDJSON end}; -start({integer, Int}, _Opts) when is_list(Int) -> - {event, {integer, Int}, fun() -> ?ENDJSON end}; +start({string, String}, _Opts) when is_binary(String) -> + {jsx, {string, json_escape(String)}, fun() -> ?ENDJSON end}; +start({float, Float}, _Opts) when is_float(Float) -> + {jsx, {float, Float}, fun() -> ?ENDJSON end}; +start({integer, Int}, _Opts) when is_integer(Int) -> + {jsx, {integer, Int}, fun() -> ?ENDJSON end}; start({literal, Atom}, _Opts) when Atom == true; Atom == false; Atom == null -> - {event, {literal, Atom}, fun() -> ?ENDJSON end}; + {jsx, {literal, Atom}, fun() -> ?ENDJSON end}; %% second parameter is a stack to match end_foos to start_foos start(Forms, Opts) -> list_or_object(Forms, [], Opts). list_or_object([start_object|Forms], Stack, Opts) -> - {event, start_object, fun() -> key(Forms, [object] ++ Stack, Opts) end}; + {jsx, start_object, fun() -> key(Forms, [object] ++ Stack, Opts) end}; list_or_object([start_array|Forms], Stack, Opts) -> - {event, start_array, fun() -> value(Forms, [array] ++ Stack, Opts) end}; + {jsx, start_array, fun() -> value(Forms, [array] ++ Stack, Opts) end}; list_or_object([], Stack, Opts) -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, []}} ; (Stream) -> list_or_object(Stream, Stack, Opts) @@ -74,12 +73,12 @@ list_or_object([], Stack, Opts) -> list_or_object(Forms, _, _) -> {error, {badjson, Forms}}. -key([{key, Key}|Forms], Stack, Opts) when is_list(Key) -> - {event, {key, json_escape(Key)}, fun() -> value(Forms, Stack, Opts) end}; +key([{key, Key}|Forms], Stack, Opts) when is_binary(Key) -> + {jsx, {key, json_escape(Key)}, fun() -> value(Forms, Stack, Opts) end}; key([end_object|Forms], [object|Stack], Opts) -> - {event, end_object, fun() -> maybe_done(Forms, Stack, Opts) end}; + {jsx, end_object, fun() -> maybe_done(Forms, Stack, Opts) end}; key([], Stack, Opts) -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, []}} ; (Stream) -> key(Stream, Stack, Opts) @@ -87,23 +86,23 @@ key([], Stack, Opts) -> key(Forms, _, _) -> {error, {badjson, Forms}}. -value([{string, S}|Forms], Stack, Opts) when is_list(S) -> - {event, {string, json_escape(S)}, fun() -> maybe_done(Forms, Stack, Opts) end}; -value([{float, F}|Forms], Stack, Opts) when is_list(F) -> - {event, {float, F}, fun() -> maybe_done(Forms, Stack, Opts) end}; -value([{integer, I}|Forms], Stack, Opts) when is_list(I) -> - {event, {integer, I}, fun() -> maybe_done(Forms, Stack, Opts) end}; +value([{string, S}|Forms], Stack, Opts) when is_binary(S) -> + {jsx, {string, json_escape(S)}, fun() -> maybe_done(Forms, Stack, Opts) end}; +value([{float, F}|Forms], Stack, Opts) when is_float(F) -> + {jsx, {float, F}, fun() -> maybe_done(Forms, Stack, Opts) end}; +value([{integer, I}|Forms], Stack, Opts) when is_integer(I) -> + {jsx, {integer, I}, fun() -> maybe_done(Forms, Stack, Opts) end}; value([{literal, L}|Forms], Stack, Opts) when L == true; L == false; L == null -> - {event, {literal, L}, fun() -> maybe_done(Forms, Stack, Opts) end}; + {jsx, {literal, L}, fun() -> maybe_done(Forms, Stack, Opts) end}; value([start_object|Forms], Stack, Opts) -> - {event, start_object, fun() -> key(Forms, [object] ++ Stack, Opts) end}; + {jsx, start_object, fun() -> key(Forms, [object] ++ Stack, Opts) end}; value([start_array|Forms], Stack, Opts) -> - {event, start_array, fun() -> value(Forms, [array] ++ Stack, Opts) end}; + {jsx, start_array, fun() -> value(Forms, [array] ++ Stack, Opts) end}; value([end_array|Forms], [array|Stack], Opts) -> - {event, end_array, fun() -> maybe_done(Forms, Stack, Opts) end}; + {jsx, end_array, fun() -> maybe_done(Forms, Stack, Opts) end}; value([], Stack, Opts) -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, []}} ; (Stream) -> value(Stream, Stack, Opts) @@ -114,15 +113,15 @@ value(Forms, _, _) -> {error, {badjson, Forms}}. maybe_done([], [], _) -> ?ENDJSON; maybe_done([end_json], [], _) -> ?ENDJSON; maybe_done([end_json|Forms], [], #opts{multi_term=true}=Opts) -> - {event, end_json, fun() -> start(Forms, Opts) end}; + {jsx, end_json, fun() -> start(Forms, Opts) end}; maybe_done([end_object|Forms], [object|Stack], Opts) -> - {event, end_object, fun() -> maybe_done(Forms, Stack, Opts) end}; + {jsx, end_object, fun() -> maybe_done(Forms, Stack, Opts) end}; maybe_done([end_array|Forms], [array|Stack], Opts) -> - {event, end_array, fun() -> maybe_done(Forms, Stack, Opts) end}; + {jsx, end_array, fun() -> maybe_done(Forms, Stack, Opts) end}; maybe_done(Forms, [object|_] = Stack, Opts) -> key(Forms, Stack, Opts); maybe_done(Forms, [array|_] = Stack, Opts) -> value(Forms, Stack, Opts); maybe_done([], Stack, Opts) -> - {incomplete, fun(end_stream) -> + {jsx, incomplete, fun(end_stream) -> {error, {badjson, []}} ; (Stream) -> maybe_done(Stream, Stack, Opts) @@ -130,41 +129,42 @@ maybe_done([], Stack, Opts) -> maybe_done(Forms, _, _) -> {error, {badjson, Forms}}. -%% json string escaping. escape the json control sequences to + +%% json string escaping, for utf8 binaries. escape the json control sequences to %% their json equivalent, escape other control characters to \uXXXX sequences, %% everything else should be a legal json string component json_escape(String) -> - json_escape(String, []). + json_escape(String, <<>>). %% double quote -json_escape([$\"|Rest], Acc) -> - json_escape(Rest, [$\", $\\] ++ Acc); +json_escape(<<$\", Rest/binary>>, Acc) -> + json_escape(Rest, <>); %% backslash \ reverse solidus -json_escape([$\\|Rest], Acc) -> - json_escape(Rest, [$\\, $\\] ++ Acc); +json_escape(<<$\\, Rest/binary>>, Acc) -> + json_escape(Rest, <>); %% backspace -json_escape([$\b|Rest], Acc) -> - json_escape(Rest, [$b, $\\] ++ Acc); +json_escape(<<$\b, Rest/binary>>, Acc) -> + json_escape(Rest, <>); %% form feed -json_escape([$\f|Rest], Acc) -> - json_escape(Rest, [$f, $\\] ++ Acc); +json_escape(<<$\f, Rest/binary>>, Acc) -> + json_escape(Rest, <>); %% newline -json_escape([$\n|Rest], Acc) -> - json_escape(Rest, [$n, $\\] ++ Acc); +json_escape(<<$\n, Rest/binary>>, Acc) -> + json_escape(Rest, <>); %% cr -json_escape([$\r|Rest], Acc) -> - json_escape(Rest, [$r, $\\] ++ Acc); +json_escape(<<$\r, Rest/binary>>, Acc) -> + json_escape(Rest, <>); %% tab -json_escape([$\t|Rest], Acc) -> - json_escape(Rest, [$t, $\\] ++ Acc); +json_escape(<<$\t, Rest/binary>>, Acc) -> + json_escape(Rest, <>); %% other control characters -json_escape([C|Rest], Acc) when C >= 0, C < $\s -> - json_escape(Rest, json_escape_sequence(C) ++ Acc); +json_escape(<>, Acc) when C >= 0, C < $\s -> + json_escape(Rest, <>); %% any other legal codepoint -json_escape([C|Rest], Acc) -> - json_escape(Rest, [C] ++ Acc); -json_escape([], Acc) -> - lists:reverse(Acc); +json_escape(<>, Acc) -> + json_escape(Rest, <>); +json_escape(<<>>, Acc) -> + Acc; json_escape(_, _) -> erlang:error(badarg). @@ -173,7 +173,7 @@ json_escape(_, _) -> %% codepoints this module might escape, ie, control characters json_escape_sequence(C) when C < 16#20 -> <<_:8, A:4, B:4>> = <>, % first two hex digits are always zero - [(to_hex(B)), (to_hex(A)), $0, $0, $u, $\\]. + <<$\\, $u, $0, $0, (to_hex(A)), (to_hex(B))>>. to_hex(15) -> $f; @@ -186,6 +186,7 @@ to_hex(X) -> X + $0. + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). @@ -216,7 +217,7 @@ encode_incremental([Term], F, Expected, Acc) -> end; encode_incremental([Term|Terms], F, Expected, Acc) -> case loop(F([Term]), []) of - {incomplete, Next, R} -> + {jsx, incomplete, Next, R} -> encode_incremental(Terms, Next, Expected, Acc ++ R) ; _ -> false @@ -224,12 +225,12 @@ encode_incremental([Term|Terms], F, Expected, Acc) -> loop({error, _}, _) -> error; -loop({incomplete, Next}, Acc) -> {incomplete, Next, lists:reverse(Acc)}; -loop({event, end_json, Next}, Acc) -> - {incomplete, F} = Next(), +loop({jsx, incomplete, Next}, Acc) -> {jsx, incomplete, Next, lists:reverse(Acc)}; +loop({jsx, end_json, Next}, Acc) -> + {jsx, incomplete, F} = Next(), {error, {badjson, []}} = F([]), {ok, lists:reverse(Acc)}; -loop({event, Event, Next}, Acc) -> loop(Next(), [Event] ++ Acc). +loop({jsx, Event, Next}, Acc) -> loop(Next(), [Event] ++ Acc). encode_test_() -> @@ -237,9 +238,9 @@ encode_test_() -> {"empty object", ?_assert(encode([start_object, end_object]))}, {"empty array", ?_assert(encode([start_array, end_array]) =:= true)}, {"nested empty objects", ?_assert(encode([start_object, - {key, "empty object"}, + {key, <<"empty object">>}, start_object, - {key, "empty object"}, + {key, <<"empty object">>}, start_object, end_object, end_object, @@ -253,20 +254,20 @@ encode_test_() -> end_array ]))}, {"simple object", ?_assert(encode([start_object, - {key, "a"}, - {string, "hello"}, - {key, "b"}, - {integer, "1"}, - {key, "c"}, - {float, "1.0"}, - {key, "d"}, + {key, <<"a">>}, + {string, <<"hello">>}, + {key, <<"b">>}, + {integer, 1}, + {key, <<"c">>}, + {float, 1.0}, + {key, <<"d">>}, {literal, true}, end_object ]))}, {"simple array", ?_assert(encode([start_array, - {string, "hello"}, - {integer, "1"}, - {float, "1.0"}, + {string, <<"hello">>}, + {integer, 1}, + {float, 1.0}, {literal, true}, end_array ]))}, @@ -274,10 +275,10 @@ encode_test_() -> end_array, end_array ]))}, - {"naked string", ?_assert(encode({string, "hello"}))}, + {"naked string", ?_assert(encode({string, <<"hello">>}))}, {"naked literal", ?_assert(encode({literal, true}))}, - {"naked integer", ?_assert(encode({integer, "1"}))}, - {"naked float", ?_assert(encode({float, "1.0"}))} + {"naked integer", ?_assert(encode({integer, 1}))}, + {"naked float", ?_assert(encode({float, 1.0}))} ]. @@ -285,14 +286,14 @@ escape_test_() -> [ {"json string escaping", ?_assert(json_escape( - "\"\\\b\f\n\r\t" - ) =:= "\\\"\\\\\\b\\f\\n\\r\\t" + <<"\"\\\b\f\n\r\t">> + ) =:= <<"\\\"\\\\\\b\\f\\n\\r\\t">> ) }, {"json string hex escape", ?_assert(json_escape( - [1, 2, 3, 11, 26, 30, 31] - ) =:= "\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f" + <<1, 2, 3, 11, 26, 30, 31>> + ) =:= <<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">> ) } ]. diff --git a/src/jsx_format.erl b/src/jsx_format.erl index ac23034..a00f845 100644 --- a/src/jsx_format.erl +++ b/src/jsx_format.erl @@ -43,14 +43,14 @@ format(JSON, OptsList) when is_binary(JSON) -> P = jsx:decoder(extract_parser_opts(OptsList)), format(fun() -> P(JSON) end, OptsList); -format(Terms, OptsList) when is_list(Terms) -> +format(Terms, OptsList) when is_list(Terms); is_tuple(Terms) -> P = jsx:encoder(), format(fun() -> P(Terms) end, OptsList); format(F, OptsList) when is_function(F) -> Opts = parse_opts(OptsList, #format_opts{}), {Continue, String} = format_something(F(), Opts, 0), case Continue() of - {event, end_json, _} -> encode(String, Opts) + {jsx, end_json, _} -> encode(String, Opts) ; _ -> {error, badarg} end. @@ -87,9 +87,9 @@ extract_parser_opts([K|Rest], Acc) -> end. -format_something({event, start_object, Next}, Opts, Level) -> +format_something({jsx, start_object, Next}, Opts, Level) -> case Next() of - {event, end_object, Continue} -> + {jsx, end_object, Continue} -> {Continue, [?start_object, ?end_object]} ; Event -> {Continue, Object} = format_object(Event, [], Opts, Level + 1), @@ -99,24 +99,24 @@ format_something({event, start_object, Next}, Opts, Level) -> ?end_object ]} end; -format_something({event, start_array, Next}, Opts, Level) -> +format_something({jsx, start_array, Next}, Opts, Level) -> case Next() of - {event, end_array, Continue} -> + {jsx, end_array, Continue} -> {Continue, [?start_array, ?end_array]} ; Event -> {Continue, Object} = format_array(Event, [], Opts, Level + 1), {Continue, [?start_array, Object, indent(Opts, Level), ?end_array]} end; -format_something({event, {Type, Value}, Next}, _Opts, _Level) -> +format_something({jsx, {Type, Value}, Next}, _Opts, _Level) -> {Next, [encode(Type, Value)]}. -format_object({event, end_object, Next}, Acc, _Opts, _Level) -> +format_object({jsx, end_object, Next}, Acc, _Opts, _Level) -> {Next, Acc}; -format_object({event, {key, Key}, Next}, Acc, Opts, Level) -> +format_object({jsx, {key, Key}, Next}, Acc, Opts, Level) -> {Continue, Value} = format_something(Next(), Opts, Level), case Continue() of - {event, end_object, NextNext} -> + {jsx, end_object, NextNext} -> {NextNext, [Acc, indent(Opts, Level), encode(string, Key), @@ -141,12 +141,12 @@ format_object({event, {key, Key}, Next}, Acc, Opts, Level) -> end. -format_array({event, end_array, Next}, Acc, _Opts, _Level) -> +format_array({jsx, end_array, Next}, Acc, _Opts, _Level) -> {Next, Acc}; format_array(Event, Acc, Opts, Level) -> {Continue, Value} = format_something(Event, Opts, Level), case Continue() of - {event, end_array, NextNext} -> + {jsx, end_array, NextNext} -> {NextNext, [Acc, indent(Opts, Level), Value]} ; Else -> format_array(Else, @@ -265,8 +265,8 @@ terms_test_() -> [ {"terms", ?_assert(format([start_object, - {key, "key"}, - {string, "value"}, + {key, <<"key">>}, + {string, <<"value">>}, end_object ], []) =:= <<"{\"key\":\"value\"}">> )} diff --git a/src/jsx_eep0018.erl b/src/jsx_terms.erl similarity index 68% rename from src/jsx_eep0018.erl rename to src/jsx_terms.erl index 25a603a..ecd947a 100644 --- a/src/jsx_eep0018.erl +++ b/src/jsx_terms.erl @@ -22,7 +22,7 @@ --module(jsx_eep0018). +-module(jsx_terms). -export([json_to_term/2, term_to_json/2]). @@ -35,8 +35,8 @@ -endif. - --spec json_to_term(JSON::binary(), Opts::decoder_opts()) -> eep0018(). +-spec json_to_term(JSON::binary(), Opts::decoder_opts()) -> + jsx_term() | {jsx, incomplete, fun()}. json_to_term(JSON, Opts) -> P = jsx:decoder(extract_parser_opts(Opts)), @@ -47,11 +47,10 @@ json_to_term(JSON, Opts) -> %% the jsx formatter (pretty printer) can do most of the heavy lifting in -%% converting erlang terms to json strings, but it expects a jsx event -%% iterator. luckily, the mapping from erlang terms to jsx events is -%% straightforward and the iterator can be faked with an anonymous function +%% converting erlang terms to json strings --spec term_to_json(JSON::eep0018(), Opts::encoder_opts()) -> binary(). +-spec term_to_json(JSON::jsx_term(), Opts::encoder_opts()) -> + binary() | {jsx, incomplete, fun()}. term_to_json(List, Opts) -> case proplists:get_value(strict, Opts, false) of @@ -60,21 +59,12 @@ term_to_json(List, Opts) -> ; false -> continue end, Encoding = proplists:get_value(encoding, Opts, utf8), - jsx:format(eventify(lists:reverse([end_json] ++ term_to_events(List))), - [{output_encoding, Encoding}] ++ Opts - ). - - -eventify([]) -> - fun() -> - {incomplete, fun(List) when is_list(List) -> - eventify(List) - ; (_) -> - erlang:error(badarg) - end} - end; -eventify([Next|Rest]) -> - fun() -> {event, Next, eventify(Rest)} end. + FOpts = [{output_encoding, Encoding}] ++ Opts, + case term_to_events(List) of + L when is_tuple(L) -> jsx:format(L, FOpts) + ; L when is_list(L) -> jsx:format(lists:reverse(L), FOpts) + end. + extract_parser_opts(Opts) -> @@ -85,113 +75,90 @@ extract_parser_opts([{K,V}|Rest], Acc) -> case lists:member(K, [encoding]) of true -> [{K,V}] ++ Acc ; false -> extract_parser_opts(Rest, Acc) - end; -extract_parser_opts([K|Rest], Acc) -> - case lists:member(K, [encoding]) of - true -> [K] ++ Acc - ; false -> extract_parser_opts(Rest, Acc) end. + + %% ensure the first jsx event we get is start_object or start_array when running %% in strict mode -collect_strict({event, Start, Next}, Acc, Opts) - when Start =:= start_object; Start =:= start_array -> +collect_strict({jsx, Start, Next}, Acc, Opts) + when Start =:= start_object; Start =:= start_array -> collect(Next(), [[]|Acc], Opts); -collect_strict({incomplete, More}, Acc, Opts) -> +collect_strict({jsx, incomplete, More}, Acc, Opts) -> case proplists:get_value(stream, Opts, false) of - true -> {incomplete, fun(JSON) -> collect(More(JSON), Acc, Opts) end} + true -> {jsx, incomplete, fun(JSON) -> + collect_strict(More(JSON), Acc, Opts) + end} ; false -> erlang:error(badarg) end; collect_strict(_, _, _) -> erlang:error(badarg). %% collect decoder events and convert to eep0018 format -collect({event, Start, Next}, Acc, Opts) - when Start =:= start_object; Start =:= start_array -> +collect({jsx, Start, Next}, Acc, Opts) + when Start =:= start_object; Start =:= start_array -> collect(Next(), [[]|Acc], Opts); %% special case for empty object -collect({event, end_object, Next}, [[], Parent|Rest], Opts) - when is_list(Parent) -> +collect({jsx, end_object, Next}, [[], Parent|Rest], Opts) + when is_list(Parent) -> collect(Next(), [[[{}]] ++ Parent] ++ Rest, Opts); %% reverse the array/object accumulator before prepending it to it's parent -collect({event, end_object, Next}, [Current, Parent|Rest], Opts) - when is_list(Parent) -> +collect({jsx, end_object, Next}, [Current, Parent|Rest], Opts) + when is_list(Parent) -> collect(Next(), [[lists:reverse(Current)] ++ Parent] ++ Rest, Opts); -collect({event, end_array, Next}, [Current, Parent|Rest], Opts) - when is_list(Parent) -> +collect({jsx, end_array, Next}, [Current, Parent|Rest], Opts) + when is_list(Parent) -> collect(Next(), [[lists:reverse(Current)] ++ Parent] ++ Rest, Opts); %% special case for empty object -collect({event, end_object, Next}, [[], Key, Parent|Rest], Opts) -> +collect({jsx, end_object, Next}, [[], Key, Parent|Rest], Opts) -> collect(Next(), [[{Key, [{}]}] ++ Parent] ++ Rest, Opts); -collect({event, End, Next}, [Current, Key, Parent|Rest], Opts) - when End =:= end_object; End =:= end_array -> +collect({jsx, End, Next}, [Current, Key, Parent|Rest], Opts) + when End =:= end_object; End =:= end_array -> collect(Next(), [[{Key, lists:reverse(Current)}] ++ Parent] ++ Rest, Opts); -collect({event, end_json, _Next}, [[Acc]], _Opts) -> +collect({jsx, end_json, _Next}, [[Acc]], _Opts) -> Acc; %% key can only be emitted inside of a json object, so just insert it directly %% into the head of the accumulator and deal with it when we receive it's %% paired value -collect({event, {key, _} = PreKey, Next}, [Current|_] = Acc, Opts) -> +collect({jsx, {key, _} = PreKey, Next}, Acc, Opts) -> Key = event(PreKey, Opts), - case decode_key_repeats(Key, Current) of - true -> erlang:error(badarg) - ; false -> collect(Next(), [Key] ++ Acc, Opts) - end; -%% check acc to see if we're inside an object or an array. because inside an -%% object context the events that fall this far are always preceded by a key -%% (which are binaries or atoms), if Current is a list, we're inside an array, -%% else, an object -collect({event, Event, Next}, [Current|Rest], Opts) when is_list(Current) -> - collect(Next(), [[event(Event, Opts)] ++ Current] ++ Rest, Opts); -collect({event, Event, Next}, [Key, Current|Rest], Opts) -> - collect(Next(), [[{Key, event(Event, Opts)}] ++ Current] ++ Rest, Opts); -%% if our returned event is {incomplete, ...} try to force end and return + collect(Next(), [Key] ++ Acc, Opts); +%% if our returned event is {jsx, incomplete, ...} try to force end and return %% the Event if one is returned -collect({incomplete, More}, Acc, Opts) -> +collect({jsx, incomplete, More}, Acc, Opts) -> case More(end_stream) of - {event, Event, _Next} -> event(Event, Opts) + {jsx, Event, _Next} -> event(Event, Opts) ; _ -> case proplists:get_value(stream, Opts, false) of true -> - {incomplete, + {jsx, incomplete, fun(JSON) -> collect(More(JSON), Acc, Opts) end } ; false -> erlang:error(badarg) end end; +%% check acc to see if we're inside an object or an array. because inside an +%% object context the events that fall this far are always preceded by a key +%% (which are binaries or atoms), if Current is a list, we're inside an array, +%% else, an object +collect({jsx, Event, Next}, [Current|Rest], Opts) when is_list(Current) -> + collect(Next(), [[event(Event, Opts)] ++ Current] ++ Rest, Opts); +collect({jsx, Event, Next}, [Key, Current|Rest], Opts) -> + collect(Next(), [[{Key, event(Event, Opts)}] ++ Current] ++ Rest, Opts); %% any other event is an error collect(_, _, _) -> erlang:error(badarg). -%% helper functions for converting jsx events to eep0018 formats -event({string, String}, _Opts) -> - unicode:characters_to_binary(String); -event({key, Key}, Opts) -> - case proplists:get_value(label, Opts, binary) of - binary -> unicode:characters_to_binary(Key) - ; atom -> - try list_to_atom(Key) - catch error:badarg -> unicode:characters_to_binary(Key) end - ; existing_atom -> - try list_to_existing_atom(Key) - catch error:badarg -> unicode:characters_to_binary(Key) end - end; -event({integer, Integer}, Opts) -> - case proplists:get_value(float, Opts, false) of - true -> erlang:float(Integer) - ; false -> Integer - end; +%% helper functions for converting jsx events to term format +event({string, String}, _Opts) -> String; +event({key, Key}, _Opts) -> Key; +event({integer, Integer}, _Opts) -> Integer; event({float, Float}, _Opts) -> Float; event({literal, Literal}, _Opts) -> Literal. - - -decode_key_repeats(Key, [{Key, _Value}|_Rest]) -> true; -decode_key_repeats(Key, [_|Rest]) -> decode_key_repeats(Key, Rest); -decode_key_repeats(_Key, []) -> false. -%% convert eep0018 representation to jsx events. note special casing for the +%% convert term format representation to jsx events. note special casing for the %% empty object term_to_events([{}]) -> [end_object, start_object]; @@ -200,16 +167,14 @@ term_to_events([First|_] = List) when is_tuple(First) -> term_to_events(List) when is_list(List) -> list_to_events(List, [start_array]); term_to_events(Term) -> - term_to_event(Term). + [Res] = term_to_event(Term), + Res. proplist_to_events([{Key, Term}|Rest], Acc) -> Event = term_to_event(Term), EncodedKey = key_to_event(Key), - case encode_key_repeats(EncodedKey, Acc) of - false -> proplist_to_events(Rest, Event ++ EncodedKey ++ Acc) - ; true -> erlang:error(badarg) - end; + proplist_to_events(Rest, Event ++ EncodedKey ++ Acc); proplist_to_events([], Acc) -> [end_object] ++ Acc; proplist_to_events(_, _) -> @@ -236,27 +201,10 @@ term_to_event(null) -> [{literal, null}]; term_to_event(_) -> erlang:error(badarg). -key_to_event(Key) when is_atom(Key) -> - [{key, json_escape(erlang:atom_to_binary(Key, utf8))}]; key_to_event(Key) when is_binary(Key) -> [{key, json_escape(Key)}]. -encode_key_repeats([Key], SoFar) -> encode_key_repeats(Key, SoFar, 0). - -encode_key_repeats(Key, [Key|_], 0) -> - true; -encode_key_repeats(Key, [end_object|Rest], Level) -> - encode_key_repeats(Key, Rest, Level + 1); -encode_key_repeats(_, [start_object|_], 0) -> - false; -encode_key_repeats(Key, [start_object|Rest], Level) -> - encode_key_repeats(Key, Rest, Level - 1); -encode_key_repeats(Key, [_|Rest], Level) -> - encode_key_repeats(Key, Rest, Level); -encode_key_repeats(_, [], 0) -> - false. - %% json string escaping, for utf8 binaries. escape the json control sequences to %% their json equivalent, escape other control characters to \uXXXX sequences, @@ -322,9 +270,8 @@ decode_test_() -> {"empty array", ?_assert(json_to_term(<<"[]">>, []) =:= [])}, {"simple object", ?_assert(json_to_term( - <<"{\"a\": true, \"b\": true, \"c\": true}">>, - [{label, atom}] - ) =:= [{a, true}, {b, true}, {c, true}] + <<"{\"a\": true, \"b\": true, \"c\": true}">>, [] + ) =:= [{<<"a">>, true}, {<<"b">>, true}, {<<"c">>, true}] ) }, {"simple array", @@ -335,9 +282,11 @@ decode_test_() -> }, {"nested structures", ?_assert(json_to_term( - <<"{\"x\":[{\"x\":[{}, {}],\"y\":{}}, []],\"y\":{}}">>, - [{label, atom}] - ) =:= [{x, [[{x, [[{}], [{}]]}, {y, [{}]}],[]]}, {y, [{}]}] + <<"{\"x\":[{\"x\":[{}, {}],\"y\":{}}, []],\"y\":{}}">>, [] + ) =:= [{<<"x">>, + [[{<<"x">>, [[{}], [{}]]}, {<<"y">>, [{}]}],[]]}, + {<<"y">>, [{}]} + ] ) }, {"numbers", @@ -347,13 +296,6 @@ decode_test_() -> ) =:= [-100000000.0, -1, 0.0, 0, 1, 100000000, 10000000.0] ) }, - {"numbers (all floats)", - ?_assert(json_to_term( - <<"[-100000000.0, -1, 0.0, 0, 1, 1000, 10000000.0]">>, - [{float, true}] - ) =:= [-100000000.0, -1.0, 0.0, 0.0, 1.0, 1000.0, 10000000.0] - ) - }, {"strings", ?_assert(json_to_term(<<"[\"a string\"]">>, [] @@ -389,9 +331,9 @@ encode_test_() -> {"empty object", ?_assert(term_to_json([{}], []) =:= <<"{}">>)}, {"empty array", ?_assert(term_to_json([], []) =:= <<"[]">>)}, {"simple object", - ?_assert(term_to_json([{a, true}, {b, true}, {c, true}], + ?_assert(term_to_json([{<<"a">>, true}, {<<"b">>, true}], [] - ) =:= <<"{\"a\":true,\"b\":true,\"c\":true}">> + ) =:= <<"{\"a\":true,\"b\":true}">> ) }, {"simple array", @@ -402,7 +344,9 @@ encode_test_() -> }, {"nested structures", ?_assert(term_to_json( - [{x, [[{x, [[{}], [{}]]}, {y, [{}]}],[]]}, {y, [{}]}], + [{<<"x">>, + [[{<<"x">>, [[{}], [{}]]}, {<<"y">>, [{}]}],[]]}, + {<<"y">>, [{}]}], [] ) =:= <<"{\"x\":[{\"x\":[{},{}],\"y\":{}},[]],\"y\":{}}">> ) @@ -444,20 +388,6 @@ encode_test_() -> ) )} ]. - -repeated_keys_test_() -> - [ - {"encode", - ?_assertError(badarg, term_to_json([{k, true}, {k, false}], [])) - }, - {"decode", - ?_assertError(badarg, json_to_term( - <<"{\"k\": true, \"k\": false}">>, - [] - ) - ) - } - ]. escape_test_() -> [ @@ -479,7 +409,7 @@ stream_test_() -> [ {"streaming mode", ?_assert(begin - {incomplete, F} = json_to_term(<<"{">>, [{stream, true}]), + {jsx, incomplete, F} = json_to_term(<<"{">>, [{stream, true}]), F(<<"}">>) end =:= [{}]) } diff --git a/src/jsx_utils.erl b/src/jsx_utils.erl index 7407a69..3a9ec1f 100644 --- a/src/jsx_utils.erl +++ b/src/jsx_utils.erl @@ -217,10 +217,10 @@ detect_encoding(<> = JSON, Opts) when X =/= 0, Y =/= 0 -> %% data to conclusively determine the encoding correctly. below is an attempt %% to solve the problem detect_encoding(<>, Opts) when X =/= 0 -> - {incomplete, + {jsx, incomplete, fun(end_stream) -> try - {incomplete, Next} = (jsx_utf8:decoder(Opts))(<>), + {jsx, incomplete, Next} = (jsx_utf8:decoder(Opts))(<>), Next(end_stream) catch error:function_clause -> {error, {badjson, <>}} @@ -230,10 +230,10 @@ detect_encoding(<>, Opts) when X =/= 0 -> end }; detect_encoding(<<0, X>>, Opts) when X =/= 0 -> - {incomplete, + {jsx, incomplete, fun(end_stream) -> try - {incomplete, Next} = (jsx_utf16:decoder(Opts))(<<0, X>>), + {jsx, incomplete, Next} = (jsx_utf16:decoder(Opts))(<<0, X>>), Next(end_stream) catch error:function_clause -> {error, {badjson, <<0, X>>}} @@ -243,10 +243,10 @@ detect_encoding(<<0, X>>, Opts) when X =/= 0 -> end }; detect_encoding(<>, Opts) when X =/= 0 -> - {incomplete, + {jsx, incomplete, fun(end_stream) -> try - {incomplete, Next} = (jsx_utf16le:decoder(Opts))(<>), + {jsx, incomplete, Next} = (jsx_utf16le:decoder(Opts))(<>), Next(end_stream) catch error:function_clause -> {error, {badjson, <>}} @@ -258,7 +258,7 @@ detect_encoding(<>, Opts) when X =/= 0 -> %% not enough input, request more detect_encoding(Bin, Opts) -> - {incomplete, + {jsx, incomplete, fun(end_stream) -> {error, {badjson, Bin}} ; (Stream) -> detect_encoding(<>, Opts) end diff --git a/src/jsx_verify.erl b/src/jsx_verify.erl index bff3636..0f463d2 100644 --- a/src/jsx_verify.erl +++ b/src/jsx_verify.erl @@ -49,8 +49,8 @@ is_json(F, OptsList) when is_function(F) -> true -> collect(F(), Opts, [[]]) ; false -> case F() of - {event, start_object, Next} -> collect(Next(), Opts, [[]]) - ; {event, start_array, Next} -> collect(Next(), Opts, [[]]) + {jsx, start_object, Next} -> collect(Next(), Opts, [[]]) + ; {jsx, start_array, Next} -> collect(Next(), Opts, [[]]) ; _ -> false end end. @@ -87,16 +87,16 @@ parse_opts([], Opts) -> -collect({event, end_json, _Next}, _Opts, _Keys) -> +collect({jsx, end_json, _Next}, _Opts, _Keys) -> true; %% allocate new key accumulator at start_object, discard it at end_object -collect({event, start_object, Next}, +collect({jsx, start_object, Next}, Opts = #verify_opts{repeated_keys = false}, Keys) -> collect(Next(), Opts, [[]|Keys]); -collect({event, end_object, Next}, +collect({jsx, end_object, Next}, Opts = #verify_opts{repeated_keys = false}, [_|Keys]) -> collect(Next(), Opts, [Keys]); @@ -104,7 +104,7 @@ collect({event, end_object, Next}, %% check to see if key has already been encountered, if not add it to the key %% accumulator and continue, else return false -collect({event, {key, Key}, Next}, +collect({jsx, {key, Key}, Next}, Opts = #verify_opts{repeated_keys = false}, [Current|Keys]) -> case lists:member(Key, Current) of @@ -112,16 +112,16 @@ collect({event, {key, Key}, Next}, ; false -> collect(Next(), Opts, [[Key] ++ Current] ++ Keys) end; - -collect({event, _, Next}, Opts, Keys) -> - collect(Next(), Opts, Keys); - %% needed to parse numbers that don't have trailing whitespace in less strict %% mode -collect({incomplete, More}, Opts, Keys) -> +collect({jsx, incomplete, More}, Opts, Keys) -> collect(More(end_stream), Opts, Keys); + +collect({jsx, _, Next}, Opts, Keys) -> + collect(Next(), Opts, Keys); + collect(_, _, _) -> false. @@ -232,8 +232,8 @@ terms_test_() -> [ {"terms", ?_assert(is_json([start_object, - {key, "key"}, - {string, "value"}, + {key, <<"key">>}, + {string, <<"value">>}, end_object ], []) =:= true )} diff --git a/test/cases/array.test b/test/cases/array.test index f1c235f..7150207 100644 --- a/test/cases/array.test +++ b/test/cases/array.test @@ -1,8 +1,8 @@ {name, "array"}. {jsx, [start_array, - {string,"foo"}, - {string,"bar"}, - {string,"baz"}, + {string,<<"foo">>}, + {string,<<"bar">>}, + {string,<<"baz">>}, start_array, {literal,true}, end_array,start_array, @@ -15,14 +15,14 @@ {literal,null}, {float,0.7}, start_object, - {key,"key"}, - {string,"value"}, + {key,<<"key">>}, + {string,<<"value">>}, end_object,start_array,start_object,end_object, {literal,null}, {literal,null}, {literal,null}, start_array,end_array,end_array, - {string,"\n\r\\"}, + {string,<<"\n\r\\">>}, start_array, {integer,-1}, end_array,end_array,end_json]}. diff --git a/test/cases/encoded_surrogates.test b/test/cases/encoded_surrogates.test index 0e938ed..aa5d317 100644 --- a/test/cases/encoded_surrogates.test +++ b/test/cases/encoded_surrogates.test @@ -1,4 +1,3 @@ {name, "encoded_surrogates"}. -{jsx, [start_array,{string,[66560]},end_array,end_json]}. +{jsx, [start_array,{string,<<66560/utf8>>},end_array,end_json]}. {json, "encoded_surrogates.json"}. -{jsx_flags, [{escaped_unicode,codepoint}]}. diff --git a/test/cases/exp.test b/test/cases/exp.test index 02d4416..c4b9bd4 100644 --- a/test/cases/exp.test +++ b/test/cases/exp.test @@ -4,9 +4,9 @@ end_array, {float,2.0e7}, start_object, - {key,"key"}, + {key,<<"key">>}, {float,2.0e7}, - {key,"another key"}, + {key,<<"another key">>}, {float,2.0e7}, end_object, {float,4.2e70}, diff --git a/test/cases/fraction.test b/test/cases/fraction.test index a0dbd2d..c36f2c9 100644 --- a/test/cases/fraction.test +++ b/test/cases/fraction.test @@ -4,14 +4,14 @@ end_array, {float,2.0}, start_object, - {key,"key"}, + {key,<<"key">>}, {float,2.0e7}, - {key,"another key"}, + {key,<<"another key">>}, {float,2.0e7}, end_object,start_object, - {key,"key"}, + {key,<<"key">>}, {float,2.0}, - {key,"another key"}, + {key,<<"another key">>}, {float,2.0}, end_object, {float,4.2}, diff --git a/test/cases/integer.test b/test/cases/integer.test index 2f2eee9..0883c33 100644 --- a/test/cases/integer.test +++ b/test/cases/integer.test @@ -4,9 +4,9 @@ end_array, {integer,20}, start_object, - {key,"key"}, + {key,<<"key">>}, {integer,20}, - {key,"another key"}, + {key,<<"another key">>}, {integer,20}, end_object, {integer,42}, diff --git a/test/cases/multibyte_utf.test b/test/cases/multibyte_utf.test index 6a9217e..cb8be9c 100644 --- a/test/cases/multibyte_utf.test +++ b/test/cases/multibyte_utf.test @@ -1,3 +1,3 @@ {name, "multibyte_utf"}. -{jsx, [start_array,{string,[32,119070,32]},end_array,end_json]}. +{jsx, [start_array,{string,<<32,119070/utf8,32>>},end_array,end_json]}. {json, "multibyte_utf.json"}. diff --git a/test/cases/naked_string.test b/test/cases/naked_string.test index 3ce5c36..68fe34b 100644 --- a/test/cases/naked_string.test +++ b/test/cases/naked_string.test @@ -1,3 +1,3 @@ {name, "naked_string"}. -{jsx, [{string,"this is a naked string"},end_json]}. +{jsx, [{string,<<"this is a naked string">>},end_json]}. {json, "naked_string.json"}. diff --git a/test/cases/negative_zero.test b/test/cases/negative_zero.test index 0ef4f1a..0bc734a 100644 --- a/test/cases/negative_zero.test +++ b/test/cases/negative_zero.test @@ -4,9 +4,9 @@ end_array, {integer,0}, start_object, - {key,"key"}, + {key,<<"key">>}, {integer,0}, - {key,"another key"}, + {key,<<"another key">>}, {integer,0}, end_object, {integer,0}, diff --git a/test/cases/object.test b/test/cases/object.test index b9b50d4..4877fc8 100644 --- a/test/cases/object.test +++ b/test/cases/object.test @@ -1,22 +1,22 @@ {name, "object"}. {jsx, [start_object, - {key,"foo"}, - {string,"bar"}, - {key,"baz"}, + {key,<<"foo">>}, + {string,<<"bar">>}, + {key,<<"baz">>}, {literal,true}, - {key,"false"}, + {key,<<"false">>}, {literal,null}, - {key,"object"}, + {key,<<"object">>}, start_object, - {key,"key"}, - {string,"value"}, + {key,<<"key">>}, + {string,<<"value">>}, end_object, - {key,"list"}, + {key,<<"list">>}, start_array, {literal,null}, {literal,null}, {literal,null}, start_array,end_array, - {string,"\n\r\\"}, + {string,<<"\n\r\\">>}, end_array,end_object,end_json]}. {json, "object.json"}. diff --git a/test/cases/string.test b/test/cases/string.test index 8df9aa5..e1334d3 100644 --- a/test/cases/string.test +++ b/test/cases/string.test @@ -1,5 +1,5 @@ {name, "string"}. {jsx, [start_array, - {string,"this is a random string with \n embedded escapes in it"}, + {string,<<"this is a random string with \n embedded escapes in it">>}, end_array,end_json]}. {json, "string.json"}. diff --git a/test/cases/string_escapes.test b/test/cases/string_escapes.test index e33a903..7cd460c 100644 --- a/test/cases/string_escapes.test +++ b/test/cases/string_escapes.test @@ -1,12 +1,12 @@ {name, "string_escapes"}. {jsx, [start_array, - {string,"\""}, - {string,"\\"}, - {string,"/"}, - {string,"\b"}, - {string,"\f"}, - {string,"\n"}, - {string,"\r"}, - {string,"\t"}, + {string,<<"\"">>}, + {string,<<"\\">>}, + {string,<<"/">>}, + {string,<<"\b">>}, + {string,<<"\f">>}, + {string,<<"\n">>}, + {string,<<"\r">>}, + {string,<<"\t">>}, end_array,end_json]}. {json, "string_escapes.json"}. diff --git a/test/cases/unicode_replaced.json b/test/cases/unicode_replaced.json deleted file mode 100644 index c8a71c9..0000000 --- a/test/cases/unicode_replaced.json +++ /dev/null @@ -1 +0,0 @@ -[ "non-character: ", "\uffff" ] \ No newline at end of file diff --git a/test/cases/unicode_replaced.test b/test/cases/unicode_replaced.test deleted file mode 100644 index 5cfe64b..0000000 --- a/test/cases/unicode_replaced.test +++ /dev/null @@ -1,7 +0,0 @@ -{name, "unicode_replaced"}. -{jsx, [start_array, - {string,"non-character: "}, - {string,[16#fffd]}, - end_array,end_json]}. -{json, "unicode_replaced.json"}. -{jsx_flags, [{escaped_unicode,replace}]}. \ No newline at end of file diff --git a/test/cases/unicode_to_codepoint.test b/test/cases/unicode_to_codepoint.test index 649ecd5..71f47fd 100644 --- a/test/cases/unicode_to_codepoint.test +++ b/test/cases/unicode_to_codepoint.test @@ -1,7 +1,6 @@ {name, "unicode_to_codepoint"}. {jsx, [start_array, - {string,"arabic letter alef: "}, - {string,[1575]}, + {string,<<"arabic letter alef: ">>}, + {string,<<1575/utf8>>}, end_array,end_json]}. {json, "unicode_to_codepoint.json"}. -{jsx_flags, [{escaped_unicode,codepoint}]}. diff --git a/test/cases/unicode_to_codepoint_noncharacter.json b/test/cases/unicode_to_codepoint_noncharacter.json deleted file mode 100644 index c8a71c9..0000000 --- a/test/cases/unicode_to_codepoint_noncharacter.json +++ /dev/null @@ -1 +0,0 @@ -[ "non-character: ", "\uffff" ] \ No newline at end of file diff --git a/test/cases/unicode_to_codepoint_noncharacter.test b/test/cases/unicode_to_codepoint_noncharacter.test deleted file mode 100644 index 63558db..0000000 --- a/test/cases/unicode_to_codepoint_noncharacter.test +++ /dev/null @@ -1,7 +0,0 @@ -{name, "unicode_to_codepoint_noncharacter"}. -{jsx, [start_array, - {string,"non-character: "}, - {string,"\\uffff"}, - end_array,end_json]}. -{json, "unicode_to_codepoint_noncharacter.json"}. -{jsx_flags, [{escaped_unicode,codepoint}]}. \ No newline at end of file diff --git a/test/cases/unicode_unconverted.json b/test/cases/unicode_unconverted.json deleted file mode 100644 index 7984511..0000000 --- a/test/cases/unicode_unconverted.json +++ /dev/null @@ -1 +0,0 @@ -[ "arabic letter alef: ", "\u0627" ] \ No newline at end of file diff --git a/test/cases/unicode_unconverted.test b/test/cases/unicode_unconverted.test deleted file mode 100644 index 9713a0f..0000000 --- a/test/cases/unicode_unconverted.test +++ /dev/null @@ -1,7 +0,0 @@ -{name, "unicode_unconverted"}. -{jsx, [start_array, - {string,"arabic letter alef: "}, - {string,"\\u0627"}, - end_array,end_json]}. -{json, "unicode_unconverted.json"}. -{jsx_flags, [{escaped_unicode,ascii}]}. diff --git a/test/cases/zero.test b/test/cases/zero.test index cd072a1..9515a85 100644 --- a/test/cases/zero.test +++ b/test/cases/zero.test @@ -4,9 +4,9 @@ end_array, {integer,0}, start_object, - {key,"key"}, + {key,<<"key">>}, {integer,0}, - {key,"another key"}, + {key,<<"another key">>}, {integer,0}, end_object, {integer,0},