diff --git a/src/jsx_terms.erl b/src/jsx_terms.erl new file mode 100644 index 0000000..ecd947a --- /dev/null +++ b/src/jsx_terms.erl @@ -0,0 +1,418 @@ +%% The MIT License + +%% Copyright (c) 2010 Alisdair Sullivan + +%% Permission is hereby granted, free of charge, to any person obtaining a copy +%% of this software and associated documentation files (the "Software"), to deal +%% in the Software without restriction, including without limitation the rights +%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +%% copies of the Software, and to permit persons to whom the Software is +%% furnished to do so, subject to the following conditions: + +%% The above copyright notice and this permission notice shall be included in +%% all copies or substantial portions of the Software. + +%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +%% THE SOFTWARE. + + + +-module(jsx_terms). + + +-export([json_to_term/2, term_to_json/2]). + + +-include("jsx_common.hrl"). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + + +-spec json_to_term(JSON::binary(), Opts::decoder_opts()) -> + jsx_term() | {jsx, incomplete, fun()}. + +json_to_term(JSON, Opts) -> + P = jsx:decoder(extract_parser_opts(Opts)), + case proplists:get_value(strict, Opts, false) of + true -> collect_strict(P(JSON), [[]], Opts) + ; false -> collect(P(JSON), [[]], Opts) + end. + + +%% the jsx formatter (pretty printer) can do most of the heavy lifting in +%% converting erlang terms to json strings + +-spec term_to_json(JSON::jsx_term(), Opts::encoder_opts()) -> + binary() | {jsx, incomplete, fun()}. + +term_to_json(List, Opts) -> + case proplists:get_value(strict, Opts, false) of + true when is_list(List) -> continue + ; true -> erlang:error(badarg) + ; false -> continue + end, + Encoding = proplists:get_value(encoding, Opts, utf8), + FOpts = [{output_encoding, Encoding}] ++ Opts, + case term_to_events(List) of + L when is_tuple(L) -> jsx:format(L, FOpts) + ; L when is_list(L) -> jsx:format(lists:reverse(L), FOpts) + end. + + + +extract_parser_opts(Opts) -> + extract_parser_opts(Opts, []). + +extract_parser_opts([], Acc) -> Acc; +extract_parser_opts([{K,V}|Rest], Acc) -> + case lists:member(K, [encoding]) of + true -> [{K,V}] ++ Acc + ; false -> extract_parser_opts(Rest, Acc) + end. + + + +%% ensure the first jsx event we get is start_object or start_array when running +%% in strict mode +collect_strict({jsx, Start, Next}, Acc, Opts) + when Start =:= start_object; Start =:= start_array -> + collect(Next(), [[]|Acc], Opts); +collect_strict({jsx, incomplete, More}, Acc, Opts) -> + case proplists:get_value(stream, Opts, false) of + true -> {jsx, incomplete, fun(JSON) -> + collect_strict(More(JSON), Acc, Opts) + end} + ; false -> erlang:error(badarg) + end; +collect_strict(_, _, _) -> erlang:error(badarg). + + +%% collect decoder events and convert to eep0018 format +collect({jsx, Start, Next}, Acc, Opts) + when Start =:= start_object; Start =:= start_array -> + collect(Next(), [[]|Acc], Opts); +%% special case for empty object +collect({jsx, end_object, Next}, [[], Parent|Rest], Opts) + when is_list(Parent) -> + collect(Next(), [[[{}]] ++ Parent] ++ Rest, Opts); +%% reverse the array/object accumulator before prepending it to it's parent +collect({jsx, end_object, Next}, [Current, Parent|Rest], Opts) + when is_list(Parent) -> + collect(Next(), [[lists:reverse(Current)] ++ Parent] ++ Rest, Opts); +collect({jsx, end_array, Next}, [Current, Parent|Rest], Opts) + when is_list(Parent) -> + collect(Next(), [[lists:reverse(Current)] ++ Parent] ++ Rest, Opts); +%% special case for empty object +collect({jsx, end_object, Next}, [[], Key, Parent|Rest], Opts) -> + collect(Next(), [[{Key, [{}]}] ++ Parent] ++ Rest, Opts); +collect({jsx, End, Next}, [Current, Key, Parent|Rest], Opts) + when End =:= end_object; End =:= end_array -> + collect(Next(), [[{Key, lists:reverse(Current)}] ++ Parent] ++ Rest, Opts); +collect({jsx, end_json, _Next}, [[Acc]], _Opts) -> + Acc; +%% key can only be emitted inside of a json object, so just insert it directly +%% into the head of the accumulator and deal with it when we receive it's +%% paired value +collect({jsx, {key, _} = PreKey, Next}, Acc, Opts) -> + Key = event(PreKey, Opts), + collect(Next(), [Key] ++ Acc, Opts); +%% if our returned event is {jsx, incomplete, ...} try to force end and return +%% the Event if one is returned +collect({jsx, incomplete, More}, Acc, Opts) -> + case More(end_stream) of + {jsx, Event, _Next} -> event(Event, Opts) + ; _ -> + case proplists:get_value(stream, Opts, false) of + true -> + {jsx, incomplete, + fun(JSON) -> collect(More(JSON), Acc, Opts) end + } + ; false -> erlang:error(badarg) + end + end; +%% check acc to see if we're inside an object or an array. because inside an +%% object context the events that fall this far are always preceded by a key +%% (which are binaries or atoms), if Current is a list, we're inside an array, +%% else, an object +collect({jsx, Event, Next}, [Current|Rest], Opts) when is_list(Current) -> + collect(Next(), [[event(Event, Opts)] ++ Current] ++ Rest, Opts); +collect({jsx, Event, Next}, [Key, Current|Rest], Opts) -> + collect(Next(), [[{Key, event(Event, Opts)}] ++ Current] ++ Rest, Opts); +%% any other event is an error +collect(_, _, _) -> erlang:error(badarg). + + +%% helper functions for converting jsx events to term format +event({string, String}, _Opts) -> String; +event({key, Key}, _Opts) -> Key; +event({integer, Integer}, _Opts) -> Integer; +event({float, Float}, _Opts) -> Float; +event({literal, Literal}, _Opts) -> Literal. + + + +%% convert term format representation to jsx events. note special casing for the +%% empty object +term_to_events([{}]) -> + [end_object, start_object]; +term_to_events([First|_] = List) when is_tuple(First) -> + proplist_to_events(List, [start_object]); +term_to_events(List) when is_list(List) -> + list_to_events(List, [start_array]); +term_to_events(Term) -> + [Res] = term_to_event(Term), + Res. + + +proplist_to_events([{Key, Term}|Rest], Acc) -> + Event = term_to_event(Term), + EncodedKey = key_to_event(Key), + proplist_to_events(Rest, Event ++ EncodedKey ++ Acc); +proplist_to_events([], Acc) -> + [end_object] ++ Acc; +proplist_to_events(_, _) -> + erlang:error(badarg). + + +list_to_events([Term|Rest], Acc) -> + list_to_events(Rest, term_to_event(Term) ++ Acc); +list_to_events([], Acc) -> + [end_array] ++ Acc. + + +term_to_event(List) when is_list(List) -> + term_to_events(List); +term_to_event(Float) when is_float(Float) -> + [{float, Float}]; +term_to_event(Integer) when is_integer(Integer) -> + [{integer, Integer}]; +term_to_event(String) when is_binary(String) -> + [{string, json_escape(String)}]; +term_to_event(true) -> [{literal, true}]; +term_to_event(false) -> [{literal, false}]; +term_to_event(null) -> [{literal, null}]; +term_to_event(_) -> erlang:error(badarg). + + +key_to_event(Key) when is_binary(Key) -> + [{key, json_escape(Key)}]. + + + +%% json string escaping, for utf8 binaries. escape the json control sequences to +%% their json equivalent, escape other control characters to \uXXXX sequences, +%% everything else should be a legal json string component +json_escape(String) -> + json_escape(String, <<>>). + +%% double quote +json_escape(<<$\", Rest/binary>>, Acc) -> + json_escape(Rest, <>); +%% backslash \ reverse solidus +json_escape(<<$\\, Rest/binary>>, Acc) -> + json_escape(Rest, <>); +%% backspace +json_escape(<<$\b, Rest/binary>>, Acc) -> + json_escape(Rest, <>); +%% form feed +json_escape(<<$\f, Rest/binary>>, Acc) -> + json_escape(Rest, <>); +%% newline +json_escape(<<$\n, Rest/binary>>, Acc) -> + json_escape(Rest, <>); +%% cr +json_escape(<<$\r, Rest/binary>>, Acc) -> + json_escape(Rest, <>); +%% tab +json_escape(<<$\t, Rest/binary>>, Acc) -> + json_escape(Rest, <>); +%% other control characters +json_escape(<>, Acc) when C >= 0, C < $\s -> + json_escape(Rest, <>); +%% any other legal codepoint +json_escape(<>, Acc) -> + json_escape(Rest, <>); +json_escape(<<>>, Acc) -> + Acc; +json_escape(_, _) -> + erlang:error(badarg). + + +%% convert a codepoint to it's \uXXXX equiv. for laziness, this only handles +%% codepoints this module might escape, ie, control characters +json_escape_sequence(C) when C < 16#20 -> + <<_:8, A:4, B:4>> = <>, % first two hex digits are always zero + <<$\\, $u, $0, $0, (to_hex(A)), (to_hex(B))>>. + + +to_hex(15) -> $f; +to_hex(14) -> $e; +to_hex(13) -> $d; +to_hex(12) -> $c; +to_hex(11) -> $b; +to_hex(10) -> $a; +to_hex(X) -> X + $0. + + +%% eunit tests +-ifdef(TEST). + +decode_test_() -> + [ + {"empty object", ?_assert(json_to_term(<<"{}">>, []) =:= [{}])}, + {"empty array", ?_assert(json_to_term(<<"[]">>, []) =:= [])}, + {"simple object", + ?_assert(json_to_term( + <<"{\"a\": true, \"b\": true, \"c\": true}">>, [] + ) =:= [{<<"a">>, true}, {<<"b">>, true}, {<<"c">>, true}] + ) + }, + {"simple array", + ?_assert(json_to_term(<<"[true,true,true]">>, + [] + ) =:= [true, true, true] + ) + }, + {"nested structures", + ?_assert(json_to_term( + <<"{\"x\":[{\"x\":[{}, {}],\"y\":{}}, []],\"y\":{}}">>, [] + ) =:= [{<<"x">>, + [[{<<"x">>, [[{}], [{}]]}, {<<"y">>, [{}]}],[]]}, + {<<"y">>, [{}]} + ] + ) + }, + {"numbers", + ?_assert(json_to_term( + <<"[-100000000.0, -1, 0.0, 0, 1, 100000000, 10000000.0]">>, + [] + ) =:= [-100000000.0, -1, 0.0, 0, 1, 100000000, 10000000.0] + ) + }, + {"strings", + ?_assert(json_to_term(<<"[\"a string\"]">>, + [] + ) =:= [<<"a string">>]) + }, + {"literals", + ?_assert(json_to_term(<<"[true,false,null]">>, + [] + ) =:= [true,false,null] + ) + }, + {"naked true", + ?_assert(json_to_term(<<"true">>, []) =:= true) + }, + {"naked short number", + ?_assert(json_to_term(<<"1">>, []) =:= 1) + }, + {"naked float", ?_assert(json_to_term(<<"1.0">>, []) =:= 1.0)}, + {"naked string", + ?_assert(json_to_term(<<"\"hello world\"">>, + [] + ) =:= <<"hello world">> + ) + }, + {"strict mode", ?_assertError(badarg, json_to_term(<<"1.0">>, + [{strict, true}] + ) + )} + ]. + +encode_test_() -> + [ + {"empty object", ?_assert(term_to_json([{}], []) =:= <<"{}">>)}, + {"empty array", ?_assert(term_to_json([], []) =:= <<"[]">>)}, + {"simple object", + ?_assert(term_to_json([{<<"a">>, true}, {<<"b">>, true}], + [] + ) =:= <<"{\"a\":true,\"b\":true}">> + ) + }, + {"simple array", + ?_assert(term_to_json([true, true, true], + [] + ) =:= <<"[true,true,true]">> + ) + }, + {"nested structures", + ?_assert(term_to_json( + [{<<"x">>, + [[{<<"x">>, [[{}], [{}]]}, {<<"y">>, [{}]}],[]]}, + {<<"y">>, [{}]}], + [] + ) =:= <<"{\"x\":[{\"x\":[{},{}],\"y\":{}},[]],\"y\":{}}">> + ) + }, + {"numbers", + ?_assert(term_to_json( + [-10000000000.0, -1, 0.0, 0, 1, 10000000, 1000000000.0], + [] + ) =:= <<"[-1.0e10,-1,0.0,0,1,10000000,1.0e9]">> + ) + }, + {"strings", + ?_assert(term_to_json([<<"a string">>], + [] + ) =:= <<"[\"a string\"]">> + ) + }, + {"literals", + ?_assert(term_to_json([true,false,null], + [] + ) =:= <<"[true,false,null]">> + ) + }, + {"naked true", + ?_assert(term_to_json(true, []) =:= <<"true">>) + }, + {"naked number", + ?_assert(term_to_json(1, []) =:= <<"1">>) + }, + {"float", ?_assert(term_to_json(1.0, []) =:= <<"1.0">>)}, + {"naked string", + ?_assert(term_to_json(<<"hello world">>, + [] + ) =:= <<"\"hello world\"">> + ) + }, + {"strict mode", ?_assertError(badarg, term_to_json(true, + [{strict, true}] + ) + )} + ]. + +escape_test_() -> + [ + {"json string escaping", + ?_assert(json_escape( + <<"\"\\\b\f\n\r\t">> + ) =:= <<"\\\"\\\\\\b\\f\\n\\r\\t">> + ) + }, + {"json string hex escape", + ?_assert(json_escape( + <<1, 2, 3, 11, 26, 30, 31>> + ) =:= <<"\\u0001\\u0002\\u0003\\u000b\\u001a\\u001e\\u001f">> + ) + } + ]. + +stream_test_() -> + [ + {"streaming mode", + ?_assert(begin + {jsx, incomplete, F} = json_to_term(<<"{">>, [{stream, true}]), + F(<<"}">>) + end =:= [{}]) + } + ]. + +-endif. \ No newline at end of file