diff --git a/src/jsx.erl b/src/jsx.erl index 7bb1ae1..4c78fb6 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -99,9 +99,6 @@ detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Stack, Callbacks, Opts) -> detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Stack, Callbacks, Opts) -> jsx_utf32:start(Rest, Stack, Callbacks, Opts); -%% utf8 null order detection -detect_encoding(<> = JSON, Stack, Callbacks, Opts) when X =/= 0, Y =/= 0 -> - jsx_utf8:start(JSON, Stack, Callbacks, Opts); %% utf32-little null order detection detect_encoding(<> = JSON, Stack, Callbacks, Opts) when X =/= 0 -> @@ -119,17 +116,38 @@ detect_encoding(<> = JSON, Stack, Callbacks, Opts) whe detect_encoding(<<0, 0, 0, X, _Rest/binary>> = JSON, Stack, Callbacks, Opts) when X =/= 0 -> jsx_utf32:start(JSON, Stack, Callbacks, Opts); -%% trying to parse a json string of a single character encoded in utf8 will fail -%% unless special cased -detect_encoding(<> = JSON, Stack, Callbacks, Opts) when X =/= 0 -> - try jsx_utf8:start(JSON, Stack, Callbacks, Opts) - catch error:function_clause -> - {incomplete, - fun(Stream) -> - detect_encoding(<>, Stack, Callbacks, Opts) - end - } - end; +%% utf8 null order detection +detect_encoding(<> = JSON, Stack, Callbacks, Opts) when X =/= 0, Y =/= 0 -> + jsx_utf8:start(JSON, Stack, Callbacks, Opts); + +%% a problem, to autodetect naked single digits' encoding, there is not enough data +%% to conclusively determine the encoding correctly. below is an attempt to solve +%% the problem + +detect_encoding(<>, Stack, Callbacks, Opts) when X =/= 0 -> + { + try {Result, _} = jsx_utf8:start(<>, [], Callbacks, Opts), Result + catch error:function_clause -> incomplete end, + fun(Stream) -> + detect_encoding(<>, Stack, Callbacks, Opts) + end + }; +detect_encoding(<<0, X>>, Stack, Callbacks, Opts) when X =/= 0 -> + { + try {Result, _} = jsx_utf16:start(<<0, X>>, [], Callbacks, Opts), Result + catch error:function_clause -> incomplete end, + fun(Stream) -> + detect_encoding(<<0, X, Stream/binary>>, Stack, Callbacks, Opts) + end + }; +detect_encoding(<>, Stack, Callbacks, Opts) when X =/= 0 -> + { + try {Result, _} = jsx_utf16le:start(<>, [], Callbacks, Opts), Result + catch error:function_clause -> incomplete end, + fun(Stream) -> + detect_encoding(<>, Stack, Callbacks, Opts) + end + }; %% not enough input, request more detect_encoding(Bin, Stack, Callbacks, Opts) -> diff --git a/src/jsx_utf16.erl b/src/jsx_utf16.erl index 56a62ff..48b6fe7 100644 --- a/src/jsx_utf16.erl +++ b/src/jsx_utf16.erl @@ -371,7 +371,7 @@ initial_decimal(<>, Stack, Callbacks, Opts, Acc) when initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); initial_decimal(Bin, Stack, Callbacks, Opts, Acc) when byte_size(Bin) < 2 -> - {incomplete, fun(Stream) -> initial_decimal(Stream, Stack, Callbacks, Opts, Acc) end}. + {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end}. decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> diff --git a/src/jsx_utf16le.erl b/src/jsx_utf16le.erl index 832c4e8..213ac40 100644 --- a/src/jsx_utf16le.erl +++ b/src/jsx_utf16le.erl @@ -371,7 +371,7 @@ initial_decimal(<>, Stack, Callbacks, Opts, Acc) when initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); initial_decimal(Bin, Stack, Callbacks, Opts, Acc) when byte_size(Bin) < 2 -> - {incomplete, fun(Stream) -> initial_decimal(Stream, Stack, Callbacks, Opts, Acc) end}. + {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end}. decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> diff --git a/src/jsx_utf32.erl b/src/jsx_utf32.erl index 85d1062..69d016f 100644 --- a/src/jsx_utf32.erl +++ b/src/jsx_utf32.erl @@ -358,7 +358,7 @@ initial_decimal(<>, Stack, Callbacks, Opts, Acc) when initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); initial_decimal(Bin, Stack, Callbacks, Opts, Acc) when byte_size(Bin) < 4 -> - {incomplete, fun(Stream) -> initial_decimal(Stream, Stack, Callbacks, Opts, Acc) end}. + {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end}. decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> diff --git a/src/jsx_utf32le.erl b/src/jsx_utf32le.erl index 5069686..84a66a6 100644 --- a/src/jsx_utf32le.erl +++ b/src/jsx_utf32le.erl @@ -358,7 +358,7 @@ initial_decimal(<>, Stack, Callbacks, Opts, Acc) when initial_decimal(<>, Stack, Callbacks, Opts, Acc) -> decimal(Rest, Stack, Callbacks, Opts, [?zero] ++ Acc); initial_decimal(Bin, Stack, Callbacks, Opts, Acc) when byte_size(Bin) < 4 -> - {incomplete, fun(Stream) -> initial_decimal(Stream, Stack, Callbacks, Opts, Acc) end}. + {incomplete, fun(Stream) -> initial_decimal(<>, Stack, Callbacks, Opts, Acc) end}. decimal(<>, Stack, Callbacks, Opts, Acc) when ?is_nonzero(S) -> diff --git a/test/jsx_test.erl b/test/jsx_test.erl index 7f8c574..894686c 100644 --- a/test/jsx_test.erl +++ b/test/jsx_test.erl @@ -24,14 +24,18 @@ -module(jsx_test). -author("alisdairsullivan@yahoo.ca"). --export([test/1, test_event/2, incremental_decode/2, decode/2]). - --include_lib("eunit/include/eunit.hrl"). +-export([test/0, test/1, test_event/2]). +test() -> + F = decoder([]), + incremental_decode(F, unicode:characters_to_binary(<<"0">>, utf8, utf16)). test(Dir) -> - Tests = gen_tests(Dir), - eunit:test(Tests, [verbose]). + ValidJSONTests = load_tests(Dir), + + etap:plan(length(ValidJSONTests) * 10), + run_tests(ValidJSONTests), + etap:end_tests(). decoder(Flags) -> @@ -43,33 +47,40 @@ test_event(Event, Acc) -> [Event] ++ Acc. - -gen_tests(Dir) -> +load_tests(Dir) -> TestSpecs = filelib:wildcard("*.test", Dir), - gen_tests(TestSpecs, Dir, []). + load_tests(TestSpecs, Dir, []). -gen_tests([], _, Acc) -> - lists:reverse(Acc); - -gen_tests([Test|Rest], Dir, Acc) -> - gen_tests(Rest, Dir, test_body(Test, Dir) ++ Acc). - -test_body(TestSpec, Dir) -> +load_tests([], _Dir, Acc) -> + lists:reverse(Acc); +load_tests([Test|Rest], Dir, Acc) -> try - TestName = filename:basename(TestSpec, ".test"), + TestName = filename:basename(Test, ".test"), {ok, JSON} = file:read_file(Dir ++ "/" ++ TestName ++ ".json"), - case file:consult(Dir ++ "/" ++ TestSpec) of + case file:consult(Dir ++ "/" ++ Test) of {ok, [Events]} -> - Decoder = jsx:decoder(), - [{TestName ++ "_incremental", ?_assertEqual(incremental_decode(Decoder, JSON), Events)}] ++ - [{TestName, ?_assertEqual(decode(Decoder, JSON), Events)}] + load_tests(Rest, Dir, [{TestName, JSON, Events, []}] ++ Acc) ; {ok, [Events, Flags]} -> - Decoder = jsx:decoder(Flags), - [{TestName ++ "_incremental", ?_assertEqual(incremental_decode(Decoder, JSON), Events)}] ++ - [{TestName, ?_assertEqual(decode(Decoder, JSON), Events)}] + load_tests(Rest, Dir, [{TestName, JSON, Events, Flags}] ++ Acc) end - catch _:_ -> [] - end. + catch _:_ -> load_tests(Rest, Dir, Acc) end. + +run_tests([]) -> + ok; +run_tests([{TestName, JSON, Events, Flags}|Rest]) -> + F = decoder(Flags), + etap:is(decode(F, JSON), Events, TestName ++ ": utf8"), + etap:is(incremental_decode(F, JSON), Events, TestName ++ ": incremental utf8"), + etap:is(decode(F, to_utf16(JSON)), Events, TestName ++ ": utf16"), + etap:is(incremental_decode(F, to_utf16(JSON)), Events, TestName ++ ": incremental utf16"), + etap:is(decode(F, to_utf16le(JSON)), Events, TestName ++ ": utf16le"), + etap:is(incremental_decode(F, to_utf16le(JSON)), Events, TestName ++ ": incremental utf16le"), + etap:is(decode(F, to_utf32(JSON)), Events, TestName ++ ": utf32"), + etap:is(incremental_decode(F, to_utf32(JSON)), Events, TestName ++ ": incremental utf32"), + etap:is(decode(F, to_utf32le(JSON)), Events, TestName ++ ": utf32le"), + etap:is(incremental_decode(F, to_utf32le(JSON)), Events, TestName ++ ": incremental utf32le"), + run_tests(Rest). + incremental_decode(F, <<>>) -> case F(<<>>) of @@ -87,4 +98,10 @@ decode(F, JSON) -> ; {Result, _} -> Result end. + +to_utf16(Bin) -> unicode:characters_to_binary(Bin, utf8, utf16). +to_utf16le(Bin) -> unicode:characters_to_binary(Bin, utf8, {utf16,little}). +to_utf32(Bin) -> unicode:characters_to_binary(Bin, utf8, utf32). +to_utf32le(Bin) -> unicode:characters_to_binary(Bin, utf8, {utf32,little}). + \ No newline at end of file