From e69ac5f3714c9284e9b162b4560942a29d7c8107 Mon Sep 17 00:00:00 2001 From: alisdair sullivan Date: Wed, 4 Apr 2012 20:46:41 -0700 Subject: [PATCH] detect utf8 bom and ignore if present --- priv/test_cases/bom.json | 1 + priv/test_cases/bom.test | 3 +++ src/jsx_decoder.erl | 26 +++++++++++++++++++++++++- 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 priv/test_cases/bom.json create mode 100644 priv/test_cases/bom.test diff --git a/priv/test_cases/bom.json b/priv/test_cases/bom.json new file mode 100644 index 0000000..ad47dbb --- /dev/null +++ b/priv/test_cases/bom.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/priv/test_cases/bom.test b/priv/test_cases/bom.test new file mode 100644 index 0000000..75b9d3e --- /dev/null +++ b/priv/test_cases/bom.test @@ -0,0 +1,3 @@ +{name, "byte order mark"}. +{jsx, [start_array, end_array, end_json]}. +{json, "bom.json"}. diff --git a/src/jsx_decoder.erl b/src/jsx_decoder.erl index 4e64bec..a362638 100644 --- a/src/jsx_decoder.erl +++ b/src/jsx_decoder.erl @@ -29,7 +29,7 @@ -spec decoder(Handler::module(), State::any(), Opts::jsx:opts()) -> jsx:decoder(). decoder(Handler, State, Opts) -> - fun(JSON) -> value(JSON, {Handler, Handler:init(State)}, [], jsx_utils:parse_opts(Opts)) end. + fun(JSON) -> start(JSON, {Handler, Handler:init(State)}, [], jsx_utils:parse_opts(Opts)) end. -include("jsx_opts.hrl"). @@ -128,6 +128,30 @@ decoder(Handler, State, Opts) -> -define(end_seq(Seq), unicode:characters_to_binary(lists:reverse(Seq))). +start(<<16#ef, Rest/binary>>, Handler, Stack, Opts) -> + maybe_bom(Rest, Handler, Stack, Opts); +start(<<>>, Handler, Stack, Opts) -> + ?incomplete(start, <<>>, Handler, Stack, Opts); +start(Bin, Handler, Stack, Opts) -> + value(Bin, Handler, Stack, Opts). + + +maybe_bom(<<16#bb, Rest/binary>>, Handler, Stack, Opts) -> + definitely_bom(Rest, Handler, Stack, Opts); +maybe_bom(<<>>, Handler, Stack, Opts) -> + ?incomplete(maybe_bom, <<>>, Handler, Stack, Opts); +maybe_bom(Bin, Handler, Stack, Opts) -> + ?error([Bin, Handler, Stack, Opts]). + + +definitely_bom(<<16#bf, Rest/binary>>, Handler, Stack, Opts) -> + value(Rest, Handler, Stack, Opts); +definitely_bom(<<>>, Handler, Stack, Opts) -> + ?incomplete(definitely_bom, <<>>, Handler, Stack, Opts); +definitely_bom(Bin, Handler, Stack, Opts) -> + ?error([Bin, Handler, Stack, Opts]). + + value(<>, Handler, Stack, Opts) -> string(Rest, Handler, [?new_seq()|Stack], Opts); value(<>, Handler, Stack, Opts = #opts{single_quoted_strings=true}) ->