diff --git a/priv/jsx_decoder.erl b/priv/jsx_decoder.erl index 757bd37..3222921 100644 --- a/priv/jsx_decoder.erl +++ b/priv/jsx_decoder.erl @@ -39,15 +39,6 @@ parse(JSON, Opts) -> start(JSON, [], Opts). -%% this code is mostly autogenerated and mostly ugly. apologies. for more insight on -%% Callbacks or Opts, see the comments accompanying decoder/2 (in jsx.erl). Stack -%% is a stack of flags used to track depth and to keep track of whether we are -%% returning from a value or a key inside objects. all pops, peeks and pushes are -%% inlined. the code that handles naked values and comments is not optimized by the -%% compiler for efficient matching, but you shouldn't be using naked values or comments -%% anyways, they are horrible and contrary to the spec - - start(<>, Stack, Opts) when ?is_whitespace(S) -> start(Rest, Stack, Opts); start(<>, Stack, Opts) -> @@ -254,10 +245,8 @@ key(Bin, Stack, Opts) -> %% representation of the string being parsed. using a list of integers representing %% unicode codepoints is faster than constructing binaries, many of which will be %% converted back to lists by the user anyways - %% string uses partial_utf/1 to cease parsing when invalid encodings are encountered %% rather than just checking remaining binary size like other states - string(<>, [key|_] = Stack, Opts, Acc) -> {event, {key, lists:reverse(Acc)}, fun() -> colon(Rest, Stack, Opts) end}; string(<>, Stack, Opts, Acc) -> @@ -329,7 +318,6 @@ partial_utf(_) -> true. %% only thing to note here is the additional accumulator passed to escaped_unicode used %% to hold the codepoint sequence. unescessary, but nicer than using the string %% accumulator - escape(<<$b/?encoding, Rest/binary>>, Stack, Opts, Acc) -> string(Rest, Stack, Opts, "\b" ++ Acc); escape(<<$f/?encoding, Rest/binary>>, Stack, Opts, Acc) -> @@ -364,7 +352,6 @@ escape(Bin, Stack, Opts, Acc) -> %% as it represents a valid unicode codepoint. this means non-characters %% representable in 16 bits are not converted (the utf16 surrogates and the two %% special non-characters). any other option and no conversion is done - escaped_unicode(<>, Stack, ?escaped_unicode_to_ascii(Opts), @@ -413,7 +400,6 @@ escaped_unicode(Bin, Stack, Opts, String, Acc) -> %% upon encountering a low pair json/hex encoded value, check to see if there's a high %% value already in the accumulator - check_acc_for_surrogate([D, C, B, A, $u, ?rsolidus|Rest]) when ?is_hex(D), ?is_hex(C), ?is_hex(B), ?is_hex(A) -> case erlang:list_to_integer([A, B, C, D], 16) of @@ -426,14 +412,12 @@ check_acc_for_surrogate(_) -> false. %% stole this from the unicode spec - surrogate_to_codepoint(High, Low) -> (High - 16#d800) * 16#400 + (Low - 16#dc00) + 16#10000. %% like strings, numbers are collected in an intermediate accumulator before %% being emitted to the callback handler - negative(<<$0/?encoding, Rest/binary>>, Stack, Opts, Acc) -> zero(Rest, Stack, Opts, "0" ++ Acc); negative(<>, Stack, Opts, Acc) when ?is_nonzero(S) -> @@ -804,8 +788,7 @@ null(Bin, Stack, Opts) -> %% any unicode character is valid in a comment except the */ sequence which ends %% the comment. they're implemented as a closure called when the comment ends that %% returns execution to the point where the comment began. comments are not -%% recorded in any way, simply parsed. - +%% reported in any way, simply parsed. maybe_comment(<>, Resume) -> comment(Rest, Resume); maybe_comment(Bin, Resume) -> diff --git a/src/jsx.erl b/src/jsx.erl index 8aa7411..0ba19ea 100644 --- a/src/jsx.erl +++ b/src/jsx.erl @@ -82,7 +82,6 @@ parse_opts([{encoding, _}|Rest], Opts) -> %% encoding detection - %% first check to see if there's a bom, if not, use the rfc4627 method for determining %% encoding. this function makes some assumptions about the validity of the stream %% which may delay failure later than if an encoding is explicitly provided