Merge branch 'develop' into error_handler
This commit is contained in:
commit
d0ae8a8fd9
3 changed files with 44 additions and 10 deletions
|
@ -1,3 +1,8 @@
|
|||
v1.4
|
||||
|
||||
* radically refactored decoder
|
||||
* `dirty_strings` now behaves intuitively in decoding. bad codepoints, bad utf8, illegal characters and escapes (except `"` and `'` if `single_quoted_strings` is enabled) are ignored completely
|
||||
|
||||
v1.3.3
|
||||
|
||||
* `pre_encode` now orders input in the order you'd expect
|
||||
|
|
|
@ -263,7 +263,7 @@ jsx functions all take a common set of options. not all flags have meaning in al
|
|||
|
||||
- `escaped_forward_slashes`
|
||||
|
||||
json strings are escaped according to the json spec. this means forward slashes (solidus) are only escaped when this flag is present. otherwise they are left unescaped. this option is only relevant for encoding; you may want to use this if you are embedding json directly into a html or xml document
|
||||
json strings are escaped according to the json spec. this means forward slashes (solidus) are only escaped when this flag is present. otherwise they are left unescaped. you may want to use this if you are embedding json directly into a html or xml document
|
||||
|
||||
- `single_quoted_strings`
|
||||
|
||||
|
@ -285,14 +285,14 @@ jsx functions all take a common set of options. not all flags have meaning in al
|
|||
|
||||
by default, both the encoder and decoder return strings as utf8 binaries appropriate for use in erlang. escape sequences that were present in decoded terms are converted into the appropriate codepoint while encoded terms are unaltered. this flag escapes strings as if for output in json, removing control codes and problematic codepoints and replacing them with the appropriate escapes
|
||||
|
||||
- `dirty_strings`
|
||||
|
||||
json escaping is lossy; it mutates the json string and repeated application can result in unwanted behaviour. if your strings are already escaped (or you'd like to force invalid strings into "json") use this flag to bypass escaping
|
||||
|
||||
- `ignored_bad_escapes`
|
||||
|
||||
during decoding, ignore unrecognized escape sequences and leave them as is in the stream. note that combining this option with `escaped_strings` will result in the escape character itself being escaped
|
||||
|
||||
- `dirty_strings`
|
||||
|
||||
json escaping is lossy; it mutates the json string and repeated application can result in unwanted behaviour. if your strings are already escaped (or you'd like to force invalid strings into "json") use this flag to bypass escaping. this can also be used to read in *really* invalid json strings. everything but escaped quotes are passed as is to result string term. note that this overrides `ignored_bad_escapes`, `unescaped_jsonp` and `escaped_strings`
|
||||
|
||||
- `explicit_end`
|
||||
|
||||
this option treats all exhausted inputs as incomplete. the parser will not attempt to return a final state until the function is called with the value `end_stream`
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
-module(jsx_decoder).
|
||||
|
||||
%% inline sequence accumulation, handle_event and format_number
|
||||
-compile({inline, [new_seq/0, new_seq/1, acc_seq/2, end_seq/1]}).
|
||||
-compile({inline, [new_seq/0, new_seq/1, acc_seq/2, end_seq/1, end_seq/2]}).
|
||||
-compile({inline, [handle_event/3]}).
|
||||
-compile({inline, [format_number/1]}).
|
||||
|
||||
|
@ -149,6 +149,9 @@ acc_seq(Seq, C) -> [C] ++ Seq.
|
|||
|
||||
end_seq(Seq) -> unicode:characters_to_binary(lists:reverse(Seq)).
|
||||
|
||||
end_seq(Seq, Config=#config{dirty_strings=true}) -> list_to_binary(lists:reverse(Seq));
|
||||
end_seq(Seq, _) -> end_seq(Seq).
|
||||
|
||||
|
||||
handle_event([], Handler, _Config) -> Handler;
|
||||
handle_event([Event|Rest], Handler, Config) ->
|
||||
|
@ -274,11 +277,11 @@ string(<<33, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|||
string(<<?doublequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
case Stack of
|
||||
[key|_] ->
|
||||
colon(Rest, handle_event({key, end_seq(Acc)}, Handler, Config), Stack, Config);
|
||||
colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), Stack, Config);
|
||||
[single_quote|_] ->
|
||||
string(Rest, Handler,acc_seq(Acc, maybe_replace(?doublequote, Config)), Stack, Config);
|
||||
_ ->
|
||||
maybe_done(Rest, handle_event({string, end_seq(Acc)}, Handler, Config), Stack, Config)
|
||||
maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), Stack, Config)
|
||||
end;
|
||||
string(<<35, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 35), Stack, Config);
|
||||
|
@ -291,9 +294,9 @@ string(<<38, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|||
string(<<?singlequote, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
case Stack of
|
||||
[single_quote, key|S] ->
|
||||
colon(Rest, handle_event({key, end_seq(Acc)}, Handler, Config), [key|S], Config)
|
||||
colon(Rest, handle_event({key, end_seq(Acc, Config)}, Handler, Config), [key|S], Config)
|
||||
; [single_quote|S] ->
|
||||
maybe_done(Rest, handle_event({string, end_seq(Acc)}, Handler, Config), S, Config)
|
||||
maybe_done(Rest, handle_event({string, end_seq(Acc, Config)}, Handler, Config), S, Config)
|
||||
; _ ->
|
||||
string(Rest, Handler, acc_seq(Acc, ?singlequote), Stack, Config)
|
||||
end;
|
||||
|
@ -473,6 +476,8 @@ string(<<126, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
|||
string(Rest, Handler, acc_seq(Acc, 126), Stack, Config);
|
||||
string(<<127, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, 127), Stack, Config);
|
||||
string(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||
string(Rest, Handler, acc_seq(Acc, C), Stack, Config);
|
||||
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X >= 16#20, X < 16#2028 ->
|
||||
string(Rest, Handler, acc_seq(Acc, X), Stack, Config);
|
||||
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config) when X == 16#2028; X == 16#2029 ->
|
||||
|
@ -586,6 +591,8 @@ strip_continuations(Rest, Handler, Acc, Stack, Config, _) ->
|
|||
string(Rest, Handler, acc_seq(Acc, 16#fffd), Stack, Config).
|
||||
|
||||
|
||||
unescape(<<C, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||
string(Rest, Handler, acc_seq(Acc, [?rsolidus, C]), Stack, Config);
|
||||
unescape(<<$b, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
string(Rest, Handler, acc_seq(Acc, maybe_replace($\b, Config)), Stack, Config);
|
||||
unescape(<<$f, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||
|
@ -1326,6 +1333,28 @@ clean_string_test_() ->
|
|||
{"clean extended noncharacters", ?_assertEqual(
|
||||
lists:duplicate(length(extended_noncharacters()), [{string, <<16#fffd/utf8>>}, end_json]),
|
||||
lists:map(fun(Codepoint) -> decode(Codepoint, [replaced_bad_utf8]) end, extended_noncharacters())
|
||||
)},
|
||||
{"dirty strings", ?_assertEqual(
|
||||
lists:map(
|
||||
fun(Result) -> [{string, Result}, end_json] end,
|
||||
[
|
||||
<<"\\uwxyz">>,
|
||||
<<"\\x23">>,
|
||||
<<0>>,
|
||||
<<237, 160, 128>>,
|
||||
<<244, 143, 191, 191>>
|
||||
]
|
||||
),
|
||||
lists:map(
|
||||
fun(JSON) ->decode(JSON, [dirty_strings]) end,
|
||||
[
|
||||
<<34, "\\uwxyz", 34>>,
|
||||
<<34, "\\x23", 34>>,
|
||||
<<34, 0, 34>>,
|
||||
<<34, 237, 160, 128, 34>>,
|
||||
<<34, 244, 143, 191, 191, 34>>
|
||||
]
|
||||
)
|
||||
)}
|
||||
].
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue