rework decoder and encoder slightly to be a little more sane
This commit is contained in:
parent
3625aaeb66
commit
8592742ded
2 changed files with 118 additions and 34 deletions
|
@ -338,11 +338,8 @@ key(Bin, Handler, Stack, Config) ->
|
||||||
?error(key, Bin, Handler, Stack, Config).
|
?error(key, Bin, Handler, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
%% explicitly whitelist ascii set for faster parsing. really? really. someone should
|
|
||||||
%% submit a patch that unrolls simple guards
|
|
||||||
%% note that if you encounter an error from string and you can't find the clause that
|
%% note that if you encounter an error from string and you can't find the clause that
|
||||||
%% caused it here, it might be in unescape below
|
%% caused it here, it might be in unescape below
|
||||||
|
|
||||||
string(Bin, Handler, Stack, Config) ->
|
string(Bin, Handler, Stack, Config) ->
|
||||||
string(Bin, Handler, [], Stack, Config).
|
string(Bin, Handler, [], Stack, Config).
|
||||||
|
|
||||||
|
@ -355,9 +352,10 @@ string(<<?solidus, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, [Acc, maybe_replace(?solidus, Config)], Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace(?solidus, Config)], Stack, Config);
|
||||||
string(<<?rsolidus/utf8, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<?rsolidus/utf8, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
unescape(Rest, Handler, Acc, Stack, Config);
|
unescape(Rest, Handler, Acc, Stack, Config);
|
||||||
string(<<X/utf8, Rest/binary>>, Handler, Acc, Stack, Config=#config{uescape=true}) ->
|
%% TODO this is pretty gross and i don't like it
|
||||||
|
string(<<X/utf8, Rest/binary>> = Bin, Handler, Acc, Stack, Config=#config{uescape=true}) ->
|
||||||
case X of
|
case X of
|
||||||
X when X < 16#80 -> string(Rest, Handler, [Acc, X], Stack, Config);
|
X when X < 16#80 -> count(Bin, Handler, Acc, Stack, Config);
|
||||||
X -> string(Rest, Handler, [Acc, json_escape_sequence(X)], Stack, Config)
|
X -> string(Rest, Handler, [Acc, json_escape_sequence(X)], Stack, Config)
|
||||||
end;
|
end;
|
||||||
%% u+2028
|
%% u+2028
|
||||||
|
@ -367,14 +365,11 @@ string(<<226, 128, 168, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(<<226, 128, 169, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<226, 128, 169, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, [Acc, maybe_replace(16#2029, Config)], Stack, Config);
|
string(Rest, Handler, [Acc, maybe_replace(16#2029, Config)], Stack, Config);
|
||||||
string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) ->
|
string(<<_/utf8, _/binary>> = Bin, Handler, Acc, Stack, Config) ->
|
||||||
Size = count(Bin, 0, Config),
|
count(Bin, Handler, Acc, Stack, Config);
|
||||||
<<Clean:Size/binary, Rest/binary>> = Bin,
|
%% necessary for bytes that are badly formed utf8 that won't match in `count`
|
||||||
string(Rest, Handler, [Acc, Clean], Stack, Config);
|
|
||||||
%% really, really dirty strings. if there's no valid utf8 we never reach `count`
|
|
||||||
%% and things get replaced instead of ignored
|
|
||||||
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
string(<<X, Rest/binary>>, Handler, Acc, Stack, Config=#config{dirty_strings=true}) ->
|
||||||
string(Rest, Handler, [Acc, X], Stack, Config);
|
string(Rest, Handler, [Acc, X], Stack, Config);
|
||||||
%% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match preceeding
|
%% u+fffe and u+ffff for R14BXX (subsequent runtimes will happily match with /utf8
|
||||||
string(<<239, 191, 190, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<239, 191, 190, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
string(Rest, Handler, [Acc, <<16#fffe/utf8>>], Stack, Config);
|
string(Rest, Handler, [Acc, <<16#fffe/utf8>>], Stack, Config);
|
||||||
string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
string(<<239, 191, 191, Rest/binary>>, Handler, Acc, Stack, Config) ->
|
||||||
|
@ -411,6 +406,14 @@ string(<<_, Rest/binary>>, Handler, Acc, Stack, Config=#config{strict_utf8=false
|
||||||
string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config).
|
string(Bin, Handler, Acc, Stack, Config) -> ?error(string, Bin, Handler, Acc, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
|
count(Bin, Handler, Acc, Stack, Config) ->
|
||||||
|
Size = count(Bin, 0, Config),
|
||||||
|
<<Clean:Size/binary, Rest/binary>> = Bin,
|
||||||
|
string(Rest, Handler, [Acc, Clean], Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
|
%% explicitly whitelist ascii set for faster parsing. really? really. someone should
|
||||||
|
%% submit a patch that unrolls simple guards
|
||||||
count(<<0, Rest/binary>>, N, Config) ->
|
count(<<0, Rest/binary>>, N, Config) ->
|
||||||
count(Rest, N + 1, Config);
|
count(Rest, N + 1, Config);
|
||||||
count(<<1, Rest/binary>>, N, Config) ->
|
count(<<1, Rest/binary>>, N, Config) ->
|
||||||
|
@ -666,13 +669,12 @@ count(<<127, Rest/binary>>, N, Config) ->
|
||||||
count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) ->
|
count(<<_, Rest/binary>>, N, Config=#config{dirty_strings=true}) ->
|
||||||
count(Rest, N + 1, Config);
|
count(Rest, N + 1, Config);
|
||||||
count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N;
|
count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N;
|
||||||
%% u+2028
|
|
||||||
count(<<226, 128, 168, _/binary>>, N, _) -> N;
|
|
||||||
%% u+2029
|
|
||||||
count(<<226, 128, 169, _/binary>>, N, _) -> N;
|
|
||||||
count(<<X/utf8, Rest/binary>>, N, Config) ->
|
count(<<X/utf8, Rest/binary>>, N, Config) ->
|
||||||
case X of
|
case X of
|
||||||
X when X < 16#800 -> count(Rest, N + 2, Config);
|
X when X < 16#800 -> count(Rest, N + 2, Config);
|
||||||
|
%% jsonp escaping
|
||||||
|
16#2028 -> N;
|
||||||
|
16#2029 -> N;
|
||||||
X when X < 16#10000 -> count(Rest, N + 3, Config);
|
X when X < 16#10000 -> count(Rest, N + 3, Config);
|
||||||
_ -> count(Rest, N + 4, Config)
|
_ -> count(Rest, N + 4, Config)
|
||||||
end;
|
end;
|
||||||
|
@ -1007,7 +1009,6 @@ finish_number(Rest, Handler, Acc, Stack, Config) ->
|
||||||
format_number({integer, Acc}) -> {integer, binary_to_integer(Acc)};
|
format_number({integer, Acc}) -> {integer, binary_to_integer(Acc)};
|
||||||
format_number({float, Acc}) -> {float, binary_to_float(Acc)}.
|
format_number({float, Acc}) -> {float, binary_to_float(Acc)}.
|
||||||
-endif.
|
-endif.
|
||||||
|
|
||||||
-ifdef(no_binary_to_whatever).
|
-ifdef(no_binary_to_whatever).
|
||||||
format_number({integer, Acc}) -> {integer, list_to_integer(unicode:characters_to_list(Acc))};
|
format_number({integer, Acc}) -> {integer, list_to_integer(unicode:characters_to_list(Acc))};
|
||||||
format_number({float, Acc}) -> {float, list_to_float(unicode:characters_to_list(Acc))}.
|
format_number({float, Acc}) -> {float, list_to_float(unicode:characters_to_list(Acc))}.
|
||||||
|
@ -1471,6 +1472,7 @@ codepoints() ->
|
||||||
[16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++
|
[16#60000, 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000] ++
|
||||||
[16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000].
|
[16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000].
|
||||||
|
|
||||||
|
|
||||||
surrogates() -> lists:seq(16#d800, 16#dfff).
|
surrogates() -> lists:seq(16#d800, 16#dfff).
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -130,6 +130,7 @@ value(BadTokens, Handler, Stack, Config) when is_list(BadTokens) ->
|
||||||
value(Token, Handler, Stack, Config) ->
|
value(Token, Handler, Stack, Config) ->
|
||||||
value([Token], Handler, Stack, Config).
|
value([Token], Handler, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
object([end_object|Tokens], Handler, [object|Stack], Config) ->
|
object([end_object|Tokens], Handler, [object|Stack], Config) ->
|
||||||
maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config);
|
maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config);
|
||||||
object([{key, Key}|Tokens], Handler, Stack, Config)
|
object([{key, Key}|Tokens], Handler, Stack, Config)
|
||||||
|
@ -153,6 +154,7 @@ object([], Handler, Stack, Config) ->
|
||||||
object(Token, Handler, Stack, Config) ->
|
object(Token, Handler, Stack, Config) ->
|
||||||
object([Token], Handler, Stack, Config).
|
object([Token], Handler, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
array([end_array|Tokens], Handler, [array|Stack], Config) ->
|
array([end_array|Tokens], Handler, [array|Stack], Config) ->
|
||||||
maybe_done(Tokens, handle_event(end_array, Handler, Config), Stack, Config);
|
maybe_done(Tokens, handle_event(end_array, Handler, Config), Stack, Config);
|
||||||
array([], Handler, Stack, Config) ->
|
array([], Handler, Stack, Config) ->
|
||||||
|
@ -162,6 +164,7 @@ array(Tokens, Handler, Stack, Config) when is_list(Tokens) ->
|
||||||
array(Token, Handler, Stack, Config) ->
|
array(Token, Handler, Stack, Config) ->
|
||||||
array([Token], Handler, Stack, Config).
|
array([Token], Handler, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
maybe_done([end_json], Handler, [], Config) ->
|
maybe_done([end_json], Handler, [], Config) ->
|
||||||
done([end_json], Handler, [], Config);
|
done([end_json], Handler, [], Config);
|
||||||
maybe_done(Tokens, Handler, [object|_] = Stack, Config) when is_list(Tokens) ->
|
maybe_done(Tokens, Handler, [object|_] = Stack, Config) when is_list(Tokens) ->
|
||||||
|
@ -175,6 +178,7 @@ maybe_done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) ->
|
||||||
maybe_done(Token, Handler, Stack, Config) ->
|
maybe_done(Token, Handler, Stack, Config) ->
|
||||||
maybe_done([Token], Handler, Stack, Config).
|
maybe_done([Token], Handler, Stack, Config).
|
||||||
|
|
||||||
|
|
||||||
done([], Handler, [], Config=#config{stream=true}) ->
|
done([], Handler, [], Config=#config{stream=true}) ->
|
||||||
incomplete(done, Handler, [], Config);
|
incomplete(done, Handler, [], Config);
|
||||||
done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] ->
|
done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] ->
|
||||||
|
@ -195,23 +199,89 @@ clean_string(Bin, #config{dirty_strings=true}) -> Bin;
|
||||||
clean_string(Bin, Config) -> clean(Bin, [], Config).
|
clean_string(Bin, Config) -> clean(Bin, [], Config).
|
||||||
|
|
||||||
|
|
||||||
clean(<<>>, Acc, _) -> iolist_to_binary(Acc);
|
%% unroll the control characters
|
||||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X < 16#20 ->
|
clean(<<0, Rest/binary>>, Acc, Config) ->
|
||||||
clean(Rest, [Acc, maybe_replace(X, Config)], Config);
|
clean(Rest, [Acc, maybe_replace(0, Config)], Config);
|
||||||
|
clean(<<1, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(1, Config)], Config);
|
||||||
|
clean(<<2, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(2, Config)], Config);
|
||||||
|
clean(<<3, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(3, Config)], Config);
|
||||||
|
clean(<<4, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(4, Config)], Config);
|
||||||
|
clean(<<5, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(5, Config)], Config);
|
||||||
|
clean(<<6, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(6, Config)], Config);
|
||||||
|
clean(<<7, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(7, Config)], Config);
|
||||||
|
clean(<<8, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(8, Config)], Config);
|
||||||
|
clean(<<9, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(9, Config)], Config);
|
||||||
|
clean(<<10, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(10, Config)], Config);
|
||||||
|
clean(<<11, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(11, Config)], Config);
|
||||||
|
clean(<<12, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(12, Config)], Config);
|
||||||
|
clean(<<13, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(13, Config)], Config);
|
||||||
|
clean(<<14, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(14, Config)], Config);
|
||||||
|
clean(<<15, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(15, Config)], Config);
|
||||||
|
clean(<<16, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(16, Config)], Config);
|
||||||
|
clean(<<17, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(17, Config)], Config);
|
||||||
|
clean(<<18, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(18, Config)], Config);
|
||||||
|
clean(<<19, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(19, Config)], Config);
|
||||||
|
clean(<<20, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(20, Config)], Config);
|
||||||
|
clean(<<21, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(21, Config)], Config);
|
||||||
|
clean(<<22, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(22, Config)], Config);
|
||||||
|
clean(<<23, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(23, Config)], Config);
|
||||||
|
clean(<<24, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(24, Config)], Config);
|
||||||
|
clean(<<25, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(25, Config)], Config);
|
||||||
|
clean(<<26, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(26, Config)], Config);
|
||||||
|
clean(<<27, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(27, Config)], Config);
|
||||||
|
clean(<<28, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(28, Config)], Config);
|
||||||
|
clean(<<29, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(29, Config)], Config);
|
||||||
|
clean(<<30, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(30, Config)], Config);
|
||||||
|
clean(<<31, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(31, Config)], Config);
|
||||||
clean(<<34, Rest/binary>>, Acc, Config) ->
|
clean(<<34, Rest/binary>>, Acc, Config) ->
|
||||||
clean(Rest, [Acc, maybe_replace(34, Config)], Config);
|
clean(Rest, [Acc, maybe_replace(34, Config)], Config);
|
||||||
clean(<<47, Rest/binary>>, Acc, Config) ->
|
clean(<<47, Rest/binary>>, Acc, Config) ->
|
||||||
clean(Rest, [Acc, maybe_replace(47, Config)], Config);
|
clean(Rest, [Acc, maybe_replace(47, Config)], Config);
|
||||||
clean(<<92, Rest/binary>>, Acc, Config) ->
|
clean(<<92, Rest/binary>>, Acc, Config) ->
|
||||||
clean(Rest, [Acc, maybe_replace(92, Config)], Config);
|
clean(Rest, [Acc, maybe_replace(92, Config)], Config);
|
||||||
clean(<<X/utf8, Rest/binary>>, Acc, Config=#config{uescape=true}) when X >= 16#80 ->
|
clean(<<X/utf8, Rest/binary>> = Bin, Acc, Config=#config{uescape=true}) ->
|
||||||
clean(Rest, [Acc, json_escape_sequence(X)], Config);
|
case X of
|
||||||
clean(<<X/utf8, Rest/binary>>, Acc, Config) when X == 16#2028; X == 16#2029 ->
|
X when X < 16#80 -> start_count(Bin, Acc, Config);
|
||||||
clean(Rest, [Acc, maybe_replace(X, Config)], Config);
|
_ -> clean(Rest, [Acc, json_escape_sequence(X)], Config)
|
||||||
clean(<<_/utf8, _/binary>> = Bin, Acc, Config) ->
|
end;
|
||||||
Size = count(Bin, 0, Config),
|
%% u+2028
|
||||||
<<Clean:Size/binary, Rest/binary>> = Bin,
|
clean(<<226, 128, 168, Rest/binary>>, Acc, Config) ->
|
||||||
clean(Rest, [Acc, Clean], Config);
|
clean(Rest, [Acc, maybe_replace(16#2028, Config)], Config);
|
||||||
|
%% u+2029
|
||||||
|
clean(<<226, 128, 169, Rest/binary>>, Acc, Config) ->
|
||||||
|
clean(Rest, [Acc, maybe_replace(16#2029, Config)], Config);
|
||||||
|
clean(<<_/utf8, _/binary>> = Bin, Acc, Config) -> start_count(Bin, Acc, Config);
|
||||||
%% surrogates
|
%% surrogates
|
||||||
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
|
clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 ->
|
||||||
clean(Rest, [Acc, maybe_replace(surrogate, Config)], Config);
|
clean(Rest, [Acc, maybe_replace(surrogate, Config)], Config);
|
||||||
|
@ -225,10 +295,17 @@ clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 ->
|
||||||
clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
|
clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 ->
|
||||||
clean(strip_continuations(Rest, 3), [Acc, maybe_replace(badutf, Config)], Config);
|
clean(strip_continuations(Rest, 3), [Acc, maybe_replace(badutf, Config)], Config);
|
||||||
clean(<<_, Rest/binary>>, Acc, Config) ->
|
clean(<<_, Rest/binary>>, Acc, Config) ->
|
||||||
clean(Rest, [Acc, maybe_replace(badutf, Config)], Config).
|
clean(Rest, [Acc, maybe_replace(badutf, Config)], Config);
|
||||||
|
clean(<<>>, Acc, _) -> iolist_to_binary(Acc).
|
||||||
|
|
||||||
|
|
||||||
count(<<>>, N, _) -> N;
|
start_count(Bin, Acc, Config) ->
|
||||||
|
Size = count(Bin, 0, Config),
|
||||||
|
<<Clean:Size/binary, Rest/binary>> = Bin,
|
||||||
|
clean(Rest, [Acc, Clean], Config).
|
||||||
|
|
||||||
|
|
||||||
|
%% again, unrolling ascii makes a huge difference. sadly
|
||||||
count(<<0, _/binary>>, N, _) -> N;
|
count(<<0, _/binary>>, N, _) -> N;
|
||||||
count(<<1, _/binary>>, N, _) -> N;
|
count(<<1, _/binary>>, N, _) -> N;
|
||||||
count(<<2, _/binary>>, N, _) -> N;
|
count(<<2, _/binary>>, N, _) -> N;
|
||||||
|
@ -451,17 +528,16 @@ count(<<126, Rest/binary>>, N, Config) ->
|
||||||
count(<<127, Rest/binary>>, N, Config) ->
|
count(<<127, Rest/binary>>, N, Config) ->
|
||||||
count(Rest, N + 1, Config);
|
count(Rest, N + 1, Config);
|
||||||
count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N;
|
count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N;
|
||||||
%% u+2028
|
|
||||||
count(<<226, 128, 168, _/binary>>, N, _) -> N;
|
|
||||||
%% u+2029
|
|
||||||
count(<<226, 128, 169, _/binary>>, N, _) -> N;
|
|
||||||
count(<<X/utf8, Rest/binary>>, N, Config) ->
|
count(<<X/utf8, Rest/binary>>, N, Config) ->
|
||||||
case X of
|
case X of
|
||||||
X when X < 16#800 -> count(Rest, N + 2, Config);
|
X when X < 16#800 -> count(Rest, N + 2, Config);
|
||||||
|
16#2028 -> N;
|
||||||
|
16#2029 -> N;
|
||||||
X when X < 16#10000 -> count(Rest, N + 3, Config);
|
X when X < 16#10000 -> count(Rest, N + 3, Config);
|
||||||
_ -> count(Rest, N + 4, Config)
|
_ -> count(Rest, N + 4, Config)
|
||||||
end;
|
end;
|
||||||
count(<<_, _/binary>>, N, _) -> N.
|
count(<<_, _/binary>>, N, _) -> N;
|
||||||
|
count(<<>>, N, _) -> N.
|
||||||
|
|
||||||
|
|
||||||
strip_continuations(Bin, 0) -> Bin;
|
strip_continuations(Bin, 0) -> Bin;
|
||||||
|
@ -524,6 +600,7 @@ to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc...
|
||||||
|
|
||||||
init([]) -> [].
|
init([]) -> [].
|
||||||
|
|
||||||
|
|
||||||
-spec handle_event(Event::any(), Acc::list()) -> list().
|
-spec handle_event(Event::any(), Acc::list()) -> list().
|
||||||
|
|
||||||
handle_event(end_json, State) -> lists:reverse(State);
|
handle_event(end_json, State) -> lists:reverse(State);
|
||||||
|
@ -631,6 +708,7 @@ codepoints() ->
|
||||||
++ lists:seq(16#e000, 16#ffff)
|
++ lists:seq(16#e000, 16#ffff)
|
||||||
).
|
).
|
||||||
|
|
||||||
|
|
||||||
extended_codepoints() ->
|
extended_codepoints() ->
|
||||||
unicode:characters_to_binary(
|
unicode:characters_to_binary(
|
||||||
lists:seq(16#10000, 16#1ffff) ++ [
|
lists:seq(16#10000, 16#1ffff) ++ [
|
||||||
|
@ -640,13 +718,16 @@ extended_codepoints() ->
|
||||||
]
|
]
|
||||||
).
|
).
|
||||||
|
|
||||||
|
|
||||||
surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ].
|
surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ].
|
||||||
|
|
||||||
|
|
||||||
clean_string_helper(String) ->
|
clean_string_helper(String) ->
|
||||||
try clean_string(String, #config{strict_utf8=true}) of Clean -> Clean
|
try clean_string(String, #config{strict_utf8=true}) of Clean -> Clean
|
||||||
catch error:badarg -> {error, badarg}
|
catch error:badarg -> {error, badarg}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
clean_string_test_() ->
|
clean_string_test_() ->
|
||||||
[
|
[
|
||||||
{"clean codepoints", ?_assertEqual(
|
{"clean codepoints", ?_assertEqual(
|
||||||
|
@ -1069,4 +1150,5 @@ datetime_test_() ->
|
||||||
)}
|
)}
|
||||||
].
|
].
|
||||||
|
|
||||||
|
|
||||||
-endif.
|
-endif.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue