310 lines
No EOL
11 KiB
Erlang
310 lines
No EOL
11 KiB
Erlang
%% The MIT License
|
|
|
|
%% Copyright (c) 2010 Alisdair Sullivan <alisdairsullivan@yahoo.ca>
|
|
|
|
%% Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
%% of this software and associated documentation files (the "Software"), to deal
|
|
%% in the Software without restriction, including without limitation the rights
|
|
%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
%% copies of the Software, and to permit persons to whom the Software is
|
|
%% furnished to do so, subject to the following conditions:
|
|
|
|
%% The above copyright notice and this permission notice shall be included in
|
|
%% all copies or substantial portions of the Software.
|
|
|
|
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
%% THE SOFTWARE.
|
|
|
|
|
|
-module(jsx_utils).
|
|
|
|
-export([nice_decimal/1, detect_encoding/1, detect_encoding/2]).
|
|
|
|
|
|
-ifdef(TEST).
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
-endif.
|
|
|
|
|
|
|
|
%% conversion of floats to 'nice' decimal output. erlang's float implementation
|
|
%% is almost but not quite ieee 754. it converts negative zero to plain zero
|
|
%% silently, and throws exceptions for any operations that would produce NaN
|
|
%% or infinity. as far as I can tell that is. trying to match against NaN or
|
|
%% infinity binary patterns produces nomatch exceptions, and arithmetic
|
|
%% operations produce badarg exceptions. with that in mind, this function
|
|
%% makes no attempt to handle special values (except for zero)
|
|
|
|
%% algorithm from "Printing Floating-Point Numbers Quickly and Accurately" by
|
|
%% Burger & Dybvig
|
|
|
|
|
|
-spec nice_decimal(Float::float()) -> string().
|
|
|
|
nice_decimal(0.0) -> "0.0";
|
|
nice_decimal(Num) ->
|
|
{F, E} = extract(<<Num:64/float>>),
|
|
{R, S, MP, MM} = initial_vals(F, E),
|
|
K = ceiling((math:log(abs(Num)) / math:log(10)) - 1.0e-10),
|
|
Round = F band 1 =:= 0,
|
|
{Dpoint, Digits} = scale(R, S, MP, MM, K, 10, Round),
|
|
if Num >= 0 -> digits_to_list(Dpoint, Digits)
|
|
; Num < 0 -> "-" ++ digits_to_list(Dpoint, Digits)
|
|
end.
|
|
|
|
|
|
%% internal functions
|
|
|
|
extract(<<_:1, 0:11, Frac:52>>) -> {Frac, -1074};
|
|
extract(<<_:1, Exp:11, Frac:52>>) -> {Frac + (1 bsl 52), Exp - 1075}.
|
|
|
|
|
|
initial_vals(F, E) when E >= 0, F /= 1 bsl 52 ->
|
|
BE = 1 bsl E,
|
|
{F * BE * 2, 2, BE, BE};
|
|
initial_vals(F, E) when E >= 0 ->
|
|
BE = 1 bsl E,
|
|
{F * BE * 4, 4, BE * 2, BE};
|
|
initial_vals(F, E) when E == -1074; F /= 1 bsl 52 ->
|
|
{F * 2, 1 bsl (-E + 1), 1, 1};
|
|
initial_vals(F, E) ->
|
|
{F * 4, 1 bsl (-E + 2), 2, 1}.
|
|
|
|
|
|
ceiling(X) ->
|
|
Y = erlang:trunc(X),
|
|
case X - Y of
|
|
Z when Z > 0 -> Y + 1
|
|
; _ -> Y
|
|
end.
|
|
|
|
|
|
scale(R, S, MP, MM, K, B, Round) ->
|
|
case K >= 0 of
|
|
true -> fixup(R, S * pow(B, K), MP, MM, K, B, Round)
|
|
; false ->
|
|
Scale = pow(B, -1 * K),
|
|
fixup(R * Scale, S, MP * Scale, MM * Scale, K, B, Round)
|
|
end.
|
|
|
|
|
|
fixup(R, S, MP, MM, K, B, true) ->
|
|
case (R + MP >= S) of
|
|
true -> {K + 1, generate(R, S, MP, MM, B, true)}
|
|
; false -> {K, generate(R * B, S, MP * B, MM * B, B, true)}
|
|
end;
|
|
fixup(R, S, MP, MM, K, B, false) ->
|
|
case (R + MP > S) of
|
|
true -> {K + 1, generate(R, S, MP, MM, B, true)}
|
|
; false -> {K, generate(R * B, S, MP * B, MM * B, B, true)}
|
|
end.
|
|
|
|
|
|
generate(RT, S, MP, MM, B, Round) ->
|
|
D = RT div S,
|
|
R = RT rem S,
|
|
TC1 = case Round of true -> (R =< MM); false -> (R < MM) end,
|
|
TC2 = case Round of true -> (R + MP >= S); false -> (R + MP > S) end,
|
|
case TC1 of
|
|
false -> case TC2 of
|
|
false -> [D | generate(R * B, S, MP * B, MM * B, B, Round)]
|
|
; true -> [D + 1]
|
|
end
|
|
; true -> case TC2 of
|
|
false -> [D]
|
|
; true -> case R * 2 < S of
|
|
true -> [D]
|
|
; false -> [D + 1]
|
|
end
|
|
end
|
|
end.
|
|
|
|
|
|
%% this is not efficient at all and should be replaced with a lookup table
|
|
%% probably
|
|
pow(_B, 0) -> 1;
|
|
pow(B, E) when E > 0 -> pow(B, E, 1).
|
|
|
|
pow(B, E, Acc) when E < 2 -> B * Acc;
|
|
pow(B, E, Acc) when E band 1 == 1 -> pow(B * B, E bsr 1, B * Acc);
|
|
pow(B, E, Acc) -> pow(B * B, E bsr 1, Acc).
|
|
|
|
|
|
digits_to_list(0, Digits) ->
|
|
digits_to_list(Digits, ignore, ".0");
|
|
digits_to_list(Dpoint, Digits) when Dpoint =< length(Digits), Dpoint > 0 ->
|
|
digits_to_list(Digits, Dpoint, []);
|
|
digits_to_list(Dpoint, Digits) when Dpoint > 0 ->
|
|
Pad = Dpoint - length(Digits),
|
|
case Pad of
|
|
X when X > 6 ->
|
|
digits_to_list(Digits, 1, []) ++ "e" ++ integer_to_list(Dpoint - 1)
|
|
; _ ->
|
|
digits_to_list(Digits ++ [ 0 || _ <- lists:seq(1, Pad)], Dpoint, [])
|
|
end;
|
|
digits_to_list(Dpoint, Digits) when Dpoint < 0 ->
|
|
digits_to_list(Digits, 1, []) ++ "e" ++ integer_to_list(Dpoint - 1).
|
|
|
|
digits_to_list([], 0, Acc) ->
|
|
lists:reverse("0." ++ Acc);
|
|
digits_to_list([], ignore, Acc) ->
|
|
lists:reverse(Acc);
|
|
digits_to_list(Digits, 0, Acc) ->
|
|
digits_to_list(Digits, ignore, "." ++ Acc);
|
|
digits_to_list([Digit|Digits], Dpoint, Acc) ->
|
|
digits_to_list(Digits,
|
|
case Dpoint of ignore -> ignore; X -> X - 1 end, to_ascii(Digit) ++ Acc
|
|
).
|
|
|
|
|
|
to_ascii(10) -> "a";
|
|
to_ascii(11) -> "b";
|
|
to_ascii(12) -> "c";
|
|
to_ascii(13) -> "d";
|
|
to_ascii(14) -> "e";
|
|
to_ascii(15) -> "f";
|
|
to_ascii(X) -> [X + 48]. %% ascii "1" is [49], "2" is [50], etc...
|
|
|
|
|
|
%% encoding detection
|
|
%% first check to see if there's a bom, if not, use the rfc4627 method for
|
|
%% determining encoding. this function makes some assumptions about the
|
|
%% validity of the stream which may delay failure later than if an encoding is
|
|
%% explicitly provided
|
|
|
|
detect_encoding(OptsList) ->
|
|
fun(Stream) -> detect_encoding(Stream, OptsList) end.
|
|
|
|
%% utf8 bom detection
|
|
detect_encoding(<<16#ef, 16#bb, 16#bf, Rest/binary>>, Opts) ->
|
|
(jsx_utf8:decoder(Opts))(Rest);
|
|
%% utf32-little bom detection (this has to come before utf16-little or it'll
|
|
%% match that)
|
|
detect_encoding(<<16#ff, 16#fe, 0, 0, Rest/binary>>, Opts) ->
|
|
(jsx_utf32le:decoder(Opts))(Rest);
|
|
%% utf16-big bom detection
|
|
detect_encoding(<<16#fe, 16#ff, Rest/binary>>, Opts) ->
|
|
(jsx_utf16:decoder(Opts))(Rest);
|
|
%% utf16-little bom detection
|
|
detect_encoding(<<16#ff, 16#fe, Rest/binary>>, Opts) ->
|
|
(jsx_utf16le:decoder(Opts))(Rest);
|
|
%% utf32-big bom detection
|
|
detect_encoding(<<0, 0, 16#fe, 16#ff, Rest/binary>>, Opts) ->
|
|
(jsx_utf32:decoder(Opts))(Rest);
|
|
|
|
%% utf32-little null order detection
|
|
detect_encoding(<<X, 0, 0, 0, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
|
(jsx_utf32le:decoder(Opts))(JSON);
|
|
%% utf32-big null order detection
|
|
detect_encoding(<<0, 0, 0, X, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
|
(jsx_utf32:decoder(Opts))(JSON);
|
|
%% utf16-little null order detection
|
|
detect_encoding(<<X, 0, _, _, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
|
(jsx_utf16le:decoder(Opts))(JSON);
|
|
%% utf16-big null order detection
|
|
detect_encoding(<<0, X, _, _, _Rest/binary>> = JSON, Opts) when X =/= 0 ->
|
|
(jsx_utf16:decoder(Opts))(JSON);
|
|
%% utf8 null order detection
|
|
detect_encoding(<<X, Y, _Rest/binary>> = JSON, Opts) when X =/= 0, Y =/= 0 ->
|
|
(jsx_utf8:decoder(Opts))(JSON);
|
|
|
|
%% a problem, to autodetect naked single digits' encoding, there is not enough
|
|
%% data to conclusively determine the encoding correctly. below is an attempt
|
|
%% to solve the problem
|
|
detect_encoding(<<X>>, Opts) when X =/= 0 ->
|
|
{jsx, incomplete,
|
|
fun(end_stream) ->
|
|
try
|
|
{jsx, incomplete, Next} = (jsx_utf8:decoder(Opts))(<<X>>),
|
|
Next(end_stream)
|
|
catch
|
|
error:function_clause -> {error, {badjson, <<X>>}}
|
|
; error:{badmatch, _} -> {error, {badjson, <<X>>}}
|
|
end
|
|
; (Stream) -> detect_encoding(<<X, Stream/binary>>, Opts)
|
|
end
|
|
};
|
|
detect_encoding(<<0, X>>, Opts) when X =/= 0 ->
|
|
{jsx, incomplete,
|
|
fun(end_stream) ->
|
|
try
|
|
{jsx, incomplete, Next}
|
|
= (jsx_utf16:decoder(Opts))(<<0, X>>),
|
|
Next(end_stream)
|
|
catch
|
|
error:function_clause -> {error, {badjson, <<0, X>>}}
|
|
; error:{badmatch, _} -> {error, {badjson, <<X>>}}
|
|
end
|
|
; (Stream) -> detect_encoding(<<0, X, Stream/binary>>, Opts)
|
|
end
|
|
};
|
|
detect_encoding(<<X, 0>>, Opts) when X =/= 0 ->
|
|
{jsx, incomplete,
|
|
fun(end_stream) ->
|
|
try
|
|
{jsx, incomplete, Next}
|
|
= (jsx_utf16le:decoder(Opts))(<<X, 0>>),
|
|
Next(end_stream)
|
|
catch
|
|
error:function_clause -> {error, {badjson, <<X, 0>>}}
|
|
; error:{badmatch, _} -> {error, {badjson, <<X>>}}
|
|
end
|
|
; (Stream) -> detect_encoding(<<X, 0, Stream/binary>>, Opts)
|
|
end
|
|
};
|
|
|
|
%% not enough input, request more
|
|
detect_encoding(Bin, Opts) ->
|
|
{jsx, incomplete,
|
|
fun(end_stream) -> {error, {badjson, Bin}}
|
|
; (Stream) -> detect_encoding(<<Bin/binary, Stream/binary>>, Opts)
|
|
end
|
|
}.
|
|
|
|
|
|
%% eunit tests
|
|
-ifdef(TEST).
|
|
|
|
nice_decimal_test_() ->
|
|
[
|
|
{"0.0", ?_assert(nice_decimal(0.0) =:= "0.0")},
|
|
{"1.0", ?_assert(nice_decimal(1.0) =:= "1.0")},
|
|
{"-1.0", ?_assert(nice_decimal(-1.0) =:= "-1.0")},
|
|
{"3.1234567890987654321",
|
|
?_assert(
|
|
nice_decimal(3.1234567890987654321) =:= "3.1234567890987655")
|
|
},
|
|
{"1.0e23", ?_assert(nice_decimal(1.0e23) =:= "1.0e23")},
|
|
{"0.3", ?_assert(nice_decimal(3.0/10.0) =:= "0.3")},
|
|
{"0.0001", ?_assert(nice_decimal(0.0001) =:= "1.0e-4")},
|
|
{"0.00000001", ?_assert(nice_decimal(0.00000001) =:= "1.0e-8")},
|
|
{"1.0e-323", ?_assert(nice_decimal(1.0e-323) =:= "1.0e-323")},
|
|
{"1.0e308", ?_assert(nice_decimal(1.0e308) =:= "1.0e308")},
|
|
{"min normalized float",
|
|
?_assert(
|
|
nice_decimal(math:pow(2, -1022)) =:= "2.2250738585072014e-308"
|
|
)
|
|
},
|
|
{"max normalized float",
|
|
?_assert(
|
|
nice_decimal((2 - math:pow(2, -52)) * math:pow(2, 1023))
|
|
=:= "1.7976931348623157e308"
|
|
)
|
|
},
|
|
{"min denormalized float",
|
|
?_assert(nice_decimal(math:pow(2, -1074)) =:= "5.0e-324")
|
|
},
|
|
{"max denormalized float",
|
|
?_assert(
|
|
nice_decimal((1 - math:pow(2, -52)) * math:pow(2, -1022))
|
|
=:= "2.225073858507201e-308"
|
|
)
|
|
}
|
|
].
|
|
|
|
-endif. |