leveled/src/leveled_util.erl
Martin Sumner b96518c32a
Use backwards compatible term_to_binary (#408)
* Use backwards compatible term_to_binary

So that where we have hashed term_to_binary output in OTP25 or earlier, that has will be matched in OTP 26.

* Test reliability

If all keys are put in order, the max_slots may not be used, as the driver at L0 is penciller cache size, and merge to new files (managed by the parameter) only occurs when there are overlapping files the level below
2023-10-05 10:33:20 +01:00

136 lines
4.3 KiB
Erlang

%% -------- Utility Functions ---------
%%
%% Generally helpful funtions within leveled
%%
-module(leveled_util).
-include("include/leveled.hrl").
-export([generate_uuid/0,
integer_now/0,
integer_time/1,
magic_hash/1,
t2b/1,
safe_rename/4]).
-define(WRITE_OPS, [binary, raw, read, write]).
-spec generate_uuid() -> list().
%% @doc
%% Generate a new globally unique ID as a string.
%% Credit to
%% https://github.com/afiskon/erlang-uuid-v4/blob/master/src/uuid.erl
generate_uuid() ->
<<A:32, B:16, C:16, D:16, E:48>> = leveled_rand:rand_bytes(16),
L = io_lib:format("~8.16.0b-~4.16.0b-4~3.16.0b-~4.16.0b-~12.16.0b",
[A, B, C band 16#0fff, D band 16#3fff bor 16#8000, E]),
binary_to_list(list_to_binary(L)).
-spec integer_now() -> non_neg_integer().
%% @doc
%% Return now in gregorian seconds
integer_now() ->
integer_time(os:timestamp()).
-spec integer_time (erlang:timestamp()) -> non_neg_integer().
%% @doc
%% Return a given time in gergorian seconds
integer_time(TS) ->
DT = calendar:now_to_universal_time(TS),
calendar:datetime_to_gregorian_seconds(DT).
-spec magic_hash(any()) -> integer().
%% @doc
%% Use DJ Bernstein magic hash function. Note, this is more expensive than
%% phash2 but provides a much more balanced result.
%%
%% Hash function contains mysterious constants, some explanation here as to
%% what they are -
%% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function
magic_hash({binary, BinaryKey}) ->
H = 5381,
hash1(H, BinaryKey) band 16#FFFFFFFF;
magic_hash(AnyKey) ->
BK = t2b(AnyKey),
magic_hash({binary, BK}).
hash1(H, <<>>) ->
H;
hash1(H, <<B:8/integer, Rest/bytes>>) ->
H1 = H * 33,
H2 = H1 bxor B,
hash1(H2, Rest).
-spec t2b(term()) -> binary().
%% @doc
%% term_to_binary with options necessary to ensure backwards compatability
%% in the handling of atoms (within OTP 26).
%% See https://github.com/martinsumner/leveled/issues/407
%% If the binary() which is outputted is to be hashed for comparison, then
%% this must be used.
t2b(Term) ->
term_to_binary(Term, [{minor_version, 1}]).
-spec safe_rename(string(), string(), binary(), boolean()) -> ok.
%% @doc
%% Write a file, sync it and rename it (and for super-safe mode read it back)
%% An attempt to prevent crashes leaving files with empty or partially written
%% values
safe_rename(TempFN, RealFN, BinData, ReadCheck) ->
{ok, TempFH} = file:open(TempFN, ?WRITE_OPS),
ok = file:write(TempFH, BinData),
ok = file:sync(TempFH),
ok = file:close(TempFH),
ok = file:rename(TempFN, RealFN),
case ReadCheck of
true ->
{ok, ReadBack} = file:read_file(RealFN),
true = (ReadBack == BinData),
ok;
false ->
ok
end.
%%%============================================================================
%%% Test
%%%============================================================================
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-define(TEST_AREA, "test/test_area/util/").
magichashperf_test() ->
KeyFun =
fun(X) ->
K = {o, "Bucket", "Key" ++ integer_to_list(X), null},
{K, X}
end,
KL = lists:map(KeyFun, lists:seq(1, 1000)),
{TimeMH, HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]),
io:format(user, "1000 keys magic hashed in ~w microseconds~n", [TimeMH]),
{TimePH, _Hl2} = timer:tc(lists, map, [fun(K) -> erlang:phash2(K) end, KL]),
io:format(user, "1000 keys phash2 hashed in ~w microseconds~n", [TimePH]),
{TimeMH2, HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]),
io:format(user, "1000 keys magic hashed in ~w microseconds~n", [TimeMH2]).
safe_rename_test() ->
ok = filelib:ensure_dir(?TEST_AREA),
TempFN = filename:join(?TEST_AREA, "test_manifest0.pnd"),
RealFN = filename:join(?TEST_AREA, "test_manifest0.man"),
ok = safe_rename(TempFN, RealFN, <<1:128/integer>>, false),
?assertMatch({ok, <<1:128/integer>>}, file:read_file(RealFN)),
TempFN1 = filename:join(?TEST_AREA, "test_manifest1.pnd"),
RealFN1 = filename:join(?TEST_AREA, "test_manifest1.man"),
ok = safe_rename(TempFN1, RealFN1, <<2:128/integer>>, true),
?assertMatch({ok, <<2:128/integer>>}, file:read_file(RealFN1)).
-endif.