Mas i361 d31 (#365)
* Resolve hash 0 of confusion (#362) * Resolve hash 0 of confusion Ensure that a hash of 0 is not confused with an empty index entry (where both hash and position are 0). For a genuine entry position must always be non-zero. * Use little endian format directly ... instead of extracting in big_endian format then flipping (or flipping before writing) * OTP 24 dialyzer fix in test
This commit is contained in:
parent
43ec9a4eab
commit
27cf6bef55
1 changed files with 103 additions and 51 deletions
|
@ -1003,9 +1003,9 @@ set_writeops(SyncStrategy) ->
|
||||||
%% the file handle
|
%% the file handle
|
||||||
open_active_file(FileName) when is_list(FileName) ->
|
open_active_file(FileName) when is_list(FileName) ->
|
||||||
{ok, Handle} = file:open(FileName, ?WRITE_OPS),
|
{ok, Handle} = file:open(FileName, ?WRITE_OPS),
|
||||||
{ok, Position} = file:position(Handle, {bof, 256*?DWORD_SIZE}),
|
{ok, Position} = file:position(Handle, {bof, 256 * ?DWORD_SIZE}),
|
||||||
{LastPosition, {HashTree, LastKey}} = startup_scan_over_file(Handle,
|
{LastPosition, {HashTree, LastKey}} =
|
||||||
Position),
|
startup_scan_over_file(Handle, Position),
|
||||||
case file:position(Handle, eof) of
|
case file:position(Handle, eof) of
|
||||||
{ok, LastPosition} ->
|
{ok, LastPosition} ->
|
||||||
ok = file:close(Handle);
|
ok = file:close(Handle);
|
||||||
|
@ -1239,8 +1239,7 @@ load_index(Handle) ->
|
||||||
LoadIndexFun =
|
LoadIndexFun =
|
||||||
fun(X) ->
|
fun(X) ->
|
||||||
file:position(Handle, {bof, ?DWORD_SIZE * X}),
|
file:position(Handle, {bof, ?DWORD_SIZE * X}),
|
||||||
{HashTablePos, Count} = read_next_2_integers(Handle),
|
read_next_2_integers(Handle)
|
||||||
{HashTablePos, Count}
|
|
||||||
end,
|
end,
|
||||||
list_to_tuple(lists:map(LoadIndexFun, Index)).
|
list_to_tuple(lists:map(LoadIndexFun, Index)).
|
||||||
|
|
||||||
|
@ -1279,8 +1278,8 @@ scan_index_returnpositions(Handle, Position, Count, PosList0) ->
|
||||||
{ok, _} = file:position(Handle, Position),
|
{ok, _} = file:position(Handle, Position),
|
||||||
AddPosFun =
|
AddPosFun =
|
||||||
fun({Hash, HPosition}, PosList) ->
|
fun({Hash, HPosition}, PosList) ->
|
||||||
case Hash of
|
case {Hash, HPosition} of
|
||||||
0 ->
|
{0, 0} ->
|
||||||
PosList;
|
PosList;
|
||||||
_ ->
|
_ ->
|
||||||
[HPosition|PosList]
|
[HPosition|PosList]
|
||||||
|
@ -1572,13 +1571,11 @@ extract_valueandsize(ValueAsBin) ->
|
||||||
{ValueAsBin, byte_size(ValueAsBin)}.
|
{ValueAsBin, byte_size(ValueAsBin)}.
|
||||||
|
|
||||||
|
|
||||||
%% Used for reading lengths
|
%% Used for reading lengths with CDB
|
||||||
%% Note that the endian_flip is required to make the file format compatible
|
|
||||||
%% with CDB
|
|
||||||
read_next_2_integers(Handle) ->
|
read_next_2_integers(Handle) ->
|
||||||
case file:read(Handle, ?DWORD_SIZE) of
|
case file:read(Handle, ?DWORD_SIZE) of
|
||||||
{ok, <<Int1:32,Int2:32>>} ->
|
{ok, <<Int1:32/little-integer, Int2:32/little-integer>>} ->
|
||||||
{endian_flip(Int1), endian_flip(Int2)};
|
{Int1, Int2};
|
||||||
ReadError ->
|
ReadError ->
|
||||||
ReadError
|
ReadError
|
||||||
end.
|
end.
|
||||||
|
@ -1589,10 +1586,9 @@ read_next_n_integerpairs(Handle, NumberOfPairs) ->
|
||||||
|
|
||||||
read_integerpairs(<<>>, Pairs) ->
|
read_integerpairs(<<>>, Pairs) ->
|
||||||
Pairs;
|
Pairs;
|
||||||
read_integerpairs(<<Int1:32, Int2:32, Rest/binary>>, Pairs) ->
|
read_integerpairs(<<Int1:32/little-integer, Int2:32/little-integer,
|
||||||
read_integerpairs(<<Rest/binary>>,
|
Rest/binary>>, Pairs) ->
|
||||||
Pairs ++ [{endian_flip(Int1),
|
read_integerpairs(<<Rest/binary>>, Pairs ++ [{Int1, Int2}]).
|
||||||
endian_flip(Int2)}]).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1626,10 +1622,11 @@ search_hash_table(Handle,
|
||||||
((Slot + CycleCount - 1) rem TotalSlots) * ?DWORD_SIZE
|
((Slot + CycleCount - 1) rem TotalSlots) * ?DWORD_SIZE
|
||||||
+ FirstHashPosition,
|
+ FirstHashPosition,
|
||||||
{ok, _} = file:position(Handle, Offset),
|
{ok, _} = file:position(Handle, Offset),
|
||||||
{StoredHash, DataLoc} = read_next_2_integers(Handle),
|
|
||||||
|
|
||||||
case StoredHash of
|
case read_next_2_integers(Handle) of
|
||||||
Hash ->
|
{0, 0} ->
|
||||||
|
{Timings, missing};
|
||||||
|
{Hash, DataLoc} ->
|
||||||
KV =
|
KV =
|
||||||
case QuickCheck of
|
case QuickCheck of
|
||||||
loose_presence ->
|
loose_presence ->
|
||||||
|
@ -1652,8 +1649,6 @@ search_hash_table(Handle,
|
||||||
UpdTimings = update_fetchtimings(Timings, CycleCount),
|
UpdTimings = update_fetchtimings(Timings, CycleCount),
|
||||||
{UpdTimings, KV}
|
{UpdTimings, KV}
|
||||||
end;
|
end;
|
||||||
0 ->
|
|
||||||
{Timings, missing};
|
|
||||||
_ ->
|
_ ->
|
||||||
search_hash_table(Handle,
|
search_hash_table(Handle,
|
||||||
{FirstHashPosition,
|
{FirstHashPosition,
|
||||||
|
@ -1773,18 +1768,20 @@ perform_write_hash_tables(Handle, HashTreeBin, StartPos) ->
|
||||||
%% in the hash table
|
%% in the hash table
|
||||||
%% The List passed in should be made up of {Index, Position, Count} tuples
|
%% The List passed in should be made up of {Index, Position, Count} tuples
|
||||||
write_top_index_table(Handle, BasePos, IndexList) ->
|
write_top_index_table(Handle, BasePos, IndexList) ->
|
||||||
FnWriteIndex = fun({_Index, Pos, Count}, {AccBin, CurrPos}) ->
|
FnWriteIndex =
|
||||||
case Count == 0 of
|
fun({_Index, Pos, Count}, {AccBin, CurrPos}) ->
|
||||||
true ->
|
{Position, NextPos} =
|
||||||
PosLE = endian_flip(CurrPos),
|
case Count == 0 of
|
||||||
NextPos = CurrPos;
|
true ->
|
||||||
false ->
|
{CurrPos, CurrPos};
|
||||||
PosLE = endian_flip(Pos),
|
false ->
|
||||||
NextPos = Pos + (Count * ?DWORD_SIZE)
|
{Pos, Pos + (Count * ?DWORD_SIZE)}
|
||||||
end,
|
end,
|
||||||
CountLE = endian_flip(Count),
|
{<<AccBin/binary,
|
||||||
{<<AccBin/binary, PosLE:32, CountLE:32>>, NextPos}
|
Position:32/little-integer,
|
||||||
end,
|
Count:32/little-integer>>,
|
||||||
|
NextPos}
|
||||||
|
end,
|
||||||
|
|
||||||
{IndexBin, _Pos} = lists:foldl(FnWriteIndex,
|
{IndexBin, _Pos} = lists:foldl(FnWriteIndex,
|
||||||
{<<>>, BasePos},
|
{<<>>, BasePos},
|
||||||
|
@ -1794,12 +1791,6 @@ write_top_index_table(Handle, BasePos, IndexList) ->
|
||||||
ok = file:advise(Handle, 0, ?DWORD_SIZE * 256, will_need),
|
ok = file:advise(Handle, 0, ?DWORD_SIZE * 256, will_need),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
%% To make this compatible with original Bernstein format this endian flip
|
|
||||||
%% and also the use of the standard hash function required.
|
|
||||||
|
|
||||||
endian_flip(Int) ->
|
|
||||||
<<X:32/unsigned-little-integer>> = <<Int:32>>,
|
|
||||||
X.
|
|
||||||
|
|
||||||
hash(Key) ->
|
hash(Key) ->
|
||||||
leveled_util:magic_hash(Key).
|
leveled_util:magic_hash(Key).
|
||||||
|
@ -1823,10 +1814,9 @@ key_value_to_record({Key, Value}, BinaryMode) ->
|
||||||
end,
|
end,
|
||||||
KS = byte_size(BK),
|
KS = byte_size(BK),
|
||||||
VS = byte_size(BV),
|
VS = byte_size(BV),
|
||||||
KS_FL = endian_flip(KS),
|
|
||||||
VS_FL = endian_flip(VS + 4),
|
|
||||||
CRC = calc_crc(BK, BV),
|
CRC = calc_crc(BK, BV),
|
||||||
<<KS_FL:32, VS_FL:32, BK:KS/binary, CRC:32/integer, BV:VS/binary>>.
|
<<KS:32/little-integer, (VS + 4):32/little-integer,
|
||||||
|
BK:KS/binary, CRC:32/integer, BV:VS/binary>>.
|
||||||
|
|
||||||
|
|
||||||
multi_key_value_to_record(KVList, BinaryMode, LastPosition) ->
|
multi_key_value_to_record(KVList, BinaryMode, LastPosition) ->
|
||||||
|
@ -1879,9 +1869,7 @@ to_slotmap(HashTree, Index) ->
|
||||||
IndexLength = length(HPList) * 2,
|
IndexLength = length(HPList) * 2,
|
||||||
ConvertObjFun =
|
ConvertObjFun =
|
||||||
fun({Hash, Position}) ->
|
fun({Hash, Position}) ->
|
||||||
HashLE = endian_flip(Hash),
|
NewBin = <<Hash:32/little-integer, Position:32/little-integer>>,
|
||||||
PosLE = endian_flip(Position),
|
|
||||||
NewBin = <<HashLE:32, PosLE:32>>,
|
|
||||||
{hash_to_slot(Hash, IndexLength), NewBin}
|
{hash_to_slot(Hash, IndexLength), NewBin}
|
||||||
end,
|
end,
|
||||||
lists:map(ConvertObjFun, HPList).
|
lists:map(ConvertObjFun, HPList).
|
||||||
|
@ -1982,12 +1970,11 @@ write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos,
|
||||||
%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%
|
||||||
-ifdef(TEST).
|
-ifdef(TEST).
|
||||||
|
|
||||||
%%
|
%% To make this compatible with original Bernstein format this endian flip
|
||||||
%% dump(FileName) -> List
|
%% and also the use of the standard hash function required.
|
||||||
%% Given a file name, this function returns a list
|
endian_flip(Int) ->
|
||||||
%% of {key,value} tuples from the CDB.
|
<<X:32/unsigned-little-integer>> = <<Int:32>>,
|
||||||
%%
|
X.
|
||||||
|
|
||||||
|
|
||||||
%% from_dict(FileName,ListOfKeyValueTuples)
|
%% from_dict(FileName,ListOfKeyValueTuples)
|
||||||
%% Given a filename and a dictionary, create a cdb
|
%% Given a filename and a dictionary, create a cdb
|
||||||
|
@ -2117,6 +2104,71 @@ find_firstzero_test() ->
|
||||||
?assertMatch([<<71:64/integer>>, <<72:64/integer>>], RHS).
|
?assertMatch([<<71:64/integer>>, <<72:64/integer>>], RHS).
|
||||||
|
|
||||||
|
|
||||||
|
magickey_test() ->
|
||||||
|
{C, L1, L2} = {247, 10, 100},
|
||||||
|
% Magic constants - will lead to first hash slot being empty
|
||||||
|
% prompts potential issue when first hash slot is empty but
|
||||||
|
% hash is 0
|
||||||
|
|
||||||
|
MagicKey =
|
||||||
|
{315781,
|
||||||
|
stnd,
|
||||||
|
{o_rkv,
|
||||||
|
<<100,111,109,97,105,110,68,111,99,117,109,101,110,116>>,
|
||||||
|
<<48,48,48,49,52,54,56,54,51,48,48,48,51,50,49,54,51,51>>,
|
||||||
|
null}},
|
||||||
|
?assertEqual(0, hash(MagicKey)),
|
||||||
|
NotMagicKVGen =
|
||||||
|
fun(I) ->
|
||||||
|
{{I + C, stnd, {o_rkv, <<"B">>, integer_to_binary(I + C), null}},
|
||||||
|
<<"V">>}
|
||||||
|
end,
|
||||||
|
Set1 = lists:map(NotMagicKVGen, lists:seq(1, L1)),
|
||||||
|
Set2 = lists:map(NotMagicKVGen, lists:seq(L1 + 1, L2)),
|
||||||
|
{ok, P1} =
|
||||||
|
cdb_open_writer("test/test_area/magic_hash.pnd",
|
||||||
|
#cdb_options{binary_mode=true}),
|
||||||
|
|
||||||
|
ok = cdb_put(P1, MagicKey, <<"MagicV0">>),
|
||||||
|
lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, Set1),
|
||||||
|
ok = cdb_put(P1, MagicKey, <<"MagicV1">>),
|
||||||
|
lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, Set2),
|
||||||
|
{ok, F2} = cdb_complete(P1),
|
||||||
|
{ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=true}),
|
||||||
|
{GetK, GetV} = cdb_get(P2, MagicKey),
|
||||||
|
?assertEqual(<<"MagicV1">>, GetV),
|
||||||
|
|
||||||
|
AllKeys = cdb_directfetch(P2, cdb_getpositions(P2, all), key_only),
|
||||||
|
?assertMatch(true, lists:member(MagicKey, AllKeys)),
|
||||||
|
ok = cdb_close(P2),
|
||||||
|
|
||||||
|
ok = file:delete("test/test_area/magic_hash.cdb"),
|
||||||
|
|
||||||
|
{ok, P3} =
|
||||||
|
cdb_open_writer("test/test_area/magic_hash.pnd",
|
||||||
|
#cdb_options{binary_mode=true}),
|
||||||
|
KVL = Set1 ++ [{MagicKey, <<"MagicV1">>}] ++ Set2,
|
||||||
|
ok = cdb_mput(P3, KVL),
|
||||||
|
{ok, F2} = cdb_complete(P3),
|
||||||
|
{ok, P4} = cdb_open_reader(F2, #cdb_options{binary_mode=true}),
|
||||||
|
|
||||||
|
{GetK, GetV} = cdb_get(P4, MagicKey),
|
||||||
|
?assertEqual(<<"MagicV1">>, GetV),
|
||||||
|
ok = cdb_close(P4),
|
||||||
|
ok = file:delete("test/test_area/magic_hash.cdb"),
|
||||||
|
|
||||||
|
{ok, P5} =
|
||||||
|
cdb_open_writer("test/test_area/magic_hash.pnd",
|
||||||
|
#cdb_options{binary_mode=true}),
|
||||||
|
KVL5 = Set1 ++ Set2,
|
||||||
|
ok = cdb_mput(P5, KVL5),
|
||||||
|
{ok, F2} = cdb_complete(P5),
|
||||||
|
{ok, P6} = cdb_open_reader(F2, #cdb_options{binary_mode=true}),
|
||||||
|
missing = cdb_get(P6, MagicKey),
|
||||||
|
ok = cdb_close(P6),
|
||||||
|
ok = file:delete("test/test_area/magic_hash.cdb").
|
||||||
|
|
||||||
|
|
||||||
cyclecount_test() ->
|
cyclecount_test() ->
|
||||||
io:format("~n~nStarting cycle count test~n"),
|
io:format("~n~nStarting cycle count test~n"),
|
||||||
KVL1 = generate_sequentialkeys(5000, []),
|
KVL1 = generate_sequentialkeys(5000, []),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue