Stop using list

Producing the list of all slots to try appeared to be expensive.  In volume tests taking 150 - 250 microseconds per GET.  Perhaps the list could be long (>1000), with a split and append, so not surprising.

Instead loop and count.
This commit is contained in:
Martin Sumner 2017-11-20 20:01:21 +00:00
parent 06f6604ac4
commit 52c7a023a1
2 changed files with 42 additions and 43 deletions

View file

@ -921,18 +921,15 @@ get(Handle, Key, Cache, QuickCheck, BinaryMode, Timings)
{Timings, missing};
_ ->
% Get starting slot in hashtable
{ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}),
{ok, FirstHashPosition} =
file:position(Handle, {bof, HashTable}),
Slot = hash_to_slot(Hash, Count),
{ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}),
LastHashPosition = HashTable + ((Count-1) * ?DWORD_SIZE),
LocList = lists:seq(FirstHashPosition,
LastHashPosition,
?DWORD_SIZE),
% Split list around starting slot.
{L1, L2} = lists:split(Slot, LocList),
UpdTimings = update_indextimings(Timings, SW),
search_hash_table(Handle,
lists:append(L2, L1),
{FirstHashPosition,
Slot,
1,
Count},
Hash,
Key,
QuickCheck,
@ -1314,14 +1311,10 @@ read_integerpairs(<<Int1:32, Int2:32, Rest/binary>>, Pairs) ->
endian_flip(Int2)}]).
search_hash_table(Handle, Entries, Hash, Key,
QuickCheck, BinaryMode, Timings) ->
search_hash_table(Handle, Entries, Hash, Key,
QuickCheck, BinaryMode, Timings, 0).
-spec search_hash_table(file:io_device(), list(), integer(), any(),
-spec search_hash_table(file:io_device(), tuple(), integer(), any(),
loose_presence|boolean(), boolean(),
cdb_timings(), integer()) ->
cdb_timings()) ->
{cdb_timings(), missing|probably|tuple()}.
%% @doc
%%
@ -1332,13 +1325,23 @@ search_hash_table(Handle, Entries, Hash, Key,
%% true - check the CRC before returning key & value
%% false - don't check the CRC before returning key & value
%% loose_presence - confirm that the hash of the key is present
search_hash_table(_Handle, [], Hash, _Key,
_QuickCheck, _BinaryMode, _Timings, CycleCount) ->
log_cyclecount(CycleCount, Hash, missing),
{no_timing, missing};
search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key,
QuickCheck, BinaryMode, Timings, CycleCount) ->
{ok, _} = file:position(Handle, Entry),
search_hash_table(_Handle,
{_, _, _TotalSlots, _TotalSlots},
_Hash, _Key,
_QuickCheck, _BinaryMode, Timings) ->
% We have done the full loop - value must not be present
{Timings, missing};
search_hash_table(Handle,
{FirstHashPosition, Slot, CycleCount, TotalSlots},
Hash, Key,
QuickCheck, BinaryMode, Timings) ->
% Read the next 2 integers at current position, see if it matches the hash
% we're after
Offset =
((Slot + CycleCount - 1) rem TotalSlots) * ?DWORD_SIZE
+ FirstHashPosition,
{ok, _} = file:position(Handle, Offset),
{StoredHash, DataLoc} = read_next_2_integers(Handle),
case StoredHash of
@ -1352,33 +1355,30 @@ search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key,
end,
case KV of
missing ->
leveled_log:log("CDB15", [Hash]),
search_hash_table(Handle,
RestOfEntries,
Hash,
Key,
QuickCheck,
BinaryMode,
Timings,
CycleCount + 1);
{FirstHashPosition,
Slot,
CycleCount + 1,
TotalSlots},
Hash, Key,
QuickCheck, BinaryMode,
Timings);
_ ->
UpdTimings = update_fetchtimings(Timings, CycleCount),
log_cyclecount(CycleCount, Hash, found),
{UpdTimings, KV}
end;
_ ->
search_hash_table(Handle, RestOfEntries, Hash, Key,
QuickCheck, BinaryMode,
Timings,
CycleCount + 1)
search_hash_table(Handle,
{FirstHashPosition,
Slot,
CycleCount + 1,
TotalSlots},
Hash, Key,
QuickCheck, BinaryMode,
Timings)
end.
log_cyclecount(CycleCount, Hash, Result) ->
if
CycleCount > 8 ->
leveled_log:log("CDB15", [CycleCount, Hash, Result]);
true ->
ok
end.
-spec update_fetchtimings(no_timing|cdb_timings(), integer()) ->
no_timing|cdb_timings().

View file

@ -319,8 +319,7 @@
{info, "Microsecond timings for hashtree build of "
++ "to_list ~w sort ~w build ~w"}},
{"CDB15",
{info, "Cycle count of ~w in hashtable search higher than expected"
++ " in search for hash ~w with result ~w"}},
{info, "Collision in search for hash ~w"}},
{"CDB16",
{info, "CDB scan from start ~w in file with end ~w and last_key ~w"}},
{"CDB17",