Initial files proving concepts
WIP - nothing currently workable
This commit is contained in:
parent
85a6998ca0
commit
e2099d0c14
19 changed files with 1491 additions and 0 deletions
BIN
src/activefile_test.cdb
Normal file
BIN
src/activefile_test.cdb
Normal file
Binary file not shown.
17
src/eleveleddb.app.src
Normal file
17
src/eleveleddb.app.src
Normal file
|
@ -0,0 +1,17 @@
|
|||
{application, eleveleddb,
|
||||
[
|
||||
{description, ""},
|
||||
{vsn, "0.0.1"},
|
||||
{modules, []},
|
||||
{registered, []},
|
||||
{applications, [
|
||||
kernel,
|
||||
stdlib
|
||||
]},
|
||||
{mod, {eleveleddb_app, []}},
|
||||
{env, [
|
||||
%% Default max file size (in bytes)
|
||||
{max_file_size, 32#80000000}, % 4GB default
|
||||
|
||||
]}
|
||||
]}.
|
BIN
src/from_dict_test.cdb
Normal file
BIN
src/from_dict_test.cdb
Normal file
Binary file not shown.
BIN
src/full.cdb
Normal file
BIN
src/full.cdb
Normal file
Binary file not shown.
BIN
src/hashtable1_test.cdb
Normal file
BIN
src/hashtable1_test.cdb
Normal file
Binary file not shown.
BIN
src/leveled_bst.beam
Normal file
BIN
src/leveled_bst.beam
Normal file
Binary file not shown.
156
src/leveled_bst.erl
Normal file
156
src/leveled_bst.erl
Normal file
|
@ -0,0 +1,156 @@
|
|||
%%
|
||||
%% This module provides functions for managing bst files - a modified version
|
||||
%% of sst files, to be used in leveleddb.
|
||||
%% bst files are borken into the following sections:
|
||||
%% - Header (fixed width 32 bytes - containing pointers and metadata)
|
||||
%% - Blocks (variable length)
|
||||
%% - Slots (variable length)
|
||||
%% - Footer (variable length - contains slot index and helper metadata)
|
||||
%%
|
||||
%% The 32-byte header is made up of
|
||||
%% - 1 byte version (major 5 bits, minor 3 bits) - default 0.1
|
||||
%% - 1 byte state bits (1 bit to indicate mutability, 1 for use of compression)
|
||||
%% - 4 bytes footer position
|
||||
%% - 4 bytes slot list length
|
||||
%% - 4 bytes helper length
|
||||
%% - 14 bytes spare for future options
|
||||
%% - 4 bytes CRC (header)
|
||||
%%
|
||||
%% The Blocks is a series of blocks of:
|
||||
%% - 4 byte block length
|
||||
%% - variable-length compressed list of 32 keys & values
|
||||
%% - 4 byte CRC for block
|
||||
%% There will be up to 4000 blocks in a single bst file
|
||||
%%
|
||||
%% The slots is a series of references
|
||||
%% - 4 byte bloom-filter length
|
||||
%% - 4 byte key-helper length
|
||||
%% - a variable-length compressed bloom filter for all keys in slot (approx 1KB)
|
||||
%% - 32 ordered variable-length key helpers pointing to first key in each
|
||||
%% block (in slot) of the form Key Length, Key, Block Position
|
||||
%% - 4 byte CRC for the slot
|
||||
%%
|
||||
%% The slot index in the footer is made up of 128 keys and pointers at the
|
||||
%% the start of each slot
|
||||
%% - 128 Key Length (4 byte), Key, Position (4 byte) indexes
|
||||
%% - 4 bytes CRC for the index
|
||||
%%
|
||||
%% The format of the file is intended to support quick lookups, whilst
|
||||
%% allowing for a new file to be written incrementally (so that all keys and
|
||||
%% values need not be retained in memory) - perhaps n blocks at a time
|
||||
|
||||
|
||||
-module(leveled_bst).
|
||||
|
||||
-export([start_file/1, convert_header/1]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(WORD_SIZE, 4).
|
||||
-define(CURRENT_VERSION, {0,1}).
|
||||
-define(SLOT_COUNT, 128).
|
||||
-define(BLOCK_SIZE, 32).
|
||||
-define(SLOT_SIZE, 32).
|
||||
|
||||
-record(metadata, {version = ?CURRENT_VERSION :: tuple(),
|
||||
mutable = false :: true | false,
|
||||
compressed = true :: tre | false,
|
||||
slot_list :: list(),
|
||||
cache :: tuple(),
|
||||
smallest_key :: tuple(),
|
||||
largest_key :: tuple(),
|
||||
smallest_sqn :: integer(),
|
||||
largest_sqn :: integer()
|
||||
}).
|
||||
|
||||
%% Start a bare file with an initial header and no further details
|
||||
%% Return the {Handle, metadata record}
|
||||
start_file(FileName) when is_list(FileName) ->
|
||||
{ok, Handle} = file:open(FileName, [binary, raw, read, write]),
|
||||
start_file(Handle);
|
||||
start_file(Handle) ->
|
||||
Header = create_header(initial),
|
||||
{ok, _} = file:position(Handle, bof),
|
||||
file:write(Handle, Header),
|
||||
{Version, {M, C}, _, _} = convert_header(Header),
|
||||
FileMD = #metadata{version=Version, mutable=M, compressed=C},
|
||||
SlotArray = array:new(?SLOT_COUNT),
|
||||
{Handle, FileMD, SlotArray}.
|
||||
|
||||
|
||||
create_header(initial) ->
|
||||
{Major, Minor} = ?CURRENT_VERSION,
|
||||
Version = <<Major:5, Minor:3>>,
|
||||
State = <<0:6, 1:1, 1:1>>, % Mutable and compressed
|
||||
Lengths = <<0:32, 0:32, 0:32>>,
|
||||
Options = <<0:112>>,
|
||||
H1 = <<Version/binary, State/binary, Lengths/binary, Options/binary>>,
|
||||
CRC32 = erlang:crc32(H1),
|
||||
<<H1/binary, CRC32:32/integer>>.
|
||||
|
||||
|
||||
convert_header(Header) ->
|
||||
<<H1:28/binary, CRC32:32/integer>> = Header,
|
||||
case erlang:crc32(H1) of
|
||||
CRC32 ->
|
||||
<<Major:5/integer, Minor:3/integer, _/binary>> = H1,
|
||||
case {Major, Minor} of
|
||||
{0, 1} ->
|
||||
convert_header_v01(H1);
|
||||
_ ->
|
||||
unknown_version
|
||||
end;
|
||||
_ ->
|
||||
crc_mismatch
|
||||
end.
|
||||
|
||||
convert_header_v01(Header) ->
|
||||
<<_:8, 0:6, Mutable:1, Comp:1,
|
||||
FooterP:32/integer, SlotLng:32/integer, HlpLng:32/integer,
|
||||
_/binary>> = Header,
|
||||
case Mutable of
|
||||
1 -> M = true;
|
||||
0 -> M = false
|
||||
end,
|
||||
case Comp of
|
||||
1 -> C = true;
|
||||
0 -> C = false
|
||||
end,
|
||||
{{0, 1}, {M, C}, {FooterP, SlotLng, HlpLng}, none}.
|
||||
|
||||
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%
|
||||
% T E S T
|
||||
%%%%%%%%%%%%%%%
|
||||
|
||||
empty_header_test() ->
|
||||
Header = create_header(initial),
|
||||
?assertMatch(32, byte_size(Header)),
|
||||
<<Major:5, Minor:3, _/binary>> = Header,
|
||||
?assertMatch({0, 1}, {Major, Minor}),
|
||||
{Version, State, Lengths, Options} = convert_header(Header),
|
||||
?assertMatch({0, 1}, Version),
|
||||
?assertMatch({true, true}, State),
|
||||
?assertMatch({0, 0, 0}, Lengths),
|
||||
?assertMatch(none, Options).
|
||||
|
||||
bad_header_test() ->
|
||||
Header = create_header(initial),
|
||||
<<_:1/binary, Rest/binary >> = Header,
|
||||
HdrDetails1 = convert_header(<<0:5/integer, 2:3/integer, Rest/binary>>),
|
||||
?assertMatch(crc_mismatch, HdrDetails1),
|
||||
<<_:1/binary, RestToCRC:27/binary, _:32/integer>> = Header,
|
||||
NewHdr1 = <<0:5/integer, 2:3/integer, RestToCRC/binary>>,
|
||||
CRC32 = erlang:crc32(NewHdr1),
|
||||
NewHdr2 = <<NewHdr1/binary, CRC32:32/integer>>,
|
||||
?assertMatch(unknown_version, convert_header(NewHdr2)).
|
||||
|
||||
record_onstartfile_test() ->
|
||||
{_, FileMD, _} = start_file("onstartfile.bst"),
|
||||
?assertMatch({0, 1}, FileMD#metadata.version).
|
||||
|
||||
|
||||
|
||||
|
BIN
src/leveled_cdb.beam
Normal file
BIN
src/leveled_cdb.beam
Normal file
Binary file not shown.
804
src/leveled_cdb.erl
Normal file
804
src/leveled_cdb.erl
Normal file
|
@ -0,0 +1,804 @@
|
|||
%%
|
||||
%% This is a modified version of the cdb module provided by Tom Whitcomb.
|
||||
%%
|
||||
%% - https://github.com/thomaswhitcomb/erlang-cdb
|
||||
%%
|
||||
%% The primary differences are:
|
||||
%% - Support for incrementally writing a CDB file while keeping the hash table
|
||||
%% in memory
|
||||
%% - Support for merging of multiple CDB files with a key-checking function to
|
||||
%% allow for compaction
|
||||
%% - Automatic adding of a helper object that will keep a small proportion of
|
||||
%% keys to be used when checking to see if the cdb file is a candidate for
|
||||
%% compaction
|
||||
%% - The ability to scan a database and accumulate all the Key, Values to
|
||||
%% rebuild in-memory tables on startup
|
||||
%%
|
||||
%% This is to be used in eleveledb, and in this context:
|
||||
%% - Keys will be a Sequence Number
|
||||
%% - Values will be a Checksum; Pointers (length * 3); Key; [Metadata]; [Value]
|
||||
%% where the pointers can be used to extract just part of the value
|
||||
%% (i.e. metadata only)
|
||||
%%
|
||||
%% This module provides functions to create and query a CDB (constant database).
|
||||
%% A CDB implements a two-level hashtable which provides fast {key,value}
|
||||
%% lookups that remain fairly constant in speed regardless of the CDBs size.
|
||||
%%
|
||||
%% The first level in the CDB occupies the first 255 doublewords in the file.
|
||||
%% Each doubleword slot contains two values. The first is a file pointer to
|
||||
%% the primary hashtable (at the end of the file) and the second value is the
|
||||
%% number of entries in the hashtable. The first level table of 255 entries
|
||||
%% is indexed with the lower eight bits of the hash of the input key.
|
||||
%%
|
||||
%% Following the 255 doublewords are the {key,value} tuples. The tuples are
|
||||
%% packed in the file without regard to word boundaries. Each {key,value}
|
||||
%% tuple is represented with a four byte key length, a four byte value length,
|
||||
%% the actual key value followed by the actual value.
|
||||
%%
|
||||
%% Following the {key,value} tuples are the primary hash tables. There are
|
||||
%% at most 255 hash tables. Each hash table is referenced by one of the 255
|
||||
%% doubleword entries at the top of the file. For efficiency reasons, each
|
||||
%% hash table is allocated twice the number of entries that it will need.
|
||||
%% Each entry in the hash table is a doubleword.
|
||||
%% The first word is the corresponding hash value and the second word is a
|
||||
%% file pointer to the actual {key,value} tuple higher in the file.
|
||||
%%
|
||||
|
||||
-module(leveled_cdb).
|
||||
|
||||
-export([from_dict/2,
|
||||
create/2,
|
||||
dump/1,
|
||||
get/2,
|
||||
get_mem/3,
|
||||
put/4,
|
||||
open_active_file/1,
|
||||
get_nextkey/1,
|
||||
get_nextkey/2]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(DWORD_SIZE, 8).
|
||||
-define(WORD_SIZE, 4).
|
||||
-define(CRC_CHECK, true).
|
||||
|
||||
%%
|
||||
%% from_dict(FileName,ListOfKeyValueTuples)
|
||||
%% Given a filename and a dictionary, create a cdb
|
||||
%% using the key value pairs from the dict.
|
||||
%%
|
||||
%% @spec from_dict(filename(),dictionary()) -> ok
|
||||
%% where
|
||||
%% filename() = string(),
|
||||
%% dictionary() = dict()
|
||||
%%
|
||||
from_dict(FileName,Dict) ->
|
||||
KeyValueList = dict:to_list(Dict),
|
||||
create(FileName, KeyValueList).
|
||||
|
||||
%%
|
||||
%% create(FileName,ListOfKeyValueTuples) -> ok
|
||||
%% Given a filename and a list of {key,value} tuples,
|
||||
%% this function creates a CDB
|
||||
%%
|
||||
create(FileName,KeyValueList) ->
|
||||
{ok, Handle} = file:open(FileName, [write]),
|
||||
{ok, _} = file:position(Handle, {bof, 2048}),
|
||||
{BasePos, HashTree} = write_key_value_pairs(Handle, KeyValueList),
|
||||
io:format("KVs has been written to base position ~w~n", [BasePos]),
|
||||
L2 = write_hash_tables(Handle, HashTree),
|
||||
io:format("Index list output of ~w~n", [L2]),
|
||||
write_top_index_table(Handle, BasePos, L2),
|
||||
file:close(Handle).
|
||||
|
||||
%%
|
||||
%% dump(FileName) -> List
|
||||
%% Given a file name, this function returns a list
|
||||
%% of {key,value} tuples from the CDB.
|
||||
%%
|
||||
%%
|
||||
%% @spec dump(filename()) -> key_value_list()
|
||||
%% where
|
||||
%% filename() = string(),
|
||||
%% key_value_list() = [{key,value}]
|
||||
dump(FileName) ->
|
||||
dump(FileName, ?CRC_CHECK).
|
||||
|
||||
dump(FileName, CRCCheck) ->
|
||||
{ok, Handle} = file:open(FileName, [binary,raw]),
|
||||
Fn = fun(Index, Acc) ->
|
||||
{ok, _} = file:position(Handle, ?DWORD_SIZE * Index),
|
||||
{_, Count} = read_next_2_integers(Handle),
|
||||
Acc + Count
|
||||
end,
|
||||
NumberOfPairs = lists:foldl(Fn, 0, lists:seq(0,255)) bsr 1,
|
||||
io:format("Count of keys in db is ~w~n", [NumberOfPairs]),
|
||||
|
||||
{ok, _} = file:position(Handle, {bof, 2048}),
|
||||
Fn1 = fun(_I,Acc) ->
|
||||
{KL,VL} = read_next_2_integers(Handle),
|
||||
Key = read_next_string(Handle, KL),
|
||||
case read_next_string(Handle, VL, crc, CRCCheck) of
|
||||
{false, _} ->
|
||||
{ok, CurrLoc} = file:position(Handle, cur),
|
||||
Return = {crc_wonky, get(Handle, Key)};
|
||||
{_, Value} ->
|
||||
{ok, CurrLoc} = file:position(Handle, cur),
|
||||
Return = case get(Handle, Key) of
|
||||
{Key,Value} -> {Key ,Value};
|
||||
X -> {wonky, X}
|
||||
end
|
||||
end,
|
||||
{ok, _} = file:position(Handle, CurrLoc),
|
||||
[Return | Acc]
|
||||
end,
|
||||
lists:foldr(Fn1,[],lists:seq(0,NumberOfPairs-1)).
|
||||
|
||||
%% Open an active file - one for which it is assumed the hash tables have not
|
||||
%% yet been written
|
||||
%%
|
||||
%% Needs to scan over file to incrementally produce the hash list, starting at
|
||||
%% the end of the top index table.
|
||||
%%
|
||||
%% Should return a dictionary keyed by index containing a list of {Hash, Pos}
|
||||
%% tuples as the write_key_value_pairs function, and the current position, and
|
||||
%% the file handle
|
||||
open_active_file(FileName) when is_list(FileName) ->
|
||||
{ok, Handle} = file:open(FileName, [binary, raw, read, write]),
|
||||
{ok, Position} = file:position(Handle, {bof, 256*?DWORD_SIZE}),
|
||||
{LastPosition, HashTree} = scan_over_file(Handle, Position),
|
||||
case file:position(Handle, eof) of
|
||||
{ok, LastPosition} ->
|
||||
ok = file:close(Handle);
|
||||
{ok, _} ->
|
||||
LogDetails = [LastPosition, file:position(Handle, eof)],
|
||||
io:format("File to be truncated at last position of"
|
||||
"~w with end of file at ~w~n", LogDetails),
|
||||
{ok, LastPosition} = file:position(Handle, LastPosition),
|
||||
ok = file:truncate(Handle),
|
||||
ok = file:close(Handle)
|
||||
end,
|
||||
{LastPosition, HashTree}.
|
||||
|
||||
%% put(Handle, Key, Value, {LastPosition, HashDict}) -> {NewPosition, KeyDict}
|
||||
%% Append to an active file a new key/value pair returning an updated
|
||||
%% dictionary of Keys and positions. Returns an updated Position
|
||||
%%
|
||||
put(FileName, Key, Value, {LastPosition, HashTree}) when is_list(FileName) ->
|
||||
{ok, Handle} = file:open(FileName,
|
||||
[binary, raw, read, write, delayed_write]),
|
||||
put(Handle, Key, Value, {LastPosition, HashTree});
|
||||
put(Handle, Key, Value, {LastPosition, HashTree}) ->
|
||||
Bin = key_value_to_record({Key, Value}), % create binary for Key and Value
|
||||
ok = file:pwrite(Handle, LastPosition, Bin),
|
||||
{LastPosition + byte_size(Bin), put_hashtree(Key, LastPosition, HashTree)}.
|
||||
|
||||
|
||||
%%
|
||||
%% get(FileName,Key) -> {key,value}
|
||||
%% Given a filename and a key, returns a key and value tuple.
|
||||
%%
|
||||
get(FileNameOrHandle, Key) ->
|
||||
get(FileNameOrHandle, Key, ?CRC_CHECK).
|
||||
|
||||
get(FileName, Key, CRCCheck) when is_list(FileName), is_list(Key) ->
|
||||
{ok,Handle} = file:open(FileName,[binary,raw]),
|
||||
get(Handle,Key, CRCCheck);
|
||||
|
||||
get(Handle, Key, CRCCheck) when is_tuple(Handle), is_list(Key) ->
|
||||
Hash = hash(Key),
|
||||
Index = hash_to_index(Hash),
|
||||
{ok,_} = file:position(Handle, {bof, ?DWORD_SIZE * Index}),
|
||||
% Get location of hashtable and number of entries in the hash
|
||||
{HashTable, Count} = read_next_2_integers(Handle),
|
||||
% If the count is 0 for that index - key must be missing
|
||||
case Count of
|
||||
0 ->
|
||||
missing;
|
||||
_ ->
|
||||
% Get starting slot in hashtable
|
||||
{ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}),
|
||||
Slot = hash_to_slot(Hash, Count),
|
||||
{ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}),
|
||||
LastHashPosition = HashTable + ((Count-1) * ?DWORD_SIZE),
|
||||
LocList = lists:seq(FirstHashPosition, LastHashPosition, ?DWORD_SIZE),
|
||||
% Split list around starting slot.
|
||||
{L1, L2} = lists:split(Slot, LocList),
|
||||
search_hash_table(Handle, lists:append(L2, L1), Hash, Key, CRCCheck)
|
||||
end.
|
||||
|
||||
%% Get a Key/Value pair from an active CDB file (with no hash table written)
|
||||
%% This requires a key dictionary to be passed in (mapping keys to positions)
|
||||
%% Will return {Key, Value} or missing
|
||||
get_mem(Key, Filename, HashTree) when is_list(Filename) ->
|
||||
{ok, Handle} = file:open(Filename, [binary, raw, read]),
|
||||
get_mem(Key, Handle, HashTree);
|
||||
get_mem(Key, Handle, HashTree) ->
|
||||
extract_kvpair(Handle, get_hashtree(Key, HashTree), Key).
|
||||
|
||||
%% Get the next key at a position in the file (or the first key if no position
|
||||
%% is passed). Will return both a key and the next position
|
||||
get_nextkey(Filename) when is_list(Filename) ->
|
||||
{ok, Handle} = file:open(Filename, [binary, raw, read]),
|
||||
get_nextkey(Handle);
|
||||
get_nextkey(Handle) ->
|
||||
{ok, _} = file:position(Handle, bof),
|
||||
{FirstHashPosition, _} = read_next_2_integers(Handle),
|
||||
get_nextkey(Handle, {256 * ?DWORD_SIZE, FirstHashPosition}).
|
||||
|
||||
get_nextkey(Handle, {Position, FirstHashPosition}) ->
|
||||
{ok, Position} = file:position(Handle, Position),
|
||||
case read_next_2_integers(Handle) of
|
||||
{KeyLength, ValueLength} ->
|
||||
NextKey = read_next_string(Handle, KeyLength),
|
||||
NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE,
|
||||
case NextPosition of
|
||||
FirstHashPosition ->
|
||||
{NextKey, nomorekeys};
|
||||
_ ->
|
||||
{NextKey, Handle, {NextPosition, FirstHashPosition}}
|
||||
end;
|
||||
eof ->
|
||||
nomorekeys
|
||||
end.
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%
|
||||
%% Internal functions
|
||||
%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
%% Fetch a list of positions by passing a key to the HashTree
|
||||
get_hashtree(Key, HashTree) ->
|
||||
Hash = hash(Key),
|
||||
Index = hash_to_index(Hash),
|
||||
Tree = array:get(Index, HashTree),
|
||||
case gb_trees:lookup(Hash, Tree) of
|
||||
{value, List} ->
|
||||
List;
|
||||
_ ->
|
||||
[]
|
||||
end.
|
||||
|
||||
%% Add to hash tree - this is an array of 256 gb_trees that contains the Hash
|
||||
%% and position of objects which have been added to an open CDB file
|
||||
put_hashtree(Key, Position, HashTree) ->
|
||||
Hash = hash(Key),
|
||||
Index = hash_to_index(Hash),
|
||||
Tree = array:get(Index, HashTree),
|
||||
case gb_trees:lookup(Hash, Tree) of
|
||||
none ->
|
||||
array:set(Index, gb_trees:insert(Hash, [Position], Tree), HashTree);
|
||||
{value, L} ->
|
||||
array:set(Index, gb_trees:update(Hash, [Position|L], Tree), HashTree)
|
||||
end.
|
||||
|
||||
%% Function to extract a Key-Value pair given a file handle and a position
|
||||
%% Will confirm that the key matches and do a CRC check when requested
|
||||
extract_kvpair(Handle, Positions, Key) ->
|
||||
extract_kvpair(Handle, Positions, Key, ?CRC_CHECK).
|
||||
|
||||
extract_kvpair(_, [], _, _) ->
|
||||
missing;
|
||||
extract_kvpair(Handle, [Position|Rest], Key, Check) ->
|
||||
{ok, _} = file:position(Handle, Position),
|
||||
{KeyLength, ValueLength} = read_next_2_integers(Handle),
|
||||
case read_next_string(Handle, KeyLength) of
|
||||
Key -> % If same key as passed in, then found!
|
||||
case read_next_string(Handle, ValueLength, crc, Check) of
|
||||
{false, _} ->
|
||||
crc_wonky;
|
||||
{_, Value} ->
|
||||
{Key,Value}
|
||||
end;
|
||||
_ ->
|
||||
extract_kvpair(Handle, Rest, Key, Check)
|
||||
end.
|
||||
|
||||
%% Scan through the file until there is a failure to crc check an input, and
|
||||
%% at that point return the position and the key dictionary scanned so far
|
||||
scan_over_file(Handle, Position) ->
|
||||
HashTree = array:new(256, {default, gb_trees:empty()}),
|
||||
scan_over_file(Handle, Position, HashTree).
|
||||
|
||||
scan_over_file(Handle, Position, HashTree) ->
|
||||
case read_next_2_integers(Handle) of
|
||||
{KeyLength, ValueLength} ->
|
||||
Key = read_next_string(Handle, KeyLength),
|
||||
{ok, ValueAsBin} = file:read(Handle, ValueLength),
|
||||
case crccheck_value(ValueAsBin) of
|
||||
true ->
|
||||
NewPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE,
|
||||
scan_over_file(Handle, NewPosition,
|
||||
put_hashtree(Key, Position, HashTree));
|
||||
false ->
|
||||
io:format("CRC check returned false on key of ~w ~n", [Key]),
|
||||
{Position, HashTree}
|
||||
end;
|
||||
eof ->
|
||||
{Position, HashTree}
|
||||
end.
|
||||
|
||||
%% The first four bytes of the value are the crc check
|
||||
crccheck_value(Value) when byte_size(Value) >4 ->
|
||||
<< Hash:32/integer, Tail/bitstring>> = Value,
|
||||
case calc_crc(Tail) of
|
||||
Hash ->
|
||||
true;
|
||||
_ ->
|
||||
io:format("CRC check failed due to mismatch ~n"),
|
||||
false
|
||||
end;
|
||||
crccheck_value(_) ->
|
||||
io:format("CRC check failed due to size ~n"),
|
||||
false.
|
||||
|
||||
%% Run a crc check filling out any values which don't fit on byte boundary
|
||||
calc_crc(Value) ->
|
||||
case bit_size(Value) rem 8 of
|
||||
0 ->
|
||||
erlang:crc32(Value);
|
||||
N ->
|
||||
M = 8 - N,
|
||||
erlang:crc32(<<Value/bitstring,0:M>>)
|
||||
end.
|
||||
|
||||
%%
|
||||
%% to_dict(FileName)
|
||||
%% Given a filename returns a dict containing
|
||||
%% the key value pairs from the dict.
|
||||
%%
|
||||
%% @spec to_dict(filename()) -> dictionary()
|
||||
%% where
|
||||
%% filename() = string(),
|
||||
%% dictionary() = dict()
|
||||
%%
|
||||
to_dict(FileName) ->
|
||||
KeyValueList = dump(FileName),
|
||||
dict:from_list(KeyValueList).
|
||||
|
||||
read_next_string(Handle, Length) ->
|
||||
{ok, Bin} = file:read(Handle, Length),
|
||||
binary_to_list(Bin).
|
||||
|
||||
%% Read next string where the string has a CRC prepended - stripping the crc
|
||||
%% and checking if requested
|
||||
read_next_string(Handle, Length, crc, Check) ->
|
||||
case Check of
|
||||
true ->
|
||||
{ok, <<CRC:32/integer, Bin/binary>>} = file:read(Handle, Length),
|
||||
case calc_crc(Bin) of
|
||||
CRC ->
|
||||
{true, binary_to_list(Bin)};
|
||||
_ ->
|
||||
{false, binary_to_list(Bin)}
|
||||
end;
|
||||
_ ->
|
||||
{ok, _} = file:position(Handle, {cur, 4}),
|
||||
{ok, Bin} = file:read(Handle, Length - 4),
|
||||
{unchecked, binary_to_list(Bin)}
|
||||
end.
|
||||
|
||||
|
||||
%% Used for reading lengths
|
||||
%% Note that the endian_flip is required to make the file format compatible
|
||||
%% with CDB
|
||||
read_next_2_integers(Handle) ->
|
||||
case file:read(Handle,?DWORD_SIZE) of
|
||||
{ok, <<Int1:32,Int2:32>>} ->
|
||||
{endian_flip(Int1), endian_flip(Int2)};
|
||||
MatchError
|
||||
->
|
||||
MatchError
|
||||
end.
|
||||
|
||||
%% Seach the hash table for the matching hash and key. Be prepared for
|
||||
%% multiple keys to have the same hash value.
|
||||
search_hash_table(_Handle, [], _Hash, _Key, _CRCCHeck) ->
|
||||
missing;
|
||||
search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, CRCCheck) ->
|
||||
{ok, _} = file:position(Handle, Entry),
|
||||
{StoredHash, DataLoc} = read_next_2_integers(Handle),
|
||||
io:format("looking in data location ~w~n", [DataLoc]),
|
||||
case StoredHash of
|
||||
Hash ->
|
||||
KV = extract_kvpair(Handle, [DataLoc], Key, CRCCheck),
|
||||
case KV of
|
||||
missing ->
|
||||
search_hash_table(Handle, RestOfEntries, Hash, Key, CRCCheck);
|
||||
_ ->
|
||||
KV
|
||||
end;
|
||||
0 ->
|
||||
% Hash is 0 so key must be missing as 0 found before Hash matched
|
||||
missing;
|
||||
_ ->
|
||||
search_hash_table(Handle, RestOfEntries, Hash, Key, CRCCheck)
|
||||
end.
|
||||
|
||||
% Write Key and Value tuples into the CDB. Each tuple consists of a
|
||||
% 4 byte key length, a 4 byte value length, the actual key followed
|
||||
% by the value.
|
||||
%
|
||||
% Returns a dictionary that is keyed by
|
||||
% the least significant 8 bits of each hash with the
|
||||
% values being a list of the hash and the position of the
|
||||
% key/value binary in the file.
|
||||
write_key_value_pairs(Handle, KeyValueList) ->
|
||||
{ok, Position} = file:position(Handle, cur),
|
||||
HashTree = array:new(256, {default, gb_trees:empty()}),
|
||||
write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}).
|
||||
|
||||
write_key_value_pairs(_, [], Acc) ->
|
||||
Acc;
|
||||
write_key_value_pairs(Handle, [HeadPair|TailList], Acc) ->
|
||||
{Key, Value} = HeadPair,
|
||||
{NewPosition, HashTree} = put(Handle, Key, Value, Acc),
|
||||
write_key_value_pairs(Handle, TailList, {NewPosition, HashTree}).
|
||||
|
||||
%% Write the actual hashtables at the bottom of the file. Each hash table
|
||||
%% entry is a doubleword in length. The first word is the hash value
|
||||
%% corresponding to a key and the second word is a file pointer to the
|
||||
%% corresponding {key,value} tuple.
|
||||
write_hash_tables(Handle, HashTree) ->
|
||||
Seq = lists:seq(0, 255),
|
||||
{ok, StartPos} = file:position(Handle, cur),
|
||||
write_hash_tables(Seq, Handle, HashTree, StartPos, []).
|
||||
|
||||
write_hash_tables([], Handle, _, StartPos, IndexList) ->
|
||||
{ok, EndPos} = file:position(Handle, cur),
|
||||
ok = file:advise(Handle, StartPos, EndPos - StartPos, will_need),
|
||||
IndexList;
|
||||
write_hash_tables([Index|Rest], Handle, HashTree, StartPos, IndexList) ->
|
||||
Tree = array:get(Index, HashTree),
|
||||
case gb_trees:keys(Tree) of
|
||||
[] ->
|
||||
write_hash_tables(Rest, Handle, HashTree, StartPos, IndexList);
|
||||
_ ->
|
||||
HashList = gb_trees:to_list(Tree),
|
||||
BinList = build_binaryhashlist(HashList, []),
|
||||
IndexLength = length(BinList) * 2,
|
||||
SlotList = lists:duplicate(IndexLength, <<0:32, 0:32>>),
|
||||
|
||||
Fn = fun({Hash, Binary}, AccSlotList) ->
|
||||
Slot1 = find_open_slot(AccSlotList, Hash),
|
||||
{L1, [<<0:32, 0:32>>|L2]} = lists:split(Slot1, AccSlotList),
|
||||
lists:append(L1, [Binary|L2])
|
||||
end,
|
||||
NewSlotList = lists:foldl(Fn, SlotList, BinList),
|
||||
|
||||
{ok, CurrPos} = file:position(Handle, cur),
|
||||
file:write(Handle, NewSlotList),
|
||||
write_hash_tables(Rest, Handle, HashTree, StartPos,
|
||||
[{Index, CurrPos, IndexLength}|IndexList])
|
||||
end.
|
||||
|
||||
%% The list created from the original HashTree may have duplicate positions
|
||||
%% e.g. {Key, [Value1, Value2]}. Before any writing is done it is necessary
|
||||
%% to know the actual number of hashes - or the Slot may not be sized correctly
|
||||
%%
|
||||
%% This function creates {Hash, Binary} pairs on a list where there is a unique
|
||||
%% entry for eveyr Key/Value
|
||||
build_binaryhashlist([], BinList) ->
|
||||
BinList;
|
||||
build_binaryhashlist([{Hash, [Position|TailP]}|TailKV], BinList) ->
|
||||
HashLE = endian_flip(Hash),
|
||||
PosLE = endian_flip(Position),
|
||||
NewBin = <<HashLE:32, PosLE:32>>,
|
||||
case TailP of
|
||||
[] ->
|
||||
build_binaryhashlist(TailKV, [{Hash, NewBin}|BinList]);
|
||||
_ ->
|
||||
build_binaryhashlist([{Hash, TailP}|TailKV], [{Hash, NewBin}|BinList])
|
||||
end.
|
||||
|
||||
%% Slot is zero based because it comes from a REM
|
||||
find_open_slot(List, Hash) ->
|
||||
Len = length(List),
|
||||
Slot = hash_to_slot(Hash, Len),
|
||||
Seq = lists:seq(1, Len),
|
||||
{CL1, CL2} = lists:split(Slot, Seq),
|
||||
{L1, L2} = lists:split(Slot, List),
|
||||
find_open_slot1(lists:append(CL2, CL1), lists:append(L2, L1)).
|
||||
|
||||
find_open_slot1([Slot|_RestOfSlots], [<<0:32,0:32>>|_RestOfEntries]) ->
|
||||
Slot - 1;
|
||||
find_open_slot1([_|RestOfSlots], [_|RestOfEntries]) ->
|
||||
find_open_slot1(RestOfSlots, RestOfEntries).
|
||||
|
||||
|
||||
%% Write the top most 255 doubleword entries. First word is the
|
||||
%% file pointer to a hashtable and the second word is the number of entries
|
||||
%% in the hash table
|
||||
%% The List passed in should be made up of {Index, Position, Count} tuples
|
||||
write_top_index_table(Handle, BasePos, List) ->
|
||||
% fold function to find any missing index tuples, and add one a replacement
|
||||
% in this case with a count of 0. Also orders the list by index
|
||||
FnMakeIndex = fun(I, Acc) ->
|
||||
case lists:keysearch(I, 1, List) of
|
||||
{value, Tuple} ->
|
||||
[Tuple|Acc];
|
||||
false ->
|
||||
[{I, BasePos, 0}|Acc]
|
||||
end
|
||||
end,
|
||||
% Fold function to write the index entries
|
||||
FnWriteIndex = fun({Index, Pos, Count}, CurrPos) ->
|
||||
{ok, _} = file:position(Handle, ?DWORD_SIZE * Index),
|
||||
case Count == 0 of
|
||||
true ->
|
||||
PosLE = endian_flip(CurrPos),
|
||||
NextPos = CurrPos;
|
||||
false ->
|
||||
PosLE = endian_flip(Pos),
|
||||
NextPos = Pos + (Count * ?DWORD_SIZE)
|
||||
end,
|
||||
CountLE = endian_flip(Count),
|
||||
Bin = <<PosLE:32, CountLE:32>>,
|
||||
file:write(Handle, Bin),
|
||||
NextPos
|
||||
end,
|
||||
|
||||
Seq = lists:seq(0, 255),
|
||||
CompleteList = lists:keysort(1, lists:foldl(FnMakeIndex, [], Seq)),
|
||||
lists:foldl(FnWriteIndex, BasePos, CompleteList),
|
||||
ok = file:advise(Handle, 0, ?DWORD_SIZE * 256, will_need).
|
||||
|
||||
|
||||
endian_flip(Int) ->
|
||||
<<X:32/unsigned-little-integer>> = <<Int:32>>,
|
||||
X.
|
||||
|
||||
hash(Key) ->
|
||||
H = 5381,
|
||||
hash1(H,Key) band 16#FFFFFFFF.
|
||||
|
||||
hash1(H,[]) ->H;
|
||||
hash1(H,[B|Rest]) ->
|
||||
H1 = H * 33,
|
||||
H2 = H1 bxor B,
|
||||
hash1(H2,Rest).
|
||||
|
||||
% Get the least significant 8 bits from the hash.
|
||||
hash_to_index(Hash) ->
|
||||
Hash band 255.
|
||||
|
||||
hash_to_slot(Hash,L) ->
|
||||
(Hash bsr 8) rem L.
|
||||
|
||||
%% Create a binary of the LengthKeyLengthValue, adding a CRC check
|
||||
%% at the front of the value
|
||||
key_value_to_record({Key,Value}) ->
|
||||
L1 = endian_flip(length(Key)),
|
||||
L2 = endian_flip(length(Value) + 4),
|
||||
LB1 = list_to_binary(Key),
|
||||
LB2 = list_to_binary(Value),
|
||||
CRC = calc_crc(LB2),
|
||||
<<L1:32,L2:32,LB1/binary,CRC:32/integer,LB2/binary>>.
|
||||
|
||||
%%%%%%%%%%%%%%%%
|
||||
% T E S T
|
||||
%%%%%%%%%%%%%%%
|
||||
|
||||
hash_1_test() ->
|
||||
Hash = hash("key1"),
|
||||
?assertMatch(Hash,2088047427).
|
||||
|
||||
hash_to_index_1_test() ->
|
||||
Hash = hash("key1"),
|
||||
Index = hash_to_index(Hash),
|
||||
?assertMatch(Index,67).
|
||||
|
||||
hash_to_index_2_test() ->
|
||||
Hash = 256,
|
||||
I = hash_to_index(Hash),
|
||||
?assertMatch(I,0).
|
||||
|
||||
hash_to_index_3_test() ->
|
||||
Hash = 268,
|
||||
I = hash_to_index(Hash),
|
||||
?assertMatch(I,12).
|
||||
|
||||
hash_to_index_4_test() ->
|
||||
Hash = hash("key2"),
|
||||
Index = hash_to_index(Hash),
|
||||
?assertMatch(Index,64).
|
||||
|
||||
write_key_value_pairs_1_test() ->
|
||||
{ok,Handle} = file:open("test.cdb",write),
|
||||
{_, HashTree} = write_key_value_pairs(Handle,[{"key1","value1"},{"key2","value2"}]),
|
||||
Hash1 = hash("key1"),
|
||||
Index1 = hash_to_index(Hash1),
|
||||
Hash2 = hash("key2"),
|
||||
Index2 = hash_to_index(Hash2),
|
||||
R0 = array:new(256, {default, gb_trees:empty()}),
|
||||
R1 = array:set(Index1, gb_trees:insert(Hash1, [0], array:get(Index1, R0)), R0),
|
||||
R2 = array:set(Index2, gb_trees:insert(Hash2, [22], array:get(Index2, R1)), R1),
|
||||
?assertMatch(R2, HashTree).
|
||||
|
||||
|
||||
write_hash_tables_1_test() ->
|
||||
{ok, Handle} = file:open("test.cdb",write),
|
||||
R0 = array:new(256, {default, gb_trees:empty()}),
|
||||
R1 = array:set(64, gb_trees:insert(6383014720, [18], array:get(64, R0)), R0),
|
||||
R2 = array:set(67, gb_trees:insert(6383014723, [0], array:get(67, R1)), R1),
|
||||
Result = write_hash_tables(Handle, R2),
|
||||
io:format("write hash tables result of ~w ~n", [Result]),
|
||||
?assertMatch(Result,[{67,16,2},{64,0,2}]).
|
||||
|
||||
find_open_slot_1_test() ->
|
||||
List = [<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>],
|
||||
Slot = find_open_slot(List,0),
|
||||
?assertMatch(Slot,1).
|
||||
|
||||
find_open_slot_2_test() ->
|
||||
List = [<<0:32,0:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>],
|
||||
Slot = find_open_slot(List,0),
|
||||
?assertMatch(Slot,0).
|
||||
|
||||
find_open_slot_3_test() ->
|
||||
List = [<<1:32,1:32>>,<<1:32,1:32>>,<<1:32,1:32>>,<<0:32,0:32>>],
|
||||
Slot = find_open_slot(List,2),
|
||||
?assertMatch(Slot,3).
|
||||
|
||||
find_open_slot_4_test() ->
|
||||
List = [<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>,<<1:32,1:32>>],
|
||||
Slot = find_open_slot(List,1),
|
||||
?assertMatch(Slot,0).
|
||||
|
||||
find_open_slot_5_test() ->
|
||||
List = [<<1:32,1:32>>,<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>],
|
||||
Slot = find_open_slot(List,3),
|
||||
?assertMatch(Slot,2).
|
||||
|
||||
full_1_test() ->
|
||||
List1 = lists:sort([{"key1","value1"},{"key2","value2"}]),
|
||||
create("simple.cdb",lists:sort([{"key1","value1"},{"key2","value2"}])),
|
||||
List2 = lists:sort(dump("simple.cdb")),
|
||||
?assertMatch(List1,List2).
|
||||
|
||||
full_2_test() ->
|
||||
List1 = lists:sort([{lists:flatten(io_lib:format("~s~p",[Prefix,Plug])),
|
||||
lists:flatten(io_lib:format("value~p",[Plug]))}
|
||||
|| Plug <- lists:seq(1,2000),
|
||||
Prefix <- ["dsd","so39ds","oe9%#*(","020dkslsldclsldowlslf%$#",
|
||||
"tiep4||","qweq"]]),
|
||||
create("full.cdb",List1),
|
||||
List2 = lists:sort(dump("full.cdb")),
|
||||
?assertMatch(List1,List2).
|
||||
|
||||
from_dict_test() ->
|
||||
D = dict:new(),
|
||||
D1 = dict:store("a","b",D),
|
||||
D2 = dict:store("c","d",D1),
|
||||
ok = from_dict("from_dict_test.cdb",D2),
|
||||
io:format("Store created ~n", []),
|
||||
KVP = lists:sort(dump("from_dict_test.cdb")),
|
||||
D3 = lists:sort(dict:to_list(D2)),
|
||||
io:format("KVP is ~w~n", [KVP]),
|
||||
io:format("D3 is ~w~n", [D3]),
|
||||
?assertMatch(KVP,D3).
|
||||
|
||||
to_dict_test() ->
|
||||
D = dict:new(),
|
||||
D1 = dict:store("a","b",D),
|
||||
D2 = dict:store("c","d",D1),
|
||||
ok = from_dict("from_dict_test.cdb",D2),
|
||||
Dict = to_dict("from_dict_test.cdb"),
|
||||
D3 = lists:sort(dict:to_list(D2)),
|
||||
D4 = lists:sort(dict:to_list(Dict)),
|
||||
?assertMatch(D4,D3).
|
||||
|
||||
crccheck_emptyvalue_test() ->
|
||||
?assertMatch(false, crccheck_value(<<>>)).
|
||||
|
||||
crccheck_shortvalue_test() ->
|
||||
Value = <<128,128,32>>,
|
||||
?assertMatch(false, crccheck_value(Value)).
|
||||
|
||||
crccheck_justshortvalue_test() ->
|
||||
Value = <<128,128,32,64>>,
|
||||
?assertMatch(false, crccheck_value(Value)).
|
||||
|
||||
crccheck_correctvalue_test() ->
|
||||
Value = term_to_binary("some text as value"),
|
||||
Hash = erlang:crc32(Value),
|
||||
ValueOnDisk = <<Hash:32/integer, Value/binary>>,
|
||||
?assertMatch(true, crccheck_value(ValueOnDisk)).
|
||||
|
||||
crccheck_wronghash_test() ->
|
||||
Value = term_to_binary("some text as value"),
|
||||
Hash = erlang:crc32(Value) + 1,
|
||||
ValueOnDisk = <<Hash:32/integer, Value/binary>>,
|
||||
?assertMatch(false, crccheck_value(ValueOnDisk)).
|
||||
|
||||
crccheck_truncatedvalue_test() ->
|
||||
Value = term_to_binary("some text as value"),
|
||||
Hash = erlang:crc32(Value),
|
||||
ValueOnDisk = <<Hash:32/integer, Value/binary>>,
|
||||
Size = bit_size(ValueOnDisk) - 1,
|
||||
<<TruncatedValue:Size/bitstring, _/bitstring>> = ValueOnDisk,
|
||||
?assertMatch(false, crccheck_value(TruncatedValue)).
|
||||
|
||||
activewrite_singlewrite_test() ->
|
||||
Key = "0002",
|
||||
Value = "some text as new value",
|
||||
InitialD = dict:new(),
|
||||
InitialD1 = dict:store("0001", "Initial value", InitialD),
|
||||
ok = from_dict("test_mem.cdb", InitialD1),
|
||||
io:format("New db file created ~n", []),
|
||||
{LastPosition, KeyDict} = open_active_file("test_mem.cdb"),
|
||||
io:format("File opened as new active file "
|
||||
"with LastPosition=~w ~n", [LastPosition]),
|
||||
{_, UpdKeyDict} = put("test_mem.cdb", Key, Value, {LastPosition, KeyDict}),
|
||||
io:format("New key and value added to active file ~n", []),
|
||||
?assertMatch({Key, Value}, get_mem(Key, "test_mem.cdb", UpdKeyDict)).
|
||||
|
||||
search_hash_table_findinslot_test() ->
|
||||
Key1 = "key1", % this is in slot 3 if count is 8
|
||||
D = dict:from_list([{Key1, "value1"}, {"K2", "V2"}, {"K3", "V3"},
|
||||
{"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"},
|
||||
{"K8", "V8"}]),
|
||||
ok = from_dict("hashtable1_test.cdb",D),
|
||||
{ok, Handle} = file:open("hashtable1_test.cdb", [binary, raw, read, write]),
|
||||
Hash = hash(Key1),
|
||||
Index = hash_to_index(Hash),
|
||||
{ok, _} = file:position(Handle, {bof, ?DWORD_SIZE*Index}),
|
||||
{HashTable, Count} = read_next_2_integers(Handle),
|
||||
io:format("Count of ~w~n", [Count]),
|
||||
{ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}),
|
||||
Slot = hash_to_slot(Hash, Count),
|
||||
io:format("Slot of ~w~n", [Slot]),
|
||||
{ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}),
|
||||
{ReadH3, ReadP3} = read_next_2_integers(Handle),
|
||||
{ReadH4, ReadP4} = read_next_2_integers(Handle),
|
||||
io:format("Slot 1 has Hash ~w Position ~w~n", [ReadH3, ReadP3]),
|
||||
io:format("Slot 2 has Hash ~w Position ~w~n", [ReadH4, ReadP4]),
|
||||
?assertMatch(0, ReadH4),
|
||||
?assertMatch({"key1", "value1"}, get(Handle, Key1)),
|
||||
{ok, _} = file:position(Handle, FirstHashPosition),
|
||||
FlipH3 = endian_flip(ReadH3),
|
||||
FlipP3 = endian_flip(ReadP3),
|
||||
RBin = <<FlipH3:32/integer, FlipP3:32/integer, 0:32/integer, 0:32/integer>>,
|
||||
io:format("Replacement binary of ~w~n", [RBin]),
|
||||
{ok, OldBin} = file:pread(Handle,
|
||||
FirstHashPosition + (Slot -1) * ?DWORD_SIZE, 16),
|
||||
io:format("Bin to be replaced is ~w ~n", [OldBin]),
|
||||
ok = file:pwrite(Handle, FirstHashPosition + (Slot -1) * ?DWORD_SIZE, RBin),
|
||||
ok = file:close(Handle),
|
||||
io:format("Find key following change to hash table~n"),
|
||||
?assertMatch(missing, get("hashtable1_test.cdb", Key1)).
|
||||
|
||||
getnextkey_test() ->
|
||||
L = [{"K9", "V9"}, {"K2", "V2"}, {"K3", "V3"},
|
||||
{"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"},
|
||||
{"K8", "V8"}, {"K1", "V1"}],
|
||||
ok = create("hashtable1_test.cdb", L),
|
||||
{FirstKey, Handle, P1} = get_nextkey("hashtable1_test.cdb"),
|
||||
io:format("Next position details of ~w~n", [P1]),
|
||||
?assertMatch("K9", FirstKey),
|
||||
{SecondKey, Handle, P2} = get_nextkey(Handle, P1),
|
||||
?assertMatch("K2", SecondKey),
|
||||
{_, Handle, P3} = get_nextkey(Handle, P2),
|
||||
{_, Handle, P4} = get_nextkey(Handle, P3),
|
||||
{_, Handle, P5} = get_nextkey(Handle, P4),
|
||||
{_, Handle, P6} = get_nextkey(Handle, P5),
|
||||
{_, Handle, P7} = get_nextkey(Handle, P6),
|
||||
{_, Handle, P8} = get_nextkey(Handle, P7),
|
||||
{LastKey, Info} = get_nextkey(Handle, P8),
|
||||
?assertMatch(nomorekeys, Info),
|
||||
?assertMatch("K1", LastKey).
|
||||
|
||||
newactivefile_test() ->
|
||||
{LastPosition, _} = open_active_file("activefile_test.cdb"),
|
||||
?assertMatch(256 * ?DWORD_SIZE, LastPosition),
|
||||
Response = get_nextkey("activefile_test.cdb"),
|
||||
?assertMatch(nomorekeys, Response).
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
BIN
src/leveled_internal.beam
Normal file
BIN
src/leveled_internal.beam
Normal file
Binary file not shown.
118
src/leveled_internal.erl
Normal file
118
src/leveled_internal.erl
Normal file
|
@ -0,0 +1,118 @@
|
|||
-module(leveled_internal).
|
||||
-export([termiterator/6]).
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
|
||||
%% We will have a sorted list of terms
|
||||
%% Some terms will be dummy terms which are pointers to more terms which can be found
|
||||
%% If a pointer is hit need to replenish the term list before proceeding
|
||||
%%
|
||||
%% Helper Functions should have free functions - FolderFun, CompareFun, PointerCheck}
|
||||
%% FolderFun - function which takes the next item and the accumulator and returns an updated accunulator
|
||||
%% CompareFun - function which should be able to compare two keys (which are not pointers)
|
||||
%% PointerCheck - function for differentiating between keys and pointer
|
||||
|
||||
termiterator(HeadItem, [], Acc, HelperFuns, _StartKey, _EndKey) ->
|
||||
io:format("Reached empty list with head item of ~w~n", [HeadItem]),
|
||||
case HeadItem of
|
||||
null ->
|
||||
Acc;
|
||||
_ ->
|
||||
{FolderFun, _, _} = HelperFuns,
|
||||
FolderFun(Acc, HeadItem)
|
||||
end;
|
||||
termiterator(null, [NextItem|TailList], Acc, HelperFuns, StartKey, EndKey) ->
|
||||
%% Check that the NextItem is not a pointer before promoting to HeadItem
|
||||
%% Cannot now promote a HeadItem which is a pointer
|
||||
{_, _, PointerCheck} = HelperFuns,
|
||||
case PointerCheck(NextItem) of
|
||||
{true, Pointer} ->
|
||||
NewSlice = getnextslice(Pointer, EndKey),
|
||||
ExtendedList = lists:merge(NewSlice, TailList),
|
||||
termiterator(null, ExtendedList, Acc, HelperFuns, StartKey, EndKey);
|
||||
false ->
|
||||
termiterator(NextItem, TailList, Acc, HelperFuns, StartKey, EndKey)
|
||||
end;
|
||||
termiterator(HeadItem, [NextItem|TailList], Acc, HelperFuns, StartKey, EndKey) ->
|
||||
io:format("Checking head item of ~w~n", [HeadItem]),
|
||||
{FolderFun, CompareFun, PointerCheck} = HelperFuns,
|
||||
%% HeadItem cannot be pointer, but NextItem might be, so check before comparison
|
||||
case PointerCheck(NextItem) of
|
||||
{true, Pointer} ->
|
||||
NewSlice = getnextslice(Pointer, EndKey),
|
||||
ExtendedList = lists:merge(NewSlice, [NextItem|TailList]),
|
||||
termiterator(null, ExtendedList, Acc, HelperFuns, StartKey, EndKey);
|
||||
false ->
|
||||
%% Compare to see if Head and Next match, or if Head is a winner to be added
|
||||
%% to accumulator
|
||||
case CompareFun(HeadItem, NextItem) of
|
||||
{match, StrongItem, _WeakItem} ->
|
||||
%% Discard WeakItem
|
||||
termiterator(StrongItem, TailList, Acc, HelperFuns, StartKey, EndKey);
|
||||
{winner, HeadItem} ->
|
||||
%% Add next item to accumulator, and proceed with next item
|
||||
AccPlus = FolderFun(Acc, HeadItem),
|
||||
termiterator(NextItem, TailList, AccPlus, HelperFuns, HeadItem, EndKey)
|
||||
end
|
||||
end.
|
||||
|
||||
|
||||
|
||||
pointercheck_indexkey(IndexKey) ->
|
||||
case IndexKey of
|
||||
{i, _Bucket, _Index, _Term, _Key, _Sequence, {zpointer, Pointer}} ->
|
||||
{true, Pointer};
|
||||
_ ->
|
||||
false
|
||||
end.
|
||||
|
||||
folder_indexkey(Acc, IndexKey) ->
|
||||
io:format("Folding index key of - ~w~n", [IndexKey]),
|
||||
case IndexKey of
|
||||
{i, _Bucket, _Index, _Term, _Key, _Sequence, tombstone} ->
|
||||
Acc;
|
||||
{i, _Bucket, _Index, _Term, Key, _Sequence, null} ->
|
||||
io:format("Adding key ~s~n", [Key]),
|
||||
lists:append(Acc, [Key])
|
||||
end.
|
||||
|
||||
compare_indexkey(IndexKey1, IndexKey2) ->
|
||||
{i, Bucket1, Index1, Term1, Key1, Sequence1, _Value1} = IndexKey1,
|
||||
{i, Bucket2, Index2, Term2, Key2, Sequence2, _Value2} = IndexKey2,
|
||||
case {Bucket1, Index1, Term1, Key1} of
|
||||
{Bucket2, Index2, Term2, Key2} when Sequence1 >= Sequence2 ->
|
||||
{match, IndexKey1, IndexKey2};
|
||||
{Bucket2, Index2, Term2, Key2} ->
|
||||
{match, IndexKey2, IndexKey1};
|
||||
_ when IndexKey2 >= IndexKey1 ->
|
||||
{winner, IndexKey1};
|
||||
_ ->
|
||||
{winner, IndexKey2}
|
||||
end.
|
||||
|
||||
|
||||
getnextslice(Pointer, _EndKey) ->
|
||||
case Pointer of
|
||||
{test, NewList} ->
|
||||
NewList;
|
||||
_ ->
|
||||
[]
|
||||
end.
|
||||
|
||||
|
||||
%% Unit tests
|
||||
|
||||
|
||||
iterateoverindexkeyswithnopointer_test_() ->
|
||||
Key1 = {i, "pdsRecord", "familyName_bin", "1972SMITH", "10001", 1, null},
|
||||
Key2 = {i, "pdsRecord", "familyName_bin", "1972SMITH", "10001", 2, tombstone},
|
||||
Key3 = {i, "pdsRecord", "familyName_bin", "1971SMITH", "10002", 2, null},
|
||||
Key4 = {i, "pdsRecord", "familyName_bin", "1972JONES", "10003", 2, null},
|
||||
KeyList = lists:sort([Key1, Key2, Key3, Key4]),
|
||||
HelperFuns = {fun folder_indexkey/2, fun compare_indexkey/2, fun pointercheck_indexkey/1},
|
||||
ResultList = ["10002", "10003"],
|
||||
?_assertEqual(ResultList, termiterator(null, KeyList, [], HelperFuns, "1971", "1973")).
|
||||
|
||||
|
||||
|
||||
|
BIN
src/onstartfile.bst
Normal file
BIN
src/onstartfile.bst
Normal file
Binary file not shown.
155
src/rice.erl
Normal file
155
src/rice.erl
Normal file
|
@ -0,0 +1,155 @@
|
|||
-module(rice).
|
||||
-export([encode/1,
|
||||
encode/2,
|
||||
checkforhash/2,
|
||||
converttohash/1]).
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
%% Factor is the power of 2 representing the expected normal gap size between
|
||||
%% members of the hash, and therefore the size of the bitstring to represent the
|
||||
%% remainder for the gap
|
||||
%%
|
||||
%% The encoded output should contain a single byte which is the Factor, followed
|
||||
%% by a series of exponents and remainders.
|
||||
%%
|
||||
%% The exponent is n 1's followed by a 0, where n * (2 ^ Factor) + remainder
|
||||
%% represents the gap to the next hash
|
||||
%%
|
||||
%% The size passed in should be the maximum possible value of the hash.
|
||||
%% If this isn't provided - assumes 2^32 - the default for phash2
|
||||
|
||||
encode(HashList) ->
|
||||
encode(HashList, 4 * 1024 * 1024 * 1024).
|
||||
|
||||
encode(HashList, Size) ->
|
||||
SortedHashList = lists:usort(HashList),
|
||||
ExpectedGapSize = Size div length(SortedHashList),
|
||||
Factor = findpowerundergap(ExpectedGapSize),
|
||||
riceencode(SortedHashList, Factor).
|
||||
|
||||
%% Outcome may be suboptimal if lists have not been de-duplicated
|
||||
%% Will fail on an unsorted list
|
||||
|
||||
riceencode(HashList, Factor) when Factor<256 ->
|
||||
Divisor = powtwo(Factor),
|
||||
riceencode(HashList, Factor, Divisor, <<>>, 0).
|
||||
|
||||
riceencode([], Factor, _, BitStrAcc, _) ->
|
||||
Prefix = binary:encode_unsigned(Factor),
|
||||
<<Prefix/bytes, BitStrAcc/bitstring>>;
|
||||
riceencode([HeadHash|TailList], Factor, Divisor, BitStrAcc, LastHash) ->
|
||||
HashGap = HeadHash - LastHash,
|
||||
case HashGap of
|
||||
0 ->
|
||||
riceencode(TailList, Factor, Divisor, BitStrAcc, HeadHash);
|
||||
N when N > 0 ->
|
||||
Exponent = buildexponent(HashGap div Divisor),
|
||||
Remainder = HashGap rem Divisor,
|
||||
ExpandedBitStrAcc = <<BitStrAcc/bitstring, Exponent/bitstring, Remainder:Factor>>,
|
||||
riceencode(TailList, Factor, Divisor, ExpandedBitStrAcc, HeadHash)
|
||||
end.
|
||||
|
||||
|
||||
%% Checking for a hash needs to roll through the compressed bloom, decoding until
|
||||
%% the member is found (match!), passed (not matched) or the end of the encoded
|
||||
%% bitstring has been reached (not matched)
|
||||
|
||||
checkforhash(HashToCheck, BitStr) ->
|
||||
<<Factor:8/integer, RiceEncodedBitStr/bitstring>> = BitStr,
|
||||
Divisor = powtwo(Factor),
|
||||
checkforhash(HashToCheck, RiceEncodedBitStr, Factor, Divisor, 0).
|
||||
|
||||
checkforhash(_, <<>>, _, _, _) ->
|
||||
false;
|
||||
checkforhash(HashToCheck, BitStr, Factor, Divisor, Acc) ->
|
||||
[Exponent, BitStrTail] = findexponent(BitStr),
|
||||
[Remainder, BitStrTail2] = findremainder(BitStrTail, Factor),
|
||||
NextHash = Acc + Divisor * Exponent + Remainder,
|
||||
case NextHash of
|
||||
HashToCheck -> true;
|
||||
N when N>HashToCheck -> false;
|
||||
_ -> checkforhash(HashToCheck, BitStrTail2, Factor, Divisor, NextHash)
|
||||
end.
|
||||
|
||||
|
||||
%% Exported functions - currently used only in testing
|
||||
|
||||
converttohash(ItemList) ->
|
||||
converttohash(ItemList, []).
|
||||
|
||||
converttohash([], HashList) ->
|
||||
HashList;
|
||||
converttohash([H|T], HashList) ->
|
||||
converttohash(T, [erlang:phash2(H)|HashList]).
|
||||
|
||||
|
||||
|
||||
%% Helper functions
|
||||
|
||||
buildexponent(Exponent) ->
|
||||
buildexponent(Exponent, <<0:1>>).
|
||||
|
||||
buildexponent(0, OutputBits) ->
|
||||
OutputBits;
|
||||
buildexponent(Exponent, OutputBits) ->
|
||||
buildexponent(Exponent - 1, <<1:1, OutputBits/bitstring>>).
|
||||
|
||||
|
||||
findexponent(BitStr) ->
|
||||
findexponent(BitStr, 0).
|
||||
|
||||
findexponent(BitStr, Acc) ->
|
||||
<<H:1/bitstring, T/bitstring>> = BitStr,
|
||||
case H of
|
||||
<<1:1>> -> findexponent(T, Acc + 1);
|
||||
<<0:1>> -> [Acc, T]
|
||||
end.
|
||||
|
||||
|
||||
findremainder(BitStr, Factor) ->
|
||||
<<Remainder:Factor/integer, BitStrTail/bitstring>> = BitStr,
|
||||
[Remainder, BitStrTail].
|
||||
|
||||
|
||||
powtwo(N) -> powtwo(N, 1).
|
||||
|
||||
powtwo(0, Acc) ->
|
||||
Acc;
|
||||
powtwo(N, Acc) ->
|
||||
powtwo(N-1, Acc * 2).
|
||||
|
||||
%% Helper method for finding the factor of two which provides the most
|
||||
%% efficient compression given an average gap size
|
||||
|
||||
findpowerundergap(GapSize) -> findpowerundergap(GapSize, 1, 0).
|
||||
|
||||
findpowerundergap(GapSize, Acc, Counter) ->
|
||||
case Acc of
|
||||
N when N > GapSize -> Counter - 1;
|
||||
_ -> findpowerundergap(GapSize, Acc * 2, Counter + 1)
|
||||
end.
|
||||
|
||||
|
||||
%% Unit tests
|
||||
|
||||
findpowerundergap_test_() ->
|
||||
[
|
||||
?_assertEqual(9, findpowerundergap(700)),
|
||||
?_assertEqual(9, findpowerundergap(512)),
|
||||
?_assertEqual(8, findpowerundergap(511))].
|
||||
|
||||
encode_test_() ->
|
||||
[
|
||||
?_assertEqual(<<9, 6, 44, 4:5>>, encode([24,924], 1024)),
|
||||
?_assertEqual(<<9, 6, 44, 4:5>>, encode([24,24,924], 1024)),
|
||||
?_assertEqual(<<9, 6, 44, 4:5>>, encode([24,924,924], 1024))
|
||||
].
|
||||
|
||||
check_test_() ->
|
||||
[
|
||||
?_assertEqual(true, checkforhash(924, <<9, 6, 44, 4:5>>)),
|
||||
?_assertEqual(true, checkforhash(24, <<9, 6, 44, 4:5>>)),
|
||||
?_assertEqual(false, checkforhash(23, <<9, 6, 44, 4:5>>)),
|
||||
?_assertEqual(false, checkforhash(923, <<9, 6, 44, 4:5>>)),
|
||||
?_assertEqual(false, checkforhash(925, <<9, 6, 44, 4:5>>))
|
||||
].
|
BIN
src/simple.cdb
Normal file
BIN
src/simple.cdb
Normal file
Binary file not shown.
BIN
src/test.cdb
Normal file
BIN
src/test.cdb
Normal file
Binary file not shown.
0
src/test_inconsole.cdb
Normal file
0
src/test_inconsole.cdb
Normal file
BIN
src/test_mem.cdb
Normal file
BIN
src/test_mem.cdb
Normal file
Binary file not shown.
BIN
test/lookup_test.beam
Normal file
BIN
test/lookup_test.beam
Normal file
Binary file not shown.
241
test/lookup_test.erl
Normal file
241
test/lookup_test.erl
Normal file
|
@ -0,0 +1,241 @@
|
|||
-module(lookup_test).
|
||||
|
||||
-export([go_dict/1, go_ets/1, go_gbtree/1,
|
||||
go_arrayofdict/1, go_arrayofgbtree/1, go_arrayofdict_withcache/1]).
|
||||
|
||||
-define(CACHE_SIZE, 512).
|
||||
|
||||
hash(Key) ->
|
||||
H = 5381,
|
||||
hash1(H,Key) band 16#FFFFFFFF.
|
||||
|
||||
hash1(H,[]) ->H;
|
||||
hash1(H,[B|Rest]) ->
|
||||
H1 = H * 33,
|
||||
H2 = H1 bxor B,
|
||||
hash1(H2,Rest).
|
||||
|
||||
% Get the least significant 8 bits from the hash.
|
||||
hash_to_index(Hash) ->
|
||||
Hash band 255.
|
||||
|
||||
|
||||
%%
|
||||
%% Timings (microseconds):
|
||||
%%
|
||||
%% go_dict(200000) : 1569894
|
||||
%% go_dict(1000000) : 17191365
|
||||
%% go_dict(5000000) : forever
|
||||
|
||||
go_dict(N) ->
|
||||
go_dict(dict:new(), N, N).
|
||||
|
||||
go_dict(_, 0, _) ->
|
||||
{erlang:memory(), statistics(garbage_collection)};
|
||||
go_dict(D, N, M) ->
|
||||
% Lookup a random key - which may not be present
|
||||
LookupKey = lists:concat(["key-", random:uniform(M)]),
|
||||
LookupHash = hash(LookupKey),
|
||||
dict:find(LookupHash, D),
|
||||
|
||||
% Add a new key - which may be present so value to be appended
|
||||
Key = lists:concat(["key-", N]),
|
||||
Hash = hash(Key),
|
||||
case dict:find(Hash, D) of
|
||||
error ->
|
||||
go_dict(dict:store(Hash, [N], D), N-1, M);
|
||||
{ok, List} ->
|
||||
go_dict(dict:store(Hash, [N|List], D), N-1, M)
|
||||
end.
|
||||
|
||||
|
||||
|
||||
%%
|
||||
%% Timings (microseconds):
|
||||
%%
|
||||
%% go_ets(200000) : 609119
|
||||
%% go_ets(1000000) : 3520757
|
||||
%% go_ets(5000000) : 19974562
|
||||
|
||||
go_ets(N) ->
|
||||
go_ets(ets:new(ets_test, [private, bag]), N, N).
|
||||
|
||||
go_ets(_, 0, _) ->
|
||||
{erlang:memory(), statistics(garbage_collection)};
|
||||
go_ets(Ets, N, M) ->
|
||||
% Lookup a random key - which may not be present
|
||||
LookupKey = lists:concat(["key-", random:uniform(M)]),
|
||||
LookupHash = hash(LookupKey),
|
||||
ets:lookup(Ets, LookupHash),
|
||||
|
||||
% Add a new key - which may be present so value to be appended
|
||||
Key = lists:concat(["key-", N]),
|
||||
Hash = hash(Key),
|
||||
ets:insert(Ets, {Hash, N}),
|
||||
go_ets(Ets, N - 1, M).
|
||||
|
||||
%%
|
||||
%% Timings (microseconds):
|
||||
%%
|
||||
%% go_gbtree(200000) : 1393936
|
||||
%% go_gbtree(1000000) : 8430997
|
||||
%% go_gbtree(5000000) : 45630810
|
||||
|
||||
go_gbtree(N) ->
|
||||
go_gbtree(gb_trees:empty(), N, N).
|
||||
|
||||
go_gbtree(_, 0, _) ->
|
||||
{erlang:memory(), statistics(garbage_collection)};
|
||||
go_gbtree(Tree, N, M) ->
|
||||
% Lookup a random key - which may not be present
|
||||
LookupKey = lists:concat(["key-", random:uniform(M)]),
|
||||
LookupHash = hash(LookupKey),
|
||||
gb_trees:lookup(LookupHash, Tree),
|
||||
|
||||
% Add a new key - which may be present so value to be appended
|
||||
Key = lists:concat(["key-", N]),
|
||||
Hash = hash(Key),
|
||||
case gb_trees:lookup(Hash, Tree) of
|
||||
none ->
|
||||
go_gbtree(gb_trees:insert(Hash, [N], Tree), N - 1, M);
|
||||
{value, List} ->
|
||||
go_gbtree(gb_trees:update(Hash, [N|List], Tree), N - 1, M)
|
||||
end.
|
||||
|
||||
|
||||
%%
|
||||
%% Timings (microseconds):
|
||||
%%
|
||||
%% go_arrayofidict(200000) : 1266931
|
||||
%% go_arrayofidict(1000000) : 7387219
|
||||
%% go_arrayofidict(5000000) : 49511484
|
||||
|
||||
go_arrayofdict(N) ->
|
||||
go_arrayofdict(array:new(256, {default, dict:new()}), N, N).
|
||||
|
||||
go_arrayofdict(_, 0, _) ->
|
||||
% dict:to_list(array:get(0, Array)),
|
||||
% dict:to_list(array:get(1, Array)),
|
||||
% dict:to_list(array:get(2, Array)),
|
||||
% dict:to_list(array:get(3, Array)),
|
||||
% dict:to_list(array:get(4, Array)),
|
||||
% dict:to_list(array:get(5, Array)),
|
||||
% dict:to_list(array:get(6, Array)),
|
||||
% dict:to_list(array:get(7, Array)),
|
||||
% dict:to_list(array:get(8, Array)),
|
||||
% dict:to_list(array:get(9, Array)),
|
||||
{erlang:memory(), statistics(garbage_collection)};
|
||||
go_arrayofdict(Array, N, M) ->
|
||||
% Lookup a random key - which may not be present
|
||||
LookupKey = lists:concat(["key-", random:uniform(M)]),
|
||||
LookupHash = hash(LookupKey),
|
||||
LookupIndex = hash_to_index(LookupHash),
|
||||
dict:find(LookupHash, array:get(LookupIndex, Array)),
|
||||
|
||||
% Add a new key - which may be present so value to be appended
|
||||
Key = lists:concat(["key-", N]),
|
||||
Hash = hash(Key),
|
||||
Index = hash_to_index(Hash),
|
||||
D = array:get(Index, Array),
|
||||
case dict:find(Hash, D) of
|
||||
error ->
|
||||
go_arrayofdict(array:set(Index,
|
||||
dict:store(Hash, [N], D), Array), N-1, M);
|
||||
{ok, List} ->
|
||||
go_arrayofdict(array:set(Index,
|
||||
dict:store(Hash, [N|List], D), Array), N-1, M)
|
||||
end.
|
||||
|
||||
%%
|
||||
%% Timings (microseconds):
|
||||
%%
|
||||
%% go_arrayofgbtree(200000) : 1176224
|
||||
%% go_arrayofgbtree(1000000) : 7480653
|
||||
%% go_arrayofgbtree(5000000) : 41266701
|
||||
|
||||
go_arrayofgbtree(N) ->
|
||||
go_arrayofgbtree(array:new(256, {default, gb_trees:empty()}), N, N).
|
||||
|
||||
go_arrayofgbtree(_, 0, _) ->
|
||||
% gb_trees:to_list(array:get(0, Array)),
|
||||
% gb_trees:to_list(array:get(1, Array)),
|
||||
% gb_trees:to_list(array:get(2, Array)),
|
||||
% gb_trees:to_list(array:get(3, Array)),
|
||||
% gb_trees:to_list(array:get(4, Array)),
|
||||
% gb_trees:to_list(array:get(5, Array)),
|
||||
% gb_trees:to_list(array:get(6, Array)),
|
||||
% gb_trees:to_list(array:get(7, Array)),
|
||||
% gb_trees:to_list(array:get(8, Array)),
|
||||
% gb_trees:to_list(array:get(9, Array)),
|
||||
{erlang:memory(), statistics(garbage_collection)};
|
||||
go_arrayofgbtree(Array, N, M) ->
|
||||
% Lookup a random key - which may not be present
|
||||
LookupKey = lists:concat(["key-", random:uniform(M)]),
|
||||
LookupHash = hash(LookupKey),
|
||||
LookupIndex = hash_to_index(LookupHash),
|
||||
gb_trees:lookup(LookupHash, array:get(LookupIndex, Array)),
|
||||
|
||||
% Add a new key - which may be present so value to be appended
|
||||
Key = lists:concat(["key-", N]),
|
||||
Hash = hash(Key),
|
||||
Index = hash_to_index(Hash),
|
||||
Tree = array:get(Index, Array),
|
||||
case gb_trees:lookup(Hash, Tree) of
|
||||
none ->
|
||||
go_arrayofgbtree(array:set(Index,
|
||||
gb_trees:insert(Hash, [N], Tree), Array), N - 1, M);
|
||||
{value, List} ->
|
||||
go_arrayofgbtree(array:set(Index,
|
||||
gb_trees:update(Hash, [N|List], Tree), Array), N - 1, M)
|
||||
end.
|
||||
|
||||
|
||||
%%
|
||||
%% Timings (microseconds):
|
||||
%%
|
||||
%% go_arrayofdict_withcache(200000) : 1432951
|
||||
%% go_arrayofdict_withcache(1000000) : 9140169
|
||||
%% go_arrayofdict_withcache(5000000) : 59435511
|
||||
|
||||
go_arrayofdict_withcache(N) ->
|
||||
go_arrayofdict_withcache({array:new(256, {default, dict:new()}),
|
||||
array:new(256, {default, dict:new()})}, N, N).
|
||||
|
||||
go_arrayofdict_withcache(_, 0, _) ->
|
||||
{erlang:memory(), statistics(garbage_collection)};
|
||||
go_arrayofdict_withcache({MArray, CArray}, N, M) ->
|
||||
% Lookup a random key - which may not be present
|
||||
LookupKey = lists:concat(["key-", random:uniform(M)]),
|
||||
LookupHash = hash(LookupKey),
|
||||
LookupIndex = hash_to_index(LookupHash),
|
||||
dict:find(LookupHash, array:get(LookupIndex, CArray)),
|
||||
dict:find(LookupHash, array:get(LookupIndex, MArray)),
|
||||
|
||||
% Add a new key - which may be present so value to be appended
|
||||
Key = lists:concat(["key-", N]),
|
||||
Hash = hash(Key),
|
||||
Index = hash_to_index(Hash),
|
||||
Cache = array:get(Index, CArray),
|
||||
case dict:find(Hash, Cache) of
|
||||
error ->
|
||||
UpdCache = dict:store(Hash, [N], Cache);
|
||||
{ok, _} ->
|
||||
UpdCache = dict:append(Hash, N, Cache)
|
||||
end,
|
||||
case dict:size(UpdCache) of
|
||||
?CACHE_SIZE ->
|
||||
UpdCArray = array:set(Index, dict:new(), CArray),
|
||||
UpdMArray = array:set(Index, dict:merge(fun merge_values/3, UpdCache, array:get(Index, MArray)), MArray),
|
||||
go_arrayofdict_withcache({UpdMArray, UpdCArray}, N - 1, M);
|
||||
_ ->
|
||||
UpdCArray = array:set(Index, UpdCache, CArray),
|
||||
go_arrayofdict_withcache({MArray, UpdCArray}, N - 1, M)
|
||||
end.
|
||||
|
||||
|
||||
|
||||
merge_values(_, Value1, Value2) ->
|
||||
lists:append(Value1, Value2).
|
||||
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue