%% %% This is a modified version of the cdb module provided by Tom Whitcomb. %% %% - https://github.com/thomaswhitcomb/erlang-cdb %% %% The primary differences are: %% - Support for incrementally writing a CDB file while keeping the hash table %% in memory %% - The ability to scan a database and accumulate all the Key, Values to %% rebuild in-memory tables on startup %% - The ability to scan a database in blocks of sequence numbers %% %% This is to be used in eleveledb, and in this context: %% - Keys will be a combinatio of the PrimaryKey and the Sequence Number %% - Values will be a serialised version on the whole object, and the %% IndexChanges associated with the transaction %% Where the IndexChanges are all the Key changes required to be added to the %% ledger to complete the changes (the addition of postings and tombstones). %% %% This module provides functions to create and query a CDB (constant database). %% A CDB implements a two-level hashtable which provides fast {key,value} %% lookups that remain fairly constant in speed regardless of the CDBs size. %% %% The first level in the CDB occupies the first 255 doublewords in the file. %% Each doubleword slot contains two values. The first is a file pointer to %% the primary hashtable (at the end of the file) and the second value is the %% number of entries in the hashtable. The first level table of 255 entries %% is indexed with the lower eight bits of the hash of the input key. %% %% Following the 255 doublewords are the {key,value} tuples. The tuples are %% packed in the file without regard to word boundaries. Each {key,value} %% tuple is represented with a four byte key length, a four byte value length, %% the actual key value followed by the actual value. %% %% Following the {key,value} tuples are the primary hash tables. There are %% at most 255 hash tables. Each hash table is referenced by one of the 255 %% doubleword entries at the top of the file. For efficiency reasons, each %% hash table is allocated twice the number of entries that it will need. %% Each entry in the hash table is a doubleword. %% The first word is the corresponding hash value and the second word is a %% file pointer to the actual {key,value} tuple higher in the file. %% -module(leveled_cdb). -behaviour(gen_server). -include("include/leveled.hrl"). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, cdb_open_writer/1, cdb_open_writer/2, cdb_open_reader/1, cdb_get/2, cdb_put/3, cdb_put/4, cdb_getpositions/2, cdb_directfetch/3, cdb_lastkey/1, cdb_firstkey/1, cdb_filename/1, cdb_keycheck/2, cdb_scan/4, cdb_close/1, cdb_complete/1, cdb_roll/1, cdb_returnhashtable/3, cdb_destroy/1, cdb_deletepending/1, hashtable_calc/2]). -include_lib("eunit/include/eunit.hrl"). -define(DWORD_SIZE, 8). -define(WORD_SIZE, 4). -define(CRC_CHECK, true). -define(MAX_FILE_SIZE, 3221225472). -define(BINARY_MODE, false). -define(BASE_POSITION, 2048). -define(WRITE_OPS, [binary, raw, read, write]). -define(PENDING_ROLL_WAIT, 30). -record(state, {hashtree, last_position :: integer(), last_key = empty, hash_index = [] :: list(), filename :: string(), handle :: file:fd(), writer :: boolean(), max_size :: integer(), pending_roll = false :: boolean(), pending_delete = false :: boolean(), binary_mode = false :: boolean()}). %%%============================================================================ %%% API %%%============================================================================ cdb_open_writer(Filename) -> %% No options passed cdb_open_writer(Filename, #cdb_options{}). cdb_open_writer(Filename, Opts) -> {ok, Pid} = gen_server:start(?MODULE, [Opts], []), case gen_server:call(Pid, {open_writer, Filename}, infinity) of ok -> {ok, Pid}; Error -> Error end. cdb_open_reader(Filename) -> {ok, Pid} = gen_server:start(?MODULE, [#cdb_options{}], []), case gen_server:call(Pid, {open_reader, Filename}, infinity) of ok -> {ok, Pid}; Error -> Error end. cdb_get(Pid, Key) -> gen_server:call(Pid, {get_kv, Key}, infinity). cdb_put(Pid, Key, Value) -> cdb_put(Pid, Key, Value, hash). cdb_put(Pid, Key, Value, HashOpt) -> gen_server:call(Pid, {put_kv, Key, Value, HashOpt}, infinity). %% SampleSize can be an integer or the atom all cdb_getpositions(Pid, SampleSize) -> gen_server:call(Pid, {get_positions, SampleSize}, infinity). %% Info can be key_only, key_size (size being the size of the value) or %% key_value_check (with the check part indicating if the CRC is correct for %% the value) cdb_directfetch(Pid, PositionList, Info) -> gen_server:call(Pid, {direct_fetch, PositionList, Info}, infinity). cdb_close(Pid) -> cdb_close(Pid, ?PENDING_ROLL_WAIT). cdb_close(Pid, WaitsLeft) -> if WaitsLeft > 0 -> case gen_server:call(Pid, cdb_close, infinity) of pending_roll -> timer:sleep(1), cdb_close(Pid, WaitsLeft - 1); R -> R end; true -> gen_server:call(Pid, cdb_kill, infinity) end. cdb_complete(Pid) -> gen_server:call(Pid, cdb_complete, infinity). cdb_roll(Pid) -> gen_server:cast(Pid, cdb_roll). cdb_returnhashtable(Pid, IndexList, HashTreeBin) -> gen_server:call(Pid, {return_hashtable, IndexList, HashTreeBin}, infinity). cdb_destroy(Pid) -> gen_server:cast(Pid, destroy). cdb_deletepending(Pid) -> gen_server:cast(Pid, delete_pending). %% cdb_scan returns {LastPosition, Acc}. Use LastPosition as StartPosiiton to %% continue from that point (calling function has to protect against) double %% counting. %% %% LastPosition could be the atom complete when the last key processed was at %% the end of the file. last_key must be defined in LoopState. cdb_scan(Pid, FilterFun, InitAcc, StartPosition) -> gen_server:call(Pid, {cdb_scan, FilterFun, InitAcc, StartPosition}, infinity). %% Get the last key to be added to the file (which will have the highest %% sequence number) cdb_lastkey(Pid) -> gen_server:call(Pid, cdb_lastkey, infinity). cdb_firstkey(Pid) -> gen_server:call(Pid, cdb_firstkey, infinity). %% Get the filename of the database cdb_filename(Pid) -> gen_server:call(Pid, cdb_filename, infinity). %% Check to see if the key is probably present, will return either %% probably or missing. Does not do a definitive check cdb_keycheck(Pid, Key) -> gen_server:call(Pid, {key_check, Key}, infinity). %%%============================================================================ %%% gen_server callbacks %%%============================================================================ init([Opts]) -> MaxSize = case Opts#cdb_options.max_size of undefined -> ?MAX_FILE_SIZE; M -> M end, {ok, #state{max_size=MaxSize, binary_mode=Opts#cdb_options.binary_mode}}. handle_call({open_writer, Filename}, _From, State) -> io:format("Opening file for writing with filename ~s~n", [Filename]), {LastPosition, HashTree, LastKey} = open_active_file(Filename), {ok, Handle} = file:open(Filename, [sync | ?WRITE_OPS]), {reply, ok, State#state{handle=Handle, last_position=LastPosition, last_key=LastKey, filename=Filename, hashtree=HashTree, writer=true}}; handle_call({open_reader, Filename}, _From, State) -> io:format("Opening file for reading with filename ~s~n", [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename), {reply, ok, State#state{handle=Handle, last_key=LastKey, filename=Filename, writer=false, hash_index=Index}}; handle_call({get_kv, Key}, _From, State) -> case State#state.writer of true -> {reply, get_mem(Key, State#state.handle, State#state.hashtree), State}; false -> {reply, get_withcache(State#state.handle, Key, State#state.hash_index), State} end; handle_call({key_check, Key}, _From, State) -> case State#state.writer of true -> {reply, get_mem(Key, State#state.handle, State#state.hashtree, loose_presence), State}; false -> {reply, get(State#state.handle, Key, loose_presence, State#state.hash_index), State} end; handle_call({put_kv, Key, Value, HashOpt}, _From, State) -> case {State#state.writer, State#state.pending_roll} of {true, false} -> Result = put(State#state.handle, Key, Value, {State#state.last_position, State#state.hashtree}, State#state.binary_mode, State#state.max_size), case {Result, HashOpt} of {roll, _} -> %% Key and value could not be written {reply, roll, State}; {{UpdHandle, NewPosition, HashTree}, hash} -> {reply, ok, State#state{handle=UpdHandle, last_position=NewPosition, last_key=Key, hashtree=HashTree}}; {{UpdHandle, NewPosition, _HashTree}, no_hash} -> %% Don't update the hashtree {reply, ok, State#state{handle=UpdHandle, last_position=NewPosition, last_key=Key}} end; _ -> {reply, {error, read_only}, State} end; handle_call(cdb_lastkey, _From, State) -> {reply, State#state.last_key, State}; handle_call(cdb_firstkey, _From, State) -> {reply, extract_key(State#state.handle, ?BASE_POSITION), State}; handle_call(cdb_filename, _From, State) -> {reply, State#state.filename, State}; handle_call({get_positions, SampleSize}, _From, State) -> case SampleSize of all -> {reply, scan_index(State#state.handle, State#state.hash_index, {fun scan_index_returnpositions/4, []}), State}; _ -> SeededL = lists:map(fun(X) -> {random:uniform(), X} end, State#state.hash_index), SortedL = lists:keysort(1, SeededL), RandomisedHashIndex = lists:map(fun({_R, X}) -> X end, SortedL), {reply, scan_index_forsample(State#state.handle, RandomisedHashIndex, fun scan_index_returnpositions/4, [], SampleSize), State} end; handle_call({direct_fetch, PositionList, Info}, _From, State) -> H = State#state.handle, case Info of key_only -> KeyList = lists:map(fun(P) -> extract_key(H, P) end, PositionList), {reply, KeyList, State}; key_size -> KeySizeList = lists:map(fun(P) -> extract_key_size(H, P) end, PositionList), {reply, KeySizeList, State}; key_value_check -> KVCList = lists:map(fun(P) -> extract_key_value_check(H, P) end, PositionList), {reply, KVCList, State} end; handle_call({cdb_scan, FilterFun, Acc, StartPos}, _From, State) -> {ok, StartPos0} = case StartPos of undefined -> file:position(State#state.handle, ?BASE_POSITION); StartPos -> {ok, StartPos} end, case check_last_key(State#state.last_key) of ok -> {LastPosition, Acc2} = scan_over_file(State#state.handle, StartPos0, FilterFun, Acc, State#state.last_key), {reply, {LastPosition, Acc2}, State}; empty -> {reply, {eof, Acc}, State} end; handle_call(cdb_close, _From, State=#state{pending_roll=RollPending}) when RollPending == true -> {reply, pending_roll, State}; handle_call(cdb_close, _From, State) -> ok = file:close(State#state.handle), {stop, normal, ok, State#state{handle=undefined}}; handle_call(cdb_kill, _From, State) -> {stop, killed, ok, State}; handle_call(cdb_complete, _From, State=#state{writer=Writer}) when Writer == true -> NewName = determine_new_filename(State#state.filename), ok = close_file(State#state.handle, State#state.hashtree, State#state.last_position), ok = rename_for_read(State#state.filename, NewName), {stop, normal, {ok, NewName}, State}; handle_call(cdb_complete, _From, State) -> ok = file:close(State#state.handle), {stop, normal, {ok, State#state.filename}, State}; handle_call({return_hashtable, IndexList, HashTreeBin}, _From, State=#state{pending_roll=RollPending}) when RollPending == true -> Handle = State#state.handle, {ok, BasePos} = file:position(Handle, State#state.last_position), NewName = determine_new_filename(State#state.filename), ok = perform_write_hash_tables(Handle, HashTreeBin, BasePos), ok = write_top_index_table(Handle, BasePos, IndexList), file:close(Handle), ok = rename_for_read(State#state.filename, NewName), io:format("Opening file for reading with filename ~s~n", [NewName]), {NewHandle, Index, LastKey} = open_for_readonly(NewName), {reply, ok, State#state{handle=NewHandle, last_key=LastKey, filename=NewName, writer=false, pending_roll=false, hash_index=Index}}. handle_cast(destroy, State) -> ok = file:close(State#state.handle), ok = file:delete(State#state.filename), {noreply, State}; handle_cast(delete_pending, State) -> {noreply, State#state{pending_delete=true}}; handle_cast(cdb_roll, State=#state{writer=Writer}) when Writer == true -> ok = leveled_iclerk:clerk_hashtablecalc(State#state.hashtree, State#state.last_position, self()), {noreply, State#state{pending_roll=true}}. handle_info(_Info, State) -> {noreply, State}. terminate(_Reason, State) -> case {State#state.handle, State#state.pending_delete} of {undefined, _} -> ok; {Handle, false} -> file:close(Handle); {Handle, true} -> file:close(Handle), file:delete(State#state.filename) end. code_change(_OldVsn, State, _Extra) -> {ok, State}. %%%============================================================================ %%% Internal functions %%%============================================================================ %% from_dict(FileName,ListOfKeyValueTuples) %% Given a filename and a dictionary, create a cdb %% using the key value pairs from the dict. from_dict(FileName,Dict) -> KeyValueList = dict:to_list(Dict), create(FileName, KeyValueList). %% %% create(FileName,ListOfKeyValueTuples) -> ok %% Given a filename and a list of {key,value} tuples, %% this function creates a CDB %% create(FileName,KeyValueList) -> {ok, Handle} = file:open(FileName, ?WRITE_OPS), {ok, _} = file:position(Handle, {bof, ?BASE_POSITION}), {BasePos, HashTree} = write_key_value_pairs(Handle, KeyValueList), close_file(Handle, HashTree, BasePos). %% %% dump(FileName) -> List %% Given a file name, this function returns a list %% of {key,value} tuples from the CDB. %% dump(FileName) -> dump(FileName, ?CRC_CHECK). dump(FileName, CRCCheck) -> {ok, Handle} = file:open(FileName, [binary, raw, read]), Fn = fun(Index, Acc) -> {ok, _} = file:position(Handle, ?DWORD_SIZE * Index), {_, Count} = read_next_2_integers(Handle), Acc + Count end, NumberOfPairs = lists:foldl(Fn, 0, lists:seq(0,255)) bsr 1, io:format("Count of keys in db is ~w~n", [NumberOfPairs]), {ok, _} = file:position(Handle, {bof, 2048}), Fn1 = fun(_I,Acc) -> {KL,VL} = read_next_2_integers(Handle), Key = read_next_term(Handle, KL), case read_next_term(Handle, VL, crc, CRCCheck) of {false, _} -> {ok, CurrLoc} = file:position(Handle, cur), Return = {crc_wonky, get(Handle, Key)}; {_, Value} -> {ok, CurrLoc} = file:position(Handle, cur), Return = case get(Handle, Key) of {Key,Value} -> {Key ,Value}; X -> {wonky, X} end end, {ok, _} = file:position(Handle, CurrLoc), [Return | Acc] end, lists:foldr(Fn1, [], lists:seq(0, NumberOfPairs-1)). %% Open an active file - one for which it is assumed the hash tables have not %% yet been written %% %% Needs to scan over file to incrementally produce the hash list, starting at %% the end of the top index table. %% %% Should return a dictionary keyed by index containing a list of {Hash, Pos} %% tuples as the write_key_value_pairs function, and the current position, and %% the file handle open_active_file(FileName) when is_list(FileName) -> {ok, Handle} = file:open(FileName, ?WRITE_OPS), {ok, Position} = file:position(Handle, {bof, 256*?DWORD_SIZE}), {LastPosition, {HashTree, LastKey}} = startup_scan_over_file(Handle, Position), case file:position(Handle, eof) of {ok, LastPosition} -> ok = file:close(Handle); {ok, EndPosition} -> LogDetails = [LastPosition, EndPosition], io:format("File to be truncated at last position of ~w " "with end of file at ~w~n", LogDetails), {ok, _LastPosition} = file:position(Handle, LastPosition), ok = file:truncate(Handle), ok = file:close(Handle) end, {LastPosition, HashTree, LastKey}. %% put(Handle, Key, Value, {LastPosition, HashDict}) -> {NewPosition, KeyDict} %% Append to an active file a new key/value pair returning an updated %% dictionary of Keys and positions. Returns an updated Position %% put(FileName, Key, Value, {LastPosition, HashTree}, BinaryMode, MaxSize) when is_list(FileName) -> {ok, Handle} = file:open(FileName, ?WRITE_OPS), put(Handle, Key, Value, {LastPosition, HashTree}, BinaryMode, MaxSize); put(Handle, Key, Value, {LastPosition, HashTree}, BinaryMode, MaxSize) -> Bin = key_value_to_record({Key, Value}, BinaryMode), PotentialNewSize = LastPosition + byte_size(Bin), if PotentialNewSize > MaxSize -> roll; true -> ok = file:pwrite(Handle, LastPosition, Bin), {Handle, PotentialNewSize, put_hashtree(Key, LastPosition, HashTree)} end. %% Should not be used for non-test PUTs by the inker - as the Max File Size %% should be taken from the startup options not the default put(FileName, Key, Value, {LastPosition, HashTree}) -> put(FileName, Key, Value, {LastPosition, HashTree}, ?BINARY_MODE, ?MAX_FILE_SIZE). %% %% get(FileName,Key) -> {key,value} %% Given a filename and a key, returns a key and value tuple. %% get_withcache(Handle, Key, Cache) -> get(Handle, Key, ?CRC_CHECK, Cache). get(FileNameOrHandle, Key) -> get(FileNameOrHandle, Key, ?CRC_CHECK). get(FileNameOrHandle, Key, CRCCheck) -> get(FileNameOrHandle, Key, CRCCheck, no_cache). get(FileName, Key, CRCCheck, Cache) when is_list(FileName) -> {ok, Handle} = file:open(FileName,[binary, raw, read]), get(Handle, Key, CRCCheck, Cache); get(Handle, Key, CRCCheck, Cache) when is_tuple(Handle) -> Hash = hash(Key), Index = hash_to_index(Hash), {HashTable, Count} = get_index(Handle, Index, Cache), % If the count is 0 for that index - key must be missing case Count of 0 -> missing; _ -> % Get starting slot in hashtable {ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}), Slot = hash_to_slot(Hash, Count), {ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}), LastHashPosition = HashTable + ((Count-1) * ?DWORD_SIZE), LocList = lists:seq(FirstHashPosition, LastHashPosition, ?DWORD_SIZE), % Split list around starting slot. {L1, L2} = lists:split(Slot, LocList), search_hash_table(Handle, lists:append(L2, L1), Hash, Key, CRCCheck) end. get_index(Handle, Index, no_cache) -> {ok,_} = file:position(Handle, {bof, ?DWORD_SIZE * Index}), % Get location of hashtable and number of entries in the hash read_next_2_integers(Handle); get_index(_Handle, Index, Cache) -> {Index, {Pointer, Count}} = lists:keyfind(Index, 1, Cache), {Pointer, Count}. %% Get a Key/Value pair from an active CDB file (with no hash table written) %% This requires a key dictionary to be passed in (mapping keys to positions) %% Will return {Key, Value} or missing get_mem(Key, FNOrHandle, HashTree) -> get_mem(Key, FNOrHandle, HashTree, ?CRC_CHECK). get_mem(Key, Filename, HashTree, CRCCheck) when is_list(Filename) -> {ok, Handle} = file:open(Filename, [binary, raw, read]), get_mem(Key, Handle, HashTree, CRCCheck); get_mem(Key, Handle, HashTree, CRCCheck) -> ListToCheck = get_hashtree(Key, HashTree), case {CRCCheck, ListToCheck} of {loose_presence, []} -> missing; {loose_presence, _L} -> probably; _ -> extract_kvpair(Handle, ListToCheck, Key, CRCCheck) end. %% Get the next key at a position in the file (or the first key if no position %% is passed). Will return both a key and the next position get_nextkey(Filename) when is_list(Filename) -> {ok, Handle} = file:open(Filename, [binary, raw, read]), get_nextkey(Handle); get_nextkey(Handle) -> {ok, _} = file:position(Handle, bof), {FirstHashPosition, _} = read_next_2_integers(Handle), get_nextkey(Handle, {256 * ?DWORD_SIZE, FirstHashPosition}). get_nextkey(Handle, {Position, FirstHashPosition}) -> {ok, Position} = file:position(Handle, Position), case read_next_2_integers(Handle) of {KeyLength, ValueLength} -> NextKey = read_next_term(Handle, KeyLength), NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE, case NextPosition of FirstHashPosition -> {NextKey, nomorekeys}; _ -> {NextKey, Handle, {NextPosition, FirstHashPosition}} end; eof -> nomorekeys end. %% Fold over all of the objects in the file, applying FoldFun to each object %% where FoldFun(K, V, Acc0) -> Acc , or FoldFun(K, Acc0) -> Acc if KeyOnly is %% set to true fold(FileName, FoldFun, Acc0) when is_list(FileName) -> {ok, Handle} = file:open(FileName, [binary, raw, read]), fold(Handle, FoldFun, Acc0); fold(Handle, FoldFun, Acc0) -> {ok, _} = file:position(Handle, bof), {FirstHashPosition, _} = read_next_2_integers(Handle), fold(Handle, FoldFun, Acc0, {256 * ?DWORD_SIZE, FirstHashPosition}, false). fold(Handle, FoldFun, Acc0, {Position, FirstHashPosition}, KeyOnly) -> {ok, Position} = file:position(Handle, Position), case Position of FirstHashPosition -> Acc0; _ -> case read_next_2_integers(Handle) of {KeyLength, ValueLength} -> NextKey = read_next_term(Handle, KeyLength), NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE, case KeyOnly of true -> fold(Handle, FoldFun, FoldFun(NextKey, Acc0), {NextPosition, FirstHashPosition}, KeyOnly); false -> case read_next_term(Handle, ValueLength, crc, ?CRC_CHECK) of {false, _} -> io:format("Skipping value for Key ~w as CRC check failed~n", [NextKey]), fold(Handle, FoldFun, Acc0, {NextPosition, FirstHashPosition}, KeyOnly); {_, Value} -> fold(Handle, FoldFun, FoldFun(NextKey, Value, Acc0), {NextPosition, FirstHashPosition}, KeyOnly) end end; eof -> Acc0 end end. fold_keys(FileName, FoldFun, Acc0) when is_list(FileName) -> {ok, Handle} = file:open(FileName, [binary, raw, read]), fold_keys(Handle, FoldFun, Acc0); fold_keys(Handle, FoldFun, Acc0) -> {ok, _} = file:position(Handle, bof), {FirstHashPosition, _} = read_next_2_integers(Handle), fold(Handle, FoldFun, Acc0, {256 * ?DWORD_SIZE, FirstHashPosition}, true). hashtable_calc(HashTree, StartPos) -> Seq = lists:seq(0, 255), SWC = os:timestamp(), {IndexList, HashTreeBin} = write_hash_tables(Seq, HashTree, StartPos, [], <<>>), io:format("HashTree computed in ~w microseconds~n", [timer:now_diff(os:timestamp(), SWC)]), {IndexList, HashTreeBin}. %%%%%%%%%%%%%%%%%%%% %% Internal functions %%%%%%%%%%%%%%%%%%%% determine_new_filename(Filename) -> filename:rootname(Filename, ".pnd") ++ ".cdb". rename_for_read(Filename, NewName) -> %% Rename file io:format("Renaming file from ~s to ~s " ++ "for which existence is ~w~n", [Filename, NewName, filelib:is_file(NewName)]), file:rename(Filename, NewName). open_for_readonly(Filename) -> {ok, Handle} = file:open(Filename, [binary, raw, read]), Index = load_index(Handle), LastKey = find_lastkey(Handle, Index), {Handle, Index, LastKey}. load_index(Handle) -> Index = lists:seq(0, 255), lists:map(fun(X) -> file:position(Handle, {bof, ?DWORD_SIZE * X}), {HashTablePos, Count} = read_next_2_integers(Handle), {X, {HashTablePos, Count}} end, Index). %% Function to find the LastKey in the file find_lastkey(Handle, IndexCache) -> LastPosition = scan_index(Handle, IndexCache, {fun scan_index_findlast/4, 0}), {ok, _} = file:position(Handle, LastPosition), {KeyLength, _ValueLength} = read_next_2_integers(Handle), read_next_term(Handle, KeyLength). scan_index(Handle, IndexCache, {ScanFun, InitAcc}) -> lists:foldl(fun({_X, {Pos, Count}}, Acc) -> ScanFun(Handle, Pos, Count, Acc) end, InitAcc, IndexCache). scan_index_forsample(_Handle, [], _ScanFun, Acc, SampleSize) -> lists:sublist(Acc, SampleSize); scan_index_forsample(Handle, [CacheEntry|Tail], ScanFun, Acc, SampleSize) -> case length(Acc) of L when L >= SampleSize -> lists:sublist(Acc, SampleSize); _ -> {_X, {Pos, Count}} = CacheEntry, scan_index_forsample(Handle, Tail, ScanFun, ScanFun(Handle, Pos, Count, Acc), SampleSize) end. scan_index_findlast(Handle, Position, Count, LastPosition) -> {ok, _} = file:position(Handle, Position), lists:foldl(fun({_Hash, HPos}, MaxPos) -> max(HPos, MaxPos) end, LastPosition, read_next_n_integerpairs(Handle, Count)). scan_index_returnpositions(Handle, Position, Count, PosList0) -> {ok, _} = file:position(Handle, Position), lists:foldl(fun({Hash, HPosition}, PosList) -> case Hash of 0 -> PosList; _ -> PosList ++ [HPosition] end end, PosList0, read_next_n_integerpairs(Handle, Count)). %% Take an active file and write the hash details necessary to close that %% file and roll a new active file if requested. %% %% Base Pos should be at the end of the KV pairs written (the position for) %% the hash tables close_file(Handle, HashTree, BasePos) -> {ok, BasePos} = file:position(Handle, BasePos), IndexList = write_hash_tables(Handle, HashTree), ok = write_top_index_table(Handle, BasePos, IndexList), file:close(Handle). %% Fetch a list of positions by passing a key to the HashTree get_hashtree(Key, HashTree) -> Hash = hash(Key), Index = hash_to_index(Hash), Tree = array:get(Index, HashTree), case gb_trees:lookup(Hash, Tree) of {value, List} -> List; _ -> [] end. %% Add to hash tree - this is an array of 256 gb_trees that contains the Hash %% and position of objects which have been added to an open CDB file put_hashtree(Key, Position, HashTree) -> Hash = hash(Key), Index = hash_to_index(Hash), Tree = array:get(Index, HashTree), case gb_trees:lookup(Hash, Tree) of none -> array:set(Index, gb_trees:insert(Hash, [Position], Tree), HashTree); {value, L} -> array:set(Index, gb_trees:update(Hash, [Position|L], Tree), HashTree) end. %% Function to extract a Key-Value pair given a file handle and a position %% Will confirm that the key matches and do a CRC check when requested extract_kvpair(_, [], _, _) -> missing; extract_kvpair(Handle, [Position|Rest], Key, Check) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), case read_next_term(Handle, KeyLength) of Key -> % If same key as passed in, then found! case read_next_term(Handle, ValueLength, crc, Check) of {false, _} -> crc_wonky; {_, Value} -> {Key,Value} end; _ -> extract_kvpair(Handle, Rest, Key, Check) end. extract_key(Handle, Position) -> {ok, _} = file:position(Handle, Position), {KeyLength, _ValueLength} = read_next_2_integers(Handle), read_next_term(Handle, KeyLength). extract_key_size(Handle, Position) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), {read_next_term(Handle, KeyLength), ValueLength}. extract_key_value_check(Handle, Position) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), K = read_next_term(Handle, KeyLength), {Check, V} = read_next_term(Handle, ValueLength, crc, true), {K, V, Check}. %% Scan through the file until there is a failure to crc check an input, and %% at that point return the position and the key dictionary scanned so far startup_scan_over_file(Handle, Position) -> HashTree = array:new(256, {default, gb_trees:empty()}), scan_over_file(Handle, Position, fun startup_filter/5, {HashTree, empty}, empty). %% Specific filter to be used at startup to build a hashtree for an incomplete %% cdb file, and returns at the end the hashtree and the final Key seen in the %% journal startup_filter(Key, ValueAsBin, Position, {Hashtree, LastKey}, _ExtractFun) -> case crccheck_value(ValueAsBin) of true -> {loop, {put_hashtree(Key, Position, Hashtree), Key}}; false -> {stop, {Hashtree, LastKey}} end. %% Scan for key changes - scan over file returning applying FilterFun %% The FilterFun should accept as input: %% - Key, ValueBin, Position, Accumulator, Fun (to extract values from Binary) %% -> outputting a new Accumulator and a loop|stop instruction as a tuple %% i.e. {loop, Acc} or {stop, Acc} scan_over_file(Handle, Position, FilterFun, Output, LastKey) -> case saferead_keyvalue(Handle) of false -> io:format("Failure to read Key/Value at Position ~w" ++ " in scan~n", [Position]), {Position, Output}; {Key, ValueAsBin, KeyLength, ValueLength} -> NewPosition = case Key of LastKey -> eof; _ -> Position + KeyLength + ValueLength + ?DWORD_SIZE end, case FilterFun(Key, ValueAsBin, Position, Output, fun extract_valueandsize/1) of {stop, UpdOutput} -> {NewPosition, UpdOutput}; {loop, UpdOutput} -> case NewPosition of eof -> {eof, UpdOutput}; _ -> scan_over_file(Handle, NewPosition, FilterFun, UpdOutput, LastKey) end end end. %% Confirm that the last key has been defined and set to a non-default value check_last_key(LastKey) -> case LastKey of empty -> empty; _ -> ok end. %% Read the Key/Value at this point, returning {ok, Key, Value} %% catch expected exceptiosn associated with file corruption (or end) and %% return eof saferead_keyvalue(Handle) -> case read_next_2_integers(Handle) of {error, einval} -> false; eof -> false; {KeyL, ValueL} -> case safe_read_next_term(Handle, KeyL) of {error, einval} -> false; eof -> false; false -> false; Key -> case file:read(Handle, ValueL) of {error, einval} -> false; eof -> false; {ok, Value} -> {Key, Value, KeyL, ValueL} end end end. safe_read_next_term(Handle, Length) -> try read_next_term(Handle, Length) of Term -> Term catch error:badarg -> false end. %% The first four bytes of the value are the crc check crccheck_value(Value) when byte_size(Value) >4 -> << Hash:32/integer, Tail/bitstring>> = Value, case calc_crc(Tail) of Hash -> true; _ -> io:format("CRC check failed due to mismatch ~n"), false end; crccheck_value(_) -> io:format("CRC check failed due to size ~n"), false. %% Run a crc check filling out any values which don't fit on byte boundary calc_crc(Value) -> case bit_size(Value) rem 8 of 0 -> erlang:crc32(Value); N -> M = 8 - N, erlang:crc32(<>) end. %% %% to_dict(FileName) %% Given a filename returns a dict containing %% the key value pairs from the dict. %% %% @spec to_dict(filename()) -> dictionary() %% where %% filename() = string(), %% dictionary() = dict() %% to_dict(FileName) -> KeyValueList = dump(FileName), dict:from_list(KeyValueList). read_next_term(Handle, Length) -> case file:read(Handle, Length) of {ok, Bin} -> binary_to_term(Bin); ReadError -> ReadError end. %% Read next string where the string has a CRC prepended - stripping the crc %% and checking if requested read_next_term(Handle, Length, crc, Check) -> case Check of true -> {ok, <>} = file:read(Handle, Length), case calc_crc(Bin) of CRC -> {true, binary_to_term(Bin)}; _ -> {false, binary_to_term(Bin)} end; false -> {ok, _} = file:position(Handle, {cur, 4}), {ok, Bin} = file:read(Handle, Length - 4), {unchecked, binary_to_term(Bin)} end. %% Extract value and size from binary containing CRC extract_valueandsize(ValueAsBin) -> <<_CRC:32/integer, Bin/binary>> = ValueAsBin, {binary_to_term(Bin), byte_size(Bin)}. %% Used for reading lengths %% Note that the endian_flip is required to make the file format compatible %% with CDB read_next_2_integers(Handle) -> case file:read(Handle,?DWORD_SIZE) of {ok, <>} -> {endian_flip(Int1), endian_flip(Int2)}; ReadError -> ReadError end. read_next_n_integerpairs(Handle, NumberOfPairs) -> {ok, Block} = file:read(Handle, ?DWORD_SIZE * NumberOfPairs), read_integerpairs(Block, []). read_integerpairs(<<>>, Pairs) -> Pairs; read_integerpairs(<>, Pairs) -> read_integerpairs(<>, Pairs ++ [{endian_flip(Int1), endian_flip(Int2)}]). %% Seach the hash table for the matching hash and key. Be prepared for %% multiple keys to have the same hash value. %% %% There are three possible values of CRCCheck: %% true - check the CRC before returning key & value %% false - don't check the CRC before returning key & value %% loose_presence - confirm that the hash of the key is present search_hash_table(_Handle, [], _Hash, _Key, _CRCCheck) -> missing; search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, CRCCheck) -> {ok, _} = file:position(Handle, Entry), {StoredHash, DataLoc} = read_next_2_integers(Handle), case StoredHash of Hash -> KV = case CRCCheck of loose_presence -> probably; _ -> extract_kvpair(Handle, [DataLoc], Key, CRCCheck) end, case KV of missing -> search_hash_table(Handle, RestOfEntries, Hash, Key, CRCCheck); _ -> KV end; 0 -> % Hash is 0 so key must be missing as 0 found before Hash matched missing; _ -> search_hash_table(Handle, RestOfEntries, Hash, Key, CRCCheck) end. % Write Key and Value tuples into the CDB. Each tuple consists of a % 4 byte key length, a 4 byte value length, the actual key followed % by the value. % % Returns a dictionary that is keyed by % the least significant 8 bits of each hash with the % values being a list of the hash and the position of the % key/value binary in the file. write_key_value_pairs(Handle, KeyValueList) -> {ok, Position} = file:position(Handle, cur), HashTree = array:new(256, {default, gb_trees:empty()}), write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}). write_key_value_pairs(_, [], Acc) -> Acc; write_key_value_pairs(Handle, [HeadPair|TailList], Acc) -> {Key, Value} = HeadPair, {Handle, NewPosition, HashTree} = put(Handle, Key, Value, Acc), write_key_value_pairs(Handle, TailList, {NewPosition, HashTree}). %% Write the actual hashtables at the bottom of the file. Each hash table %% entry is a doubleword in length. The first word is the hash value %% corresponding to a key and the second word is a file pointer to the %% corresponding {key,value} tuple. write_hash_tables(Handle, HashTree) -> {ok, StartPos} = file:position(Handle, cur), {IndexList, HashTreeBin} = hashtable_calc(HashTree, StartPos), ok = perform_write_hash_tables(Handle, HashTreeBin, StartPos), IndexList. perform_write_hash_tables(Handle, HashTreeBin, StartPos) -> SWW = os:timestamp(), ok = file:write(Handle, HashTreeBin), {ok, EndPos} = file:position(Handle, cur), ok = file:advise(Handle, StartPos, EndPos - StartPos, will_need), io:format("HashTree written in ~w microseconds~n", [timer:now_diff(os:timestamp(), SWW)]), ok. write_hash_tables([], _HashTree, _CurrPos, IndexList, HashTreeBin) -> {IndexList, HashTreeBin}; write_hash_tables([Index|Rest], HashTree, CurrPos, IndexList, HashTreeBin) -> Tree = array:get(Index, HashTree), case gb_trees:keys(Tree) of [] -> write_hash_tables(Rest, HashTree, CurrPos, IndexList, HashTreeBin); _ -> HashList = gb_trees:to_list(Tree), BinList = build_binaryhashlist(HashList, []), IndexLength = length(BinList) * 2, SlotList = lists:duplicate(IndexLength, <<0:32, 0:32>>), Fn = fun({Hash, Binary}, AccSlotList) -> Slot1 = find_open_slot(AccSlotList, Hash), {L1, [<<0:32, 0:32>>|L2]} = lists:split(Slot1, AccSlotList), lists:append(L1, [Binary|L2]) end, NewSlotList = lists:foldl(Fn, SlotList, BinList), NewSlotBin = lists:foldl(fun(X, Acc) -> <> end, HashTreeBin, NewSlotList), write_hash_tables(Rest, HashTree, CurrPos + length(NewSlotList) * ?DWORD_SIZE, [{Index, CurrPos, IndexLength}|IndexList], NewSlotBin) end. %% The list created from the original HashTree may have duplicate positions %% e.g. {Key, [Value1, Value2]}. Before any writing is done it is necessary %% to know the actual number of hashes - or the Slot may not be sized correctly %% %% This function creates {Hash, Binary} pairs on a list where there is a unique %% entry for eveyr Key/Value build_binaryhashlist([], BinList) -> BinList; build_binaryhashlist([{Hash, [Position|TailP]}|TailKV], BinList) -> HashLE = endian_flip(Hash), PosLE = endian_flip(Position), NewBin = <>, case TailP of [] -> build_binaryhashlist(TailKV, [{Hash, NewBin}|BinList]); _ -> build_binaryhashlist([{Hash, TailP}|TailKV], [{Hash, NewBin}|BinList]) end. %% Slot is zero based because it comes from a REM find_open_slot(List, Hash) -> Len = length(List), Slot = hash_to_slot(Hash, Len), Seq = lists:seq(1, Len), {CL1, CL2} = lists:split(Slot, Seq), {L1, L2} = lists:split(Slot, List), find_open_slot1(lists:append(CL2, CL1), lists:append(L2, L1)). find_open_slot1([Slot|_RestOfSlots], [<<0:32,0:32>>|_RestOfEntries]) -> Slot - 1; find_open_slot1([_|RestOfSlots], [_|RestOfEntries]) -> find_open_slot1(RestOfSlots, RestOfEntries). %% Write the top most 255 doubleword entries. First word is the %% file pointer to a hashtable and the second word is the number of entries %% in the hash table %% The List passed in should be made up of {Index, Position, Count} tuples write_top_index_table(Handle, BasePos, List) -> % fold function to find any missing index tuples, and add one a replacement % in this case with a count of 0. Also orders the list by index FnMakeIndex = fun(I) -> case lists:keysearch(I, 1, List) of {value, Tuple} -> Tuple; false -> {I, BasePos, 0} end end, % Fold function to write the index entries FnWriteIndex = fun({_Index, Pos, Count}, {AccBin, CurrPos}) -> case Count == 0 of true -> PosLE = endian_flip(CurrPos), NextPos = CurrPos; false -> PosLE = endian_flip(Pos), NextPos = Pos + (Count * ?DWORD_SIZE) end, CountLE = endian_flip(Count), {<>, NextPos} end, Seq = lists:seq(0, 255), CompleteList = lists:keysort(1, lists:map(FnMakeIndex, Seq)), {IndexBin, _Pos} = lists:foldl(FnWriteIndex, {<<>>, BasePos}, CompleteList), {ok, _} = file:position(Handle, 0), ok = file:write(Handle, IndexBin), ok = file:advise(Handle, 0, ?DWORD_SIZE * 256, will_need), ok. %% To make this compatible with original Bernstein format this endian flip %% and also the use of the standard hash function required. %% %% Hash function contains mysterious constants, some explanation here as to %% what they are - %% http://stackoverflow.com/ ++ %% questions/10696223/reason-for-5381-number-in-djb-hash-function endian_flip(Int) -> <> = <>, X. hash(Key) -> BK = term_to_binary(Key), H = 5381, hash1(H, BK) band 16#FFFFFFFF. hash1(H, <<>>) -> H; hash1(H, <>) -> H1 = H * 33, H2 = H1 bxor B, hash1(H2, Rest). % Get the least significant 8 bits from the hash. hash_to_index(Hash) -> Hash band 255. hash_to_slot(Hash, L) -> (Hash bsr 8) rem L. %% Create a binary of the LengthKeyLengthValue, adding a CRC check %% at the front of the value key_value_to_record({Key, Value}, BinaryMode) -> BK = term_to_binary(Key), BV = case BinaryMode of true -> Value; false -> term_to_binary(Value) end, LK = byte_size(BK), LV = byte_size(BV), LK_FL = endian_flip(LK), LV_FL = endian_flip(LV + 4), CRC = calc_crc(BV), <>. %%%%%%%%%%%%%%%% % T E S T %%%%%%%%%%%%%%% -ifdef(TEST). write_key_value_pairs_1_test() -> {ok,Handle} = file:open("../test/test.cdb",[write]), {_, HashTree} = write_key_value_pairs(Handle, [{"key1","value1"}, {"key2","value2"}]), Hash1 = hash("key1"), Index1 = hash_to_index(Hash1), Hash2 = hash("key2"), Index2 = hash_to_index(Hash2), R0 = array:new(256, {default, gb_trees:empty()}), R1 = array:set(Index1, gb_trees:insert(Hash1, [0], array:get(Index1, R0)), R0), R2 = array:set(Index2, gb_trees:insert(Hash2, [30], array:get(Index2, R1)), R1), io:format("HashTree is ~w~n", [HashTree]), io:format("Expected HashTree is ~w~n", [R2]), ?assertMatch(R2, HashTree), ok = file:delete("../test/test.cdb"). write_hash_tables_1_test() -> {ok, Handle} = file:open("../test/testx.cdb", [write]), R0 = array:new(256, {default, gb_trees:empty()}), R1 = array:set(64, gb_trees:insert(6383014720, [18], array:get(64, R0)), R0), R2 = array:set(67, gb_trees:insert(6383014723, [0], array:get(67, R1)), R1), Result = write_hash_tables(Handle, R2), io:format("write hash tables result of ~w ~n", [Result]), ?assertMatch(Result,[{67,16,2},{64,0,2}]), ok = file:delete("../test/testx.cdb"). find_open_slot_1_test() -> List = [<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>], Slot = find_open_slot(List,0), ?assertMatch(Slot,1). find_open_slot_2_test() -> List = [<<0:32,0:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>], Slot = find_open_slot(List,0), ?assertMatch(Slot,0). find_open_slot_3_test() -> List = [<<1:32,1:32>>,<<1:32,1:32>>,<<1:32,1:32>>,<<0:32,0:32>>], Slot = find_open_slot(List,2), ?assertMatch(Slot,3). find_open_slot_4_test() -> List = [<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>,<<1:32,1:32>>], Slot = find_open_slot(List,1), ?assertMatch(Slot,0). find_open_slot_5_test() -> List = [<<1:32,1:32>>,<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>], Slot = find_open_slot(List,3), ?assertMatch(Slot,2). full_1_test() -> List1 = lists:sort([{"key1","value1"},{"key2","value2"}]), create("../test/simple.cdb", lists:sort([{"key1","value1"},{"key2","value2"}])), List2 = lists:sort(dump("../test/simple.cdb")), ?assertMatch(List1,List2), ok = file:delete("../test/simple.cdb"). full_2_test() -> List1 = lists:sort([{lists:flatten(io_lib:format("~s~p",[Prefix,Plug])), lists:flatten(io_lib:format("value~p",[Plug]))} || Plug <- lists:seq(1,200), Prefix <- ["dsd","so39ds","oe9%#*(","020dkslsldclsldowlslf%$#", "tiep4||","qweq"]]), create("../test/full.cdb",List1), List2 = lists:sort(dump("../test/full.cdb")), ?assertMatch(List1,List2), ok = file:delete("../test/full.cdb"). from_dict_test() -> D = dict:new(), D1 = dict:store("a","b",D), D2 = dict:store("c","d",D1), ok = from_dict("../test/from_dict_test.cdb",D2), io:format("Store created ~n", []), KVP = lists:sort(dump("../test/from_dict_test.cdb")), D3 = lists:sort(dict:to_list(D2)), io:format("KVP is ~w~n", [KVP]), io:format("D3 is ~w~n", [D3]), ?assertMatch(KVP, D3), ok = file:delete("../test/from_dict_test.cdb"). to_dict_test() -> D = dict:new(), D1 = dict:store("a","b",D), D2 = dict:store("c","d",D1), ok = from_dict("../test/from_dict_test1.cdb",D2), Dict = to_dict("../test/from_dict_test1.cdb"), D3 = lists:sort(dict:to_list(D2)), D4 = lists:sort(dict:to_list(Dict)), ?assertMatch(D4,D3), ok = file:delete("../test/from_dict_test1.cdb"). crccheck_emptyvalue_test() -> ?assertMatch(false, crccheck_value(<<>>)). crccheck_shortvalue_test() -> Value = <<128,128,32>>, ?assertMatch(false, crccheck_value(Value)). crccheck_justshortvalue_test() -> Value = <<128,128,32,64>>, ?assertMatch(false, crccheck_value(Value)). crccheck_correctvalue_test() -> Value = term_to_binary("some text as value"), Hash = erlang:crc32(Value), ValueOnDisk = <>, ?assertMatch(true, crccheck_value(ValueOnDisk)). crccheck_wronghash_test() -> Value = term_to_binary("some text as value"), Hash = erlang:crc32(Value) + 1, ValueOnDisk = <>, ?assertMatch(false, crccheck_value(ValueOnDisk)). crccheck_truncatedvalue_test() -> Value = term_to_binary("some text as value"), Hash = erlang:crc32(Value), ValueOnDisk = <>, Size = bit_size(ValueOnDisk) - 1, <> = ValueOnDisk, ?assertMatch(false, crccheck_value(TruncatedValue)). activewrite_singlewrite_test() -> Key = "0002", Value = "some text as new value", InitialD = dict:new(), InitialD1 = dict:store("0001", "Initial value", InitialD), ok = from_dict("../test/test_mem.cdb", InitialD1), io:format("New db file created ~n", []), {LastPosition, KeyDict, _} = open_active_file("../test/test_mem.cdb"), io:format("File opened as new active file " "with LastPosition=~w ~n", [LastPosition]), {_, _, UpdKeyDict} = put("../test/test_mem.cdb", Key, Value, {LastPosition, KeyDict}), io:format("New key and value added to active file ~n", []), ?assertMatch({Key, Value}, get_mem(Key, "../test/test_mem.cdb", UpdKeyDict)), ?assertMatch(probably, get_mem(Key, "../test/test_mem.cdb", UpdKeyDict, loose_presence)), ?assertMatch(missing, get_mem("not_present", "../test/test_mem.cdb", UpdKeyDict, loose_presence)), ok = file:delete("../test/test_mem.cdb"). search_hash_table_findinslot_test() -> Key1 = "key1", % this is in slot 3 if count is 8 D = dict:from_list([{Key1, "value1"}, {"K2", "V2"}, {"K3", "V3"}, {"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"}, {"K8", "V8"}]), ok = from_dict("../test/hashtable1_test.cdb",D), {ok, Handle} = file:open("../test/hashtable1_test.cdb", [binary, raw, read, write]), Hash = hash(Key1), Index = hash_to_index(Hash), {ok, _} = file:position(Handle, {bof, ?DWORD_SIZE*Index}), {HashTable, Count} = read_next_2_integers(Handle), io:format("Count of ~w~n", [Count]), {ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}), Slot = hash_to_slot(Hash, Count), io:format("Slot of ~w~n", [Slot]), {ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}), {ReadH3, ReadP3} = read_next_2_integers(Handle), {ReadH4, ReadP4} = read_next_2_integers(Handle), io:format("Slot 1 has Hash ~w Position ~w~n", [ReadH3, ReadP3]), io:format("Slot 2 has Hash ~w Position ~w~n", [ReadH4, ReadP4]), ?assertMatch(0, ReadH4), ?assertMatch({"key1", "value1"}, get(Handle, Key1)), ?assertMatch(probably, get(Handle, Key1, loose_presence)), ?assertMatch(missing, get(Handle, "Key99", loose_presence)), {ok, _} = file:position(Handle, FirstHashPosition), FlipH3 = endian_flip(ReadH3), FlipP3 = endian_flip(ReadP3), RBin = <>, io:format("Replacement binary of ~w~n", [RBin]), {ok, OldBin} = file:pread(Handle, FirstHashPosition + (Slot -1) * ?DWORD_SIZE, 16), io:format("Bin to be replaced is ~w ~n", [OldBin]), ok = file:pwrite(Handle, FirstHashPosition + (Slot -1) * ?DWORD_SIZE, RBin), ok = file:close(Handle), io:format("Find key following change to hash table~n"), ?assertMatch(missing, get("../test/hashtable1_test.cdb", Key1)), ok = file:delete("../test/hashtable1_test.cdb"). getnextkey_inclemptyvalue_test() -> L = [{"K9", "V9"}, {"K2", "V2"}, {"K3", ""}, {"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"}, {"K8", "V8"}, {"K1", "V1"}], ok = create("../test/hashtable2_test.cdb", L), {FirstKey, Handle, P1} = get_nextkey("../test/hashtable2_test.cdb"), io:format("Next position details of ~w~n", [P1]), ?assertMatch("K9", FirstKey), {SecondKey, Handle, P2} = get_nextkey(Handle, P1), ?assertMatch("K2", SecondKey), {ThirdKeyNoValue, Handle, P3} = get_nextkey(Handle, P2), ?assertMatch("K3", ThirdKeyNoValue), {_, Handle, P4} = get_nextkey(Handle, P3), {_, Handle, P5} = get_nextkey(Handle, P4), {_, Handle, P6} = get_nextkey(Handle, P5), {_, Handle, P7} = get_nextkey(Handle, P6), {_, Handle, P8} = get_nextkey(Handle, P7), {LastKey, nomorekeys} = get_nextkey(Handle, P8), ?assertMatch("K1", LastKey), ok = file:delete("../test/hashtable2_test.cdb"). newactivefile_test() -> {LastPosition, _, _} = open_active_file("../test/activefile_test.cdb"), ?assertMatch(256 * ?DWORD_SIZE, LastPosition), Response = get_nextkey("../test/activefile_test.cdb"), ?assertMatch(nomorekeys, Response), ok = file:delete("../test/activefile_test.cdb"). emptyvalue_fromdict_test() -> D = dict:new(), D1 = dict:store("K1", "V1", D), D2 = dict:store("K2", "", D1), D3 = dict:store("K3", "V3", D2), D4 = dict:store("K4", "", D3), ok = from_dict("../test/from_dict_test_ev.cdb",D4), io:format("Store created ~n", []), KVP = lists:sort(dump("../test/from_dict_test_ev.cdb")), D_Result = lists:sort(dict:to_list(D4)), io:format("KVP is ~w~n", [KVP]), io:format("D_Result is ~w~n", [D_Result]), ?assertMatch(KVP, D_Result), ok = file:delete("../test/from_dict_test_ev.cdb"). fold_test() -> K1 = {"Key1", 1}, V1 = 2, K2 = {"Key1", 2}, V2 = 4, K3 = {"Key1", 3}, V3 = 8, K4 = {"Key1", 4}, V4 = 16, K5 = {"Key1", 5}, V5 = 32, D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}]), ok = from_dict("../test/fold_test.cdb", D), FromSN = 2, FoldFun = fun(K, V, Acc) -> {_Key, Seq} = K, if Seq > FromSN -> Acc + V; true -> Acc end end, ?assertMatch(56, fold("../test/fold_test.cdb", FoldFun, 0)), ok = file:delete("../test/fold_test.cdb"). fold_keys_test() -> K1 = {"Key1", 1}, V1 = 2, K2 = {"Key2", 2}, V2 = 4, K3 = {"Key3", 3}, V3 = 8, K4 = {"Key4", 4}, V4 = 16, K5 = {"Key5", 5}, V5 = 32, D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}]), ok = from_dict("../test/fold_keys_test.cdb", D), FromSN = 2, FoldFun = fun(K, Acc) -> {Key, Seq} = K, if Seq > FromSN -> lists:append(Acc, [Key]); true -> Acc end end, Result = fold_keys("../test/fold_keys_test.cdb", FoldFun, []), ?assertMatch(["Key3", "Key4", "Key5"], lists:sort(Result)), ok = file:delete("../test/fold_keys_test.cdb"). fold2_test() -> K1 = {"Key1", 1}, V1 = 2, K2 = {"Key1", 2}, V2 = 4, K3 = {"Key1", 3}, V3 = 8, K4 = {"Key1", 4}, V4 = 16, K5 = {"Key1", 5}, V5 = 32, K6 = {"Key2", 1}, V6 = 64, D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}, {K6, V6}]), ok = from_dict("../test/fold2_test.cdb", D), FoldFun = fun(K, V, Acc) -> {Key, Seq} = K, case dict:find(Key, Acc) of error -> dict:store(Key, {Seq, V}, Acc); {ok, {LSN, _V}} when Seq > LSN -> dict:store(Key, {Seq, V}, Acc); _ -> Acc end end, RD = dict:new(), RD1 = dict:store("Key1", {5, 32}, RD), RD2 = dict:store("Key2", {1, 64}, RD1), Result = fold("../test/fold2_test.cdb", FoldFun, dict:new()), ?assertMatch(RD2, Result), ok = file:delete("../test/fold2_test.cdb"). find_lastkey_test() -> {ok, P1} = cdb_open_writer("../test/lastkey.pnd"), ok = cdb_put(P1, "Key1", "Value1"), ok = cdb_put(P1, "Key3", "Value3"), ok = cdb_put(P1, "Key2", "Value2"), ?assertMatch("Key2", cdb_lastkey(P1)), ?assertMatch("Key1", cdb_firstkey(P1)), probably = cdb_keycheck(P1, "Key2"), ok = cdb_close(P1), {ok, P2} = cdb_open_writer("../test/lastkey.pnd"), ?assertMatch("Key2", cdb_lastkey(P2)), probably = cdb_keycheck(P2, "Key2"), {ok, F2} = cdb_complete(P2), {ok, P3} = cdb_open_reader(F2), ?assertMatch("Key2", cdb_lastkey(P3)), {ok, _FN} = cdb_complete(P3), {ok, P4} = cdb_open_reader(F2), ?assertMatch("Key2", cdb_lastkey(P4)), ok = cdb_close(P4), ok = file:delete("../test/lastkey.cdb"). get_keys_byposition_simple_test() -> {ok, P1} = cdb_open_writer("../test/poskey.pnd"), ok = cdb_put(P1, "Key1", "Value1"), ok = cdb_put(P1, "Key3", "Value3"), ok = cdb_put(P1, "Key2", "Value2"), KeyList = ["Key1", "Key2", "Key3"], {ok, F2} = cdb_complete(P1), {ok, P2} = cdb_open_reader(F2), PositionList = cdb_getpositions(P2, all), io:format("Position list of ~w~n", [PositionList]), ?assertMatch(3, length(PositionList)), R1 = cdb_directfetch(P2, PositionList, key_only), ?assertMatch(3, length(R1)), lists:foreach(fun(Key) -> Check = lists:member(Key, KeyList), ?assertMatch(Check, true) end, R1), R2 = cdb_directfetch(P2, PositionList, key_size), ?assertMatch(3, length(R2)), lists:foreach(fun({Key, _Size}) -> Check = lists:member(Key, KeyList), ?assertMatch(Check, true) end, R2), R3 = cdb_directfetch(P2, PositionList, key_value_check), ?assertMatch(3, length(R3)), lists:foreach(fun({Key, Value, Check}) -> ?assertMatch(Check, true), {K, V} = cdb_get(P2, Key), ?assertMatch(K, Key), ?assertMatch(V, Value) end, R3), ok = cdb_close(P2), ok = file:delete(F2). generate_sequentialkeys(0, KVList) -> KVList; generate_sequentialkeys(Count, KVList) -> KV = {"Key" ++ integer_to_list(Count), "Value" ++ integer_to_list(Count)}, generate_sequentialkeys(Count - 1, KVList ++ [KV]). get_keys_byposition_manykeys_test() -> KeyCount = 1024, {ok, P1} = cdb_open_writer("../test/poskeymany.pnd"), KVList = generate_sequentialkeys(KeyCount, []), lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, KVList), SW1 = os:timestamp(), {ok, F2} = cdb_complete(P1), SW2 = os:timestamp(), io:format("CDB completed in ~w microseconds~n", [timer:now_diff(SW2, SW1)]), {ok, P2} = cdb_open_reader(F2), SW3 = os:timestamp(), io:format("CDB opened for read in ~w microseconds~n", [timer:now_diff(SW3, SW2)]), PositionList = cdb_getpositions(P2, all), io:format("Positions fetched in ~w microseconds~n", [timer:now_diff(os:timestamp(), SW3)]), L1 = length(PositionList), ?assertMatch(L1, KeyCount), SampleList1 = cdb_getpositions(P2, 10), ?assertMatch(10, length(SampleList1)), SampleList2 = cdb_getpositions(P2, KeyCount), ?assertMatch(KeyCount, length(SampleList2)), SampleList3 = cdb_getpositions(P2, KeyCount + 1), ?assertMatch(KeyCount, length(SampleList3)), ok = cdb_close(P2), ok = file:delete(F2). -endif.