diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index da25d1c..46004e0 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -9,7 +9,7 @@ %% - Support for merging of multiple CDB files with a key-checking function to %% allow for compaction %% - Automatic adding of a helper object that will keep a small proportion of -%% keys to be used when checking to see if the cdb file is a candidate for +%% keys to be used when checking to see if the cdb file is a candidate for %% compaction %% - The ability to scan a database and accumulate all the Key, Values to %% rebuild in-memory tables on startup @@ -46,17 +46,27 @@ -module(leveled_cdb). --export([from_dict/2, - create/2, - dump/1, - get/2, - get_mem/3, - put/4, - open_active_file/1, - get_nextkey/1, - get_nextkey/2, - fold/3, - fold_keys/3]). +-behaviour(gen_server). + +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3, + cdb_open_writer/1, + cdb_open_reader/1, + from_dict/2, + create/2, + dump/1, + get/2, + get_mem/3, + put/4, + open_active_file/1, + get_nextkey/1, + get_nextkey/2, + fold/3, + fold_keys/3]). -include_lib("eunit/include/eunit.hrl"). @@ -66,13 +76,94 @@ -define(MAX_FILE_SIZE, 3221225472). -define(BASE_POSITION, 2048). -%% +-record(state, {hashtree, + last_position :: integer(), + smallest_sqn :: integer(), + highest_sqn :: integer(), + filename :: string(), + handle :: file:fd(), + writer :: boolean}). + + +%%%============================================================================ +%%% API +%%%============================================================================ + +cdb_open_writer(Filename) -> + {ok, Pid} = gen_server:start(?MODULE, [], []), + case gen_server:call(Pid, {cdb_open_writer, Filename}, infinity) of + ok -> + {ok, Pid}; + Error -> + Error + end. + +cdb_open_reader(Filename) -> + {ok, Pid} = gen_server:start(?MODULE, [], []), + case gen_server:call(Pid, {cdb_open_reader, Filename}, infinity) of + ok -> + {ok, Pid}; + Error -> + Error + end. + +%cdb_get(Pid, Key) -> +% gen_server:call(Pid, {cdb_get, Key}, infinity). +% +%cdb_put(Pid, Key, Value) -> +% gen_server:call(Pid, {cdb_put, Key, Value}, infinity). +% +%cdb_close(Pid) -> +% gen_server:call(Pid, cdb_close, infinity). + + +%%%============================================================================ +%%% gen_server callbacks +%%%============================================================================ + +init([]) -> + {ok, #state{}}. + +handle_call({cdb_open_writer, Filename}, _From, State) -> + io:format("Opening file for writing with filename ~s~n", [Filename]), + {LastPosition, HashTree} = open_active_file(Filename), + {ok, Handle} = file:open(Filename, [binary, raw, read, + write, delayed_write]), + {reply, ok, State#state{handle=Handle, + last_position=LastPosition, + filename=Filename, + hashtree=HashTree, + writer=true}}; +handle_call({cdb_open_reader, Filename}, _From, State) -> + io:format("Opening file for reading with filename ~s~n", [Filename]), + {ok, Handle} = file:open(Filename, [binary, raw, read]), + {reply, ok, State#state{handle=Handle, + filename=Filename, + writer=false}}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%============================================================================ +%%% Internal functions +%%%============================================================================ + + %% from_dict(FileName,ListOfKeyValueTuples) %% Given a filename and a dictionary, create a cdb %% using the key value pairs from the dict. from_dict(FileName,Dict) -> - KeyValueList = dict:to_list(Dict), - create(FileName, KeyValueList). + KeyValueList = dict:to_list(Dict), + create(FileName, KeyValueList). %% %% create(FileName,ListOfKeyValueTuples) -> ok @@ -80,10 +171,10 @@ from_dict(FileName,Dict) -> %% this function creates a CDB %% create(FileName,KeyValueList) -> - {ok, Handle} = file:open(FileName, [binary, raw, read, write]), - {ok, _} = file:position(Handle, {bof, ?BASE_POSITION}), - {BasePos, HashTree} = write_key_value_pairs(Handle, KeyValueList), - close_file(Handle, HashTree, BasePos). + {ok, Handle} = file:open(FileName, [binary, raw, read, write]), + {ok, _} = file:position(Handle, {bof, ?BASE_POSITION}), + {BasePos, HashTree} = write_key_value_pairs(Handle, KeyValueList), + close_file(Handle, HashTree, BasePos). %% %% dump(FileName) -> List @@ -91,38 +182,38 @@ create(FileName,KeyValueList) -> %% of {key,value} tuples from the CDB. %% dump(FileName) -> - dump(FileName, ?CRC_CHECK). + dump(FileName, ?CRC_CHECK). dump(FileName, CRCCheck) -> - {ok, Handle} = file:open(FileName, [binary, raw, read]), - Fn = fun(Index, Acc) -> - {ok, _} = file:position(Handle, ?DWORD_SIZE * Index), - {_, Count} = read_next_2_integers(Handle), - Acc + Count - end, - NumberOfPairs = lists:foldl(Fn, 0, lists:seq(0,255)) bsr 1, - io:format("Count of keys in db is ~w~n", [NumberOfPairs]), - - {ok, _} = file:position(Handle, {bof, 2048}), - Fn1 = fun(_I,Acc) -> - {KL,VL} = read_next_2_integers(Handle), - Key = read_next_term(Handle, KL), - io:format("Key read of ~w~n", [Key]), - case read_next_term(Handle, VL, crc, CRCCheck) of - {false, _} -> - {ok, CurrLoc} = file:position(Handle, cur), - Return = {crc_wonky, get(Handle, Key)}; - {_, Value} -> - {ok, CurrLoc} = file:position(Handle, cur), - Return = case get(Handle, Key) of - {Key,Value} -> {Key ,Value}; - X -> {wonky, X} - end + {ok, Handle} = file:open(FileName, [binary, raw, read]), + Fn = fun(Index, Acc) -> + {ok, _} = file:position(Handle, ?DWORD_SIZE * Index), + {_, Count} = read_next_2_integers(Handle), + Acc + Count end, - {ok, _} = file:position(Handle, CurrLoc), - [Return | Acc] - end, - lists:foldr(Fn1,[],lists:seq(0,NumberOfPairs-1)). + NumberOfPairs = lists:foldl(Fn, 0, lists:seq(0,255)) bsr 1, + io:format("Count of keys in db is ~w~n", [NumberOfPairs]), + {ok, _} = file:position(Handle, {bof, 2048}), + Fn1 = fun(_I,Acc) -> + {KL,VL} = read_next_2_integers(Handle), + Key = read_next_term(Handle, KL), + io:format("Key read of ~w~n", [Key]), + case read_next_term(Handle, VL, crc, CRCCheck) of + {false, _} -> + {ok, CurrLoc} = file:position(Handle, cur), + Return = {crc_wonky, get(Handle, Key)}; + {_, Value} -> + {ok, CurrLoc} = file:position(Handle, cur), + Return = + case get(Handle, Key) of + {Key,Value} -> {Key ,Value}; + X -> {wonky, X} + end + end, + {ok, _} = file:position(Handle, CurrLoc), + [Return | Acc] + end, + lists:foldr(Fn1,[],lists:seq(0,NumberOfPairs-1)). %% Open an active file - one for which it is assumed the hash tables have not %% yet been written @@ -134,21 +225,21 @@ dump(FileName, CRCCheck) -> %% tuples as the write_key_value_pairs function, and the current position, and %% the file handle open_active_file(FileName) when is_list(FileName) -> - {ok, Handle} = file:open(FileName, [binary, raw, read, write]), - {ok, Position} = file:position(Handle, {bof, 256*?DWORD_SIZE}), - {LastPosition, HashTree} = scan_over_file(Handle, Position), - case file:position(Handle, eof) of - {ok, LastPosition} -> - ok = file:close(Handle); - {ok, _} -> - LogDetails = [LastPosition, file:position(Handle, eof)], - io:format("File to be truncated at last position of" - "~w with end of file at ~w~n", LogDetails), - {ok, LastPosition} = file:position(Handle, LastPosition), - ok = file:truncate(Handle), - ok = file:close(Handle) - end, - {LastPosition, HashTree}. + {ok, Handle} = file:open(FileName, [binary, raw, read, write]), + {ok, Position} = file:position(Handle, {bof, 256*?DWORD_SIZE}), + {LastPosition, HashTree} = scan_over_file(Handle, Position), + case file:position(Handle, eof) of + {ok, LastPosition} -> + ok = file:close(Handle); + {ok, _} -> + LogDetails = [LastPosition, file:position(Handle, eof)], + io:format("File to be truncated at last position of" + "~w with end of file at ~w~n", LogDetails), + {ok, LastPosition} = file:position(Handle, LastPosition), + ok = file:truncate(Handle), + ok = file:close(Handle) + end, + {LastPosition, HashTree}. %% put(Handle, Key, Value, {LastPosition, HashDict}) -> {NewPosition, KeyDict} %% Append to an active file a new key/value pair returning an updated @@ -175,68 +266,67 @@ put(Handle, Key, Value, {LastPosition, HashTree}) -> %% Given a filename and a key, returns a key and value tuple. %% get(FileNameOrHandle, Key) -> - get(FileNameOrHandle, Key, ?CRC_CHECK). + get(FileNameOrHandle, Key, ?CRC_CHECK). get(FileName, Key, CRCCheck) when is_list(FileName), is_list(Key) -> - {ok,Handle} = file:open(FileName,[binary, raw, read]), - get(Handle,Key, CRCCheck); - + {ok,Handle} = file:open(FileName,[binary, raw, read]), + get(Handle,Key, CRCCheck); get(Handle, Key, CRCCheck) when is_tuple(Handle), is_list(Key) -> - Hash = hash(Key), - Index = hash_to_index(Hash), - {ok,_} = file:position(Handle, {bof, ?DWORD_SIZE * Index}), - % Get location of hashtable and number of entries in the hash - {HashTable, Count} = read_next_2_integers(Handle), - % If the count is 0 for that index - key must be missing - case Count of - 0 -> - missing; - _ -> - % Get starting slot in hashtable - {ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}), - Slot = hash_to_slot(Hash, Count), - {ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}), - LastHashPosition = HashTable + ((Count-1) * ?DWORD_SIZE), - LocList = lists:seq(FirstHashPosition, LastHashPosition, ?DWORD_SIZE), - % Split list around starting slot. - {L1, L2} = lists:split(Slot, LocList), - search_hash_table(Handle, lists:append(L2, L1), Hash, Key, CRCCheck) - end. + Hash = hash(Key), + Index = hash_to_index(Hash), + {ok,_} = file:position(Handle, {bof, ?DWORD_SIZE * Index}), + % Get location of hashtable and number of entries in the hash + {HashTable, Count} = read_next_2_integers(Handle), + % If the count is 0 for that index - key must be missing + case Count of + 0 -> + missing; + _ -> + % Get starting slot in hashtable + {ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}), + Slot = hash_to_slot(Hash, Count), + {ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}), + LastHashPosition = HashTable + ((Count-1) * ?DWORD_SIZE), + LocList = lists:seq(FirstHashPosition, LastHashPosition, ?DWORD_SIZE), + % Split list around starting slot. + {L1, L2} = lists:split(Slot, LocList), + search_hash_table(Handle, lists:append(L2, L1), Hash, Key, CRCCheck) + end. %% Get a Key/Value pair from an active CDB file (with no hash table written) %% This requires a key dictionary to be passed in (mapping keys to positions) %% Will return {Key, Value} or missing get_mem(Key, Filename, HashTree) when is_list(Filename) -> - {ok, Handle} = file:open(Filename, [binary, raw, read]), - get_mem(Key, Handle, HashTree); + {ok, Handle} = file:open(Filename, [binary, raw, read]), + get_mem(Key, Handle, HashTree); get_mem(Key, Handle, HashTree) -> - extract_kvpair(Handle, get_hashtree(Key, HashTree), Key). + extract_kvpair(Handle, get_hashtree(Key, HashTree), Key). %% Get the next key at a position in the file (or the first key if no position %% is passed). Will return both a key and the next position get_nextkey(Filename) when is_list(Filename) -> - {ok, Handle} = file:open(Filename, [binary, raw, read]), - get_nextkey(Handle); + {ok, Handle} = file:open(Filename, [binary, raw, read]), + get_nextkey(Handle); get_nextkey(Handle) -> - {ok, _} = file:position(Handle, bof), - {FirstHashPosition, _} = read_next_2_integers(Handle), - get_nextkey(Handle, {256 * ?DWORD_SIZE, FirstHashPosition}). + {ok, _} = file:position(Handle, bof), + {FirstHashPosition, _} = read_next_2_integers(Handle), + get_nextkey(Handle, {256 * ?DWORD_SIZE, FirstHashPosition}). get_nextkey(Handle, {Position, FirstHashPosition}) -> - {ok, Position} = file:position(Handle, Position), - case read_next_2_integers(Handle) of - {KeyLength, ValueLength} -> - NextKey = read_next_term(Handle, KeyLength), - NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE, - case NextPosition of - FirstHashPosition -> - {NextKey, nomorekeys}; - _ -> - {NextKey, Handle, {NextPosition, FirstHashPosition}} - end; - eof -> - nomorekeys - end. + {ok, Position} = file:position(Handle, Position), + case read_next_2_integers(Handle) of + {KeyLength, ValueLength} -> + NextKey = read_next_term(Handle, KeyLength), + NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE, + case NextPosition of + FirstHashPosition -> + {NextKey, nomorekeys}; + _ -> + {NextKey, Handle, {NextPosition, FirstHashPosition}} + end; + eof -> + nomorekeys +end. %% Fold over all of the objects in the file, applying FoldFun to each object @@ -244,52 +334,66 @@ get_nextkey(Handle, {Position, FirstHashPosition}) -> %% set to true fold(FileName, FoldFun, Acc0) when is_list(FileName) -> - {ok, Handle} = file:open(FileName, [binary, raw, read]), - fold(Handle, FoldFun, Acc0); + {ok, Handle} = file:open(FileName, [binary, raw, read]), + fold(Handle, FoldFun, Acc0); fold(Handle, FoldFun, Acc0) -> - {ok, _} = file:position(Handle, bof), - {FirstHashPosition, _} = read_next_2_integers(Handle), - fold(Handle, FoldFun, Acc0, {256 * ?DWORD_SIZE, FirstHashPosition}, false). + {ok, _} = file:position(Handle, bof), + {FirstHashPosition, _} = read_next_2_integers(Handle), + fold(Handle, FoldFun, Acc0, {256 * ?DWORD_SIZE, FirstHashPosition}, false). fold(Handle, FoldFun, Acc0, {Position, FirstHashPosition}, KeyOnly) -> - {ok, Position} = file:position(Handle, Position), - case Position of - FirstHashPosition -> - Acc0; - _ -> - case read_next_2_integers(Handle) of - {KeyLength, ValueLength} -> - NextKey = read_next_term(Handle, KeyLength), - NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE, - case KeyOnly of - true -> - fold(Handle, FoldFun, FoldFun(NextKey, Acc0), - {NextPosition, FirstHashPosition}, KeyOnly); - false -> - case read_next_term(Handle, ValueLength, crc, ?CRC_CHECK) of - {false, _} -> - io:format("Skipping value for Key ~w as CRC check failed~n", - [NextKey]), - fold(Handle, FoldFun, Acc0, - {NextPosition, FirstHashPosition}, KeyOnly); - {_, Value} -> - fold(Handle, FoldFun, FoldFun(NextKey, Value, Acc0), - {NextPosition, FirstHashPosition}, KeyOnly) - end - end; - eof -> - Acc0 - end - end. + {ok, Position} = file:position(Handle, Position), + case Position of + FirstHashPosition -> + Acc0; + _ -> + case read_next_2_integers(Handle) of + {KeyLength, ValueLength} -> + NextKey = read_next_term(Handle, KeyLength), + NextPosition = Position + + KeyLength + ValueLength + + ?DWORD_SIZE, + case KeyOnly of + true -> + fold(Handle, + FoldFun, + FoldFun(NextKey, Acc0), + {NextPosition, FirstHashPosition}, + KeyOnly); + false -> + case read_next_term(Handle, + ValueLength, + crc, + ?CRC_CHECK) of + {false, _} -> + io:format("Skipping value for Key ~w as CRC + check failed~n", [NextKey]), + fold(Handle, + FoldFun, + Acc0, + {NextPosition, FirstHashPosition}, + KeyOnly); + {_, Value} -> + fold(Handle, + FoldFun, + FoldFun(NextKey, Value, Acc0), + {NextPosition, FirstHashPosition}, + KeyOnly) + end + end; + eof -> + Acc0 + end + end. fold_keys(FileName, FoldFun, Acc0) when is_list(FileName) -> - {ok, Handle} = file:open(FileName, [binary, raw, read]), - fold_keys(Handle, FoldFun, Acc0); + {ok, Handle} = file:open(FileName, [binary, raw, read]), + fold_keys(Handle, FoldFun, Acc0); fold_keys(Handle, FoldFun, Acc0) -> - {ok, _} = file:position(Handle, bof), - {FirstHashPosition, _} = read_next_2_integers(Handle), - fold(Handle, FoldFun, Acc0, {256 * ?DWORD_SIZE, FirstHashPosition}, true). + {ok, _} = file:position(Handle, bof), + {FirstHashPosition, _} = read_next_2_integers(Handle), + fold(Handle, FoldFun, Acc0, {256 * ?DWORD_SIZE, FirstHashPosition}, true). %%%%%%%%%%%%%%%%%%%% @@ -302,24 +406,24 @@ fold_keys(Handle, FoldFun, Acc0) -> %% Base Pos should be at the end of the KV pairs written (the position for) %% the hash tables close_file(Handle, HashTree, BasePos) -> - {ok, BasePos} = file:position(Handle, BasePos), - L2 = write_hash_tables(Handle, HashTree), - write_top_index_table(Handle, BasePos, L2), - file:close(Handle). + {ok, BasePos} = file:position(Handle, BasePos), + L2 = write_hash_tables(Handle, HashTree), + write_top_index_table(Handle, BasePos, L2), + file:close(Handle). %% Fetch a list of positions by passing a key to the HashTree get_hashtree(Key, HashTree) -> - Hash = hash(Key), - Index = hash_to_index(Hash), - Tree = array:get(Index, HashTree), - case gb_trees:lookup(Hash, Tree) of - {value, List} -> - List; - _ -> - [] - end. + Hash = hash(Key), + Index = hash_to_index(Hash), + Tree = array:get(Index, HashTree), + case gb_trees:lookup(Hash, Tree) of + {value, List} -> + List; + _ -> + [] + end. %% Add to hash tree - this is an array of 256 gb_trees that contains the Hash %% and position of objects which have been added to an open CDB file @@ -337,100 +441,117 @@ put_hashtree(Key, Position, HashTree) -> %% Function to extract a Key-Value pair given a file handle and a position %% Will confirm that the key matches and do a CRC check when requested extract_kvpair(Handle, Positions, Key) -> - extract_kvpair(Handle, Positions, Key, ?CRC_CHECK). + extract_kvpair(Handle, Positions, Key, ?CRC_CHECK). extract_kvpair(_, [], _, _) -> - missing; + missing; extract_kvpair(Handle, [Position|Rest], Key, Check) -> - {ok, _} = file:position(Handle, Position), - {KeyLength, ValueLength} = read_next_2_integers(Handle), - case read_next_term(Handle, KeyLength) of - Key -> % If same key as passed in, then found! - case read_next_term(Handle, ValueLength, crc, Check) of - {false, _} -> - crc_wonky; - {_, Value} -> - {Key,Value} - end; - _ -> - extract_kvpair(Handle, Rest, Key, Check) - end. + {ok, _} = file:position(Handle, Position), + {KeyLength, ValueLength} = read_next_2_integers(Handle), + case read_next_term(Handle, KeyLength) of + Key -> % If same key as passed in, then found! + case read_next_term(Handle, ValueLength, crc, Check) of + {false, _} -> + crc_wonky; + {_, Value} -> + {Key,Value} + end; + _ -> + extract_kvpair(Handle, Rest, Key, Check) + end. %% Scan through the file until there is a failure to crc check an input, and %% at that point return the position and the key dictionary scanned so far scan_over_file(Handle, Position) -> - HashTree = array:new(256, {default, gb_trees:empty()}), - scan_over_file(Handle, Position, HashTree). + HashTree = array:new(256, {default, gb_trees:empty()}), + scan_over_file(Handle, Position, HashTree). scan_over_file(Handle, Position, HashTree) -> - case saferead_keyvalue(Handle) of - false -> - {Position, HashTree}; - {Key, ValueAsBin, KeyLength, ValueLength} -> - case crccheck_value(ValueAsBin) of - true -> - NewPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE, - scan_over_file(Handle, NewPosition, - put_hashtree(Key, Position, HashTree)); + case saferead_keyvalue(Handle) of false -> - io:format("CRC check returned false on key of ~w ~n", [Key]), - {Position, HashTree} - end; - eof -> - {Position, HashTree} - end. + {Position, HashTree}; + {Key, ValueAsBin, KeyLength, ValueLength} -> + case crccheck_value(ValueAsBin) of + true -> + NewPosition = Position + KeyLength + ValueLength + + ?DWORD_SIZE, + scan_over_file(Handle, + NewPosition, + put_hashtree(Key, Position, HashTree)); + false -> + io:format("CRC check returned false on key of ~w ~n", + [Key]), + {Position, HashTree} + end; + eof -> + {Position, HashTree} + end. %% Read the Key/Value at this point, returning {ok, Key, Value} %% catch expected exceptiosn associated with file corruption (or end) and %% return eof saferead_keyvalue(Handle) -> - case read_next_2_integers(Handle) of - {error, einval} -> - false; - eof -> - false; - {KeyL, ValueL} -> - case read_next_term(Handle, KeyL) of + case read_next_2_integers(Handle) of {error, einval} -> - false; + false; eof -> - false; - Key -> - case file:read(Handle, ValueL) of - {error, einval} -> - false; - eof -> - false; - {ok, Value} -> - {Key, Value, KeyL, ValueL} - end - end - end. + false; + {KeyL, ValueL} -> + io:format("KeyL ~w ValueL ~w~n", [KeyL, ValueL]), + case safe_read_next_term(Handle, KeyL) of + {error, einval} -> + false; + eof -> + false; + false -> + false; + Key -> + io:format("Found Key of ~s~n", [Key]), + case file:read(Handle, ValueL) of + {error, einval} -> + false; + eof -> + false; + {ok, Value} -> + {Key, Value, KeyL, ValueL} + end + end + end. + + +safe_read_next_term(Handle, Length) -> + try read_next_term(Handle, Length) of + Term -> + Term + catch + error:badarg -> + false + end. %% The first four bytes of the value are the crc check crccheck_value(Value) when byte_size(Value) >4 -> - << Hash:32/integer, Tail/bitstring>> = Value, - case calc_crc(Tail) of - Hash -> - true; - _ -> - io:format("CRC check failed due to mismatch ~n"), - false - end; + << Hash:32/integer, Tail/bitstring>> = Value, + case calc_crc(Tail) of + Hash -> + true; + _ -> + io:format("CRC check failed due to mismatch ~n"), + false + end; crccheck_value(_) -> - io:format("CRC check failed due to size ~n"), - false. + io:format("CRC check failed due to size ~n"), + false. %% Run a crc check filling out any values which don't fit on byte boundary calc_crc(Value) -> - case bit_size(Value) rem 8 of - 0 -> - erlang:crc32(Value); - N -> - M = 8 - N, - erlang:crc32(<>) - end. + case bit_size(Value) rem 8 of + 0 -> + erlang:crc32(Value); + N -> + M = 8 - N, + erlang:crc32(<>) + end. %% %% to_dict(FileName) @@ -443,70 +564,69 @@ calc_crc(Value) -> %% dictionary() = dict() %% to_dict(FileName) -> - KeyValueList = dump(FileName), - dict:from_list(KeyValueList). + KeyValueList = dump(FileName), + dict:from_list(KeyValueList). read_next_term(Handle, Length) -> - case file:read(Handle, Length) of - {ok, Bin} -> - binary_to_term(Bin); - ReadError -> - ReadError - end. + case file:read(Handle, Length) of + {ok, Bin} -> + binary_to_term(Bin); + ReadError -> + ReadError + end. %% Read next string where the string has a CRC prepended - stripping the crc %% and checking if requested read_next_term(Handle, Length, crc, Check) -> - case Check of - true -> - {ok, <>} = file:read(Handle, Length), - case calc_crc(Bin) of - CRC -> - {true, binary_to_term(Bin)}; + case Check of + true -> + {ok, <>} = file:read(Handle, Length), + case calc_crc(Bin) of + CRC -> + {true, binary_to_term(Bin)}; + _ -> + {false, binary_to_term(Bin)} + end; _ -> - {false, binary_to_term(Bin)} - end; - _ -> - {ok, _} = file:position(Handle, {cur, 4}), - {ok, Bin} = file:read(Handle, Length - 4), - {unchecked, binary_to_term(Bin)} - end. + {ok, _} = file:position(Handle, {cur, 4}), + {ok, Bin} = file:read(Handle, Length - 4), + {unchecked, binary_to_term(Bin)} + end. %% Used for reading lengths %% Note that the endian_flip is required to make the file format compatible %% with CDB read_next_2_integers(Handle) -> - case file:read(Handle,?DWORD_SIZE) of - {ok, <>} -> - {endian_flip(Int1), endian_flip(Int2)}; - ReadError - -> - ReadError - end. + case file:read(Handle,?DWORD_SIZE) of + {ok, <>} -> + {endian_flip(Int1), endian_flip(Int2)}; + ReadError -> + ReadError + end. %% Seach the hash table for the matching hash and key. Be prepared for %% multiple keys to have the same hash value. search_hash_table(_Handle, [], _Hash, _Key, _CRCCHeck) -> - missing; + missing; search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, CRCCheck) -> - {ok, _} = file:position(Handle, Entry), - {StoredHash, DataLoc} = read_next_2_integers(Handle), - case StoredHash of - Hash -> - KV = extract_kvpair(Handle, [DataLoc], Key, CRCCheck), - case KV of - missing -> - search_hash_table(Handle, RestOfEntries, Hash, Key, CRCCheck); + {ok, _} = file:position(Handle, Entry), + {StoredHash, DataLoc} = read_next_2_integers(Handle), + case StoredHash of + Hash -> + KV = extract_kvpair(Handle, [DataLoc], Key, CRCCheck), + case KV of + missing -> + search_hash_table(Handle, RestOfEntries, Hash, Key, CRCCheck); + _ -> + KV + end; + 0 -> + % Hash is 0 so key must be missing as 0 found before Hash matched + missing; _ -> - KV - end; - 0 -> - % Hash is 0 so key must be missing as 0 found before Hash matched - missing; - _ -> - search_hash_table(Handle, RestOfEntries, Hash, Key, CRCCheck) - end. + search_hash_table(Handle, RestOfEntries, Hash, Key, CRCCheck) + end. % Write Key and Value tuples into the CDB. Each tuple consists of a % 4 byte key length, a 4 byte value length, the actual key followed @@ -517,53 +637,53 @@ search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, CRCCheck) -> % values being a list of the hash and the position of the % key/value binary in the file. write_key_value_pairs(Handle, KeyValueList) -> - {ok, Position} = file:position(Handle, cur), - HashTree = array:new(256, {default, gb_trees:empty()}), - write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}). + {ok, Position} = file:position(Handle, cur), + HashTree = array:new(256, {default, gb_trees:empty()}), + write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}). write_key_value_pairs(_, [], Acc) -> - Acc; + Acc; write_key_value_pairs(Handle, [HeadPair|TailList], Acc) -> - {Key, Value} = HeadPair, - {Handle, NewPosition, HashTree} = put(Handle, Key, Value, Acc), - write_key_value_pairs(Handle, TailList, {NewPosition, HashTree}). + {Key, Value} = HeadPair, + {Handle, NewPosition, HashTree} = put(Handle, Key, Value, Acc), + write_key_value_pairs(Handle, TailList, {NewPosition, HashTree}). %% Write the actual hashtables at the bottom of the file. Each hash table %% entry is a doubleword in length. The first word is the hash value %% corresponding to a key and the second word is a file pointer to the %% corresponding {key,value} tuple. write_hash_tables(Handle, HashTree) -> - Seq = lists:seq(0, 255), - {ok, StartPos} = file:position(Handle, cur), - write_hash_tables(Seq, Handle, HashTree, StartPos, []). + Seq = lists:seq(0, 255), + {ok, StartPos} = file:position(Handle, cur), + write_hash_tables(Seq, Handle, HashTree, StartPos, []). write_hash_tables([], Handle, _, StartPos, IndexList) -> - {ok, EndPos} = file:position(Handle, cur), - ok = file:advise(Handle, StartPos, EndPos - StartPos, will_need), - IndexList; + {ok, EndPos} = file:position(Handle, cur), + ok = file:advise(Handle, StartPos, EndPos - StartPos, will_need), + IndexList; write_hash_tables([Index|Rest], Handle, HashTree, StartPos, IndexList) -> - Tree = array:get(Index, HashTree), - case gb_trees:keys(Tree) of - [] -> - write_hash_tables(Rest, Handle, HashTree, StartPos, IndexList); - _ -> - HashList = gb_trees:to_list(Tree), - BinList = build_binaryhashlist(HashList, []), - IndexLength = length(BinList) * 2, - SlotList = lists:duplicate(IndexLength, <<0:32, 0:32>>), - - Fn = fun({Hash, Binary}, AccSlotList) -> - Slot1 = find_open_slot(AccSlotList, Hash), - {L1, [<<0:32, 0:32>>|L2]} = lists:split(Slot1, AccSlotList), - lists:append(L1, [Binary|L2]) - end, - NewSlotList = lists:foldl(Fn, SlotList, BinList), - - {ok, CurrPos} = file:position(Handle, cur), - file:write(Handle, NewSlotList), - write_hash_tables(Rest, Handle, HashTree, StartPos, - [{Index, CurrPos, IndexLength}|IndexList]) - end. + Tree = array:get(Index, HashTree), + case gb_trees:keys(Tree) of + [] -> + write_hash_tables(Rest, Handle, HashTree, StartPos, IndexList); + _ -> + HashList = gb_trees:to_list(Tree), + BinList = build_binaryhashlist(HashList, []), + IndexLength = length(BinList) * 2, + SlotList = lists:duplicate(IndexLength, <<0:32, 0:32>>), + + Fn = fun({Hash, Binary}, AccSlotList) -> + Slot1 = find_open_slot(AccSlotList, Hash), + {L1, [<<0:32, 0:32>>|L2]} = lists:split(Slot1, AccSlotList), + lists:append(L1, [Binary|L2]) + end, + + NewSlotList = lists:foldl(Fn, SlotList, BinList), + {ok, CurrPos} = file:position(Handle, cur), + file:write(Handle, NewSlotList), + write_hash_tables(Rest, Handle, HashTree, StartPos, + [{Index, CurrPos, IndexLength}|IndexList]) + end. %% The list created from the original HashTree may have duplicate positions %% e.g. {Key, [Value1, Value2]}. Before any writing is done it is necessary @@ -572,31 +692,31 @@ write_hash_tables([Index|Rest], Handle, HashTree, StartPos, IndexList) -> %% This function creates {Hash, Binary} pairs on a list where there is a unique %% entry for eveyr Key/Value build_binaryhashlist([], BinList) -> - BinList; + BinList; build_binaryhashlist([{Hash, [Position|TailP]}|TailKV], BinList) -> - HashLE = endian_flip(Hash), - PosLE = endian_flip(Position), - NewBin = <>, - case TailP of - [] -> - build_binaryhashlist(TailKV, [{Hash, NewBin}|BinList]); - _ -> - build_binaryhashlist([{Hash, TailP}|TailKV], [{Hash, NewBin}|BinList]) - end. + HashLE = endian_flip(Hash), + PosLE = endian_flip(Position), + NewBin = <>, + case TailP of + [] -> + build_binaryhashlist(TailKV, [{Hash, NewBin}|BinList]); + _ -> + build_binaryhashlist([{Hash, TailP}|TailKV], [{Hash, NewBin}|BinList]) + end. %% Slot is zero based because it comes from a REM find_open_slot(List, Hash) -> - Len = length(List), - Slot = hash_to_slot(Hash, Len), - Seq = lists:seq(1, Len), - {CL1, CL2} = lists:split(Slot, Seq), - {L1, L2} = lists:split(Slot, List), - find_open_slot1(lists:append(CL2, CL1), lists:append(L2, L1)). + Len = length(List), + Slot = hash_to_slot(Hash, Len), + Seq = lists:seq(1, Len), + {CL1, CL2} = lists:split(Slot, Seq), + {L1, L2} = lists:split(Slot, List), + find_open_slot1(lists:append(CL2, CL1), lists:append(L2, L1)). find_open_slot1([Slot|_RestOfSlots], [<<0:32,0:32>>|_RestOfEntries]) -> - Slot - 1; + Slot - 1; find_open_slot1([_|RestOfSlots], [_|RestOfEntries]) -> - find_open_slot1(RestOfSlots, RestOfEntries). + find_open_slot1(RestOfSlots, RestOfEntries). %% Write the top most 255 doubleword entries. First word is the @@ -606,71 +726,71 @@ find_open_slot1([_|RestOfSlots], [_|RestOfEntries]) -> write_top_index_table(Handle, BasePos, List) -> % fold function to find any missing index tuples, and add one a replacement % in this case with a count of 0. Also orders the list by index - FnMakeIndex = fun(I, Acc) -> - case lists:keysearch(I, 1, List) of - {value, Tuple} -> - [Tuple|Acc]; - false -> - [{I, BasePos, 0}|Acc] - end - end, - % Fold function to write the index entries - FnWriteIndex = fun({Index, Pos, Count}, CurrPos) -> - {ok, _} = file:position(Handle, ?DWORD_SIZE * Index), - case Count == 0 of - true -> - PosLE = endian_flip(CurrPos), - NextPos = CurrPos; - false -> - PosLE = endian_flip(Pos), - NextPos = Pos + (Count * ?DWORD_SIZE) - end, - CountLE = endian_flip(Count), - Bin = <>, - file:write(Handle, Bin), - NextPos - end, - - Seq = lists:seq(0, 255), - CompleteList = lists:keysort(1, lists:foldl(FnMakeIndex, [], Seq)), - lists:foldl(FnWriteIndex, BasePos, CompleteList), - ok = file:advise(Handle, 0, ?DWORD_SIZE * 256, will_need). + FnMakeIndex = fun(I, Acc) -> + case lists:keysearch(I, 1, List) of + {value, Tuple} -> + [Tuple|Acc]; + false -> + [{I, BasePos, 0}|Acc] + end + end, + % Fold function to write the index entries + FnWriteIndex = fun({Index, Pos, Count}, CurrPos) -> + {ok, _} = file:position(Handle, ?DWORD_SIZE * Index), + case Count == 0 of + true -> + PosLE = endian_flip(CurrPos), + NextPos = CurrPos; + false -> + PosLE = endian_flip(Pos), + NextPos = Pos + (Count * ?DWORD_SIZE) + end, + CountLE = endian_flip(Count), + Bin = <>, + file:write(Handle, Bin), + NextPos + end, + + Seq = lists:seq(0, 255), + CompleteList = lists:keysort(1, lists:foldl(FnMakeIndex, [], Seq)), + lists:foldl(FnWriteIndex, BasePos, CompleteList), + ok = file:advise(Handle, 0, ?DWORD_SIZE * 256, will_need). endian_flip(Int) -> - <> = <>, - X. + <> = <>, + X. hash(Key) -> - BK = term_to_binary(Key), - H = 5381, - hash1(H, BK) band 16#FFFFFFFF. + BK = term_to_binary(Key), + H = 5381, + hash1(H, BK) band 16#FFFFFFFF. hash1(H, <<>>) -> - H; + H; hash1(H, <>) -> - H1 = H * 33, - H2 = H1 bxor B, - hash1(H2, Rest). + H1 = H * 33, + H2 = H1 bxor B, + hash1(H2, Rest). % Get the least significant 8 bits from the hash. hash_to_index(Hash) -> - Hash band 255. + Hash band 255. hash_to_slot(Hash,L) -> - (Hash bsr 8) rem L. + (Hash bsr 8) rem L. %% Create a binary of the LengthKeyLengthValue, adding a CRC check %% at the front of the value key_value_to_record({Key, Value}) -> - BK = term_to_binary(Key), - BV = term_to_binary(Value), - LK = byte_size(BK), - LV = byte_size(BV), - LK_FL = endian_flip(LK), - LV_FL = endian_flip(LV + 4), - CRC = calc_crc(BV), - <>. + BK = term_to_binary(Key), + BV = term_to_binary(Value), + LK = byte_size(BK), + LV = byte_size(BV), + LK_FL = endian_flip(LK), + LV_FL = endian_flip(LV + 4), + CRC = calc_crc(BV), + <>. %%%%%%%%%%%%%%%% @@ -679,307 +799,307 @@ key_value_to_record({Key, Value}) -> -ifdef(TEST). write_key_value_pairs_1_test() -> - {ok,Handle} = file:open("test.cdb",write), - {_, HashTree} = write_key_value_pairs(Handle,[{"key1","value1"},{"key2","value2"}]), - Hash1 = hash("key1"), - Index1 = hash_to_index(Hash1), - Hash2 = hash("key2"), - Index2 = hash_to_index(Hash2), - R0 = array:new(256, {default, gb_trees:empty()}), - R1 = array:set(Index1, gb_trees:insert(Hash1, [0], array:get(Index1, R0)), R0), - R2 = array:set(Index2, gb_trees:insert(Hash2, [30], array:get(Index2, R1)), R1), - io:format("HashTree is ~w~n", [HashTree]), - io:format("Expected HashTree is ~w~n", [R2]), - ?assertMatch(R2, HashTree), - ok = file:delete("test.cdb"). + {ok,Handle} = file:open("../test/test.cdb",write), + {_, HashTree} = write_key_value_pairs(Handle,[{"key1","value1"},{"key2","value2"}]), + Hash1 = hash("key1"), + Index1 = hash_to_index(Hash1), + Hash2 = hash("key2"), + Index2 = hash_to_index(Hash2), + R0 = array:new(256, {default, gb_trees:empty()}), + R1 = array:set(Index1, gb_trees:insert(Hash1, [0], array:get(Index1, R0)), R0), + R2 = array:set(Index2, gb_trees:insert(Hash2, [30], array:get(Index2, R1)), R1), + io:format("HashTree is ~w~n", [HashTree]), + io:format("Expected HashTree is ~w~n", [R2]), + ?assertMatch(R2, HashTree), + ok = file:delete("../test/test.cdb"). write_hash_tables_1_test() -> - {ok, Handle} = file:open("test.cdb",write), - R0 = array:new(256, {default, gb_trees:empty()}), - R1 = array:set(64, gb_trees:insert(6383014720, [18], array:get(64, R0)), R0), - R2 = array:set(67, gb_trees:insert(6383014723, [0], array:get(67, R1)), R1), - Result = write_hash_tables(Handle, R2), - io:format("write hash tables result of ~w ~n", [Result]), - ?assertMatch(Result,[{67,16,2},{64,0,2}]), - ok = file:delete("test.cdb"). + {ok, Handle} = file:open("../test/testx.cdb",write), + R0 = array:new(256, {default, gb_trees:empty()}), + R1 = array:set(64, gb_trees:insert(6383014720, [18], array:get(64, R0)), R0), + R2 = array:set(67, gb_trees:insert(6383014723, [0], array:get(67, R1)), R1), + Result = write_hash_tables(Handle, R2), + io:format("write hash tables result of ~w ~n", [Result]), + ?assertMatch(Result,[{67,16,2},{64,0,2}]), + ok = file:delete("../test/testx.cdb"). find_open_slot_1_test() -> - List = [<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>], - Slot = find_open_slot(List,0), - ?assertMatch(Slot,1). + List = [<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>], + Slot = find_open_slot(List,0), + ?assertMatch(Slot,1). find_open_slot_2_test() -> - List = [<<0:32,0:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>], - Slot = find_open_slot(List,0), - ?assertMatch(Slot,0). + List = [<<0:32,0:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>], + Slot = find_open_slot(List,0), + ?assertMatch(Slot,0). find_open_slot_3_test() -> - List = [<<1:32,1:32>>,<<1:32,1:32>>,<<1:32,1:32>>,<<0:32,0:32>>], - Slot = find_open_slot(List,2), - ?assertMatch(Slot,3). + List = [<<1:32,1:32>>,<<1:32,1:32>>,<<1:32,1:32>>,<<0:32,0:32>>], + Slot = find_open_slot(List,2), + ?assertMatch(Slot,3). find_open_slot_4_test() -> - List = [<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>,<<1:32,1:32>>], - Slot = find_open_slot(List,1), - ?assertMatch(Slot,0). + List = [<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>,<<1:32,1:32>>], + Slot = find_open_slot(List,1), + ?assertMatch(Slot,0). find_open_slot_5_test() -> - List = [<<1:32,1:32>>,<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>], - Slot = find_open_slot(List,3), - ?assertMatch(Slot,2). + List = [<<1:32,1:32>>,<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>], + Slot = find_open_slot(List,3), + ?assertMatch(Slot,2). full_1_test() -> - List1 = lists:sort([{"key1","value1"},{"key2","value2"}]), - create("simple.cdb",lists:sort([{"key1","value1"},{"key2","value2"}])), - List2 = lists:sort(dump("simple.cdb")), - ?assertMatch(List1,List2), - ok = file:delete("simple.cdb"). + List1 = lists:sort([{"key1","value1"},{"key2","value2"}]), + create("../test/simple.cdb",lists:sort([{"key1","value1"},{"key2","value2"}])), + List2 = lists:sort(dump("../test/simple.cdb")), + ?assertMatch(List1,List2), + ok = file:delete("../test/simple.cdb"). full_2_test() -> - List1 = lists:sort([{lists:flatten(io_lib:format("~s~p",[Prefix,Plug])), - lists:flatten(io_lib:format("value~p",[Plug]))} - || Plug <- lists:seq(1,2000), - Prefix <- ["dsd","so39ds","oe9%#*(","020dkslsldclsldowlslf%$#", - "tiep4||","qweq"]]), - create("full.cdb",List1), - List2 = lists:sort(dump("full.cdb")), - ?assertMatch(List1,List2), - ok = file:delete("full.cdb"). + List1 = lists:sort([{lists:flatten(io_lib:format("~s~p",[Prefix,Plug])), + lists:flatten(io_lib:format("value~p",[Plug]))} + || Plug <- lists:seq(1,2000), + Prefix <- ["dsd","so39ds","oe9%#*(","020dkslsldclsldowlslf%$#", + "tiep4||","qweq"]]), + create("../test/full.cdb",List1), + List2 = lists:sort(dump("../test/full.cdb")), + ?assertMatch(List1,List2), + ok = file:delete("../test/full.cdb"). from_dict_test() -> - D = dict:new(), - D1 = dict:store("a","b",D), - D2 = dict:store("c","d",D1), - ok = from_dict("from_dict_test.cdb",D2), - io:format("Store created ~n", []), - KVP = lists:sort(dump("from_dict_test.cdb")), - D3 = lists:sort(dict:to_list(D2)), - io:format("KVP is ~w~n", [KVP]), - io:format("D3 is ~w~n", [D3]), - ?assertMatch(KVP, D3), - ok = file:delete("from_dict_test.cdb"). + D = dict:new(), + D1 = dict:store("a","b",D), + D2 = dict:store("c","d",D1), + ok = from_dict("../test/from_dict_test.cdb",D2), + io:format("Store created ~n", []), + KVP = lists:sort(dump("../test/from_dict_test.cdb")), + D3 = lists:sort(dict:to_list(D2)), + io:format("KVP is ~w~n", [KVP]), + io:format("D3 is ~w~n", [D3]), + ?assertMatch(KVP, D3), + ok = file:delete("../test/from_dict_test.cdb"). to_dict_test() -> - D = dict:new(), - D1 = dict:store("a","b",D), - D2 = dict:store("c","d",D1), - ok = from_dict("from_dict_test.cdb",D2), - Dict = to_dict("from_dict_test.cdb"), - D3 = lists:sort(dict:to_list(D2)), - D4 = lists:sort(dict:to_list(Dict)), - ?assertMatch(D4,D3), - ok = file:delete("from_dict_test.cdb"). + D = dict:new(), + D1 = dict:store("a","b",D), + D2 = dict:store("c","d",D1), + ok = from_dict("../test/from_dict_test1.cdb",D2), + Dict = to_dict("../test/from_dict_test1.cdb"), + D3 = lists:sort(dict:to_list(D2)), + D4 = lists:sort(dict:to_list(Dict)), + ?assertMatch(D4,D3), + ok = file:delete("../test/from_dict_test1.cdb"). crccheck_emptyvalue_test() -> - ?assertMatch(false, crccheck_value(<<>>)). + ?assertMatch(false, crccheck_value(<<>>)). crccheck_shortvalue_test() -> - Value = <<128,128,32>>, - ?assertMatch(false, crccheck_value(Value)). + Value = <<128,128,32>>, + ?assertMatch(false, crccheck_value(Value)). crccheck_justshortvalue_test() -> - Value = <<128,128,32,64>>, - ?assertMatch(false, crccheck_value(Value)). + Value = <<128,128,32,64>>, + ?assertMatch(false, crccheck_value(Value)). crccheck_correctvalue_test() -> - Value = term_to_binary("some text as value"), - Hash = erlang:crc32(Value), - ValueOnDisk = <>, - ?assertMatch(true, crccheck_value(ValueOnDisk)). + Value = term_to_binary("some text as value"), + Hash = erlang:crc32(Value), + ValueOnDisk = <>, + ?assertMatch(true, crccheck_value(ValueOnDisk)). crccheck_wronghash_test() -> - Value = term_to_binary("some text as value"), - Hash = erlang:crc32(Value) + 1, - ValueOnDisk = <>, - ?assertMatch(false, crccheck_value(ValueOnDisk)). + Value = term_to_binary("some text as value"), + Hash = erlang:crc32(Value) + 1, + ValueOnDisk = <>, + ?assertMatch(false, crccheck_value(ValueOnDisk)). crccheck_truncatedvalue_test() -> - Value = term_to_binary("some text as value"), - Hash = erlang:crc32(Value), - ValueOnDisk = <>, - Size = bit_size(ValueOnDisk) - 1, - <> = ValueOnDisk, - ?assertMatch(false, crccheck_value(TruncatedValue)). + Value = term_to_binary("some text as value"), + Hash = erlang:crc32(Value), + ValueOnDisk = <>, + Size = bit_size(ValueOnDisk) - 1, + <> = ValueOnDisk, + ?assertMatch(false, crccheck_value(TruncatedValue)). activewrite_singlewrite_test() -> - Key = "0002", - Value = "some text as new value", - InitialD = dict:new(), - InitialD1 = dict:store("0001", "Initial value", InitialD), - ok = from_dict("test_mem.cdb", InitialD1), - io:format("New db file created ~n", []), - {LastPosition, KeyDict} = open_active_file("test_mem.cdb"), - io:format("File opened as new active file " - "with LastPosition=~w ~n", [LastPosition]), - {_, _, UpdKeyDict} = put("test_mem.cdb", Key, Value, {LastPosition, KeyDict}), - io:format("New key and value added to active file ~n", []), - ?assertMatch({Key, Value}, get_mem(Key, "test_mem.cdb", UpdKeyDict)), - ok = file:delete("test_mem.cdb"). + Key = "0002", + Value = "some text as new value", + InitialD = dict:new(), + InitialD1 = dict:store("0001", "Initial value", InitialD), + ok = from_dict("../test/test_mem.cdb", InitialD1), + io:format("New db file created ~n", []), + {LastPosition, KeyDict} = open_active_file("../test/test_mem.cdb"), + io:format("File opened as new active file " + "with LastPosition=~w ~n", [LastPosition]), + {_, _, UpdKeyDict} = put("../test/test_mem.cdb", Key, Value, {LastPosition, KeyDict}), + io:format("New key and value added to active file ~n", []), + ?assertMatch({Key, Value}, get_mem(Key, "../test/test_mem.cdb", UpdKeyDict)), + ok = file:delete("../test/test_mem.cdb"). search_hash_table_findinslot_test() -> - Key1 = "key1", % this is in slot 3 if count is 8 - D = dict:from_list([{Key1, "value1"}, {"K2", "V2"}, {"K3", "V3"}, - {"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"}, - {"K8", "V8"}]), - ok = from_dict("hashtable1_test.cdb",D), - {ok, Handle} = file:open("hashtable1_test.cdb", [binary, raw, read, write]), - Hash = hash(Key1), - Index = hash_to_index(Hash), - {ok, _} = file:position(Handle, {bof, ?DWORD_SIZE*Index}), - {HashTable, Count} = read_next_2_integers(Handle), - io:format("Count of ~w~n", [Count]), - {ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}), - Slot = hash_to_slot(Hash, Count), - io:format("Slot of ~w~n", [Slot]), - {ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}), - {ReadH3, ReadP3} = read_next_2_integers(Handle), - {ReadH4, ReadP4} = read_next_2_integers(Handle), - io:format("Slot 1 has Hash ~w Position ~w~n", [ReadH3, ReadP3]), - io:format("Slot 2 has Hash ~w Position ~w~n", [ReadH4, ReadP4]), - ?assertMatch(0, ReadH4), - ?assertMatch({"key1", "value1"}, get(Handle, Key1)), - {ok, _} = file:position(Handle, FirstHashPosition), - FlipH3 = endian_flip(ReadH3), - FlipP3 = endian_flip(ReadP3), - RBin = <>, - io:format("Replacement binary of ~w~n", [RBin]), - {ok, OldBin} = file:pread(Handle, - FirstHashPosition + (Slot -1) * ?DWORD_SIZE, 16), - io:format("Bin to be replaced is ~w ~n", [OldBin]), - ok = file:pwrite(Handle, FirstHashPosition + (Slot -1) * ?DWORD_SIZE, RBin), - ok = file:close(Handle), - io:format("Find key following change to hash table~n"), - ?assertMatch(missing, get("hashtable1_test.cdb", Key1)), - ok = file:delete("hashtable1_test.cdb"). + Key1 = "key1", % this is in slot 3 if count is 8 + D = dict:from_list([{Key1, "value1"}, {"K2", "V2"}, {"K3", "V3"}, + {"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"}, + {"K8", "V8"}]), + ok = from_dict("../test/hashtable1_test.cdb",D), + {ok, Handle} = file:open("../test/hashtable1_test.cdb", [binary, raw, read, write]), + Hash = hash(Key1), + Index = hash_to_index(Hash), + {ok, _} = file:position(Handle, {bof, ?DWORD_SIZE*Index}), + {HashTable, Count} = read_next_2_integers(Handle), + io:format("Count of ~w~n", [Count]), + {ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}), + Slot = hash_to_slot(Hash, Count), + io:format("Slot of ~w~n", [Slot]), + {ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}), + {ReadH3, ReadP3} = read_next_2_integers(Handle), + {ReadH4, ReadP4} = read_next_2_integers(Handle), + io:format("Slot 1 has Hash ~w Position ~w~n", [ReadH3, ReadP3]), + io:format("Slot 2 has Hash ~w Position ~w~n", [ReadH4, ReadP4]), + ?assertMatch(0, ReadH4), + ?assertMatch({"key1", "value1"}, get(Handle, Key1)), + {ok, _} = file:position(Handle, FirstHashPosition), + FlipH3 = endian_flip(ReadH3), + FlipP3 = endian_flip(ReadP3), + RBin = <>, + io:format("Replacement binary of ~w~n", [RBin]), + {ok, OldBin} = file:pread(Handle, + FirstHashPosition + (Slot -1) * ?DWORD_SIZE, 16), + io:format("Bin to be replaced is ~w ~n", [OldBin]), + ok = file:pwrite(Handle, FirstHashPosition + (Slot -1) * ?DWORD_SIZE, RBin), + ok = file:close(Handle), + io:format("Find key following change to hash table~n"), + ?assertMatch(missing, get("../test/hashtable1_test.cdb", Key1)), + ok = file:delete("../test/hashtable1_test.cdb"). getnextkey_inclemptyvalue_test() -> - L = [{"K9", "V9"}, {"K2", "V2"}, {"K3", ""}, - {"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"}, - {"K8", "V8"}, {"K1", "V1"}], - ok = create("hashtable1_test.cdb", L), - {FirstKey, Handle, P1} = get_nextkey("hashtable1_test.cdb"), - io:format("Next position details of ~w~n", [P1]), - ?assertMatch("K9", FirstKey), - {SecondKey, Handle, P2} = get_nextkey(Handle, P1), - ?assertMatch("K2", SecondKey), - {ThirdKeyNoValue, Handle, P3} = get_nextkey(Handle, P2), - ?assertMatch("K3", ThirdKeyNoValue), - {_, Handle, P4} = get_nextkey(Handle, P3), - {_, Handle, P5} = get_nextkey(Handle, P4), - {_, Handle, P6} = get_nextkey(Handle, P5), - {_, Handle, P7} = get_nextkey(Handle, P6), - {_, Handle, P8} = get_nextkey(Handle, P7), - {LastKey, Info} = get_nextkey(Handle, P8), - ?assertMatch(nomorekeys, Info), - ?assertMatch("K1", LastKey), - ok = file:delete("hashtable1_test.cdb"). + L = [{"K9", "V9"}, {"K2", "V2"}, {"K3", ""}, + {"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"}, + {"K8", "V8"}, {"K1", "V1"}], + ok = create("../test/hashtable2_test.cdb", L), + {FirstKey, Handle, P1} = get_nextkey("../test/hashtable2_test.cdb"), + io:format("Next position details of ~w~n", [P1]), + ?assertMatch("K9", FirstKey), + {SecondKey, Handle, P2} = get_nextkey(Handle, P1), + ?assertMatch("K2", SecondKey), + {ThirdKeyNoValue, Handle, P3} = get_nextkey(Handle, P2), + ?assertMatch("K3", ThirdKeyNoValue), + {_, Handle, P4} = get_nextkey(Handle, P3), + {_, Handle, P5} = get_nextkey(Handle, P4), + {_, Handle, P6} = get_nextkey(Handle, P5), + {_, Handle, P7} = get_nextkey(Handle, P6), + {_, Handle, P8} = get_nextkey(Handle, P7), + {LastKey, Info} = get_nextkey(Handle, P8), + ?assertMatch(nomorekeys, Info), + ?assertMatch("K1", LastKey), + ok = file:delete("../test/hashtable2_test.cdb"). newactivefile_test() -> - {LastPosition, _} = open_active_file("activefile_test.cdb"), - ?assertMatch(256 * ?DWORD_SIZE, LastPosition), - Response = get_nextkey("activefile_test.cdb"), - ?assertMatch(nomorekeys, Response), - ok = file:delete("activefile_test.cdb"). + {LastPosition, _} = open_active_file("../test/activefile_test.cdb"), + ?assertMatch(256 * ?DWORD_SIZE, LastPosition), + Response = get_nextkey("../test/activefile_test.cdb"), + ?assertMatch(nomorekeys, Response), + ok = file:delete("../test/activefile_test.cdb"). emptyvalue_fromdict_test() -> - D = dict:new(), - D1 = dict:store("K1", "V1", D), - D2 = dict:store("K2", "", D1), - D3 = dict:store("K3", "V3", D2), - D4 = dict:store("K4", "", D3), - ok = from_dict("from_dict_test_ev.cdb",D4), - io:format("Store created ~n", []), - KVP = lists:sort(dump("from_dict_test_ev.cdb")), - D_Result = lists:sort(dict:to_list(D4)), - io:format("KVP is ~w~n", [KVP]), - io:format("D_Result is ~w~n", [D_Result]), - ?assertMatch(KVP, D_Result), - ok = file:delete("from_dict_test_ev.cdb"). + D = dict:new(), + D1 = dict:store("K1", "V1", D), + D2 = dict:store("K2", "", D1), + D3 = dict:store("K3", "V3", D2), + D4 = dict:store("K4", "", D3), + ok = from_dict("../test/from_dict_test_ev.cdb",D4), + io:format("Store created ~n", []), + KVP = lists:sort(dump("../test/from_dict_test_ev.cdb")), + D_Result = lists:sort(dict:to_list(D4)), + io:format("KVP is ~w~n", [KVP]), + io:format("D_Result is ~w~n", [D_Result]), + ?assertMatch(KVP, D_Result), + ok = file:delete("../test/from_dict_test_ev.cdb"). fold_test() -> - K1 = {"Key1", 1}, - V1 = 2, - K2 = {"Key1", 2}, - V2 = 4, - K3 = {"Key1", 3}, - V3 = 8, - K4 = {"Key1", 4}, - V4 = 16, - K5 = {"Key1", 5}, - V5 = 32, - D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}]), - ok = from_dict("fold_test.cdb", D), - FromSN = 2, - FoldFun = fun(K, V, Acc) -> - {_Key, Seq} = K, - if Seq > FromSN -> - Acc + V; - true -> - Acc - end - end, - ?assertMatch(56, fold("fold_test.cdb", FoldFun, 0)), - ok = file:delete("fold_test.cdb"). + K1 = {"Key1", 1}, + V1 = 2, + K2 = {"Key1", 2}, + V2 = 4, + K3 = {"Key1", 3}, + V3 = 8, + K4 = {"Key1", 4}, + V4 = 16, + K5 = {"Key1", 5}, + V5 = 32, + D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}]), + ok = from_dict("../test/fold_test.cdb", D), + FromSN = 2, + FoldFun = fun(K, V, Acc) -> + {_Key, Seq} = K, + if Seq > FromSN -> + Acc + V; + true -> + Acc + end + end, + ?assertMatch(56, fold("../test/fold_test.cdb", FoldFun, 0)), + ok = file:delete("../test/fold_test.cdb"). fold_keys_test() -> - K1 = {"Key1", 1}, - V1 = 2, - K2 = {"Key2", 2}, - V2 = 4, - K3 = {"Key3", 3}, - V3 = 8, - K4 = {"Key4", 4}, - V4 = 16, - K5 = {"Key5", 5}, - V5 = 32, - D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}]), - ok = from_dict("fold_keys_test.cdb", D), - FromSN = 2, - FoldFun = fun(K, Acc) -> + K1 = {"Key1", 1}, + V1 = 2, + K2 = {"Key2", 2}, + V2 = 4, + K3 = {"Key3", 3}, + V3 = 8, + K4 = {"Key4", 4}, + V4 = 16, + K5 = {"Key5", 5}, + V5 = 32, + D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}]), + ok = from_dict("../test/fold_keys_test.cdb", D), + FromSN = 2, + FoldFun = fun(K, Acc) -> {Key, Seq} = K, if Seq > FromSN -> - lists:append(Acc, [Key]); - true -> - Acc + lists:append(Acc, [Key]); + true -> + Acc end - end, - Result = fold_keys("fold_keys_test.cdb", FoldFun, []), - ?assertMatch(["Key3", "Key4", "Key5"], lists:sort(Result)), - ok = file:delete("fold_keys_test.cdb"). + end, + Result = fold_keys("../test/fold_keys_test.cdb", FoldFun, []), + ?assertMatch(["Key3", "Key4", "Key5"], lists:sort(Result)), + ok = file:delete("../test/fold_keys_test.cdb"). fold2_test() -> - K1 = {"Key1", 1}, - V1 = 2, - K2 = {"Key1", 2}, - V2 = 4, - K3 = {"Key1", 3}, - V3 = 8, - K4 = {"Key1", 4}, - V4 = 16, - K5 = {"Key1", 5}, - V5 = 32, - K6 = {"Key2", 1}, - V6 = 64, - D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, + K1 = {"Key1", 1}, + V1 = 2, + K2 = {"Key1", 2}, + V2 = 4, + K3 = {"Key1", 3}, + V3 = 8, + K4 = {"Key1", 4}, + V4 = 16, + K5 = {"Key1", 5}, + V5 = 32, + K6 = {"Key2", 1}, + V6 = 64, + D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}, {K6, V6}]), - ok = from_dict("fold2_test.cdb", D), - FoldFun = fun(K, V, Acc) -> - {Key, Seq} = K, - case dict:find(Key, Acc) of - error -> - dict:store(Key, {Seq, V}, Acc); - {ok, {LSN, _V}} when Seq > LSN -> - dict:store(Key, {Seq, V}, Acc); - _ -> - Acc - end - end, - RD = dict:new(), - RD1 = dict:store("Key1", {5, 32}, RD), - RD2 = dict:store("Key2", {1, 64}, RD1), - Result = fold("fold2_test.cdb", FoldFun, dict:new()), - ?assertMatch(RD2, Result), - ok = file:delete("fold2_test.cdb"). + ok = from_dict("../test/fold2_test.cdb", D), + FoldFun = fun(K, V, Acc) -> + {Key, Seq} = K, + case dict:find(Key, Acc) of + error -> + dict:store(Key, {Seq, V}, Acc); + {ok, {LSN, _V}} when Seq > LSN -> + dict:store(Key, {Seq, V}, Acc); + _ -> + Acc + end + end, + RD = dict:new(), + RD1 = dict:store("Key1", {5, 32}, RD), + RD2 = dict:store("Key2", {1, 64}, RD1), + Result = fold("../test/fold2_test.cdb", FoldFun, dict:new()), + ?assertMatch(RD2, Result), + ok = file:delete("../test/fold2_test.cdb"). -endif. diff --git a/src/leveled_concierge.erl b/src/leveled_concierge.erl index 4147a3a..8abb892 100644 --- a/src/leveled_concierge.erl +++ b/src/leveled_concierge.erl @@ -1,3 +1,71 @@ +%% -------- Overview --------- +%% +%% The eleveleddb is based on the LSM-tree similar to leveldb, except that: +%% - Values are kept seperately to Keys & Metadata +%% - Different file formats are used for value store (based on constant +%% database), and key store (based on sst) +%% - It is not intended to be general purpose, but be specifically suited for +%% use as a Riak backend in specific circumstances (relatively large values, +%% and frequent use of iterators) +%% - The Value store is an extended nursery log in leveldb terms. It is keyed +%% on the sequence number of the write +%% - The Key Store is a LSM tree, where the key is the actaul object key, and +%% the value is the metadata of the object including the sequence number +%% +%% -------- Concierge & Manifest --------- +%% +%% The concierge is responsible for opening up the store, and keeps a manifest +%% of where items can be found. The manifest keeps a mapping of: +%% - Sequence Number ranges and the PID of the Value Store file that contains +%% that range +%% - Key ranges to PID mappings for each leval of the KeyStore +%% +%% -------- GET -------- +%% +%% A GET request for Key and Metadata requires a lookup in the KeyStore only. +%% - The concierge should consult the manifest for the lowest level to find +%% the PID which may contain the Key +%% - The concierge should ask the file owner if the Key is present, if not +%% present lower levels should be consulted until the objetc is found +%% +%% If a value is required, when the Key/Metadata has been fetched from the +%% KeyStore, the sequence number should be tkane, and matched in the ValueStore +%% manifest to find the right value. +%% +%% For recent PUTs the Key/Metadata is added into memory, and there is an +%% in-memory hash table for the entries in the most recent ValueStore CDB. +%% +%% -------- PUT -------- +%% +%% A PUT request must be persisted to the open (and append only) CDB file which +%% acts as a transaction log to persist the change. The Key & Metadata needs +%% also to be placed in memory. +%% +%% Once the CDB file is full, the managing process should be requested to +%% complete the lookup hash, and a new CDB file be started. +%% +%% Once the in-memory +%% +%% -------- Snapshots (Key Only) -------- +%% +%% If there is a iterator/snapshot request, the concierge will simply handoff a +%% copy of the manifest, and register the interest of the iterator at the +%% manifest sequence number at the time of the request. Iterators should +%% de-register themselves from the manager on completion. Iterators should be +%% automatically release after a timeout period. A file can be deleted if +%% there are no registered iterators from before the point the file was +%% removed from the manifest. +%% +%% -------- Snapshots (Key & Value) -------- +%% +%% +%% +%% -------- Special Ops -------- +%% +%% e.g. Get all for SegmentID/Partition +%% +%% -------- KeyStore --------- +%% %% The concierge is responsible for controlling access to the store and %% maintaining both an in-memory view and a persisted state of all the sft %% files in use across the store. @@ -34,14 +102,7 @@ %% will call the manifets manager on a timeout to confirm that they are no %% longer in use (by any iterators). %% -%% If there is a iterator/snapshot request, the concierge will simply handoff a -%% copy of the manifest, and register the interest of the iterator at the -%% manifest sequence number at the time of the request. Iterators should -%% de-register themselves from the manager on completion. Iterators should be -%% automatically release after a timeout period. A file can be deleted if -%% there are no registered iterators from before the point the file was -%% removed from the manifest. -%% +