2016-11-07 17:08:50 +00:00
|
|
|
%% -------- CDB File Clerk ---------
|
2015-05-25 22:45:45 +01:00
|
|
|
%%
|
|
|
|
%% This is a modified version of the cdb module provided by Tom Whitcomb.
|
|
|
|
%%
|
|
|
|
%% - https://github.com/thomaswhitcomb/erlang-cdb
|
|
|
|
%%
|
2016-11-07 17:08:50 +00:00
|
|
|
%% The CDB module is an implementation of the constant database format
|
|
|
|
%% described by DJ Bernstein
|
|
|
|
%%
|
|
|
|
%% - https://cr.yp.to/cdb.html
|
|
|
|
%%
|
2015-05-25 22:45:45 +01:00
|
|
|
%% The primary differences are:
|
|
|
|
%% - Support for incrementally writing a CDB file while keeping the hash table
|
|
|
|
%% in memory
|
2016-09-20 18:24:05 +01:00
|
|
|
%% - The ability to scan a database in blocks of sequence numbers
|
2016-11-07 17:08:50 +00:00
|
|
|
%% - The applictaion of a CRC chekc by default to all values
|
2015-05-25 22:45:45 +01:00
|
|
|
%%
|
|
|
|
%% This module provides functions to create and query a CDB (constant database).
|
|
|
|
%% A CDB implements a two-level hashtable which provides fast {key,value}
|
|
|
|
%% lookups that remain fairly constant in speed regardless of the CDBs size.
|
|
|
|
%%
|
|
|
|
%% The first level in the CDB occupies the first 255 doublewords in the file.
|
|
|
|
%% Each doubleword slot contains two values. The first is a file pointer to
|
|
|
|
%% the primary hashtable (at the end of the file) and the second value is the
|
|
|
|
%% number of entries in the hashtable. The first level table of 255 entries
|
|
|
|
%% is indexed with the lower eight bits of the hash of the input key.
|
|
|
|
%%
|
|
|
|
%% Following the 255 doublewords are the {key,value} tuples. The tuples are
|
|
|
|
%% packed in the file without regard to word boundaries. Each {key,value}
|
|
|
|
%% tuple is represented with a four byte key length, a four byte value length,
|
|
|
|
%% the actual key value followed by the actual value.
|
|
|
|
%%
|
|
|
|
%% Following the {key,value} tuples are the primary hash tables. There are
|
|
|
|
%% at most 255 hash tables. Each hash table is referenced by one of the 255
|
|
|
|
%% doubleword entries at the top of the file. For efficiency reasons, each
|
|
|
|
%% hash table is allocated twice the number of entries that it will need.
|
|
|
|
%% Each entry in the hash table is a doubleword.
|
|
|
|
%% The first word is the corresponding hash value and the second word is a
|
|
|
|
%% file pointer to the actual {key,value} tuple higher in the file.
|
|
|
|
%%
|
2016-10-26 20:39:16 +01:00
|
|
|
%%
|
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
-module(leveled_cdb).
|
|
|
|
|
2016-10-26 20:39:16 +01:00
|
|
|
-behaviour(gen_fsm).
|
2016-10-18 01:59:03 +01:00
|
|
|
-include("include/leveled.hrl").
|
2016-07-29 17:19:30 +01:00
|
|
|
|
|
|
|
-export([init/1,
|
2016-10-26 20:39:16 +01:00
|
|
|
handle_sync_event/4,
|
|
|
|
handle_event/3,
|
|
|
|
handle_info/3,
|
|
|
|
terminate/3,
|
|
|
|
code_change/4,
|
|
|
|
starting/3,
|
|
|
|
writer/3,
|
|
|
|
writer/2,
|
2016-10-29 00:52:49 +01:00
|
|
|
rolling/2,
|
2016-10-26 20:39:16 +01:00
|
|
|
rolling/3,
|
|
|
|
reader/3,
|
|
|
|
reader/2,
|
|
|
|
delete_pending/3,
|
|
|
|
delete_pending/2]).
|
|
|
|
|
|
|
|
-export([cdb_open_writer/1,
|
|
|
|
cdb_open_writer/2,
|
|
|
|
cdb_open_reader/1,
|
|
|
|
cdb_get/2,
|
|
|
|
cdb_put/3,
|
|
|
|
cdb_mput/2,
|
|
|
|
cdb_getpositions/2,
|
|
|
|
cdb_directfetch/3,
|
|
|
|
cdb_lastkey/1,
|
|
|
|
cdb_firstkey/1,
|
|
|
|
cdb_filename/1,
|
|
|
|
cdb_keycheck/2,
|
|
|
|
cdb_scan/4,
|
|
|
|
cdb_close/1,
|
|
|
|
cdb_complete/1,
|
|
|
|
cdb_roll/1,
|
|
|
|
cdb_returnhashtable/3,
|
2016-11-08 22:43:22 +00:00
|
|
|
cdb_checkhashtable/1,
|
2016-10-26 20:39:16 +01:00
|
|
|
cdb_destroy/1,
|
|
|
|
cdb_deletepending/1,
|
|
|
|
cdb_deletepending/3,
|
|
|
|
hashtable_calc/2]).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
|
|
-define(DWORD_SIZE, 8).
|
|
|
|
-define(WORD_SIZE, 4).
|
2015-06-04 21:15:31 +01:00
|
|
|
-define(MAX_FILE_SIZE, 3221225472).
|
2016-10-08 22:15:48 +01:00
|
|
|
-define(BINARY_MODE, false).
|
2015-06-04 21:15:31 +01:00
|
|
|
-define(BASE_POSITION, 2048).
|
2016-09-06 17:17:31 +01:00
|
|
|
-define(WRITE_OPS, [binary, raw, read, write]).
|
2016-10-14 13:36:12 +01:00
|
|
|
-define(PENDING_ROLL_WAIT, 30).
|
2016-10-26 20:39:16 +01:00
|
|
|
-define(DELETE_TIMEOUT, 10000).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-07-29 17:19:30 +01:00
|
|
|
-record(state, {hashtree,
|
|
|
|
last_position :: integer(),
|
2016-09-05 15:01:23 +01:00
|
|
|
last_key = empty,
|
|
|
|
hash_index = [] :: list(),
|
2016-07-29 17:19:30 +01:00
|
|
|
filename :: string(),
|
|
|
|
handle :: file:fd(),
|
2016-10-03 23:34:28 +01:00
|
|
|
max_size :: integer(),
|
2016-10-26 20:39:16 +01:00
|
|
|
binary_mode = false :: boolean(),
|
|
|
|
delete_point = 0 :: integer(),
|
2016-10-29 00:52:49 +01:00
|
|
|
inker :: pid(),
|
2016-11-14 11:17:14 +00:00
|
|
|
deferred_delete = false :: boolean(),
|
2016-11-25 17:41:08 +00:00
|
|
|
waste_path :: string(),
|
2016-12-20 23:11:50 +00:00
|
|
|
sync_strategy = none,
|
|
|
|
put_timing = {0, {0, 0}, {0, 0}} :: tuple()}).
|
2016-07-29 17:19:30 +01:00
|
|
|
|
|
|
|
|
|
|
|
%%%============================================================================
|
|
|
|
%%% API
|
|
|
|
%%%============================================================================
|
|
|
|
|
|
|
|
cdb_open_writer(Filename) ->
|
2016-09-07 17:58:12 +01:00
|
|
|
%% No options passed
|
2016-11-04 12:22:15 +00:00
|
|
|
cdb_open_writer(Filename, #cdb_options{binary_mode=true}).
|
2016-09-07 17:58:12 +01:00
|
|
|
|
|
|
|
cdb_open_writer(Filename, Opts) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
{ok, Pid} = gen_fsm:start(?MODULE, [Opts], []),
|
2016-10-29 00:52:49 +01:00
|
|
|
ok = gen_fsm:sync_send_event(Pid, {open_writer, Filename}, infinity),
|
|
|
|
{ok, Pid}.
|
2016-07-29 17:19:30 +01:00
|
|
|
|
|
|
|
cdb_open_reader(Filename) ->
|
2016-11-04 12:22:15 +00:00
|
|
|
cdb_open_reader(Filename, #cdb_options{binary_mode=true}).
|
|
|
|
|
|
|
|
cdb_open_reader(Filename, Opts) ->
|
|
|
|
{ok, Pid} = gen_fsm:start(?MODULE, [Opts], []),
|
2016-10-29 00:52:49 +01:00
|
|
|
ok = gen_fsm:sync_send_event(Pid, {open_reader, Filename}, infinity),
|
|
|
|
{ok, Pid}.
|
2016-07-29 17:19:30 +01:00
|
|
|
|
2016-07-29 17:48:11 +01:00
|
|
|
cdb_get(Pid, Key) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_event(Pid, {get_kv, Key}, infinity).
|
2016-07-29 17:48:11 +01:00
|
|
|
|
|
|
|
cdb_put(Pid, Key, Value) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_event(Pid, {put_kv, Key, Value}, infinity).
|
2016-10-25 01:57:12 +01:00
|
|
|
|
2016-10-26 11:39:27 +01:00
|
|
|
cdb_mput(Pid, KVList) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_event(Pid, {mput_kv, KVList}, infinity).
|
2016-09-20 16:13:36 +01:00
|
|
|
|
2016-09-20 18:24:05 +01:00
|
|
|
%% SampleSize can be an integer or the atom all
|
|
|
|
cdb_getpositions(Pid, SampleSize) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_event(Pid, {get_positions, SampleSize}, infinity).
|
2016-09-20 16:13:36 +01:00
|
|
|
|
2016-09-20 18:24:05 +01:00
|
|
|
%% Info can be key_only, key_size (size being the size of the value) or
|
|
|
|
%% key_value_check (with the check part indicating if the CRC is correct for
|
|
|
|
%% the value)
|
|
|
|
cdb_directfetch(Pid, PositionList, Info) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_event(Pid, {direct_fetch, PositionList, Info}, infinity).
|
2016-08-02 13:44:48 +01:00
|
|
|
|
|
|
|
cdb_close(Pid) ->
|
2016-11-08 22:43:22 +00:00
|
|
|
gen_fsm:sync_send_all_state_event(Pid, cdb_close, infinity).
|
2016-07-29 17:19:30 +01:00
|
|
|
|
2016-09-05 15:01:23 +01:00
|
|
|
cdb_complete(Pid) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_event(Pid, cdb_complete, infinity).
|
2016-09-05 15:01:23 +01:00
|
|
|
|
2016-10-07 10:04:48 +01:00
|
|
|
cdb_roll(Pid) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:send_event(Pid, cdb_roll).
|
2016-10-07 10:04:48 +01:00
|
|
|
|
2016-10-14 13:36:12 +01:00
|
|
|
cdb_returnhashtable(Pid, IndexList, HashTreeBin) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_event(Pid, {return_hashtable, IndexList, HashTreeBin}, infinity).
|
2016-10-14 13:36:12 +01:00
|
|
|
|
2016-11-08 22:43:22 +00:00
|
|
|
cdb_checkhashtable(Pid) ->
|
|
|
|
gen_fsm:sync_send_event(Pid, check_hashtable).
|
|
|
|
|
2016-09-27 14:58:26 +01:00
|
|
|
cdb_destroy(Pid) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:send_event(Pid, destroy).
|
2016-09-27 14:58:26 +01:00
|
|
|
|
2016-10-03 23:34:28 +01:00
|
|
|
cdb_deletepending(Pid) ->
|
2016-11-08 00:44:51 +00:00
|
|
|
% Only used in unit tests
|
2016-10-26 20:39:16 +01:00
|
|
|
cdb_deletepending(Pid, 0, no_poll).
|
|
|
|
|
|
|
|
cdb_deletepending(Pid, ManSQN, Inker) ->
|
|
|
|
gen_fsm:send_event(Pid, {delete_pending, ManSQN, Inker}).
|
2016-10-03 23:34:28 +01:00
|
|
|
|
2016-09-15 10:53:24 +01:00
|
|
|
%% cdb_scan returns {LastPosition, Acc}. Use LastPosition as StartPosiiton to
|
|
|
|
%% continue from that point (calling function has to protect against) double
|
|
|
|
%% counting.
|
|
|
|
%%
|
|
|
|
%% LastPosition could be the atom complete when the last key processed was at
|
|
|
|
%% the end of the file. last_key must be defined in LoopState.
|
|
|
|
|
|
|
|
cdb_scan(Pid, FilterFun, InitAcc, StartPosition) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_all_state_event(Pid,
|
|
|
|
{cdb_scan,
|
|
|
|
FilterFun,
|
|
|
|
InitAcc,
|
|
|
|
StartPosition},
|
|
|
|
infinity).
|
2016-09-09 15:58:19 +01:00
|
|
|
|
2016-09-05 15:01:23 +01:00
|
|
|
%% Get the last key to be added to the file (which will have the highest
|
|
|
|
%% sequence number)
|
|
|
|
cdb_lastkey(Pid) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_all_state_event(Pid, cdb_lastkey, infinity).
|
2016-09-05 15:01:23 +01:00
|
|
|
|
2016-09-27 14:58:26 +01:00
|
|
|
cdb_firstkey(Pid) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_all_state_event(Pid, cdb_firstkey, infinity).
|
2016-09-27 14:58:26 +01:00
|
|
|
|
2016-09-05 15:01:23 +01:00
|
|
|
%% Get the filename of the database
|
|
|
|
cdb_filename(Pid) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_all_state_event(Pid, cdb_filename, infinity).
|
2016-09-05 15:01:23 +01:00
|
|
|
|
|
|
|
%% Check to see if the key is probably present, will return either
|
|
|
|
%% probably or missing. Does not do a definitive check
|
|
|
|
cdb_keycheck(Pid, Key) ->
|
2016-10-26 20:39:16 +01:00
|
|
|
gen_fsm:sync_send_event(Pid, {key_check, Key}, infinity).
|
2016-07-29 17:19:30 +01:00
|
|
|
|
|
|
|
%%%============================================================================
|
|
|
|
%%% gen_server callbacks
|
|
|
|
%%%============================================================================
|
|
|
|
|
2016-09-07 17:58:12 +01:00
|
|
|
init([Opts]) ->
|
|
|
|
MaxSize = case Opts#cdb_options.max_size of
|
|
|
|
undefined ->
|
|
|
|
?MAX_FILE_SIZE;
|
|
|
|
M ->
|
|
|
|
M
|
|
|
|
end,
|
2016-10-26 20:39:16 +01:00
|
|
|
{ok,
|
|
|
|
starting,
|
2016-11-14 11:17:14 +00:00
|
|
|
#state{max_size=MaxSize,
|
|
|
|
binary_mode=Opts#cdb_options.binary_mode,
|
2016-11-25 17:41:08 +00:00
|
|
|
waste_path=Opts#cdb_options.waste_path,
|
|
|
|
sync_strategy=Opts#cdb_options.sync_strategy}}.
|
2016-07-29 17:19:30 +01:00
|
|
|
|
2016-10-26 20:39:16 +01:00
|
|
|
starting({open_writer, Filename}, _From, State) ->
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB01", [Filename]),
|
2016-09-05 15:01:23 +01:00
|
|
|
{LastPosition, HashTree, LastKey} = open_active_file(Filename),
|
2016-11-25 17:41:08 +00:00
|
|
|
WriteOps = set_writeops(State#state.sync_strategy),
|
2016-11-26 22:59:33 +00:00
|
|
|
leveled_log:log("CDB13", [WriteOps]),
|
2016-11-25 17:41:08 +00:00
|
|
|
{ok, Handle} = file:open(Filename, WriteOps),
|
2016-10-26 20:39:16 +01:00
|
|
|
{reply, ok, writer, State#state{handle=Handle,
|
|
|
|
last_position=LastPosition,
|
|
|
|
last_key=LastKey,
|
|
|
|
filename=Filename,
|
|
|
|
hashtree=HashTree}};
|
|
|
|
starting({open_reader, Filename}, _From, State) ->
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB02", [Filename]),
|
2016-10-07 10:04:48 +01:00
|
|
|
{Handle, Index, LastKey} = open_for_readonly(Filename),
|
2016-10-26 20:39:16 +01:00
|
|
|
{reply, ok, reader, State#state{handle=Handle,
|
|
|
|
last_key=LastKey,
|
|
|
|
filename=Filename,
|
|
|
|
hash_index=Index}}.
|
|
|
|
|
|
|
|
writer({get_kv, Key}, _From, State) ->
|
|
|
|
{reply,
|
|
|
|
get_mem(Key, State#state.handle, State#state.hashtree),
|
|
|
|
writer,
|
|
|
|
State};
|
|
|
|
writer({key_check, Key}, _From, State) ->
|
|
|
|
{reply,
|
|
|
|
get_mem(Key, State#state.handle, State#state.hashtree, loose_presence),
|
|
|
|
writer,
|
|
|
|
State};
|
|
|
|
writer({put_kv, Key, Value}, _From, State) ->
|
2016-12-20 23:11:50 +00:00
|
|
|
SW = os:timestamp(),
|
2016-10-26 20:39:16 +01:00
|
|
|
Result = put(State#state.handle,
|
2016-09-05 15:01:23 +01:00
|
|
|
Key,
|
2016-10-26 20:39:16 +01:00
|
|
|
Value,
|
|
|
|
{State#state.last_position, State#state.hashtree},
|
|
|
|
State#state.binary_mode,
|
|
|
|
State#state.max_size),
|
2016-12-20 23:11:50 +00:00
|
|
|
T0 = timer:now_diff(os:timestamp(), SW),
|
2016-10-26 20:39:16 +01:00
|
|
|
case Result of
|
|
|
|
roll ->
|
|
|
|
%% Key and value could not be written
|
|
|
|
{reply, roll, writer, State};
|
|
|
|
{UpdHandle, NewPosition, HashTree} ->
|
2016-11-27 00:16:49 +00:00
|
|
|
ok =
|
|
|
|
case State#state.sync_strategy of
|
|
|
|
riak_sync ->
|
|
|
|
file:datasync(UpdHandle);
|
|
|
|
_ ->
|
|
|
|
ok
|
|
|
|
end,
|
2016-12-20 23:11:50 +00:00
|
|
|
T1 = timer:now_diff(os:timestamp(), SW) - T0,
|
2016-12-22 14:03:31 +00:00
|
|
|
Timings = leveled_log:put_timings(journal,
|
|
|
|
State#state.put_timing,
|
|
|
|
T0, T1),
|
2016-10-26 20:39:16 +01:00
|
|
|
{reply, ok, writer, State#state{handle=UpdHandle,
|
2016-08-02 13:44:48 +01:00
|
|
|
last_position=NewPosition,
|
2016-09-05 15:01:23 +01:00
|
|
|
last_key=Key,
|
2016-12-20 23:11:50 +00:00
|
|
|
hashtree=HashTree,
|
|
|
|
put_timing=Timings}}
|
2016-10-26 11:39:27 +01:00
|
|
|
end;
|
2016-10-29 00:52:49 +01:00
|
|
|
writer({mput_kv, []}, _From, State) ->
|
|
|
|
{reply, ok, writer, State};
|
2016-10-26 20:39:16 +01:00
|
|
|
writer({mput_kv, KVList}, _From, State) ->
|
|
|
|
Result = mput(State#state.handle,
|
|
|
|
KVList,
|
|
|
|
{State#state.last_position, State#state.hashtree},
|
|
|
|
State#state.binary_mode,
|
|
|
|
State#state.max_size),
|
|
|
|
case Result of
|
|
|
|
roll ->
|
|
|
|
%% Keys and values could not be written
|
|
|
|
{reply, roll, writer, State};
|
|
|
|
{UpdHandle, NewPosition, HashTree, LastKey} ->
|
|
|
|
{reply, ok, writer, State#state{handle=UpdHandle,
|
2016-10-25 01:57:12 +01:00
|
|
|
last_position=NewPosition,
|
2016-10-26 11:39:27 +01:00
|
|
|
last_key=LastKey,
|
|
|
|
hashtree=HashTree}}
|
2016-08-02 13:44:48 +01:00
|
|
|
end;
|
2016-10-26 20:39:16 +01:00
|
|
|
writer(cdb_complete, _From, State) ->
|
|
|
|
NewName = determine_new_filename(State#state.filename),
|
|
|
|
ok = close_file(State#state.handle,
|
|
|
|
State#state.hashtree,
|
|
|
|
State#state.last_position),
|
|
|
|
ok = rename_for_read(State#state.filename, NewName),
|
|
|
|
{stop, normal, {ok, NewName}, State}.
|
|
|
|
|
|
|
|
writer(cdb_roll, State) ->
|
|
|
|
ok = leveled_iclerk:clerk_hashtablecalc(State#state.hashtree,
|
|
|
|
State#state.last_position,
|
|
|
|
self()),
|
|
|
|
{next_state, rolling, State}.
|
|
|
|
|
|
|
|
|
|
|
|
rolling({get_kv, Key}, _From, State) ->
|
|
|
|
{reply,
|
|
|
|
get_mem(Key, State#state.handle, State#state.hashtree),
|
|
|
|
rolling,
|
|
|
|
State};
|
|
|
|
rolling({key_check, Key}, _From, State) ->
|
|
|
|
{reply,
|
|
|
|
get_mem(Key, State#state.handle, State#state.hashtree, loose_presence),
|
|
|
|
rolling,
|
|
|
|
State};
|
2016-10-31 14:01:09 +00:00
|
|
|
rolling({get_positions, _SampleSize}, _From, State) ->
|
|
|
|
{reply, [], rolling, State};
|
2016-10-26 20:39:16 +01:00
|
|
|
rolling({return_hashtable, IndexList, HashTreeBin}, _From, State) ->
|
|
|
|
Handle = State#state.handle,
|
|
|
|
{ok, BasePos} = file:position(Handle, State#state.last_position),
|
|
|
|
NewName = determine_new_filename(State#state.filename),
|
|
|
|
ok = perform_write_hash_tables(Handle, HashTreeBin, BasePos),
|
|
|
|
ok = write_top_index_table(Handle, BasePos, IndexList),
|
|
|
|
file:close(Handle),
|
|
|
|
ok = rename_for_read(State#state.filename, NewName),
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB03", [NewName]),
|
2016-10-26 20:39:16 +01:00
|
|
|
{NewHandle, Index, LastKey} = open_for_readonly(NewName),
|
2016-10-29 00:52:49 +01:00
|
|
|
case State#state.deferred_delete of
|
|
|
|
true ->
|
|
|
|
{reply, ok, delete_pending, State#state{handle=NewHandle,
|
|
|
|
last_key=LastKey,
|
|
|
|
filename=NewName,
|
|
|
|
hash_index=Index}};
|
|
|
|
false ->
|
|
|
|
{reply, ok, reader, State#state{handle=NewHandle,
|
|
|
|
last_key=LastKey,
|
|
|
|
filename=NewName,
|
|
|
|
hash_index=Index}}
|
|
|
|
end;
|
2016-11-08 22:43:22 +00:00
|
|
|
rolling(check_hashtable, _From, State) ->
|
|
|
|
{reply, false, rolling, State}.
|
2016-10-29 00:52:49 +01:00
|
|
|
|
|
|
|
rolling({delete_pending, ManSQN, Inker}, State) ->
|
|
|
|
{next_state,
|
|
|
|
rolling,
|
|
|
|
State#state{delete_point=ManSQN, inker=Inker, deferred_delete=true}}.
|
|
|
|
|
2016-10-26 20:39:16 +01:00
|
|
|
reader({get_kv, Key}, _From, State) ->
|
|
|
|
{reply,
|
|
|
|
get_withcache(State#state.handle, Key, State#state.hash_index),
|
|
|
|
reader,
|
|
|
|
State};
|
|
|
|
reader({key_check, Key}, _From, State) ->
|
|
|
|
{reply,
|
2016-10-29 00:52:49 +01:00
|
|
|
get_withcache(State#state.handle,
|
|
|
|
Key,
|
|
|
|
State#state.hash_index,
|
|
|
|
loose_presence),
|
2016-10-26 20:39:16 +01:00
|
|
|
reader,
|
|
|
|
State};
|
|
|
|
reader({get_positions, SampleSize}, _From, State) ->
|
2016-09-20 18:24:05 +01:00
|
|
|
case SampleSize of
|
|
|
|
all ->
|
2016-10-26 20:39:16 +01:00
|
|
|
{reply,
|
|
|
|
scan_index(State#state.handle,
|
|
|
|
State#state.hash_index,
|
|
|
|
{fun scan_index_returnpositions/4, []}),
|
|
|
|
reader,
|
|
|
|
State};
|
2016-09-20 18:24:05 +01:00
|
|
|
_ ->
|
|
|
|
SeededL = lists:map(fun(X) -> {random:uniform(), X} end,
|
|
|
|
State#state.hash_index),
|
|
|
|
SortedL = lists:keysort(1, SeededL),
|
|
|
|
RandomisedHashIndex = lists:map(fun({_R, X}) -> X end, SortedL),
|
|
|
|
{reply,
|
|
|
|
scan_index_forsample(State#state.handle,
|
|
|
|
RandomisedHashIndex,
|
|
|
|
fun scan_index_returnpositions/4,
|
|
|
|
[],
|
|
|
|
SampleSize),
|
2016-10-26 20:39:16 +01:00
|
|
|
reader,
|
2016-09-20 18:24:05 +01:00
|
|
|
State}
|
|
|
|
end;
|
2016-10-26 20:39:16 +01:00
|
|
|
reader({direct_fetch, PositionList, Info}, _From, State) ->
|
2016-09-20 18:24:05 +01:00
|
|
|
H = State#state.handle,
|
2016-11-29 00:27:23 +00:00
|
|
|
FilterFalseKey = fun(Tpl) -> case element(1, Tpl) of
|
|
|
|
false ->
|
|
|
|
false;
|
|
|
|
_Key ->
|
|
|
|
{true, Tpl}
|
|
|
|
end end,
|
|
|
|
Reply =
|
|
|
|
case Info of
|
|
|
|
key_only ->
|
|
|
|
FM = lists:filtermap(
|
|
|
|
fun(P) ->
|
|
|
|
FilterFalseKey(extract_key(H, P)) end,
|
|
|
|
PositionList),
|
|
|
|
lists:map(fun(T) -> element(1, T) end, FM);
|
|
|
|
key_size ->
|
|
|
|
lists:filtermap(
|
|
|
|
fun(P) ->
|
|
|
|
FilterFalseKey(extract_key_size(H, P)) end,
|
|
|
|
PositionList);
|
|
|
|
key_value_check ->
|
|
|
|
lists:filtermap(
|
|
|
|
fun(P) ->
|
|
|
|
FilterFalseKey(extract_key_value_check(H, P)) end,
|
|
|
|
PositionList)
|
|
|
|
end,
|
|
|
|
{reply, Reply, reader, State};
|
2016-10-26 20:39:16 +01:00
|
|
|
reader(cdb_complete, _From, State) ->
|
|
|
|
ok = file:close(State#state.handle),
|
2016-11-08 22:43:22 +00:00
|
|
|
{stop, normal, {ok, State#state.filename}, State#state{handle=undefined}};
|
|
|
|
reader(check_hashtable, _From, State) ->
|
|
|
|
{reply, true, reader, State}.
|
2016-10-26 20:39:16 +01:00
|
|
|
|
|
|
|
|
|
|
|
reader({delete_pending, 0, no_poll}, State) ->
|
|
|
|
{next_state,
|
|
|
|
delete_pending,
|
|
|
|
State#state{delete_point=0}};
|
|
|
|
reader({delete_pending, ManSQN, Inker}, State) ->
|
|
|
|
{next_state,
|
|
|
|
delete_pending,
|
|
|
|
State#state{delete_point=ManSQN, inker=Inker},
|
|
|
|
?DELETE_TIMEOUT}.
|
|
|
|
|
|
|
|
|
|
|
|
delete_pending({get_kv, Key}, _From, State) ->
|
|
|
|
{reply,
|
|
|
|
get_withcache(State#state.handle, Key, State#state.hash_index),
|
|
|
|
delete_pending,
|
|
|
|
State,
|
|
|
|
?DELETE_TIMEOUT};
|
|
|
|
delete_pending({key_check, Key}, _From, State) ->
|
|
|
|
{reply,
|
2016-10-29 00:52:49 +01:00
|
|
|
get_withcache(State#state.handle,
|
|
|
|
Key,
|
|
|
|
State#state.hash_index,
|
|
|
|
loose_presence),
|
2016-10-26 20:39:16 +01:00
|
|
|
delete_pending,
|
|
|
|
State,
|
|
|
|
?DELETE_TIMEOUT}.
|
|
|
|
|
2016-11-08 00:46:01 +00:00
|
|
|
delete_pending(timeout, State=#state{delete_point=ManSQN}) when ManSQN > 0 ->
|
2016-11-08 00:44:51 +00:00
|
|
|
case is_process_alive(State#state.inker) of
|
|
|
|
true ->
|
|
|
|
case leveled_inker:ink_confirmdelete(State#state.inker, ManSQN) of
|
2016-10-26 20:39:16 +01:00
|
|
|
true ->
|
2016-11-08 00:44:51 +00:00
|
|
|
leveled_log:log("CDB04", [State#state.filename, ManSQN]),
|
|
|
|
{stop, normal, State};
|
2016-10-26 20:39:16 +01:00
|
|
|
false ->
|
2016-11-08 00:44:51 +00:00
|
|
|
{next_state,
|
|
|
|
delete_pending,
|
|
|
|
State,
|
|
|
|
?DELETE_TIMEOUT}
|
|
|
|
end;
|
|
|
|
false ->
|
|
|
|
{stop, normal, State}
|
2016-10-26 20:39:16 +01:00
|
|
|
end;
|
|
|
|
delete_pending(destroy, State) ->
|
|
|
|
{stop, normal, State}.
|
|
|
|
|
|
|
|
|
|
|
|
handle_sync_event({cdb_scan, FilterFun, Acc, StartPos},
|
|
|
|
_From,
|
|
|
|
StateName,
|
|
|
|
State) ->
|
2016-12-16 23:18:55 +00:00
|
|
|
{ok, EndPos0} = file:position(State#state.handle, eof),
|
2016-09-15 10:53:24 +01:00
|
|
|
{ok, StartPos0} = case StartPos of
|
|
|
|
undefined ->
|
|
|
|
file:position(State#state.handle,
|
|
|
|
?BASE_POSITION);
|
|
|
|
StartPos ->
|
|
|
|
{ok, StartPos}
|
|
|
|
end,
|
2016-12-16 23:18:55 +00:00
|
|
|
file:position(State#state.handle, StartPos0),
|
|
|
|
MaybeEnd = (check_last_key(State#state.last_key) == empty) or
|
|
|
|
(StartPos0 >= (EndPos0 - ?DWORD_SIZE)),
|
|
|
|
case MaybeEnd of
|
|
|
|
true ->
|
|
|
|
{reply, {eof, Acc}, StateName, State};
|
|
|
|
false ->
|
2016-09-15 18:38:23 +01:00
|
|
|
{LastPosition, Acc2} = scan_over_file(State#state.handle,
|
|
|
|
StartPos0,
|
|
|
|
FilterFun,
|
|
|
|
Acc,
|
|
|
|
State#state.last_key),
|
2016-12-16 23:18:55 +00:00
|
|
|
{reply, {LastPosition, Acc2}, StateName, State}
|
2016-09-15 18:38:23 +01:00
|
|
|
end;
|
2016-10-26 20:39:16 +01:00
|
|
|
handle_sync_event(cdb_lastkey, _From, StateName, State) ->
|
|
|
|
{reply, State#state.last_key, StateName, State};
|
|
|
|
handle_sync_event(cdb_firstkey, _From, StateName, State) ->
|
|
|
|
{ok, EOFPos} = file:position(State#state.handle, eof),
|
2016-12-16 23:18:55 +00:00
|
|
|
FilterFun = fun(Key, _V, _P, _O, _Fun) -> {stop, Key} end,
|
|
|
|
FirstKey =
|
|
|
|
case EOFPos of
|
|
|
|
?BASE_POSITION ->
|
|
|
|
empty;
|
|
|
|
_ ->
|
|
|
|
file:position(State#state.handle, ?BASE_POSITION),
|
|
|
|
{_Pos, FirstScanKey} = scan_over_file(State#state.handle,
|
|
|
|
?BASE_POSITION,
|
|
|
|
FilterFun,
|
|
|
|
empty,
|
|
|
|
State#state.last_key),
|
|
|
|
FirstScanKey
|
|
|
|
end,
|
2016-10-26 20:39:16 +01:00
|
|
|
{reply, FirstKey, StateName, State};
|
|
|
|
handle_sync_event(cdb_filename, _From, StateName, State) ->
|
|
|
|
{reply, State#state.filename, StateName, State};
|
|
|
|
handle_sync_event(cdb_close, _From, _StateName, State) ->
|
2016-11-08 22:43:22 +00:00
|
|
|
{stop, normal, ok, State}.
|
2016-10-07 18:07:03 +01:00
|
|
|
|
2016-10-26 20:39:16 +01:00
|
|
|
handle_event(_Msg, StateName, State) ->
|
2016-10-29 00:52:49 +01:00
|
|
|
{next_state, StateName, State}.
|
2016-07-29 17:19:30 +01:00
|
|
|
|
2016-10-26 20:39:16 +01:00
|
|
|
handle_info(_Msg, StateName, State) ->
|
2016-10-29 00:52:49 +01:00
|
|
|
{next_state, StateName, State}.
|
2016-07-29 17:19:30 +01:00
|
|
|
|
2016-10-26 20:39:16 +01:00
|
|
|
terminate(Reason, StateName, State) ->
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB05", [State#state.filename, Reason]),
|
2016-11-14 11:17:14 +00:00
|
|
|
case {State#state.handle, StateName, State#state.waste_path} of
|
|
|
|
{undefined, _, _} ->
|
2016-09-06 17:17:31 +01:00
|
|
|
ok;
|
2016-11-14 11:17:14 +00:00
|
|
|
{Handle, delete_pending, undefined} ->
|
2016-11-14 19:34:11 +00:00
|
|
|
ok = file:close(Handle),
|
|
|
|
ok = file:delete(State#state.filename);
|
2016-11-14 11:17:14 +00:00
|
|
|
{Handle, delete_pending, WasteFP} ->
|
|
|
|
file:close(Handle),
|
|
|
|
Components = filename:split(State#state.filename),
|
|
|
|
NewName = WasteFP ++ lists:last(Components),
|
|
|
|
file:rename(State#state.filename, NewName);
|
|
|
|
{Handle, _, _} ->
|
2016-10-26 20:39:16 +01:00
|
|
|
file:close(Handle)
|
2016-09-06 17:17:31 +01:00
|
|
|
end.
|
2016-07-29 17:19:30 +01:00
|
|
|
|
2016-10-26 20:39:16 +01:00
|
|
|
code_change(_OldVsn, StateName, State, _Extra) ->
|
|
|
|
{ok, StateName, State}.
|
2016-07-29 17:19:30 +01:00
|
|
|
|
|
|
|
%%%============================================================================
|
|
|
|
%%% Internal functions
|
|
|
|
%%%============================================================================
|
|
|
|
|
2016-11-25 17:41:08 +00:00
|
|
|
%% Assumption is that sync should be used - it is a transaction log.
|
|
|
|
%%
|
2016-11-27 00:16:49 +00:00
|
|
|
%% However this flag is not supported in OTP 16. Bitcask appears to pass an
|
|
|
|
%% o_sync flag, but this isn't supported either (maybe it works with the
|
|
|
|
%% bitcask nif fileops).
|
|
|
|
%%
|
|
|
|
%% To get round this will try and datasync on each PUT with riak_sync
|
2016-11-25 17:41:08 +00:00
|
|
|
set_writeops(SyncStrategy) ->
|
|
|
|
case SyncStrategy of
|
|
|
|
sync ->
|
|
|
|
[sync | ?WRITE_OPS];
|
|
|
|
riak_sync ->
|
2016-11-27 00:16:49 +00:00
|
|
|
?WRITE_OPS;
|
2016-11-25 17:41:08 +00:00
|
|
|
none ->
|
|
|
|
?WRITE_OPS
|
|
|
|
end.
|
|
|
|
|
2016-07-29 17:19:30 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%% from_dict(FileName,ListOfKeyValueTuples)
|
|
|
|
%% Given a filename and a dictionary, create a cdb
|
|
|
|
%% using the key value pairs from the dict.
|
|
|
|
from_dict(FileName,Dict) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
KeyValueList = dict:to_list(Dict),
|
|
|
|
create(FileName, KeyValueList).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%%
|
|
|
|
%% create(FileName,ListOfKeyValueTuples) -> ok
|
|
|
|
%% Given a filename and a list of {key,value} tuples,
|
|
|
|
%% this function creates a CDB
|
|
|
|
%%
|
|
|
|
create(FileName,KeyValueList) ->
|
2016-09-06 17:17:31 +01:00
|
|
|
{ok, Handle} = file:open(FileName, ?WRITE_OPS),
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, _} = file:position(Handle, {bof, ?BASE_POSITION}),
|
|
|
|
{BasePos, HashTree} = write_key_value_pairs(Handle, KeyValueList),
|
|
|
|
close_file(Handle, HashTree, BasePos).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
|
|
|
|
%% Open an active file - one for which it is assumed the hash tables have not
|
|
|
|
%% yet been written
|
|
|
|
%%
|
|
|
|
%% Needs to scan over file to incrementally produce the hash list, starting at
|
|
|
|
%% the end of the top index table.
|
|
|
|
%%
|
|
|
|
%% Should return a dictionary keyed by index containing a list of {Hash, Pos}
|
|
|
|
%% tuples as the write_key_value_pairs function, and the current position, and
|
|
|
|
%% the file handle
|
|
|
|
open_active_file(FileName) when is_list(FileName) ->
|
2016-09-06 17:17:31 +01:00
|
|
|
{ok, Handle} = file:open(FileName, ?WRITE_OPS),
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, Position} = file:position(Handle, {bof, 256*?DWORD_SIZE}),
|
2016-09-09 15:58:19 +01:00
|
|
|
{LastPosition, {HashTree, LastKey}} = startup_scan_over_file(Handle,
|
|
|
|
Position),
|
2016-07-29 17:19:30 +01:00
|
|
|
case file:position(Handle, eof) of
|
|
|
|
{ok, LastPosition} ->
|
|
|
|
ok = file:close(Handle);
|
2016-09-15 18:38:23 +01:00
|
|
|
{ok, EndPosition} ->
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB06", [LastPosition, EndPosition]),
|
2016-09-15 18:38:23 +01:00
|
|
|
{ok, _LastPosition} = file:position(Handle, LastPosition),
|
2016-07-29 17:19:30 +01:00
|
|
|
ok = file:truncate(Handle),
|
|
|
|
ok = file:close(Handle)
|
|
|
|
end,
|
2016-09-05 15:01:23 +01:00
|
|
|
{LastPosition, HashTree, LastKey}.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%% put(Handle, Key, Value, {LastPosition, HashDict}) -> {NewPosition, KeyDict}
|
|
|
|
%% Append to an active file a new key/value pair returning an updated
|
|
|
|
%% dictionary of Keys and positions. Returns an updated Position
|
|
|
|
%%
|
2016-10-08 22:15:48 +01:00
|
|
|
put(FileName,
|
|
|
|
Key,
|
|
|
|
Value,
|
|
|
|
{LastPosition, HashTree},
|
|
|
|
BinaryMode,
|
|
|
|
MaxSize) when is_list(FileName) ->
|
|
|
|
{ok, Handle} = file:open(FileName, ?WRITE_OPS),
|
|
|
|
put(Handle, Key, Value, {LastPosition, HashTree}, BinaryMode, MaxSize);
|
|
|
|
put(Handle, Key, Value, {LastPosition, HashTree}, BinaryMode, MaxSize) ->
|
|
|
|
Bin = key_value_to_record({Key, Value}, BinaryMode),
|
|
|
|
PotentialNewSize = LastPosition + byte_size(Bin),
|
|
|
|
if
|
|
|
|
PotentialNewSize > MaxSize ->
|
|
|
|
roll;
|
|
|
|
true ->
|
|
|
|
ok = file:pwrite(Handle, LastPosition, Bin),
|
|
|
|
{Handle,
|
|
|
|
PotentialNewSize,
|
|
|
|
put_hashtree(Key, LastPosition, HashTree)}
|
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-10-26 11:39:27 +01:00
|
|
|
mput(Handle, KVList, {LastPosition, HashTree0}, BinaryMode, MaxSize) ->
|
|
|
|
{KPList, Bin, LastKey} = multi_key_value_to_record(KVList,
|
|
|
|
BinaryMode,
|
|
|
|
LastPosition),
|
|
|
|
PotentialNewSize = LastPosition + byte_size(Bin),
|
|
|
|
if
|
|
|
|
PotentialNewSize > MaxSize ->
|
|
|
|
roll;
|
|
|
|
true ->
|
|
|
|
ok = file:pwrite(Handle, LastPosition, Bin),
|
|
|
|
HashTree1 = lists:foldl(fun({K, P}, Acc) ->
|
|
|
|
put_hashtree(K, P, Acc)
|
|
|
|
end,
|
|
|
|
HashTree0,
|
|
|
|
KPList),
|
|
|
|
{Handle, PotentialNewSize, HashTree1, LastKey}
|
|
|
|
end.
|
|
|
|
|
2016-09-07 17:58:12 +01:00
|
|
|
%% Should not be used for non-test PUTs by the inker - as the Max File Size
|
|
|
|
%% should be taken from the startup options not the default
|
|
|
|
put(FileName, Key, Value, {LastPosition, HashTree}) ->
|
2016-10-08 22:15:48 +01:00
|
|
|
put(FileName, Key, Value, {LastPosition, HashTree},
|
|
|
|
?BINARY_MODE, ?MAX_FILE_SIZE).
|
2016-09-07 17:58:12 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%%
|
|
|
|
%% get(FileName,Key) -> {key,value}
|
|
|
|
%% Given a filename and a key, returns a key and value tuple.
|
|
|
|
%%
|
2016-10-29 00:52:49 +01:00
|
|
|
|
|
|
|
|
2016-09-05 15:01:23 +01:00
|
|
|
get_withcache(Handle, Key, Cache) ->
|
2016-10-29 00:52:49 +01:00
|
|
|
get(Handle, Key, Cache, true).
|
2016-09-05 15:01:23 +01:00
|
|
|
|
2016-10-29 00:52:49 +01:00
|
|
|
get_withcache(Handle, Key, Cache, QuickCheck) ->
|
|
|
|
get(Handle, Key, Cache, QuickCheck).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-10-29 00:52:49 +01:00
|
|
|
get(FileNameOrHandle, Key) ->
|
|
|
|
get(FileNameOrHandle, Key, no_cache, true).
|
2016-09-05 15:01:23 +01:00
|
|
|
|
2016-10-29 00:52:49 +01:00
|
|
|
get(FileName, Key, Cache, QuickCheck) when is_list(FileName) ->
|
2016-09-05 20:22:16 +01:00
|
|
|
{ok, Handle} = file:open(FileName,[binary, raw, read]),
|
2016-10-29 00:52:49 +01:00
|
|
|
get(Handle, Key, Cache, QuickCheck);
|
|
|
|
get(Handle, Key, Cache, QuickCheck) when is_tuple(Handle) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Hash = hash(Key),
|
|
|
|
Index = hash_to_index(Hash),
|
2016-09-05 15:01:23 +01:00
|
|
|
{HashTable, Count} = get_index(Handle, Index, Cache),
|
2016-07-29 17:19:30 +01:00
|
|
|
% If the count is 0 for that index - key must be missing
|
|
|
|
case Count of
|
|
|
|
0 ->
|
|
|
|
missing;
|
|
|
|
_ ->
|
|
|
|
% Get starting slot in hashtable
|
|
|
|
{ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}),
|
2016-12-13 17:02:45 +00:00
|
|
|
Slot = hash_to_slot(Hash, Count),
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}),
|
|
|
|
LastHashPosition = HashTable + ((Count-1) * ?DWORD_SIZE),
|
2016-09-07 17:58:12 +01:00
|
|
|
LocList = lists:seq(FirstHashPosition,
|
|
|
|
LastHashPosition,
|
|
|
|
?DWORD_SIZE),
|
2016-07-29 17:19:30 +01:00
|
|
|
% Split list around starting slot.
|
|
|
|
{L1, L2} = lists:split(Slot, LocList),
|
2016-09-07 17:58:12 +01:00
|
|
|
search_hash_table(Handle,
|
|
|
|
lists:append(L2, L1),
|
2016-10-29 00:52:49 +01:00
|
|
|
Hash,
|
|
|
|
Key,
|
|
|
|
QuickCheck)
|
2016-07-29 17:19:30 +01:00
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-09-05 15:01:23 +01:00
|
|
|
get_index(Handle, Index, no_cache) ->
|
|
|
|
{ok,_} = file:position(Handle, {bof, ?DWORD_SIZE * Index}),
|
|
|
|
% Get location of hashtable and number of entries in the hash
|
|
|
|
read_next_2_integers(Handle);
|
|
|
|
get_index(_Handle, Index, Cache) ->
|
2016-09-19 15:31:26 +01:00
|
|
|
{Index, {Pointer, Count}} = lists:keyfind(Index, 1, Cache),
|
|
|
|
{Pointer, Count}.
|
2016-09-05 15:01:23 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%% Get a Key/Value pair from an active CDB file (with no hash table written)
|
|
|
|
%% This requires a key dictionary to be passed in (mapping keys to positions)
|
|
|
|
%% Will return {Key, Value} or missing
|
2016-09-05 15:01:23 +01:00
|
|
|
get_mem(Key, FNOrHandle, HashTree) ->
|
2016-10-29 00:52:49 +01:00
|
|
|
get_mem(Key, FNOrHandle, HashTree, true).
|
2016-09-05 15:01:23 +01:00
|
|
|
|
2016-10-29 00:52:49 +01:00
|
|
|
get_mem(Key, Filename, HashTree, QuickCheck) when is_list(Filename) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, Handle} = file:open(Filename, [binary, raw, read]),
|
2016-10-29 00:52:49 +01:00
|
|
|
get_mem(Key, Handle, HashTree, QuickCheck);
|
|
|
|
get_mem(Key, Handle, HashTree, QuickCheck) ->
|
2016-09-05 15:01:23 +01:00
|
|
|
ListToCheck = get_hashtree(Key, HashTree),
|
2016-10-29 00:52:49 +01:00
|
|
|
case {QuickCheck, ListToCheck} of
|
2016-09-05 15:01:23 +01:00
|
|
|
{loose_presence, []} ->
|
|
|
|
missing;
|
|
|
|
{loose_presence, _L} ->
|
|
|
|
probably;
|
|
|
|
_ ->
|
2016-10-29 00:52:49 +01:00
|
|
|
extract_kvpair(Handle, ListToCheck, Key)
|
2016-09-05 15:01:23 +01:00
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%% Get the next key at a position in the file (or the first key if no position
|
|
|
|
%% is passed). Will return both a key and the next position
|
|
|
|
get_nextkey(Filename) when is_list(Filename) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, Handle} = file:open(Filename, [binary, raw, read]),
|
|
|
|
get_nextkey(Handle);
|
2015-05-25 22:45:45 +01:00
|
|
|
get_nextkey(Handle) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, _} = file:position(Handle, bof),
|
|
|
|
{FirstHashPosition, _} = read_next_2_integers(Handle),
|
|
|
|
get_nextkey(Handle, {256 * ?DWORD_SIZE, FirstHashPosition}).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
get_nextkey(Handle, {Position, FirstHashPosition}) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, Position} = file:position(Handle, Position),
|
|
|
|
case read_next_2_integers(Handle) of
|
|
|
|
{KeyLength, ValueLength} ->
|
|
|
|
NextKey = read_next_term(Handle, KeyLength),
|
|
|
|
NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE,
|
|
|
|
case NextPosition of
|
|
|
|
FirstHashPosition ->
|
|
|
|
{NextKey, nomorekeys};
|
|
|
|
_ ->
|
|
|
|
{NextKey, Handle, {NextPosition, FirstHashPosition}}
|
|
|
|
end;
|
|
|
|
eof ->
|
|
|
|
nomorekeys
|
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-10-14 13:36:12 +01:00
|
|
|
hashtable_calc(HashTree, StartPos) ->
|
|
|
|
Seq = lists:seq(0, 255),
|
|
|
|
SWC = os:timestamp(),
|
2016-12-13 02:15:13 +00:00
|
|
|
{IndexList, HashTreeBin} = write_hash_tables(Seq, HashTree, StartPos),
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log_timer("CDB07", [], SWC),
|
2016-10-14 13:36:12 +01:00
|
|
|
{IndexList, HashTreeBin}.
|
2015-06-04 21:15:31 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%%%%%%%%%%%%%%%%%%%%
|
|
|
|
%% Internal functions
|
|
|
|
%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
2016-10-07 10:04:48 +01:00
|
|
|
determine_new_filename(Filename) ->
|
|
|
|
filename:rootname(Filename, ".pnd") ++ ".cdb".
|
|
|
|
|
|
|
|
rename_for_read(Filename, NewName) ->
|
|
|
|
%% Rename file
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB08", [Filename, NewName, filelib:is_file(NewName)]),
|
2016-10-07 10:04:48 +01:00
|
|
|
file:rename(Filename, NewName).
|
|
|
|
|
|
|
|
open_for_readonly(Filename) ->
|
|
|
|
{ok, Handle} = file:open(Filename, [binary, raw, read]),
|
|
|
|
Index = load_index(Handle),
|
|
|
|
LastKey = find_lastkey(Handle, Index),
|
|
|
|
{Handle, Index, LastKey}.
|
|
|
|
|
2016-09-05 15:01:23 +01:00
|
|
|
load_index(Handle) ->
|
|
|
|
Index = lists:seq(0, 255),
|
|
|
|
lists:map(fun(X) ->
|
|
|
|
file:position(Handle, {bof, ?DWORD_SIZE * X}),
|
|
|
|
{HashTablePos, Count} = read_next_2_integers(Handle),
|
|
|
|
{X, {HashTablePos, Count}} end,
|
|
|
|
Index).
|
|
|
|
|
2016-09-19 15:31:26 +01:00
|
|
|
%% Function to find the LastKey in the file
|
|
|
|
find_lastkey(Handle, IndexCache) ->
|
2016-10-25 23:13:14 +01:00
|
|
|
{LastPosition, TotalKeys} = scan_index(Handle,
|
|
|
|
IndexCache,
|
|
|
|
{fun scan_index_findlast/4,
|
|
|
|
{0, 0}}),
|
|
|
|
case TotalKeys of
|
|
|
|
0 ->
|
2016-10-26 11:39:27 +01:00
|
|
|
empty;
|
2016-10-25 23:13:14 +01:00
|
|
|
_ ->
|
|
|
|
{ok, _} = file:position(Handle, LastPosition),
|
|
|
|
{KeyLength, _ValueLength} = read_next_2_integers(Handle),
|
|
|
|
read_next_term(Handle, KeyLength)
|
|
|
|
end.
|
|
|
|
|
2016-09-19 15:31:26 +01:00
|
|
|
|
2016-09-20 16:13:36 +01:00
|
|
|
scan_index(Handle, IndexCache, {ScanFun, InitAcc}) ->
|
|
|
|
lists:foldl(fun({_X, {Pos, Count}}, Acc) ->
|
2016-12-13 02:15:13 +00:00
|
|
|
ScanFun(Handle, Pos, Count, Acc)
|
|
|
|
end,
|
2016-09-20 16:13:36 +01:00
|
|
|
InitAcc,
|
2016-09-19 15:31:26 +01:00
|
|
|
IndexCache).
|
|
|
|
|
2016-09-20 18:24:05 +01:00
|
|
|
scan_index_forsample(_Handle, [], _ScanFun, Acc, SampleSize) ->
|
|
|
|
lists:sublist(Acc, SampleSize);
|
|
|
|
scan_index_forsample(Handle, [CacheEntry|Tail], ScanFun, Acc, SampleSize) ->
|
|
|
|
case length(Acc) of
|
|
|
|
L when L >= SampleSize ->
|
|
|
|
lists:sublist(Acc, SampleSize);
|
|
|
|
_ ->
|
|
|
|
{_X, {Pos, Count}} = CacheEntry,
|
|
|
|
scan_index_forsample(Handle,
|
|
|
|
Tail,
|
|
|
|
ScanFun,
|
|
|
|
ScanFun(Handle, Pos, Count, Acc),
|
|
|
|
SampleSize)
|
|
|
|
end.
|
|
|
|
|
|
|
|
|
2016-10-25 23:13:14 +01:00
|
|
|
scan_index_findlast(Handle, Position, Count, {LastPosition, TotalKeys}) ->
|
2016-09-20 16:13:36 +01:00
|
|
|
{ok, _} = file:position(Handle, Position),
|
2016-10-25 23:13:14 +01:00
|
|
|
MaxPos = lists:foldl(fun({_Hash, HPos}, MaxPos) -> max(HPos, MaxPos) end,
|
|
|
|
LastPosition,
|
|
|
|
read_next_n_integerpairs(Handle, Count)),
|
|
|
|
{MaxPos, TotalKeys + Count}.
|
2016-09-20 16:13:36 +01:00
|
|
|
|
|
|
|
scan_index_returnpositions(Handle, Position, Count, PosList0) ->
|
|
|
|
{ok, _} = file:position(Handle, Position),
|
|
|
|
lists:foldl(fun({Hash, HPosition}, PosList) ->
|
|
|
|
case Hash of
|
|
|
|
0 -> PosList;
|
|
|
|
_ -> PosList ++ [HPosition]
|
|
|
|
end end,
|
|
|
|
PosList0,
|
|
|
|
read_next_n_integerpairs(Handle, Count)).
|
2016-09-19 15:31:26 +01:00
|
|
|
|
2016-09-05 15:01:23 +01:00
|
|
|
|
2015-06-04 21:15:31 +01:00
|
|
|
%% Take an active file and write the hash details necessary to close that
|
|
|
|
%% file and roll a new active file if requested.
|
|
|
|
%%
|
|
|
|
%% Base Pos should be at the end of the KV pairs written (the position for)
|
|
|
|
%% the hash tables
|
|
|
|
close_file(Handle, HashTree, BasePos) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, BasePos} = file:position(Handle, BasePos),
|
2016-10-14 13:36:12 +01:00
|
|
|
IndexList = write_hash_tables(Handle, HashTree),
|
|
|
|
ok = write_top_index_table(Handle, BasePos, IndexList),
|
2016-07-29 17:19:30 +01:00
|
|
|
file:close(Handle).
|
2015-06-04 21:15:31 +01:00
|
|
|
|
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%% Fetch a list of positions by passing a key to the HashTree
|
|
|
|
get_hashtree(Key, HashTree) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Hash = hash(Key),
|
|
|
|
Index = hash_to_index(Hash),
|
2016-12-10 13:03:38 +00:00
|
|
|
lookup_positions(HashTree, Index, Hash).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-12-10 10:55:35 +00:00
|
|
|
%% Add to hash tree - this is an array of 256 skiplists that contains the Hash
|
2015-05-25 22:45:45 +01:00
|
|
|
%% and position of objects which have been added to an open CDB file
|
|
|
|
put_hashtree(Key, Position, HashTree) ->
|
|
|
|
Hash = hash(Key),
|
|
|
|
Index = hash_to_index(Hash),
|
2016-12-10 13:03:38 +00:00
|
|
|
add_position_tohashtree(HashTree, Index, Hash, Position).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%% Function to extract a Key-Value pair given a file handle and a position
|
2016-10-29 00:52:49 +01:00
|
|
|
%% Will confirm that the key matches and do a CRC check
|
|
|
|
extract_kvpair(_, [], _) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
missing;
|
2016-10-29 00:52:49 +01:00
|
|
|
extract_kvpair(Handle, [Position|Rest], Key) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, _} = file:position(Handle, Position),
|
|
|
|
{KeyLength, ValueLength} = read_next_2_integers(Handle),
|
2016-10-31 20:58:19 +00:00
|
|
|
case safe_read_next_term(Handle, KeyLength) of
|
2016-07-29 17:19:30 +01:00
|
|
|
Key -> % If same key as passed in, then found!
|
2016-10-29 00:52:49 +01:00
|
|
|
case read_next_term(Handle, ValueLength, crc) of
|
2016-07-29 17:19:30 +01:00
|
|
|
{false, _} ->
|
|
|
|
crc_wonky;
|
|
|
|
{_, Value} ->
|
|
|
|
{Key,Value}
|
|
|
|
end;
|
|
|
|
_ ->
|
2016-10-29 00:52:49 +01:00
|
|
|
extract_kvpair(Handle, Rest, Key)
|
2016-07-29 17:19:30 +01:00
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-09-20 16:13:36 +01:00
|
|
|
extract_key(Handle, Position) ->
|
|
|
|
{ok, _} = file:position(Handle, Position),
|
|
|
|
{KeyLength, _ValueLength} = read_next_2_integers(Handle),
|
2016-11-29 00:27:23 +00:00
|
|
|
{safe_read_next_term(Handle, KeyLength)}.
|
2016-09-20 16:13:36 +01:00
|
|
|
|
2016-09-20 18:24:05 +01:00
|
|
|
extract_key_size(Handle, Position) ->
|
|
|
|
{ok, _} = file:position(Handle, Position),
|
|
|
|
{KeyLength, ValueLength} = read_next_2_integers(Handle),
|
2016-11-29 00:27:23 +00:00
|
|
|
{safe_read_next_term(Handle, KeyLength), ValueLength}.
|
2016-09-20 18:24:05 +01:00
|
|
|
|
|
|
|
extract_key_value_check(Handle, Position) ->
|
|
|
|
{ok, _} = file:position(Handle, Position),
|
|
|
|
{KeyLength, ValueLength} = read_next_2_integers(Handle),
|
2016-11-29 00:27:23 +00:00
|
|
|
K = safe_read_next_term(Handle, KeyLength),
|
2016-10-29 00:52:49 +01:00
|
|
|
{Check, V} = read_next_term(Handle, ValueLength, crc),
|
2016-09-20 18:24:05 +01:00
|
|
|
{K, V, Check}.
|
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%% Scan through the file until there is a failure to crc check an input, and
|
|
|
|
%% at that point return the position and the key dictionary scanned so far
|
2016-09-09 15:58:19 +01:00
|
|
|
startup_scan_over_file(Handle, Position) ->
|
2016-12-10 13:03:38 +00:00
|
|
|
HashTree = new_hashtree(),
|
2016-12-16 23:18:55 +00:00
|
|
|
{eof, Output} = scan_over_file(Handle,
|
|
|
|
Position,
|
|
|
|
fun startup_filter/5,
|
|
|
|
{HashTree, empty},
|
|
|
|
empty),
|
|
|
|
{ok, FinalPos} = file:position(Handle, cur),
|
|
|
|
{FinalPos, Output}.
|
2016-09-09 15:58:19 +01:00
|
|
|
|
2016-09-19 15:31:26 +01:00
|
|
|
%% Specific filter to be used at startup to build a hashtree for an incomplete
|
|
|
|
%% cdb file, and returns at the end the hashtree and the final Key seen in the
|
|
|
|
%% journal
|
|
|
|
|
2016-11-09 16:35:13 +00:00
|
|
|
startup_filter(Key, ValueAsBin, Position, {Hashtree, _LastKey}, _ExtractFun) ->
|
2016-09-19 15:31:26 +01:00
|
|
|
case crccheck_value(ValueAsBin) of
|
|
|
|
true ->
|
2016-11-09 16:35:13 +00:00
|
|
|
% This function is preceeded by a "safe read" of the key and value
|
|
|
|
% and so the crccheck should always be true, as a failed check
|
|
|
|
% should not reach this stage
|
|
|
|
{loop, {put_hashtree(Key, Position, Hashtree), Key}}
|
2016-09-19 15:31:26 +01:00
|
|
|
end.
|
|
|
|
|
|
|
|
|
2016-09-09 15:58:19 +01:00
|
|
|
%% Scan for key changes - scan over file returning applying FilterFun
|
|
|
|
%% The FilterFun should accept as input:
|
2016-09-15 18:38:23 +01:00
|
|
|
%% - Key, ValueBin, Position, Accumulator, Fun (to extract values from Binary)
|
|
|
|
%% -> outputting a new Accumulator and a loop|stop instruction as a tuple
|
|
|
|
%% i.e. {loop, Acc} or {stop, Acc}
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-09-15 10:53:24 +01:00
|
|
|
scan_over_file(Handle, Position, FilterFun, Output, LastKey) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
case saferead_keyvalue(Handle) of
|
2015-05-25 22:45:45 +01:00
|
|
|
false ->
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB09", [Position]),
|
2016-12-16 23:18:55 +00:00
|
|
|
{eof, Output};
|
2016-07-29 17:19:30 +01:00
|
|
|
{Key, ValueAsBin, KeyLength, ValueLength} ->
|
2016-09-19 15:31:26 +01:00
|
|
|
NewPosition = case Key of
|
|
|
|
LastKey ->
|
|
|
|
eof;
|
|
|
|
_ ->
|
|
|
|
Position + KeyLength + ValueLength
|
|
|
|
+ ?DWORD_SIZE
|
|
|
|
end,
|
|
|
|
case FilterFun(Key,
|
2016-09-15 18:38:23 +01:00
|
|
|
ValueAsBin,
|
|
|
|
Position,
|
|
|
|
Output,
|
2016-10-08 22:15:48 +01:00
|
|
|
fun extract_valueandsize/1) of
|
2016-09-19 15:31:26 +01:00
|
|
|
{stop, UpdOutput} ->
|
2016-10-29 00:52:49 +01:00
|
|
|
{Position, UpdOutput};
|
2016-09-19 15:31:26 +01:00
|
|
|
{loop, UpdOutput} ->
|
|
|
|
case NewPosition of
|
|
|
|
eof ->
|
|
|
|
{eof, UpdOutput};
|
|
|
|
_ ->
|
|
|
|
scan_over_file(Handle,
|
|
|
|
NewPosition,
|
|
|
|
FilterFun,
|
|
|
|
UpdOutput,
|
|
|
|
LastKey)
|
|
|
|
end
|
2016-09-15 10:53:24 +01:00
|
|
|
end
|
2016-07-29 17:19:30 +01:00
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-09-15 10:53:24 +01:00
|
|
|
%% Confirm that the last key has been defined and set to a non-default value
|
|
|
|
|
|
|
|
check_last_key(LastKey) ->
|
|
|
|
case LastKey of
|
2016-09-15 18:38:23 +01:00
|
|
|
empty -> empty;
|
2016-09-15 10:53:24 +01:00
|
|
|
_ -> ok
|
|
|
|
end.
|
|
|
|
|
2015-06-04 21:15:31 +01:00
|
|
|
%% Read the Key/Value at this point, returning {ok, Key, Value}
|
2016-10-29 00:52:49 +01:00
|
|
|
%% catch expected exceptions associated with file corruption (or end) and
|
2015-06-04 21:15:31 +01:00
|
|
|
%% return eof
|
|
|
|
saferead_keyvalue(Handle) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
case read_next_2_integers(Handle) of
|
2015-06-04 21:15:31 +01:00
|
|
|
eof ->
|
2016-07-29 17:19:30 +01:00
|
|
|
false;
|
|
|
|
{KeyL, ValueL} ->
|
|
|
|
case safe_read_next_term(Handle, KeyL) of
|
2016-10-29 00:52:49 +01:00
|
|
|
{error, _} ->
|
2016-07-29 17:19:30 +01:00
|
|
|
false;
|
|
|
|
eof ->
|
|
|
|
false;
|
|
|
|
false ->
|
|
|
|
false;
|
|
|
|
Key ->
|
|
|
|
case file:read(Handle, ValueL) of
|
|
|
|
eof ->
|
|
|
|
false;
|
|
|
|
{ok, Value} ->
|
2016-11-01 00:46:14 +00:00
|
|
|
case crccheck_value(Value) of
|
|
|
|
true ->
|
|
|
|
{Key, Value, KeyL, ValueL};
|
|
|
|
false ->
|
|
|
|
false
|
|
|
|
end
|
2016-07-29 17:19:30 +01:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end.
|
|
|
|
|
|
|
|
|
|
|
|
safe_read_next_term(Handle, Length) ->
|
|
|
|
try read_next_term(Handle, Length) of
|
|
|
|
Term ->
|
|
|
|
Term
|
|
|
|
catch
|
|
|
|
error:badarg ->
|
|
|
|
false
|
|
|
|
end.
|
2015-06-04 21:15:31 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%% The first four bytes of the value are the crc check
|
|
|
|
crccheck_value(Value) when byte_size(Value) >4 ->
|
2016-07-29 17:19:30 +01:00
|
|
|
<< Hash:32/integer, Tail/bitstring>> = Value,
|
|
|
|
case calc_crc(Tail) of
|
|
|
|
Hash ->
|
|
|
|
true;
|
|
|
|
_ ->
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB10", []),
|
2016-07-29 17:19:30 +01:00
|
|
|
false
|
|
|
|
end;
|
2015-05-25 22:45:45 +01:00
|
|
|
crccheck_value(_) ->
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log("CDB11", []),
|
2016-07-29 17:19:30 +01:00
|
|
|
false.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%% Run a crc check filling out any values which don't fit on byte boundary
|
|
|
|
calc_crc(Value) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
case bit_size(Value) rem 8 of
|
|
|
|
0 ->
|
|
|
|
erlang:crc32(Value);
|
|
|
|
N ->
|
|
|
|
M = 8 - N,
|
|
|
|
erlang:crc32(<<Value/bitstring,0:M>>)
|
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2015-06-04 21:15:31 +01:00
|
|
|
read_next_term(Handle, Length) ->
|
2016-10-29 00:52:49 +01:00
|
|
|
case file:read(Handle, Length) of
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, Bin} ->
|
|
|
|
binary_to_term(Bin);
|
|
|
|
ReadError ->
|
|
|
|
ReadError
|
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%% Read next string where the string has a CRC prepended - stripping the crc
|
|
|
|
%% and checking if requested
|
2016-10-29 00:52:49 +01:00
|
|
|
read_next_term(Handle, Length, crc) ->
|
|
|
|
{ok, <<CRC:32/integer, Bin/binary>>} = file:read(Handle, Length),
|
|
|
|
case calc_crc(Bin) of
|
|
|
|
CRC ->
|
|
|
|
{true, binary_to_term(Bin)};
|
|
|
|
_ ->
|
2016-10-31 20:58:19 +00:00
|
|
|
{false, crc_wonky}
|
2016-07-29 17:19:30 +01:00
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-10-08 22:15:48 +01:00
|
|
|
%% Extract value and size from binary containing CRC
|
|
|
|
extract_valueandsize(ValueAsBin) ->
|
2016-09-15 18:38:23 +01:00
|
|
|
<<_CRC:32/integer, Bin/binary>> = ValueAsBin,
|
2016-10-08 22:15:48 +01:00
|
|
|
{binary_to_term(Bin), byte_size(Bin)}.
|
2016-09-15 18:38:23 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%% Used for reading lengths
|
|
|
|
%% Note that the endian_flip is required to make the file format compatible
|
|
|
|
%% with CDB
|
|
|
|
read_next_2_integers(Handle) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
case file:read(Handle,?DWORD_SIZE) of
|
|
|
|
{ok, <<Int1:32,Int2:32>>} ->
|
|
|
|
{endian_flip(Int1), endian_flip(Int2)};
|
|
|
|
ReadError ->
|
|
|
|
ReadError
|
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-09-20 16:13:36 +01:00
|
|
|
read_next_n_integerpairs(Handle, NumberOfPairs) ->
|
|
|
|
{ok, Block} = file:read(Handle, ?DWORD_SIZE * NumberOfPairs),
|
|
|
|
read_integerpairs(Block, []).
|
|
|
|
|
|
|
|
read_integerpairs(<<>>, Pairs) ->
|
|
|
|
Pairs;
|
|
|
|
read_integerpairs(<<Int1:32, Int2:32, Rest/binary>>, Pairs) ->
|
|
|
|
read_integerpairs(<<Rest/binary>>,
|
|
|
|
Pairs ++ [{endian_flip(Int1),
|
|
|
|
endian_flip(Int2)}]).
|
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%% Seach the hash table for the matching hash and key. Be prepared for
|
|
|
|
%% multiple keys to have the same hash value.
|
2016-09-05 15:01:23 +01:00
|
|
|
%%
|
|
|
|
%% There are three possible values of CRCCheck:
|
|
|
|
%% true - check the CRC before returning key & value
|
|
|
|
%% false - don't check the CRC before returning key & value
|
|
|
|
%% loose_presence - confirm that the hash of the key is present
|
|
|
|
|
2016-12-13 14:06:19 +00:00
|
|
|
search_hash_table(Handle, Entries, Hash, Key, QuickCheck) ->
|
|
|
|
search_hash_table(Handle, Entries, Hash, Key, QuickCheck, 0).
|
|
|
|
|
2016-12-13 17:02:45 +00:00
|
|
|
search_hash_table(_Handle, [], Hash, _Key, _QuickCheck, CycleCount) ->
|
|
|
|
log_cyclecount(CycleCount, Hash, missing),
|
2016-07-29 17:19:30 +01:00
|
|
|
missing;
|
2016-12-13 14:06:19 +00:00
|
|
|
search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key,
|
|
|
|
QuickCheck, CycleCount) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, _} = file:position(Handle, Entry),
|
|
|
|
{StoredHash, DataLoc} = read_next_2_integers(Handle),
|
|
|
|
case StoredHash of
|
|
|
|
Hash ->
|
2016-10-29 00:52:49 +01:00
|
|
|
KV = case QuickCheck of
|
2016-09-05 15:01:23 +01:00
|
|
|
loose_presence ->
|
|
|
|
probably;
|
|
|
|
_ ->
|
2016-10-29 00:52:49 +01:00
|
|
|
extract_kvpair(Handle, [DataLoc], Key)
|
2016-09-05 15:01:23 +01:00
|
|
|
end,
|
2016-07-29 17:19:30 +01:00
|
|
|
case KV of
|
|
|
|
missing ->
|
2016-09-07 17:58:12 +01:00
|
|
|
search_hash_table(Handle,
|
|
|
|
RestOfEntries,
|
|
|
|
Hash,
|
|
|
|
Key,
|
2016-12-13 14:06:19 +00:00
|
|
|
QuickCheck,
|
|
|
|
CycleCount + 1);
|
2016-07-29 17:19:30 +01:00
|
|
|
_ ->
|
2016-12-13 17:02:45 +00:00
|
|
|
log_cyclecount(CycleCount, Hash, found),
|
2016-07-29 17:19:30 +01:00
|
|
|
KV
|
|
|
|
end;
|
2016-11-08 01:03:09 +00:00
|
|
|
%0 ->
|
|
|
|
% % Hash is 0 so key must be missing as 0 found before Hash matched
|
|
|
|
% missing;
|
2015-05-25 22:45:45 +01:00
|
|
|
_ ->
|
2016-12-13 14:06:19 +00:00
|
|
|
search_hash_table(Handle, RestOfEntries, Hash, Key,
|
|
|
|
QuickCheck, CycleCount + 1)
|
|
|
|
end.
|
|
|
|
|
2016-12-13 17:02:45 +00:00
|
|
|
log_cyclecount(CycleCount, Hash, Result) ->
|
2016-12-13 14:06:19 +00:00
|
|
|
if
|
|
|
|
CycleCount > 8 ->
|
2016-12-13 17:02:45 +00:00
|
|
|
leveled_log:log("CDB15", [CycleCount, Hash, Result]);
|
2016-12-13 14:06:19 +00:00
|
|
|
true ->
|
|
|
|
ok
|
2016-07-29 17:19:30 +01:00
|
|
|
end.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
% Write Key and Value tuples into the CDB. Each tuple consists of a
|
|
|
|
% 4 byte key length, a 4 byte value length, the actual key followed
|
|
|
|
% by the value.
|
|
|
|
%
|
|
|
|
% Returns a dictionary that is keyed by
|
|
|
|
% the least significant 8 bits of each hash with the
|
|
|
|
% values being a list of the hash and the position of the
|
|
|
|
% key/value binary in the file.
|
|
|
|
write_key_value_pairs(Handle, KeyValueList) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, Position} = file:position(Handle, cur),
|
2016-12-10 13:03:38 +00:00
|
|
|
HashTree = new_hashtree(),
|
2016-07-29 17:19:30 +01:00
|
|
|
write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
write_key_value_pairs(_, [], Acc) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Acc;
|
2015-05-25 22:45:45 +01:00
|
|
|
write_key_value_pairs(Handle, [HeadPair|TailList], Acc) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{Key, Value} = HeadPair,
|
|
|
|
{Handle, NewPosition, HashTree} = put(Handle, Key, Value, Acc),
|
|
|
|
write_key_value_pairs(Handle, TailList, {NewPosition, HashTree}).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%% Write the actual hashtables at the bottom of the file. Each hash table
|
|
|
|
%% entry is a doubleword in length. The first word is the hash value
|
|
|
|
%% corresponding to a key and the second word is a file pointer to the
|
|
|
|
%% corresponding {key,value} tuple.
|
|
|
|
write_hash_tables(Handle, HashTree) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, StartPos} = file:position(Handle, cur),
|
2016-10-14 13:36:12 +01:00
|
|
|
{IndexList, HashTreeBin} = hashtable_calc(HashTree, StartPos),
|
|
|
|
ok = perform_write_hash_tables(Handle, HashTreeBin, StartPos),
|
|
|
|
IndexList.
|
|
|
|
|
|
|
|
perform_write_hash_tables(Handle, HashTreeBin, StartPos) ->
|
2016-10-13 21:02:15 +01:00
|
|
|
SWW = os:timestamp(),
|
2016-09-20 16:13:36 +01:00
|
|
|
ok = file:write(Handle, HashTreeBin),
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, EndPos} = file:position(Handle, cur),
|
|
|
|
ok = file:advise(Handle, StartPos, EndPos - StartPos, will_need),
|
2016-11-04 16:11:11 +00:00
|
|
|
leveled_log:log_timer("CDB12", [], SWW),
|
2016-10-14 13:36:12 +01:00
|
|
|
ok.
|
|
|
|
|
2016-09-20 16:13:36 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%% Write the top most 255 doubleword entries. First word is the
|
|
|
|
%% file pointer to a hashtable and the second word is the number of entries
|
|
|
|
%% in the hash table
|
|
|
|
%% The List passed in should be made up of {Index, Position, Count} tuples
|
2016-12-13 02:15:13 +00:00
|
|
|
write_top_index_table(Handle, BasePos, IndexList) ->
|
2016-09-19 15:31:26 +01:00
|
|
|
FnWriteIndex = fun({_Index, Pos, Count}, {AccBin, CurrPos}) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
case Count == 0 of
|
|
|
|
true ->
|
|
|
|
PosLE = endian_flip(CurrPos),
|
|
|
|
NextPos = CurrPos;
|
|
|
|
false ->
|
|
|
|
PosLE = endian_flip(Pos),
|
|
|
|
NextPos = Pos + (Count * ?DWORD_SIZE)
|
|
|
|
end,
|
|
|
|
CountLE = endian_flip(Count),
|
2016-09-19 15:31:26 +01:00
|
|
|
{<<AccBin/binary, PosLE:32, CountLE:32>>, NextPos}
|
2016-07-29 17:19:30 +01:00
|
|
|
end,
|
|
|
|
|
2016-09-19 15:31:26 +01:00
|
|
|
{IndexBin, _Pos} = lists:foldl(FnWriteIndex,
|
|
|
|
{<<>>, BasePos},
|
2016-12-13 02:15:13 +00:00
|
|
|
IndexList),
|
2016-09-19 15:31:26 +01:00
|
|
|
{ok, _} = file:position(Handle, 0),
|
|
|
|
ok = file:write(Handle, IndexBin),
|
2016-10-14 13:36:12 +01:00
|
|
|
ok = file:advise(Handle, 0, ?DWORD_SIZE * 256, will_need),
|
|
|
|
ok.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-09-05 15:01:23 +01:00
|
|
|
%% To make this compatible with original Bernstein format this endian flip
|
|
|
|
%% and also the use of the standard hash function required.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
endian_flip(Int) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
<<X:32/unsigned-little-integer>> = <<Int:32>>,
|
|
|
|
X.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
hash(Key) ->
|
2016-12-11 01:02:56 +00:00
|
|
|
leveled_codec:magic_hash(Key).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
% Get the least significant 8 bits from the hash.
|
|
|
|
hash_to_index(Hash) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Hash band 255.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-09-05 20:22:16 +01:00
|
|
|
hash_to_slot(Hash, L) ->
|
2016-07-29 17:19:30 +01:00
|
|
|
(Hash bsr 8) rem L.
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
%% Create a binary of the LengthKeyLengthValue, adding a CRC check
|
|
|
|
%% at the front of the value
|
2016-10-08 22:15:48 +01:00
|
|
|
key_value_to_record({Key, Value}, BinaryMode) ->
|
|
|
|
BK = term_to_binary(Key),
|
|
|
|
BV = case BinaryMode of
|
|
|
|
true ->
|
|
|
|
Value;
|
|
|
|
false ->
|
|
|
|
term_to_binary(Value)
|
|
|
|
end,
|
2016-07-29 17:19:30 +01:00
|
|
|
LK = byte_size(BK),
|
|
|
|
LV = byte_size(BV),
|
|
|
|
LK_FL = endian_flip(LK),
|
|
|
|
LV_FL = endian_flip(LV + 4),
|
|
|
|
CRC = calc_crc(BV),
|
|
|
|
<<LK_FL:32, LV_FL:32, BK:LK/binary, CRC:32/integer, BV:LV/binary>>.
|
2015-06-04 21:15:31 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-10-26 11:39:27 +01:00
|
|
|
multi_key_value_to_record(KVList, BinaryMode, LastPosition) ->
|
|
|
|
lists:foldl(fun({K, V}, {KPosL, Bin, _LK}) ->
|
|
|
|
Bin0 = key_value_to_record({K, V}, BinaryMode),
|
|
|
|
{[{K, byte_size(Bin) + LastPosition}|KPosL],
|
|
|
|
<<Bin/binary, Bin0/binary>>,
|
|
|
|
K} end,
|
|
|
|
{[], <<>>, empty},
|
|
|
|
KVList).
|
|
|
|
|
2016-12-10 13:03:38 +00:00
|
|
|
%%%============================================================================
|
|
|
|
%%% HashTree Implementation
|
|
|
|
%%%============================================================================
|
|
|
|
|
|
|
|
lookup_positions(HashTree, Index, Hash) ->
|
2016-12-13 12:35:30 +00:00
|
|
|
lookup_positions(HashTree, Index, Hash, -1, []).
|
|
|
|
|
|
|
|
lookup_positions(HashTree, Index, Hash, Pos, PosList) ->
|
|
|
|
case ets:next(HashTree, {Index, Hash, Pos}) of
|
|
|
|
{Index, Hash, NewPos} ->
|
|
|
|
lookup_positions(HashTree, Index, Hash, NewPos, [NewPos|PosList]);
|
|
|
|
_ ->
|
|
|
|
PosList
|
|
|
|
end.
|
2016-12-10 13:03:38 +00:00
|
|
|
|
|
|
|
add_position_tohashtree(HashTree, Index, Hash, Position) ->
|
2016-12-13 12:35:30 +00:00
|
|
|
ets:insert(HashTree, {{Index, Hash, Position}}),
|
2016-12-12 21:47:09 +00:00
|
|
|
HashTree.
|
2016-12-10 13:03:38 +00:00
|
|
|
|
|
|
|
new_hashtree() ->
|
2016-12-13 12:35:30 +00:00
|
|
|
ets:new(hashtree, [ordered_set]).
|
2016-12-10 13:03:38 +00:00
|
|
|
|
2016-12-13 12:35:30 +00:00
|
|
|
to_list(HashTree, Index) ->
|
|
|
|
to_list(HashTree, Index, {0, -1}, []).
|
|
|
|
|
|
|
|
to_list(HashTree, Index, {LastHash, LastPos}, Acc) ->
|
|
|
|
case ets:next(HashTree, {Index, LastHash, LastPos}) of
|
|
|
|
{Index, Hash, Pos} ->
|
|
|
|
to_list(HashTree, Index, {Hash, Pos}, [{Hash, Pos}|Acc]);
|
2016-12-10 13:03:38 +00:00
|
|
|
_ ->
|
2016-12-13 12:35:30 +00:00
|
|
|
Acc
|
2016-12-10 13:03:38 +00:00
|
|
|
end.
|
|
|
|
|
2016-12-13 02:15:13 +00:00
|
|
|
to_slotmap(HashTree, Index) ->
|
2016-12-13 12:35:30 +00:00
|
|
|
HPList = to_list(HashTree, Index),
|
2016-12-13 17:02:45 +00:00
|
|
|
IndexLength = length(HPList) * 2,
|
2016-12-12 21:47:09 +00:00
|
|
|
ConvertObjFun =
|
2016-12-13 12:35:30 +00:00
|
|
|
fun({Hash, Position}) ->
|
2016-12-12 21:47:09 +00:00
|
|
|
HashLE = endian_flip(Hash),
|
|
|
|
PosLE = endian_flip(Position),
|
|
|
|
NewBin = <<HashLE:32, PosLE:32>>,
|
2016-12-13 02:15:13 +00:00
|
|
|
{hash_to_slot(Hash, IndexLength), NewBin}
|
2016-12-12 21:47:09 +00:00
|
|
|
end,
|
2016-12-13 12:35:30 +00:00
|
|
|
lists:map(ConvertObjFun, HPList).
|
2016-12-13 02:15:13 +00:00
|
|
|
|
|
|
|
build_hashtree_binary(SlotMap, IndexLength) ->
|
2016-12-13 03:22:40 +00:00
|
|
|
build_hashtree_binary(SlotMap, IndexLength, 0, []).
|
2016-12-13 02:15:13 +00:00
|
|
|
|
2016-12-13 03:22:40 +00:00
|
|
|
build_hashtree_binary([], IdxLen, SlotPos, Bin) ->
|
|
|
|
case SlotPos of
|
2016-12-13 02:15:13 +00:00
|
|
|
IdxLen ->
|
2016-12-13 03:22:40 +00:00
|
|
|
lists:reverse(Bin);
|
2016-12-13 02:15:13 +00:00
|
|
|
N when N < IdxLen ->
|
|
|
|
ZeroLen = (IdxLen - N) * 64,
|
2016-12-13 03:22:40 +00:00
|
|
|
lists:reverse([<<0:ZeroLen>>|Bin])
|
|
|
|
end;
|
2016-12-13 02:15:13 +00:00
|
|
|
build_hashtree_binary([{TopSlot, TopBin}|SlotMapTail], IdxLen, SlotPos, Bin) ->
|
|
|
|
case TopSlot of
|
|
|
|
N when N > SlotPos ->
|
2016-12-13 03:22:40 +00:00
|
|
|
D = N - SlotPos,
|
|
|
|
Bridge = lists:duplicate(D, <<0:64>>) ++ Bin,
|
|
|
|
UpdBin = [<<TopBin/binary>>|Bridge],
|
2016-12-13 02:15:13 +00:00
|
|
|
build_hashtree_binary(SlotMapTail,
|
|
|
|
IdxLen,
|
2016-12-13 03:22:40 +00:00
|
|
|
SlotPos + D + 1,
|
2016-12-13 02:15:13 +00:00
|
|
|
UpdBin);
|
2016-12-13 03:22:40 +00:00
|
|
|
N when N =< SlotPos, SlotPos < IdxLen ->
|
|
|
|
UpdBin = [<<TopBin/binary>>|Bin],
|
2016-12-13 02:15:13 +00:00
|
|
|
build_hashtree_binary(SlotMapTail,
|
|
|
|
IdxLen,
|
|
|
|
SlotPos + 1,
|
|
|
|
UpdBin);
|
2016-12-13 03:22:40 +00:00
|
|
|
N when N < SlotPos, SlotPos == IdxLen ->
|
2016-12-13 02:15:13 +00:00
|
|
|
% Need to wrap round and put in the first empty slot from the
|
|
|
|
% beginning
|
2016-12-13 03:22:40 +00:00
|
|
|
Pos = find_firstzero(Bin, length(Bin)),
|
|
|
|
{LHS, [<<0:64>>|RHS]} = lists:split(Pos - 1, Bin),
|
|
|
|
UpdBin = lists:append(LHS, [TopBin|RHS]),
|
2016-12-13 02:15:13 +00:00
|
|
|
build_hashtree_binary(SlotMapTail,
|
|
|
|
IdxLen,
|
2016-12-13 03:22:40 +00:00
|
|
|
SlotPos,
|
2016-12-13 02:15:13 +00:00
|
|
|
UpdBin)
|
|
|
|
end.
|
|
|
|
|
|
|
|
|
2016-12-13 03:22:40 +00:00
|
|
|
% Search from the tail of the list to find the first zero
|
|
|
|
find_firstzero(Bin, Pos) ->
|
|
|
|
case lists:nth(Pos, Bin) of
|
|
|
|
<<0:64>> ->
|
2016-12-13 02:15:13 +00:00
|
|
|
Pos;
|
|
|
|
_ ->
|
2016-12-13 03:22:40 +00:00
|
|
|
find_firstzero(Bin, Pos - 1)
|
2016-12-13 02:15:13 +00:00
|
|
|
end.
|
|
|
|
|
|
|
|
|
|
|
|
write_hash_tables(Indexes, HashTree, CurrPos) ->
|
2016-12-13 12:35:30 +00:00
|
|
|
write_hash_tables(Indexes, HashTree, CurrPos, CurrPos, [], [], {0, 0, 0}).
|
2016-12-13 02:15:13 +00:00
|
|
|
|
2016-12-13 12:35:30 +00:00
|
|
|
write_hash_tables([], _HashTree, _CurrPos, _BasePos,
|
2016-12-13 12:41:44 +00:00
|
|
|
IndexList, HT_BinList, {T1, T2, T3}) ->
|
|
|
|
leveled_log:log("CDB14", [T1, T2, T3]),
|
2016-12-13 02:15:13 +00:00
|
|
|
IL = lists:reverse(IndexList),
|
2016-12-13 03:22:40 +00:00
|
|
|
{IL, list_to_binary(HT_BinList)};
|
2016-12-13 02:15:13 +00:00
|
|
|
write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos,
|
2016-12-13 12:41:44 +00:00
|
|
|
IndexList, HT_BinList, Timers) ->
|
2016-12-13 12:35:30 +00:00
|
|
|
SW1 = os:timestamp(),
|
|
|
|
SlotMap = to_slotmap(HashTree, Index),
|
|
|
|
T1 = timer:now_diff(os:timestamp(), SW1) + element(1, Timers),
|
|
|
|
case SlotMap of
|
|
|
|
[] ->
|
2016-12-13 02:15:13 +00:00
|
|
|
write_hash_tables(Rest,
|
|
|
|
HashTree,
|
|
|
|
CurrPos,
|
|
|
|
BasePos,
|
|
|
|
[{Index, BasePos, 0}|IndexList],
|
2016-12-13 12:35:30 +00:00
|
|
|
HT_BinList,
|
|
|
|
Timers);
|
|
|
|
_ ->
|
|
|
|
SW2 = os:timestamp(),
|
2016-12-13 02:15:13 +00:00
|
|
|
IndexLength = length(SlotMap) * 2,
|
2016-12-13 12:35:30 +00:00
|
|
|
SortedMap = lists:keysort(1, SlotMap),
|
|
|
|
T2 = timer:now_diff(os:timestamp(), SW2) + element(2, Timers),
|
|
|
|
SW3 = os:timestamp(),
|
|
|
|
NewSlotBin = build_hashtree_binary(SortedMap, IndexLength),
|
|
|
|
T3 = timer:now_diff(os:timestamp(), SW3) + element(3, Timers),
|
2016-12-13 02:15:13 +00:00
|
|
|
write_hash_tables(Rest,
|
|
|
|
HashTree,
|
|
|
|
CurrPos + IndexLength * ?DWORD_SIZE,
|
|
|
|
BasePos,
|
|
|
|
[{Index, CurrPos, IndexLength}|IndexList],
|
2016-12-13 12:35:30 +00:00
|
|
|
HT_BinList ++ NewSlotBin,
|
|
|
|
{T1, T2, T3})
|
2016-12-13 02:15:13 +00:00
|
|
|
end.
|
|
|
|
|
|
|
|
|
2016-10-26 11:39:27 +01:00
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
%%%%%%%%%%%%%%%%
|
|
|
|
% T E S T
|
|
|
|
%%%%%%%%%%%%%%%
|
2015-06-04 21:15:31 +01:00
|
|
|
-ifdef(TEST).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2016-11-03 16:46:25 +00:00
|
|
|
%%
|
|
|
|
%% dump(FileName) -> List
|
|
|
|
%% Given a file name, this function returns a list
|
|
|
|
%% of {key,value} tuples from the CDB.
|
|
|
|
%%
|
|
|
|
|
|
|
|
dump(FileName) ->
|
|
|
|
{ok, Handle} = file:open(FileName, [binary, raw, read]),
|
|
|
|
Fn = fun(Index, Acc) ->
|
|
|
|
{ok, _} = file:position(Handle, ?DWORD_SIZE * Index),
|
|
|
|
{_, Count} = read_next_2_integers(Handle),
|
|
|
|
Acc + Count
|
|
|
|
end,
|
|
|
|
NumberOfPairs = lists:foldl(Fn, 0, lists:seq(0,255)) bsr 1,
|
|
|
|
io:format("Count of keys in db is ~w~n", [NumberOfPairs]),
|
|
|
|
{ok, _} = file:position(Handle, {bof, 2048}),
|
|
|
|
Fn1 = fun(_I,Acc) ->
|
|
|
|
{KL,VL} = read_next_2_integers(Handle),
|
|
|
|
Key = read_next_term(Handle, KL),
|
|
|
|
case read_next_term(Handle, VL, crc) of
|
|
|
|
{_, Value} ->
|
|
|
|
{ok, CurrLoc} = file:position(Handle, cur),
|
2016-11-08 23:07:03 +00:00
|
|
|
{Key,Value} = get(Handle, Key)
|
2016-11-03 16:46:25 +00:00
|
|
|
end,
|
|
|
|
{ok, _} = file:position(Handle, CurrLoc),
|
2016-11-08 23:07:03 +00:00
|
|
|
[{Key,Value} | Acc]
|
2016-11-03 16:46:25 +00:00
|
|
|
end,
|
|
|
|
lists:foldr(Fn1, [], lists:seq(0, NumberOfPairs-1)).
|
|
|
|
|
|
|
|
%%
|
|
|
|
%% to_dict(FileName)
|
|
|
|
%% Given a filename returns a dict containing
|
|
|
|
%% the key value pairs from the dict.
|
|
|
|
%%
|
|
|
|
%% @spec to_dict(filename()) -> dictionary()
|
|
|
|
%% where
|
|
|
|
%% filename() = string(),
|
|
|
|
%% dictionary() = dict()
|
|
|
|
%%
|
|
|
|
to_dict(FileName) ->
|
|
|
|
KeyValueList = dump(FileName),
|
|
|
|
dict:from_list(KeyValueList).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
|
2016-12-13 02:15:13 +00:00
|
|
|
build_hashtree_bunchedatend_binary_test() ->
|
|
|
|
SlotMap = [{1, <<10:32, 0:32>>},
|
|
|
|
{4, <<11:32, 100:32>>},
|
|
|
|
{8, <<12:32, 200:32>>},
|
|
|
|
{8, <<13:32, 300:32>>},
|
|
|
|
{14, <<14:32, 400:32>>},
|
|
|
|
{14, <<15:32, 500:32>>},
|
|
|
|
{15, <<16:32, 600:32>>},
|
|
|
|
{15, <<17:32, 700:32>>}],
|
2016-12-13 03:22:40 +00:00
|
|
|
Bin = list_to_binary(build_hashtree_binary(SlotMap, 16)),
|
2016-12-13 02:15:13 +00:00
|
|
|
ExpBinP1 = <<16:32, 600:32, 10:32, 0:32, 17:32, 700:32, 0:64>>,
|
|
|
|
ExpBinP2 = <<11:32, 100:32, 0:192, 12:32, 200:32, 13:32, 300:32, 0:256>>,
|
|
|
|
ExpBinP3 = <<14:32, 400:32, 15:32, 500:32>>,
|
|
|
|
ExpBin = <<ExpBinP1/binary, ExpBinP2/binary, ExpBinP3/binary>>,
|
|
|
|
?assertMatch(ExpBin, Bin).
|
|
|
|
|
|
|
|
build_hashtree_bunchedatstart_binary_test() ->
|
|
|
|
SlotMap = [{1, <<10:32, 0:32>>},
|
|
|
|
{2, <<11:32, 100:32>>},
|
|
|
|
{3, <<12:32, 200:32>>},
|
|
|
|
{4, <<13:32, 300:32>>},
|
|
|
|
{5, <<14:32, 400:32>>},
|
|
|
|
{6, <<15:32, 500:32>>},
|
|
|
|
{7, <<16:32, 600:32>>},
|
|
|
|
{8, <<17:32, 700:32>>}],
|
2016-12-13 03:22:40 +00:00
|
|
|
Bin = list_to_binary(build_hashtree_binary(SlotMap, 16)),
|
2016-12-13 02:15:13 +00:00
|
|
|
ExpBinP1 = <<0:64, 10:32, 0:32, 11:32, 100:32, 12:32, 200:32>>,
|
|
|
|
ExpBinP2 = <<13:32, 300:32, 14:32, 400:32, 15:32, 500:32, 16:32, 600:32>>,
|
|
|
|
ExpBinP3 = <<17:32, 700:32, 0:448>>,
|
|
|
|
ExpBin = <<ExpBinP1/binary, ExpBinP2/binary, ExpBinP3/binary>>,
|
|
|
|
ExpSize = byte_size(ExpBin),
|
|
|
|
?assertMatch(ExpSize, byte_size(Bin)),
|
|
|
|
?assertMatch(ExpBin, Bin).
|
|
|
|
|
2016-12-13 17:02:45 +00:00
|
|
|
|
|
|
|
build_hashtree_test() ->
|
|
|
|
SlotMap = [{3, <<2424914688:32, 100:32>>},
|
|
|
|
{3, <<2424917760:32, 200:32>>},
|
|
|
|
{7, <<2424915712:32, 300:32>>},
|
|
|
|
{9, <<2424903936:32, 400:32>>},
|
|
|
|
{9, <<2424907008:32, 500:32>>},
|
|
|
|
{10, <<2424913408:32, 600:32>>}],
|
|
|
|
BinList = build_hashtree_binary(SlotMap, 12),
|
|
|
|
ExpOut = [<<0:64>>, <<0:64>>, <<0:64>>, <<2424914688:32, 100:32>>] ++
|
|
|
|
[<<2424917760:32, 200:32>>, <<0:64>>, <<0:64>>] ++
|
|
|
|
[<<2424915712:32, 300:32>>, <<0:64>>] ++
|
|
|
|
[<<2424903936:32, 400:32>>, <<2424907008:32, 500:32>>] ++
|
|
|
|
[<<2424913408:32, 600:32>>],
|
|
|
|
?assertMatch(ExpOut, BinList).
|
|
|
|
|
|
|
|
|
2016-12-13 02:15:13 +00:00
|
|
|
find_firstzero_test() ->
|
2016-12-13 03:22:40 +00:00
|
|
|
Bin = [<<1:64/integer>>, <<0:64/integer>>,
|
|
|
|
<<89:64/integer>>, <<89:64/integer>>,
|
|
|
|
<<0:64/integer>>,
|
|
|
|
<<71:64/integer>>, <<72:64/integer>>],
|
|
|
|
?assertMatch(5, find_firstzero(Bin, length(Bin))),
|
|
|
|
{LHS, [<<0:64>>|RHS]} = lists:split(4, Bin),
|
|
|
|
?assertMatch([<<1:64/integer>>, <<0:64/integer>>,
|
|
|
|
<<89:64/integer>>, <<89:64/integer>>], LHS),
|
|
|
|
?assertMatch([<<71:64/integer>>, <<72:64/integer>>], RHS).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
|
2016-12-13 17:02:45 +00:00
|
|
|
cyclecount_test() ->
|
|
|
|
io:format("~n~nStarting cycle count test~n"),
|
|
|
|
KVL1 = generate_sequentialkeys(5000, []),
|
|
|
|
KVL2 = lists:foldl(fun({K, V}, Acc) ->
|
|
|
|
H = hash(K),
|
|
|
|
I = hash_to_index(H),
|
|
|
|
case I of
|
|
|
|
0 ->
|
|
|
|
[{K, V}|Acc];
|
|
|
|
_ ->
|
|
|
|
Acc
|
|
|
|
end end,
|
|
|
|
[],
|
|
|
|
KVL1),
|
|
|
|
{ok, P1} = cdb_open_writer("../test/cycle_count.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
|
|
|
ok = cdb_mput(P1, KVL2),
|
|
|
|
{ok, F2} = cdb_complete(P1),
|
|
|
|
{ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}),
|
|
|
|
lists:foreach(fun({K, V}) ->
|
|
|
|
?assertMatch({K, V}, cdb_get(P2, K)) end,
|
|
|
|
KVL2),
|
|
|
|
ok = cdb_close(P2),
|
|
|
|
ok = file:delete("../test/cycle_count.cdb").
|
|
|
|
|
|
|
|
|
2015-05-25 22:45:45 +01:00
|
|
|
full_1_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
List1 = lists:sort([{"key1","value1"},{"key2","value2"}]),
|
2016-09-07 17:58:12 +01:00
|
|
|
create("../test/simple.cdb",
|
|
|
|
lists:sort([{"key1","value1"},{"key2","value2"}])),
|
2016-07-29 17:19:30 +01:00
|
|
|
List2 = lists:sort(dump("../test/simple.cdb")),
|
|
|
|
?assertMatch(List1,List2),
|
|
|
|
ok = file:delete("../test/simple.cdb").
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
full_2_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
List1 = lists:sort([{lists:flatten(io_lib:format("~s~p",[Prefix,Plug])),
|
|
|
|
lists:flatten(io_lib:format("value~p",[Plug]))}
|
2016-09-19 15:31:26 +01:00
|
|
|
|| Plug <- lists:seq(1,200),
|
2016-07-29 17:19:30 +01:00
|
|
|
Prefix <- ["dsd","so39ds","oe9%#*(","020dkslsldclsldowlslf%$#",
|
|
|
|
"tiep4||","qweq"]]),
|
|
|
|
create("../test/full.cdb",List1),
|
|
|
|
List2 = lists:sort(dump("../test/full.cdb")),
|
|
|
|
?assertMatch(List1,List2),
|
|
|
|
ok = file:delete("../test/full.cdb").
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
from_dict_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
D = dict:new(),
|
|
|
|
D1 = dict:store("a","b",D),
|
|
|
|
D2 = dict:store("c","d",D1),
|
|
|
|
ok = from_dict("../test/from_dict_test.cdb",D2),
|
|
|
|
io:format("Store created ~n", []),
|
|
|
|
KVP = lists:sort(dump("../test/from_dict_test.cdb")),
|
|
|
|
D3 = lists:sort(dict:to_list(D2)),
|
|
|
|
io:format("KVP is ~w~n", [KVP]),
|
|
|
|
io:format("D3 is ~w~n", [D3]),
|
|
|
|
?assertMatch(KVP, D3),
|
|
|
|
ok = file:delete("../test/from_dict_test.cdb").
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
to_dict_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
D = dict:new(),
|
|
|
|
D1 = dict:store("a","b",D),
|
|
|
|
D2 = dict:store("c","d",D1),
|
|
|
|
ok = from_dict("../test/from_dict_test1.cdb",D2),
|
|
|
|
Dict = to_dict("../test/from_dict_test1.cdb"),
|
|
|
|
D3 = lists:sort(dict:to_list(D2)),
|
|
|
|
D4 = lists:sort(dict:to_list(Dict)),
|
|
|
|
?assertMatch(D4,D3),
|
|
|
|
ok = file:delete("../test/from_dict_test1.cdb").
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
crccheck_emptyvalue_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
?assertMatch(false, crccheck_value(<<>>)).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
crccheck_shortvalue_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Value = <<128,128,32>>,
|
|
|
|
?assertMatch(false, crccheck_value(Value)).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
crccheck_justshortvalue_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Value = <<128,128,32,64>>,
|
|
|
|
?assertMatch(false, crccheck_value(Value)).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
crccheck_correctvalue_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Value = term_to_binary("some text as value"),
|
|
|
|
Hash = erlang:crc32(Value),
|
|
|
|
ValueOnDisk = <<Hash:32/integer, Value/binary>>,
|
|
|
|
?assertMatch(true, crccheck_value(ValueOnDisk)).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
crccheck_wronghash_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Value = term_to_binary("some text as value"),
|
|
|
|
Hash = erlang:crc32(Value) + 1,
|
|
|
|
ValueOnDisk = <<Hash:32/integer, Value/binary>>,
|
|
|
|
?assertMatch(false, crccheck_value(ValueOnDisk)).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
crccheck_truncatedvalue_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Value = term_to_binary("some text as value"),
|
|
|
|
Hash = erlang:crc32(Value),
|
|
|
|
ValueOnDisk = <<Hash:32/integer, Value/binary>>,
|
|
|
|
Size = bit_size(ValueOnDisk) - 1,
|
|
|
|
<<TruncatedValue:Size/bitstring, _/bitstring>> = ValueOnDisk,
|
|
|
|
?assertMatch(false, crccheck_value(TruncatedValue)).
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
activewrite_singlewrite_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Key = "0002",
|
|
|
|
Value = "some text as new value",
|
|
|
|
InitialD = dict:new(),
|
|
|
|
InitialD1 = dict:store("0001", "Initial value", InitialD),
|
|
|
|
ok = from_dict("../test/test_mem.cdb", InitialD1),
|
|
|
|
io:format("New db file created ~n", []),
|
2016-09-05 15:01:23 +01:00
|
|
|
{LastPosition, KeyDict, _} = open_active_file("../test/test_mem.cdb"),
|
2016-07-29 17:19:30 +01:00
|
|
|
io:format("File opened as new active file "
|
|
|
|
"with LastPosition=~w ~n", [LastPosition]),
|
2016-09-05 15:01:23 +01:00
|
|
|
{_, _, UpdKeyDict} = put("../test/test_mem.cdb",
|
|
|
|
Key, Value,
|
|
|
|
{LastPosition, KeyDict}),
|
2016-07-29 17:19:30 +01:00
|
|
|
io:format("New key and value added to active file ~n", []),
|
2016-09-05 15:01:23 +01:00
|
|
|
?assertMatch({Key, Value},
|
|
|
|
get_mem(Key, "../test/test_mem.cdb",
|
|
|
|
UpdKeyDict)),
|
|
|
|
?assertMatch(probably,
|
|
|
|
get_mem(Key, "../test/test_mem.cdb",
|
|
|
|
UpdKeyDict,
|
|
|
|
loose_presence)),
|
|
|
|
?assertMatch(missing,
|
|
|
|
get_mem("not_present", "../test/test_mem.cdb",
|
|
|
|
UpdKeyDict,
|
|
|
|
loose_presence)),
|
2016-07-29 17:19:30 +01:00
|
|
|
ok = file:delete("../test/test_mem.cdb").
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
search_hash_table_findinslot_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
Key1 = "key1", % this is in slot 3 if count is 8
|
|
|
|
D = dict:from_list([{Key1, "value1"}, {"K2", "V2"}, {"K3", "V3"},
|
|
|
|
{"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"},
|
|
|
|
{"K8", "V8"}]),
|
|
|
|
ok = from_dict("../test/hashtable1_test.cdb",D),
|
2016-09-07 17:58:12 +01:00
|
|
|
{ok, Handle} = file:open("../test/hashtable1_test.cdb",
|
|
|
|
[binary, raw, read, write]),
|
2016-07-29 17:19:30 +01:00
|
|
|
Hash = hash(Key1),
|
|
|
|
Index = hash_to_index(Hash),
|
|
|
|
{ok, _} = file:position(Handle, {bof, ?DWORD_SIZE*Index}),
|
|
|
|
{HashTable, Count} = read_next_2_integers(Handle),
|
|
|
|
io:format("Count of ~w~n", [Count]),
|
|
|
|
{ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}),
|
|
|
|
Slot = hash_to_slot(Hash, Count),
|
|
|
|
io:format("Slot of ~w~n", [Slot]),
|
|
|
|
{ok, _} = file:position(Handle, {cur, Slot * ?DWORD_SIZE}),
|
|
|
|
{ReadH3, ReadP3} = read_next_2_integers(Handle),
|
|
|
|
{ReadH4, ReadP4} = read_next_2_integers(Handle),
|
|
|
|
io:format("Slot 1 has Hash ~w Position ~w~n", [ReadH3, ReadP3]),
|
|
|
|
io:format("Slot 2 has Hash ~w Position ~w~n", [ReadH4, ReadP4]),
|
|
|
|
?assertMatch(0, ReadH4),
|
|
|
|
?assertMatch({"key1", "value1"}, get(Handle, Key1)),
|
2016-10-29 00:52:49 +01:00
|
|
|
?assertMatch(probably, get(Handle, Key1, no_cache, loose_presence)),
|
|
|
|
?assertMatch(missing, get(Handle, "Key99", no_cache, loose_presence)),
|
2016-07-29 17:19:30 +01:00
|
|
|
{ok, _} = file:position(Handle, FirstHashPosition),
|
|
|
|
FlipH3 = endian_flip(ReadH3),
|
|
|
|
FlipP3 = endian_flip(ReadP3),
|
2016-09-07 17:58:12 +01:00
|
|
|
RBin = <<FlipH3:32/integer,
|
|
|
|
FlipP3:32/integer,
|
|
|
|
0:32/integer,
|
|
|
|
0:32/integer>>,
|
2016-07-29 17:19:30 +01:00
|
|
|
io:format("Replacement binary of ~w~n", [RBin]),
|
|
|
|
{ok, OldBin} = file:pread(Handle,
|
|
|
|
FirstHashPosition + (Slot -1) * ?DWORD_SIZE, 16),
|
|
|
|
io:format("Bin to be replaced is ~w ~n", [OldBin]),
|
2016-09-07 17:58:12 +01:00
|
|
|
ok = file:pwrite(Handle,
|
|
|
|
FirstHashPosition + (Slot -1) * ?DWORD_SIZE,
|
|
|
|
RBin),
|
2016-07-29 17:19:30 +01:00
|
|
|
ok = file:close(Handle),
|
|
|
|
io:format("Find key following change to hash table~n"),
|
|
|
|
?assertMatch(missing, get("../test/hashtable1_test.cdb", Key1)),
|
|
|
|
ok = file:delete("../test/hashtable1_test.cdb").
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2015-06-04 21:15:31 +01:00
|
|
|
getnextkey_inclemptyvalue_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
L = [{"K9", "V9"}, {"K2", "V2"}, {"K3", ""},
|
|
|
|
{"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"},
|
|
|
|
{"K8", "V8"}, {"K1", "V1"}],
|
|
|
|
ok = create("../test/hashtable2_test.cdb", L),
|
|
|
|
{FirstKey, Handle, P1} = get_nextkey("../test/hashtable2_test.cdb"),
|
|
|
|
io:format("Next position details of ~w~n", [P1]),
|
|
|
|
?assertMatch("K9", FirstKey),
|
|
|
|
{SecondKey, Handle, P2} = get_nextkey(Handle, P1),
|
|
|
|
?assertMatch("K2", SecondKey),
|
|
|
|
{ThirdKeyNoValue, Handle, P3} = get_nextkey(Handle, P2),
|
|
|
|
?assertMatch("K3", ThirdKeyNoValue),
|
|
|
|
{_, Handle, P4} = get_nextkey(Handle, P3),
|
|
|
|
{_, Handle, P5} = get_nextkey(Handle, P4),
|
|
|
|
{_, Handle, P6} = get_nextkey(Handle, P5),
|
|
|
|
{_, Handle, P7} = get_nextkey(Handle, P6),
|
|
|
|
{_, Handle, P8} = get_nextkey(Handle, P7),
|
2016-09-20 10:17:24 +01:00
|
|
|
{LastKey, nomorekeys} = get_nextkey(Handle, P8),
|
2016-07-29 17:19:30 +01:00
|
|
|
?assertMatch("K1", LastKey),
|
|
|
|
ok = file:delete("../test/hashtable2_test.cdb").
|
2015-05-25 22:45:45 +01:00
|
|
|
|
|
|
|
newactivefile_test() ->
|
2016-09-05 15:01:23 +01:00
|
|
|
{LastPosition, _, _} = open_active_file("../test/activefile_test.cdb"),
|
2016-07-29 17:19:30 +01:00
|
|
|
?assertMatch(256 * ?DWORD_SIZE, LastPosition),
|
|
|
|
Response = get_nextkey("../test/activefile_test.cdb"),
|
|
|
|
?assertMatch(nomorekeys, Response),
|
|
|
|
ok = file:delete("../test/activefile_test.cdb").
|
2015-05-25 22:45:45 +01:00
|
|
|
|
2015-06-04 21:15:31 +01:00
|
|
|
emptyvalue_fromdict_test() ->
|
2016-07-29 17:19:30 +01:00
|
|
|
D = dict:new(),
|
|
|
|
D1 = dict:store("K1", "V1", D),
|
|
|
|
D2 = dict:store("K2", "", D1),
|
|
|
|
D3 = dict:store("K3", "V3", D2),
|
|
|
|
D4 = dict:store("K4", "", D3),
|
|
|
|
ok = from_dict("../test/from_dict_test_ev.cdb",D4),
|
|
|
|
io:format("Store created ~n", []),
|
|
|
|
KVP = lists:sort(dump("../test/from_dict_test_ev.cdb")),
|
|
|
|
D_Result = lists:sort(dict:to_list(D4)),
|
|
|
|
io:format("KVP is ~w~n", [KVP]),
|
|
|
|
io:format("D_Result is ~w~n", [D_Result]),
|
|
|
|
?assertMatch(KVP, D_Result),
|
|
|
|
ok = file:delete("../test/from_dict_test_ev.cdb").
|
2015-06-04 21:15:31 +01:00
|
|
|
|
2016-09-19 15:31:26 +01:00
|
|
|
find_lastkey_test() ->
|
2016-12-16 23:18:55 +00:00
|
|
|
file:delete("../test/lastkey.pnd"),
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P1} = cdb_open_writer("../test/lastkey.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-09-19 15:31:26 +01:00
|
|
|
ok = cdb_put(P1, "Key1", "Value1"),
|
|
|
|
ok = cdb_put(P1, "Key3", "Value3"),
|
|
|
|
ok = cdb_put(P1, "Key2", "Value2"),
|
2016-09-27 14:58:26 +01:00
|
|
|
?assertMatch("Key2", cdb_lastkey(P1)),
|
|
|
|
?assertMatch("Key1", cdb_firstkey(P1)),
|
2016-10-14 13:36:12 +01:00
|
|
|
probably = cdb_keycheck(P1, "Key2"),
|
2016-09-19 15:31:26 +01:00
|
|
|
ok = cdb_close(P1),
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P2} = cdb_open_writer("../test/lastkey.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-09-27 14:58:26 +01:00
|
|
|
?assertMatch("Key2", cdb_lastkey(P2)),
|
2016-10-14 13:36:12 +01:00
|
|
|
probably = cdb_keycheck(P2, "Key2"),
|
2016-09-19 15:31:26 +01:00
|
|
|
{ok, F2} = cdb_complete(P2),
|
|
|
|
{ok, P3} = cdb_open_reader(F2),
|
2016-09-27 14:58:26 +01:00
|
|
|
?assertMatch("Key2", cdb_lastkey(P3)),
|
2016-10-14 18:43:16 +01:00
|
|
|
{ok, _FN} = cdb_complete(P3),
|
|
|
|
{ok, P4} = cdb_open_reader(F2),
|
|
|
|
?assertMatch("Key2", cdb_lastkey(P4)),
|
|
|
|
ok = cdb_close(P4),
|
2016-09-19 15:31:26 +01:00
|
|
|
ok = file:delete("../test/lastkey.cdb").
|
|
|
|
|
2016-09-20 16:13:36 +01:00
|
|
|
get_keys_byposition_simple_test() ->
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P1} = cdb_open_writer("../test/poskey.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-09-20 16:13:36 +01:00
|
|
|
ok = cdb_put(P1, "Key1", "Value1"),
|
|
|
|
ok = cdb_put(P1, "Key3", "Value3"),
|
|
|
|
ok = cdb_put(P1, "Key2", "Value2"),
|
|
|
|
KeyList = ["Key1", "Key2", "Key3"],
|
|
|
|
{ok, F2} = cdb_complete(P1),
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}),
|
2016-09-20 18:24:05 +01:00
|
|
|
PositionList = cdb_getpositions(P2, all),
|
2016-09-20 16:13:36 +01:00
|
|
|
io:format("Position list of ~w~n", [PositionList]),
|
2016-09-20 18:24:05 +01:00
|
|
|
?assertMatch(3, length(PositionList)),
|
|
|
|
R1 = cdb_directfetch(P2, PositionList, key_only),
|
2016-11-29 00:27:23 +00:00
|
|
|
io:format("R1 ~w~n", [R1]),
|
2016-09-20 18:24:05 +01:00
|
|
|
?assertMatch(3, length(R1)),
|
|
|
|
lists:foreach(fun(Key) ->
|
2016-11-29 00:27:23 +00:00
|
|
|
?assertMatch(true, lists:member(Key, KeyList)) end,
|
2016-09-20 18:24:05 +01:00
|
|
|
R1),
|
|
|
|
R2 = cdb_directfetch(P2, PositionList, key_size),
|
|
|
|
?assertMatch(3, length(R2)),
|
|
|
|
lists:foreach(fun({Key, _Size}) ->
|
2016-11-29 00:27:23 +00:00
|
|
|
?assertMatch(true, lists:member(Key, KeyList)) end,
|
2016-09-20 18:24:05 +01:00
|
|
|
R2),
|
|
|
|
R3 = cdb_directfetch(P2, PositionList, key_value_check),
|
|
|
|
?assertMatch(3, length(R3)),
|
|
|
|
lists:foreach(fun({Key, Value, Check}) ->
|
2016-11-29 00:27:23 +00:00
|
|
|
?assertMatch(true, Check),
|
2016-09-20 18:24:05 +01:00
|
|
|
{K, V} = cdb_get(P2, Key),
|
|
|
|
?assertMatch(K, Key),
|
|
|
|
?assertMatch(V, Value) end,
|
|
|
|
R3),
|
2016-09-20 16:13:36 +01:00
|
|
|
ok = cdb_close(P2),
|
|
|
|
ok = file:delete(F2).
|
|
|
|
|
|
|
|
generate_sequentialkeys(0, KVList) ->
|
2016-10-25 23:13:14 +01:00
|
|
|
lists:reverse(KVList);
|
2016-09-20 16:13:36 +01:00
|
|
|
generate_sequentialkeys(Count, KVList) ->
|
|
|
|
KV = {"Key" ++ integer_to_list(Count), "Value" ++ integer_to_list(Count)},
|
|
|
|
generate_sequentialkeys(Count - 1, KVList ++ [KV]).
|
|
|
|
|
|
|
|
get_keys_byposition_manykeys_test() ->
|
|
|
|
KeyCount = 1024,
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P1} = cdb_open_writer("../test/poskeymany.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-09-20 16:13:36 +01:00
|
|
|
KVList = generate_sequentialkeys(KeyCount, []),
|
|
|
|
lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, KVList),
|
2016-11-18 21:35:45 +00:00
|
|
|
ok = cdb_roll(P1),
|
|
|
|
% Should not return posiitons when rolling
|
|
|
|
?assertMatch([], cdb_getpositions(P1, 10)),
|
|
|
|
lists:foldl(fun(X, Complete) ->
|
|
|
|
case Complete of
|
|
|
|
true ->
|
|
|
|
true;
|
|
|
|
false ->
|
|
|
|
case cdb_checkhashtable(P1) of
|
|
|
|
true ->
|
|
|
|
true;
|
|
|
|
false ->
|
|
|
|
timer:sleep(X),
|
|
|
|
false
|
|
|
|
end
|
|
|
|
end end,
|
|
|
|
false,
|
|
|
|
lists:seq(1, 20)),
|
|
|
|
?assertMatch(10, length(cdb_getpositions(P1, 10))),
|
2016-09-20 16:13:36 +01:00
|
|
|
{ok, F2} = cdb_complete(P1),
|
2016-11-18 21:35:45 +00:00
|
|
|
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}),
|
2016-09-20 18:24:05 +01:00
|
|
|
PositionList = cdb_getpositions(P2, all),
|
2016-09-20 16:13:36 +01:00
|
|
|
L1 = length(PositionList),
|
2016-12-13 02:15:13 +00:00
|
|
|
?assertMatch(KeyCount, L1),
|
2016-09-20 18:24:05 +01:00
|
|
|
|
|
|
|
SampleList1 = cdb_getpositions(P2, 10),
|
|
|
|
?assertMatch(10, length(SampleList1)),
|
|
|
|
SampleList2 = cdb_getpositions(P2, KeyCount),
|
|
|
|
?assertMatch(KeyCount, length(SampleList2)),
|
|
|
|
SampleList3 = cdb_getpositions(P2, KeyCount + 1),
|
|
|
|
?assertMatch(KeyCount, length(SampleList3)),
|
|
|
|
|
2016-09-20 16:13:36 +01:00
|
|
|
ok = cdb_close(P2),
|
|
|
|
ok = file:delete(F2).
|
|
|
|
|
|
|
|
|
2016-10-26 11:39:27 +01:00
|
|
|
nokeys_test() ->
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P1} = cdb_open_writer("../test/nohash_emptyfile.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-10-25 23:13:14 +01:00
|
|
|
{ok, F2} = cdb_complete(P1),
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}),
|
2016-10-25 23:13:14 +01:00
|
|
|
io:format("FirstKey is ~s~n", [cdb_firstkey(P2)]),
|
|
|
|
io:format("LastKey is ~s~n", [cdb_lastkey(P2)]),
|
2016-10-26 11:39:27 +01:00
|
|
|
?assertMatch(empty, cdb_firstkey(P2)),
|
|
|
|
?assertMatch(empty, cdb_lastkey(P2)),
|
2016-10-25 23:13:14 +01:00
|
|
|
ok = cdb_close(P2),
|
|
|
|
ok = file:delete(F2).
|
|
|
|
|
2016-10-26 11:39:27 +01:00
|
|
|
mput_test() ->
|
|
|
|
KeyCount = 1024,
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P1} = cdb_open_writer("../test/nohash_keysinfile.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-10-26 11:39:27 +01:00
|
|
|
KVList = generate_sequentialkeys(KeyCount, []),
|
|
|
|
ok = cdb_mput(P1, KVList),
|
2016-10-26 11:50:59 +01:00
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")),
|
|
|
|
?assertMatch({"Key1024", "Value1024"}, cdb_get(P1, "Key1024")),
|
|
|
|
?assertMatch(missing, cdb_get(P1, "Key1025")),
|
|
|
|
?assertMatch(missing, cdb_get(P1, "Key1026")),
|
2016-10-25 23:13:14 +01:00
|
|
|
{ok, F2} = cdb_complete(P1),
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}),
|
2016-10-26 11:39:27 +01:00
|
|
|
?assertMatch("Key1", cdb_firstkey(P2)),
|
|
|
|
?assertMatch("Key1024", cdb_lastkey(P2)),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P2, "Key1")),
|
|
|
|
?assertMatch({"Key1024", "Value1024"}, cdb_get(P2, "Key1024")),
|
|
|
|
?assertMatch(missing, cdb_get(P2, "Key1025")),
|
|
|
|
?assertMatch(missing, cdb_get(P2, "Key1026")),
|
2016-10-25 23:13:14 +01:00
|
|
|
ok = cdb_close(P2),
|
|
|
|
ok = file:delete(F2).
|
|
|
|
|
2016-10-29 00:52:49 +01:00
|
|
|
state_test() ->
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P1} = cdb_open_writer("../test/state_test.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-10-29 00:52:49 +01:00
|
|
|
KVList = generate_sequentialkeys(1000, []),
|
|
|
|
ok = cdb_mput(P1, KVList),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, "Key1")),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")),
|
|
|
|
ok = cdb_roll(P1),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, "Key1")),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")),
|
|
|
|
ok = cdb_deletepending(P1),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, "Key1")),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")),
|
|
|
|
timer:sleep(500),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, "Key1")),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")),
|
|
|
|
ok = cdb_close(P1).
|
|
|
|
|
2016-12-13 17:02:45 +00:00
|
|
|
|
2016-11-07 23:53:14 +00:00
|
|
|
hashclash_test() ->
|
|
|
|
{ok, P1} = cdb_open_writer("../test/hashclash_test.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
|
|
|
Key1 = "Key4184465780",
|
|
|
|
Key99 = "Key4254669179",
|
|
|
|
KeyNF = "Key9070567319",
|
|
|
|
?assertMatch(22, hash(Key1)),
|
|
|
|
?assertMatch(22, hash(Key99)),
|
|
|
|
?assertMatch(22, hash(KeyNF)),
|
|
|
|
|
|
|
|
ok = cdb_mput(P1, [{Key1, 1}, {Key99, 99}]),
|
|
|
|
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, Key1)),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, Key99)),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, KeyNF)),
|
|
|
|
|
|
|
|
?assertMatch({Key1, 1}, cdb_get(P1, Key1)),
|
|
|
|
?assertMatch({Key99, 99}, cdb_get(P1, Key99)),
|
|
|
|
?assertMatch(missing, cdb_get(P1, KeyNF)),
|
|
|
|
|
|
|
|
{ok, FN} = cdb_complete(P1),
|
|
|
|
{ok, P2} = cdb_open_reader(FN),
|
|
|
|
|
|
|
|
?assertMatch(probably, cdb_keycheck(P2, Key1)),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P2, Key99)),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P2, KeyNF)),
|
|
|
|
|
|
|
|
?assertMatch({Key1, 1}, cdb_get(P2, Key1)),
|
|
|
|
?assertMatch({Key99, 99}, cdb_get(P2, Key99)),
|
|
|
|
?assertMatch(missing, cdb_get(P2, KeyNF)),
|
|
|
|
|
|
|
|
ok = cdb_deletepending(P2),
|
|
|
|
|
|
|
|
?assertMatch(probably, cdb_keycheck(P2, Key1)),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P2, Key99)),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P2, KeyNF)),
|
|
|
|
|
|
|
|
?assertMatch({Key1, 1}, cdb_get(P2, Key1)),
|
|
|
|
?assertMatch({Key99, 99}, cdb_get(P2, Key99)),
|
|
|
|
?assertMatch(missing, cdb_get(P2, KeyNF)),
|
|
|
|
|
|
|
|
ok = cdb_close(P2).
|
|
|
|
|
2016-10-29 00:52:49 +01:00
|
|
|
corruptfile_test() ->
|
|
|
|
file:delete("../test/corrupt_test.pnd"),
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P1} = cdb_open_writer("../test/corrupt_test.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-10-29 00:52:49 +01:00
|
|
|
KVList = generate_sequentialkeys(100, []),
|
|
|
|
ok = cdb_mput(P1, []), % Not relevant to this test, but needs testing
|
|
|
|
lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, KVList),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, "Key1")),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")),
|
|
|
|
?assertMatch({"Key100", "Value100"}, cdb_get(P1, "Key100")),
|
|
|
|
ok = cdb_close(P1),
|
|
|
|
lists:foreach(fun(Offset) -> corrupt_testfile_at_offset(Offset) end,
|
|
|
|
lists:seq(1, 40)),
|
|
|
|
ok = file:delete("../test/corrupt_test.pnd").
|
2016-11-08 23:07:03 +00:00
|
|
|
|
2016-10-29 00:52:49 +01:00
|
|
|
corrupt_testfile_at_offset(Offset) ->
|
|
|
|
{ok, F1} = file:open("../test/corrupt_test.pnd", ?WRITE_OPS),
|
|
|
|
{ok, EofPos} = file:position(F1, eof),
|
|
|
|
file:position(F1, EofPos - Offset),
|
|
|
|
ok = file:truncate(F1),
|
|
|
|
ok = file:close(F1),
|
2016-11-04 12:22:15 +00:00
|
|
|
{ok, P2} = cdb_open_writer("../test/corrupt_test.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
2016-10-29 00:52:49 +01:00
|
|
|
?assertMatch(probably, cdb_keycheck(P2, "Key1")),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P2, "Key1")),
|
|
|
|
?assertMatch(missing, cdb_get(P2, "Key100")),
|
|
|
|
ok = cdb_put(P2, "Key100", "Value100"),
|
|
|
|
?assertMatch({"Key100", "Value100"}, cdb_get(P2, "Key100")),
|
|
|
|
ok = cdb_close(P2).
|
2016-10-26 11:39:27 +01:00
|
|
|
|
2016-11-08 23:07:03 +00:00
|
|
|
crc_corrupt_writer_test() ->
|
|
|
|
file:delete("../test/corruptwrt_test.pnd"),
|
|
|
|
{ok, P1} = cdb_open_writer("../test/corruptwrt_test.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
|
|
|
KVList = generate_sequentialkeys(100, []),
|
|
|
|
ok = cdb_mput(P1, KVList),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P1, "Key1")),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")),
|
|
|
|
?assertMatch({"Key100", "Value100"}, cdb_get(P1, "Key100")),
|
|
|
|
ok = cdb_close(P1),
|
|
|
|
{ok, Handle} = file:open("../test/corruptwrt_test.pnd", ?WRITE_OPS),
|
|
|
|
{ok, EofPos} = file:position(Handle, eof),
|
|
|
|
% zero the last byte of the last value
|
|
|
|
ok = file:pwrite(Handle, EofPos - 5, <<0:8/integer>>),
|
|
|
|
ok = file:close(Handle),
|
|
|
|
{ok, P2} = cdb_open_writer("../test/corruptwrt_test.pnd",
|
|
|
|
#cdb_options{binary_mode=false}),
|
|
|
|
?assertMatch(probably, cdb_keycheck(P2, "Key1")),
|
|
|
|
?assertMatch({"Key1", "Value1"}, cdb_get(P2, "Key1")),
|
|
|
|
?assertMatch(missing, cdb_get(P2, "Key100")),
|
|
|
|
ok = cdb_put(P2, "Key100", "Value100"),
|
|
|
|
?assertMatch({"Key100", "Value100"}, cdb_get(P2, "Key100")),
|
|
|
|
ok = cdb_close(P2).
|
|
|
|
|
2016-11-08 01:03:09 +00:00
|
|
|
nonsense_coverage_test() ->
|
|
|
|
{ok, Pid} = gen_fsm:start(?MODULE, [#cdb_options{}], []),
|
|
|
|
ok = gen_fsm:send_all_state_event(Pid, nonsense),
|
|
|
|
?assertMatch({next_state, reader, #state{}}, handle_info(nonsense,
|
|
|
|
reader,
|
|
|
|
#state{})),
|
|
|
|
?assertMatch({ok, reader, #state{}}, code_change(nonsense,
|
|
|
|
reader,
|
|
|
|
#state{},
|
|
|
|
nonsense)).
|
|
|
|
|
2015-06-04 21:15:31 +01:00
|
|
|
-endif.
|