Some refinements
Minor amendments to formatting and outputs
This commit is contained in:
parent
71a6538288
commit
3b954aea43
1 changed files with 53 additions and 54 deletions
|
@ -105,20 +105,16 @@
|
||||||
%% Summaries could be used for other summaries of table content in the future,
|
%% Summaries could be used for other summaries of table content in the future,
|
||||||
%% perhaps application-specific bloom filters
|
%% perhaps application-specific bloom filters
|
||||||
|
|
||||||
%% The 80-byte header is made up of
|
%% The 56-byte header is made up of
|
||||||
%% - 1 byte version (major 5 bits, minor 3 bits) - default 0.1
|
%% - 1 byte version (major 5 bits, minor 3 bits) - default 0.1
|
||||||
%% - 1 byte options (currently undefined)
|
%% - 1 byte options (currently undefined)
|
||||||
%% - 1 byte Block Size - the expected number of keys in each block
|
%% - 1 byte Block Size - the expected number of keys in each block
|
||||||
%% - 1 byte Block Count - the expected number of blocks in each slot
|
%% - 1 byte Block Count - the expected number of blocks in each slot
|
||||||
%% - 2 byte Slot Count - the maximum number of slots in the file
|
%% - 2 byte Slot Count - the maximum number of slots in the file
|
||||||
%% - 6 bytes - spare
|
%% - 6 bytes - spare
|
||||||
%% - 4 bytes - Blocks position
|
|
||||||
%% - 4 bytes - Blocks length
|
%% - 4 bytes - Blocks length
|
||||||
%% - 4 bytes - Slot Index position
|
|
||||||
%% - 4 bytes - Slot Index length
|
%% - 4 bytes - Slot Index length
|
||||||
%% - 4 bytes - Slot Filter position
|
|
||||||
%% - 4 bytes - Slot Filter length
|
%% - 4 bytes - Slot Filter length
|
||||||
%% - 4 bytes - Table Summary position
|
|
||||||
%% - 4 bytes - Table summary length
|
%% - 4 bytes - Table summary length
|
||||||
%% - 24 bytes - spare
|
%% - 24 bytes - spare
|
||||||
%% - 4 bytes - CRC32
|
%% - 4 bytes - CRC32
|
||||||
|
@ -168,7 +164,7 @@
|
||||||
-define(DIVISOR_BITS, 13).
|
-define(DIVISOR_BITS, 13).
|
||||||
-define(DIVISOR, 8092).
|
-define(DIVISOR, 8092).
|
||||||
-define(COMPRESSION_LEVEL, 1).
|
-define(COMPRESSION_LEVEL, 1).
|
||||||
-define(HEADER_LENGTH, 56).
|
-define(HEADER_LEN, 56).
|
||||||
|
|
||||||
|
|
||||||
-record(state, {version = ?CURRENT_VERSION :: tuple(),
|
-record(state, {version = ?CURRENT_VERSION :: tuple(),
|
||||||
|
@ -181,8 +177,8 @@
|
||||||
slots_pointer :: integer(),
|
slots_pointer :: integer(),
|
||||||
index_pointer :: integer(),
|
index_pointer :: integer(),
|
||||||
filter_pointer :: integer(),
|
filter_pointer :: integer(),
|
||||||
summary_pointer :: integer(),
|
summ_pointer :: integer(),
|
||||||
summary_length :: integer()}).
|
summ_length :: integer()}).
|
||||||
|
|
||||||
|
|
||||||
%% Start a bare file with an initial header and no further details
|
%% Start a bare file with an initial header and no further details
|
||||||
|
@ -199,20 +195,6 @@ create_file(Handle) ->
|
||||||
{Handle, FileMD}.
|
{Handle, FileMD}.
|
||||||
|
|
||||||
|
|
||||||
%% The 56-byte header is made up of
|
|
||||||
%% - 1 byte version (major 5 bits, minor 3 bits) - default 0.1
|
|
||||||
%% - 1 byte options (currently undefined)
|
|
||||||
%% - 1 byte Block Size - the expected number of keys in each block
|
|
||||||
%% - 1 byte Block Count - the expected number of blocks in each slot
|
|
||||||
%% - 2 byte Slot Count - the maximum number of slots in the file
|
|
||||||
%% - 6 bytes - spare
|
|
||||||
%% - 4 bytes - Blocks length
|
|
||||||
%% - 4 bytes - Slot Index length
|
|
||||||
%% - 4 bytes - Slot Filter length
|
|
||||||
%% - 4 bytes - Table summary length
|
|
||||||
%% - 24 bytes - spare
|
|
||||||
%% - 4 bytes - CRC32
|
|
||||||
|
|
||||||
create_header(initial) ->
|
create_header(initial) ->
|
||||||
{Major, Minor} = ?CURRENT_VERSION,
|
{Major, Minor} = ?CURRENT_VERSION,
|
||||||
Version = <<Major:5, Minor:3>>,
|
Version = <<Major:5, Minor:3>>,
|
||||||
|
@ -226,34 +208,47 @@ create_header(initial) ->
|
||||||
CRC32 = erlang:crc32(H1),
|
CRC32 = erlang:crc32(H1),
|
||||||
<<H1/binary, CRC32:32/integer>>.
|
<<H1/binary, CRC32:32/integer>>.
|
||||||
|
|
||||||
|
%% Open a file returning a handle and metadata which can be used in fetch and
|
||||||
|
%% iterator requests
|
||||||
|
|
||||||
|
open_file(Filename) ->
|
||||||
|
{ok, _Handle} = file:open(Filename, [binary, raw, read, write]).
|
||||||
|
%% Need to write other metadata somewhere
|
||||||
|
%% ... probably in summmary
|
||||||
|
%% ... is there a need for two levels of summary?
|
||||||
|
|
||||||
%% Take a file handle with a previously created header and complete it based on
|
%% Take a file handle with a previously created header and complete it based on
|
||||||
%% the two key lists KL1 and KL2
|
%% the two key lists KL1 and KL2
|
||||||
|
|
||||||
complete_file(Handle, FileMD, KL1, KL2, Level) ->
|
complete_file(Handle, FileMD, KL1, KL2, Level) ->
|
||||||
{UpdHandle,
|
{{UpdHandle,
|
||||||
PointerList,
|
PointerList,
|
||||||
{LowSQN, HighSQN},
|
{LowSQN, HighSQN},
|
||||||
{LowKey, HighKey}} = write_group(Handle, KL1, KL2, [], <<>>, Level,
|
{LowKey, HighKey}},
|
||||||
|
KeyRemainders} = write_group(Handle, KL1, KL2, [], <<>>, Level,
|
||||||
fun sftwrite_function/2),
|
fun sftwrite_function/2),
|
||||||
{ok, HeaderLengths} = file:pread(UpdHandle, 12, 16),
|
{ok, HeaderLengths} = file:pread(UpdHandle, 12, 16),
|
||||||
<<Blnth:32/integer,
|
<<Blen:32/integer,
|
||||||
Ilnth:32/integer,
|
Ilen:32/integer,
|
||||||
Flnth:32/integer,
|
Flen:32/integer,
|
||||||
Slnth:32/integer>> = HeaderLengths,
|
Slen:32/integer>> = HeaderLengths,
|
||||||
{UpdHandle, FileMD#state{slot_index=PointerList,
|
{UpdHandle,
|
||||||
|
FileMD#state{slot_index=PointerList,
|
||||||
smallest_sqn=LowSQN,
|
smallest_sqn=LowSQN,
|
||||||
highest_sqn=HighSQN,
|
highest_sqn=HighSQN,
|
||||||
smallest_key=LowKey,
|
smallest_key=LowKey,
|
||||||
highest_key=HighKey,
|
highest_key=HighKey,
|
||||||
slots_pointer=?HEADER_LENGTH,
|
slots_pointer=?HEADER_LEN,
|
||||||
index_pointer=?HEADER_LENGTH + Blnth,
|
index_pointer=?HEADER_LEN + Blen,
|
||||||
filter_pointer=?HEADER_LENGTH + Blnth + Ilnth,
|
filter_pointer=?HEADER_LEN + Blen + Ilen,
|
||||||
summary_pointer=?HEADER_LENGTH + Blnth + Ilnth + Flnth,
|
summ_pointer=?HEADER_LEN + Blen + Ilen + Flen,
|
||||||
summary_length=Slnth}}.
|
summ_length=Slen},
|
||||||
|
KeyRemainders}.
|
||||||
|
|
||||||
%% Fetch a Key and Value from a file, returns
|
%% Fetch a Key and Value from a file, returns
|
||||||
%% {value, KV} or not_present
|
%% {value, KV} or not_present
|
||||||
|
%% The key must be pre-checked to ensure it is in the valid range for the file
|
||||||
|
%% A key out of range may fail
|
||||||
|
|
||||||
fetch_keyvalue(Handle, FileMD, Key) ->
|
fetch_keyvalue(Handle, FileMD, Key) ->
|
||||||
{_NearestKey, {FilterLen, PointerF},
|
{_NearestKey, {FilterLen, PointerF},
|
||||||
|
@ -296,14 +291,13 @@ fetch_keyvalue_fromblock([BlockNumber|T], Key, LengthList, Handle, StartOfSlot)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
get_nearestkey(KVList, Key) ->
|
get_nearestkey(KVList, Key) ->
|
||||||
get_nearestkey(KVList, Key, not_found).
|
get_nearestkey(KVList, Key, not_found).
|
||||||
|
|
||||||
get_nearestkey([], _KeyToFind, PrevV) ->
|
get_nearestkey([], _KeyToFind, PrevV) ->
|
||||||
PrevV;
|
PrevV;
|
||||||
get_nearestkey([{K, _FilterInfo, _SlotInfo}|_T], KeyToFind, PrevV) when K > KeyToFind ->
|
get_nearestkey([{K, _FilterInfo, _SlotInfo}|_T], KeyToFind, PrevV)
|
||||||
|
when K > KeyToFind ->
|
||||||
PrevV;
|
PrevV;
|
||||||
get_nearestkey([Result|T], KeyToFind, _) ->
|
get_nearestkey([Result|T], KeyToFind, _) ->
|
||||||
get_nearestkey(T, KeyToFind, Result).
|
get_nearestkey(T, KeyToFind, Result).
|
||||||
|
@ -328,10 +322,11 @@ write_group(Handle, KL1, KL2, {SlotCount, SlotTotal},
|
||||||
UpdHandle = WriteFun(slots , {Handle, SerialisedSlots}),
|
UpdHandle = WriteFun(slots , {Handle, SerialisedSlots}),
|
||||||
case maxslots_bylevel(SlotTotal, Level) of
|
case maxslots_bylevel(SlotTotal, Level) of
|
||||||
reached ->
|
reached ->
|
||||||
complete_write(UpdHandle,
|
{complete_write(UpdHandle,
|
||||||
SlotIndex,
|
SlotIndex,
|
||||||
{LSN, HSN}, {LowKey, LastKey},
|
{LSN, HSN}, {LowKey, LastKey},
|
||||||
WriteFun);
|
WriteFun),
|
||||||
|
{KL1, KL2}};
|
||||||
continue ->
|
continue ->
|
||||||
write_group(UpdHandle, KL1, KL2, {0, SlotTotal},
|
write_group(UpdHandle, KL1, KL2, {0, SlotTotal},
|
||||||
SlotIndex, <<>>,
|
SlotIndex, <<>>,
|
||||||
|
@ -353,10 +348,11 @@ write_group(Handle, KL1, KL2, {SlotCount, SlotTotal},
|
||||||
case Status of
|
case Status of
|
||||||
partial ->
|
partial ->
|
||||||
UpdHandle = WriteFun(slots , {Handle, UpdSlots}),
|
UpdHandle = WriteFun(slots , {Handle, UpdSlots}),
|
||||||
complete_write(UpdHandle,
|
{complete_write(UpdHandle,
|
||||||
UpdSlotIndex,
|
UpdSlotIndex,
|
||||||
SNExtremes, {FirstKey, FinalKey},
|
SNExtremes, {FirstKey, FinalKey},
|
||||||
WriteFun);
|
WriteFun),
|
||||||
|
{KL1rem, KL2rem}};
|
||||||
full ->
|
full ->
|
||||||
write_group(Handle, KL1rem, KL2rem, {SlotCount + 1, SlotTotal + 1},
|
write_group(Handle, KL1rem, KL2rem, {SlotCount + 1, SlotTotal + 1},
|
||||||
UpdSlotIndex, UpdSlots,
|
UpdSlotIndex, UpdSlots,
|
||||||
|
@ -364,7 +360,9 @@ write_group(Handle, KL1, KL2, {SlotCount, SlotTotal},
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
complete_write(Handle, SlotIndex, SNExtremes, {FirstKey, FinalKey}, WriteFun) ->
|
complete_write(Handle, SlotIndex,
|
||||||
|
SNExtremes, {FirstKey, FinalKey},
|
||||||
|
WriteFun) ->
|
||||||
ConvSlotIndex = convert_slotindex(SlotIndex),
|
ConvSlotIndex = convert_slotindex(SlotIndex),
|
||||||
FinHandle = WriteFun(finalise, {Handle,
|
FinHandle = WriteFun(finalise, {Handle,
|
||||||
ConvSlotIndex,
|
ConvSlotIndex,
|
||||||
|
@ -404,7 +402,7 @@ sftwrite_function(finalise,
|
||||||
_KeyExtremes}) ->
|
_KeyExtremes}) ->
|
||||||
{ok, Position} = file:position(Handle, cur),
|
{ok, Position} = file:position(Handle, cur),
|
||||||
|
|
||||||
SlotsLength = Position - ?HEADER_LENGTH,
|
SlotsLength = Position - ?HEADER_LEN,
|
||||||
SerialisedIndex = term_to_binary(PointerIndex),
|
SerialisedIndex = term_to_binary(PointerIndex),
|
||||||
IndexLength = byte_size(SerialisedIndex),
|
IndexLength = byte_size(SerialisedIndex),
|
||||||
|
|
||||||
|
@ -429,7 +427,6 @@ sftwrite_function(finalise,
|
||||||
KeyExtremes}).
|
KeyExtremes}).
|
||||||
|
|
||||||
maxslots_bylevel(SlotTotal, _Level) ->
|
maxslots_bylevel(SlotTotal, _Level) ->
|
||||||
io:format("Slot total of ~w~n", [SlotTotal]),
|
|
||||||
case SlotTotal of
|
case SlotTotal of
|
||||||
?SLOT_COUNT ->
|
?SLOT_COUNT ->
|
||||||
reached;
|
reached;
|
||||||
|
@ -551,7 +548,6 @@ create_slot(KL1, KL2, Level, BlockCount, SegLists, SerialisedSlot, LengthList,
|
||||||
Status}
|
Status}
|
||||||
end,
|
end,
|
||||||
SerialisedBlock = serialise_block(BlockKeyList),
|
SerialisedBlock = serialise_block(BlockKeyList),
|
||||||
% io:format("Serialised Block to be added ~w based on BlockKeyList ~w~n", [SerialisedBlock, BlockKeyList]),
|
|
||||||
BlockLength = byte_size(SerialisedBlock),
|
BlockLength = byte_size(SerialisedBlock),
|
||||||
SerialisedSlot2 = <<SerialisedSlot/binary, SerialisedBlock/binary>>,
|
SerialisedSlot2 = <<SerialisedSlot/binary, SerialisedBlock/binary>>,
|
||||||
create_slot(KL1b, KL2b, Level, BlockCount - 1, SegLists ++ [SegmentList],
|
create_slot(KL1b, KL2b, Level, BlockCount - 1, SegLists ++ [SegmentList],
|
||||||
|
@ -1159,8 +1155,9 @@ testwrite_function(finalise, {Handle, C_SlotIndex, SNExtremes, KeyExtremes}) ->
|
||||||
writegroup_stage1_test() ->
|
writegroup_stage1_test() ->
|
||||||
{KL1, KL2} = sample_keylist(),
|
{KL1, KL2} = sample_keylist(),
|
||||||
Output = write_group([], KL1, KL2, [], <<>>, 1, fun testwrite_function/2),
|
Output = write_group([], KL1, KL2, [], <<>>, 1, fun testwrite_function/2),
|
||||||
{{Handle, {_, PointerIndex}, SNExtremes, KeyExtremes},
|
{{{Handle, {_, PointerIndex}, SNExtremes, KeyExtremes},
|
||||||
PointerIndex, SNExtremes, KeyExtremes} = Output,
|
PointerIndex, SNExtremes, KeyExtremes},
|
||||||
|
{_KL1Rem, _KL2Rem}} = Output,
|
||||||
?assertMatch(SNExtremes, {1,3}),
|
?assertMatch(SNExtremes, {1,3}),
|
||||||
?assertMatch(KeyExtremes, {{o, "Bucket1", "Key1"},
|
?assertMatch(KeyExtremes, {{o, "Bucket1", "Key1"},
|
||||||
{o, "Bucket4", "Key1"}}),
|
{o, "Bucket4", "Key1"}}),
|
||||||
|
@ -1175,13 +1172,13 @@ writegroup_stage1_test() ->
|
||||||
|
|
||||||
initial_create_header_test() ->
|
initial_create_header_test() ->
|
||||||
Output = create_header(initial),
|
Output = create_header(initial),
|
||||||
?assertMatch(?HEADER_LENGTH, byte_size(Output)).
|
?assertMatch(?HEADER_LEN, byte_size(Output)).
|
||||||
|
|
||||||
initial_create_file_test() ->
|
initial_create_file_test() ->
|
||||||
Filename = "../test/test1.sft",
|
Filename = "../test/test1.sft",
|
||||||
{KL1, KL2} = sample_keylist(),
|
{KL1, KL2} = sample_keylist(),
|
||||||
{Handle, FileMD} = create_file(Filename),
|
{Handle, FileMD} = create_file(Filename),
|
||||||
{UpdHandle, UpdFileMD} = complete_file(Handle, FileMD, KL1, KL2, 1),
|
{UpdHandle, UpdFileMD, {[], []}} = complete_file(Handle, FileMD, KL1, KL2, 1),
|
||||||
Result1 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key8"}),
|
Result1 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key8"}),
|
||||||
io:format("Result is ~w~n", [Result1]),
|
io:format("Result is ~w~n", [Result1]),
|
||||||
?assertMatch(Result1, {{o, "Bucket1", "Key8"},
|
?assertMatch(Result1, {{o, "Bucket1", "Key8"},
|
||||||
|
@ -1197,7 +1194,9 @@ big_create_file_test() ->
|
||||||
{KL1, KL2} = {lists:sort(generate_randomkeys(50000)),
|
{KL1, KL2} = {lists:sort(generate_randomkeys(50000)),
|
||||||
lists:sort(generate_randomkeys(50000))},
|
lists:sort(generate_randomkeys(50000))},
|
||||||
{InitHandle, InitFileMD} = create_file(Filename),
|
{InitHandle, InitFileMD} = create_file(Filename),
|
||||||
{Handle, FileMD} = complete_file(InitHandle, InitFileMD, KL1, KL2, 1),
|
{Handle, FileMD, {_KL1Rem, _KL2Rem}} = complete_file(InitHandle,
|
||||||
|
InitFileMD,
|
||||||
|
KL1, KL2, 1),
|
||||||
[{K1, Sq1, St1, V1}|_] = KL1,
|
[{K1, Sq1, St1, V1}|_] = KL1,
|
||||||
[{K2, Sq2, St2, V2}|_] = KL2,
|
[{K2, Sq2, St2, V2}|_] = KL2,
|
||||||
Result1 = fetch_keyvalue(Handle, FileMD, K1),
|
Result1 = fetch_keyvalue(Handle, FileMD, K1),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue