Some refinements

Minor amendments to formatting and outputs
This commit is contained in:
martinsumner 2016-07-06 18:09:40 +01:00
parent 71a6538288
commit 3b954aea43

View file

@ -105,20 +105,16 @@
%% Summaries could be used for other summaries of table content in the future, %% Summaries could be used for other summaries of table content in the future,
%% perhaps application-specific bloom filters %% perhaps application-specific bloom filters
%% The 80-byte header is made up of %% The 56-byte header is made up of
%% - 1 byte version (major 5 bits, minor 3 bits) - default 0.1 %% - 1 byte version (major 5 bits, minor 3 bits) - default 0.1
%% - 1 byte options (currently undefined) %% - 1 byte options (currently undefined)
%% - 1 byte Block Size - the expected number of keys in each block %% - 1 byte Block Size - the expected number of keys in each block
%% - 1 byte Block Count - the expected number of blocks in each slot %% - 1 byte Block Count - the expected number of blocks in each slot
%% - 2 byte Slot Count - the maximum number of slots in the file %% - 2 byte Slot Count - the maximum number of slots in the file
%% - 6 bytes - spare %% - 6 bytes - spare
%% - 4 bytes - Blocks position
%% - 4 bytes - Blocks length %% - 4 bytes - Blocks length
%% - 4 bytes - Slot Index position
%% - 4 bytes - Slot Index length %% - 4 bytes - Slot Index length
%% - 4 bytes - Slot Filter position
%% - 4 bytes - Slot Filter length %% - 4 bytes - Slot Filter length
%% - 4 bytes - Table Summary position
%% - 4 bytes - Table summary length %% - 4 bytes - Table summary length
%% - 24 bytes - spare %% - 24 bytes - spare
%% - 4 bytes - CRC32 %% - 4 bytes - CRC32
@ -168,7 +164,7 @@
-define(DIVISOR_BITS, 13). -define(DIVISOR_BITS, 13).
-define(DIVISOR, 8092). -define(DIVISOR, 8092).
-define(COMPRESSION_LEVEL, 1). -define(COMPRESSION_LEVEL, 1).
-define(HEADER_LENGTH, 56). -define(HEADER_LEN, 56).
-record(state, {version = ?CURRENT_VERSION :: tuple(), -record(state, {version = ?CURRENT_VERSION :: tuple(),
@ -181,8 +177,8 @@
slots_pointer :: integer(), slots_pointer :: integer(),
index_pointer :: integer(), index_pointer :: integer(),
filter_pointer :: integer(), filter_pointer :: integer(),
summary_pointer :: integer(), summ_pointer :: integer(),
summary_length :: integer()}). summ_length :: integer()}).
%% Start a bare file with an initial header and no further details %% Start a bare file with an initial header and no further details
@ -199,20 +195,6 @@ create_file(Handle) ->
{Handle, FileMD}. {Handle, FileMD}.
%% The 56-byte header is made up of
%% - 1 byte version (major 5 bits, minor 3 bits) - default 0.1
%% - 1 byte options (currently undefined)
%% - 1 byte Block Size - the expected number of keys in each block
%% - 1 byte Block Count - the expected number of blocks in each slot
%% - 2 byte Slot Count - the maximum number of slots in the file
%% - 6 bytes - spare
%% - 4 bytes - Blocks length
%% - 4 bytes - Slot Index length
%% - 4 bytes - Slot Filter length
%% - 4 bytes - Table summary length
%% - 24 bytes - spare
%% - 4 bytes - CRC32
create_header(initial) -> create_header(initial) ->
{Major, Minor} = ?CURRENT_VERSION, {Major, Minor} = ?CURRENT_VERSION,
Version = <<Major:5, Minor:3>>, Version = <<Major:5, Minor:3>>,
@ -226,34 +208,47 @@ create_header(initial) ->
CRC32 = erlang:crc32(H1), CRC32 = erlang:crc32(H1),
<<H1/binary, CRC32:32/integer>>. <<H1/binary, CRC32:32/integer>>.
%% Open a file returning a handle and metadata which can be used in fetch and
%% iterator requests
open_file(Filename) ->
{ok, _Handle} = file:open(Filename, [binary, raw, read, write]).
%% Need to write other metadata somewhere
%% ... probably in summmary
%% ... is there a need for two levels of summary?
%% Take a file handle with a previously created header and complete it based on %% Take a file handle with a previously created header and complete it based on
%% the two key lists KL1 and KL2 %% the two key lists KL1 and KL2
complete_file(Handle, FileMD, KL1, KL2, Level) -> complete_file(Handle, FileMD, KL1, KL2, Level) ->
{UpdHandle, {{UpdHandle,
PointerList, PointerList,
{LowSQN, HighSQN}, {LowSQN, HighSQN},
{LowKey, HighKey}} = write_group(Handle, KL1, KL2, [], <<>>, Level, {LowKey, HighKey}},
KeyRemainders} = write_group(Handle, KL1, KL2, [], <<>>, Level,
fun sftwrite_function/2), fun sftwrite_function/2),
{ok, HeaderLengths} = file:pread(UpdHandle, 12, 16), {ok, HeaderLengths} = file:pread(UpdHandle, 12, 16),
<<Blnth:32/integer, <<Blen:32/integer,
Ilnth:32/integer, Ilen:32/integer,
Flnth:32/integer, Flen:32/integer,
Slnth:32/integer>> = HeaderLengths, Slen:32/integer>> = HeaderLengths,
{UpdHandle, FileMD#state{slot_index=PointerList, {UpdHandle,
smallest_sqn=LowSQN, FileMD#state{slot_index=PointerList,
highest_sqn=HighSQN, smallest_sqn=LowSQN,
smallest_key=LowKey, highest_sqn=HighSQN,
highest_key=HighKey, smallest_key=LowKey,
slots_pointer=?HEADER_LENGTH, highest_key=HighKey,
index_pointer=?HEADER_LENGTH + Blnth, slots_pointer=?HEADER_LEN,
filter_pointer=?HEADER_LENGTH + Blnth + Ilnth, index_pointer=?HEADER_LEN + Blen,
summary_pointer=?HEADER_LENGTH + Blnth + Ilnth + Flnth, filter_pointer=?HEADER_LEN + Blen + Ilen,
summary_length=Slnth}}. summ_pointer=?HEADER_LEN + Blen + Ilen + Flen,
summ_length=Slen},
KeyRemainders}.
%% Fetch a Key and Value from a file, returns %% Fetch a Key and Value from a file, returns
%% {value, KV} or not_present %% {value, KV} or not_present
%% The key must be pre-checked to ensure it is in the valid range for the file
%% A key out of range may fail
fetch_keyvalue(Handle, FileMD, Key) -> fetch_keyvalue(Handle, FileMD, Key) ->
{_NearestKey, {FilterLen, PointerF}, {_NearestKey, {FilterLen, PointerF},
@ -296,14 +291,13 @@ fetch_keyvalue_fromblock([BlockNumber|T], Key, LengthList, Handle, StartOfSlot)
end. end.
get_nearestkey(KVList, Key) -> get_nearestkey(KVList, Key) ->
get_nearestkey(KVList, Key, not_found). get_nearestkey(KVList, Key, not_found).
get_nearestkey([], _KeyToFind, PrevV) -> get_nearestkey([], _KeyToFind, PrevV) ->
PrevV; PrevV;
get_nearestkey([{K, _FilterInfo, _SlotInfo}|_T], KeyToFind, PrevV) when K > KeyToFind -> get_nearestkey([{K, _FilterInfo, _SlotInfo}|_T], KeyToFind, PrevV)
when K > KeyToFind ->
PrevV; PrevV;
get_nearestkey([Result|T], KeyToFind, _) -> get_nearestkey([Result|T], KeyToFind, _) ->
get_nearestkey(T, KeyToFind, Result). get_nearestkey(T, KeyToFind, Result).
@ -328,10 +322,11 @@ write_group(Handle, KL1, KL2, {SlotCount, SlotTotal},
UpdHandle = WriteFun(slots , {Handle, SerialisedSlots}), UpdHandle = WriteFun(slots , {Handle, SerialisedSlots}),
case maxslots_bylevel(SlotTotal, Level) of case maxslots_bylevel(SlotTotal, Level) of
reached -> reached ->
complete_write(UpdHandle, {complete_write(UpdHandle,
SlotIndex, SlotIndex,
{LSN, HSN}, {LowKey, LastKey}, {LSN, HSN}, {LowKey, LastKey},
WriteFun); WriteFun),
{KL1, KL2}};
continue -> continue ->
write_group(UpdHandle, KL1, KL2, {0, SlotTotal}, write_group(UpdHandle, KL1, KL2, {0, SlotTotal},
SlotIndex, <<>>, SlotIndex, <<>>,
@ -353,10 +348,11 @@ write_group(Handle, KL1, KL2, {SlotCount, SlotTotal},
case Status of case Status of
partial -> partial ->
UpdHandle = WriteFun(slots , {Handle, UpdSlots}), UpdHandle = WriteFun(slots , {Handle, UpdSlots}),
complete_write(UpdHandle, {complete_write(UpdHandle,
UpdSlotIndex, UpdSlotIndex,
SNExtremes, {FirstKey, FinalKey}, SNExtremes, {FirstKey, FinalKey},
WriteFun); WriteFun),
{KL1rem, KL2rem}};
full -> full ->
write_group(Handle, KL1rem, KL2rem, {SlotCount + 1, SlotTotal + 1}, write_group(Handle, KL1rem, KL2rem, {SlotCount + 1, SlotTotal + 1},
UpdSlotIndex, UpdSlots, UpdSlotIndex, UpdSlots,
@ -364,7 +360,9 @@ write_group(Handle, KL1, KL2, {SlotCount, SlotTotal},
end. end.
complete_write(Handle, SlotIndex, SNExtremes, {FirstKey, FinalKey}, WriteFun) -> complete_write(Handle, SlotIndex,
SNExtremes, {FirstKey, FinalKey},
WriteFun) ->
ConvSlotIndex = convert_slotindex(SlotIndex), ConvSlotIndex = convert_slotindex(SlotIndex),
FinHandle = WriteFun(finalise, {Handle, FinHandle = WriteFun(finalise, {Handle,
ConvSlotIndex, ConvSlotIndex,
@ -404,7 +402,7 @@ sftwrite_function(finalise,
_KeyExtremes}) -> _KeyExtremes}) ->
{ok, Position} = file:position(Handle, cur), {ok, Position} = file:position(Handle, cur),
SlotsLength = Position - ?HEADER_LENGTH, SlotsLength = Position - ?HEADER_LEN,
SerialisedIndex = term_to_binary(PointerIndex), SerialisedIndex = term_to_binary(PointerIndex),
IndexLength = byte_size(SerialisedIndex), IndexLength = byte_size(SerialisedIndex),
@ -429,7 +427,6 @@ sftwrite_function(finalise,
KeyExtremes}). KeyExtremes}).
maxslots_bylevel(SlotTotal, _Level) -> maxslots_bylevel(SlotTotal, _Level) ->
io:format("Slot total of ~w~n", [SlotTotal]),
case SlotTotal of case SlotTotal of
?SLOT_COUNT -> ?SLOT_COUNT ->
reached; reached;
@ -551,7 +548,6 @@ create_slot(KL1, KL2, Level, BlockCount, SegLists, SerialisedSlot, LengthList,
Status} Status}
end, end,
SerialisedBlock = serialise_block(BlockKeyList), SerialisedBlock = serialise_block(BlockKeyList),
% io:format("Serialised Block to be added ~w based on BlockKeyList ~w~n", [SerialisedBlock, BlockKeyList]),
BlockLength = byte_size(SerialisedBlock), BlockLength = byte_size(SerialisedBlock),
SerialisedSlot2 = <<SerialisedSlot/binary, SerialisedBlock/binary>>, SerialisedSlot2 = <<SerialisedSlot/binary, SerialisedBlock/binary>>,
create_slot(KL1b, KL2b, Level, BlockCount - 1, SegLists ++ [SegmentList], create_slot(KL1b, KL2b, Level, BlockCount - 1, SegLists ++ [SegmentList],
@ -1159,8 +1155,9 @@ testwrite_function(finalise, {Handle, C_SlotIndex, SNExtremes, KeyExtremes}) ->
writegroup_stage1_test() -> writegroup_stage1_test() ->
{KL1, KL2} = sample_keylist(), {KL1, KL2} = sample_keylist(),
Output = write_group([], KL1, KL2, [], <<>>, 1, fun testwrite_function/2), Output = write_group([], KL1, KL2, [], <<>>, 1, fun testwrite_function/2),
{{Handle, {_, PointerIndex}, SNExtremes, KeyExtremes}, {{{Handle, {_, PointerIndex}, SNExtremes, KeyExtremes},
PointerIndex, SNExtremes, KeyExtremes} = Output, PointerIndex, SNExtremes, KeyExtremes},
{_KL1Rem, _KL2Rem}} = Output,
?assertMatch(SNExtremes, {1,3}), ?assertMatch(SNExtremes, {1,3}),
?assertMatch(KeyExtremes, {{o, "Bucket1", "Key1"}, ?assertMatch(KeyExtremes, {{o, "Bucket1", "Key1"},
{o, "Bucket4", "Key1"}}), {o, "Bucket4", "Key1"}}),
@ -1175,13 +1172,13 @@ writegroup_stage1_test() ->
initial_create_header_test() -> initial_create_header_test() ->
Output = create_header(initial), Output = create_header(initial),
?assertMatch(?HEADER_LENGTH, byte_size(Output)). ?assertMatch(?HEADER_LEN, byte_size(Output)).
initial_create_file_test() -> initial_create_file_test() ->
Filename = "../test/test1.sft", Filename = "../test/test1.sft",
{KL1, KL2} = sample_keylist(), {KL1, KL2} = sample_keylist(),
{Handle, FileMD} = create_file(Filename), {Handle, FileMD} = create_file(Filename),
{UpdHandle, UpdFileMD} = complete_file(Handle, FileMD, KL1, KL2, 1), {UpdHandle, UpdFileMD, {[], []}} = complete_file(Handle, FileMD, KL1, KL2, 1),
Result1 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key8"}), Result1 = fetch_keyvalue(UpdHandle, UpdFileMD, {o, "Bucket1", "Key8"}),
io:format("Result is ~w~n", [Result1]), io:format("Result is ~w~n", [Result1]),
?assertMatch(Result1, {{o, "Bucket1", "Key8"}, ?assertMatch(Result1, {{o, "Bucket1", "Key8"},
@ -1197,7 +1194,9 @@ big_create_file_test() ->
{KL1, KL2} = {lists:sort(generate_randomkeys(50000)), {KL1, KL2} = {lists:sort(generate_randomkeys(50000)),
lists:sort(generate_randomkeys(50000))}, lists:sort(generate_randomkeys(50000))},
{InitHandle, InitFileMD} = create_file(Filename), {InitHandle, InitFileMD} = create_file(Filename),
{Handle, FileMD} = complete_file(InitHandle, InitFileMD, KL1, KL2, 1), {Handle, FileMD, {_KL1Rem, _KL2Rem}} = complete_file(InitHandle,
InitFileMD,
KL1, KL2, 1),
[{K1, Sq1, St1, V1}|_] = KL1, [{K1, Sq1, St1, V1}|_] = KL1,
[{K2, Sq2, St2, V2}|_] = KL2, [{K2, Sq2, St2, V2}|_] = KL2,
Result1 = fetch_keyvalue(Handle, FileMD, K1), Result1 = fetch_keyvalue(Handle, FileMD, K1),