Bring compression back to SFT

It is expensive on the CPU - but it leads to a 4 x increase in the cache
coverage.

Try and make some small micro gains in list handling in create_block
This commit is contained in:
martinsumner 2016-12-11 15:02:33 +00:00
parent 44cee5a6e8
commit 1b63845050
2 changed files with 24 additions and 12 deletions

View file

@ -246,7 +246,7 @@ check_forinkertype(_LedgerKey, _Object) ->
create_value_for_journal(Value) -> create_value_for_journal(Value) ->
case Value of case Value of
{Object, KeyChanges} -> {Object, KeyChanges} ->
term_to_binary({Object, KeyChanges}); term_to_binary({Object, KeyChanges}, [compressed]);
Value when is_binary(Value) -> Value when is_binary(Value) ->
Value Value
end. end.

View file

@ -179,7 +179,7 @@
-define(DWORD_SIZE, 8). -define(DWORD_SIZE, 8).
-define(CURRENT_VERSION, {0,1}). -define(CURRENT_VERSION, {0,1}).
-define(SLOT_COUNT, 256). -define(SLOT_COUNT, 256).
-define(SLOT_GROUPWRITE_COUNT, 128). -define(SLOT_GROUPWRITE_COUNT, 64).
-define(BLOCK_SIZE, 32). -define(BLOCK_SIZE, 32).
-define(BLOCK_COUNT, 4). -define(BLOCK_COUNT, 4).
-define(FOOTERPOS_HEADERPOS, 2). -define(FOOTERPOS_HEADERPOS, 2).
@ -970,16 +970,25 @@ create_block(KeyList1, KeyList2,
when length(BlockKeyList)==?BLOCK_SIZE -> when length(BlockKeyList)==?BLOCK_SIZE ->
case {KeyList1, KeyList2} of case {KeyList1, KeyList2} of
{[], []} -> {[], []} ->
{BlockKeyList, complete, {LSN, HSN}, SegmentList, {lists:reverse(BlockKeyList),
complete,
{LSN, HSN},
lists:reverse(SegmentList),
Bloom, Bloom,
[], []}; [], []};
_ -> _ ->
{BlockKeyList, full, {LSN, HSN}, SegmentList, {lists:reverse(BlockKeyList),
full,
{LSN, HSN},
lists:reverse(SegmentList),
Bloom, Bloom,
KeyList1, KeyList2} KeyList1, KeyList2}
end; end;
create_block([], [], BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom) -> create_block([], [], BlockKeyList, {LSN, HSN}, SegmentList, _LevelR, Bloom) ->
{BlockKeyList, partial, {LSN, HSN}, SegmentList, {lists:reverse(BlockKeyList),
partial,
{LSN, HSN},
lists:reverse(SegmentList),
Bloom, Bloom,
[], []}; [], []};
create_block(KeyList1, KeyList2, create_block(KeyList1, KeyList2,
@ -992,10 +1001,8 @@ create_block(KeyList1, KeyList2,
{SQN, _St, MH, _MD} = leveled_codec:striphead_to_details(V), {SQN, _St, MH, _MD} = leveled_codec:striphead_to_details(V),
{UpdLSN, UpdHSN} = update_sequencenumbers(SQN, LSN, HSN), {UpdLSN, UpdHSN} = update_sequencenumbers(SQN, LSN, HSN),
UpdBloom = leveled_tinybloom:enter({hash, MH}, Bloom), UpdBloom = leveled_tinybloom:enter({hash, MH}, Bloom),
NewBlockKeyList = lists:append(BlockKeyList, NewBlockKeyList = [TopKey|BlockKeyList],
[TopKey]), NewSegmentList = [hash_for_segmentid(TopKey)|SegmentList],
NewSegmentList = lists:append(SegmentList,
[hash_for_segmentid(TopKey)]),
create_block(Rem1, Rem2, create_block(Rem1, Rem2,
NewBlockKeyList, {UpdLSN, UpdHSN}, NewBlockKeyList, {UpdLSN, UpdHSN},
NewSegmentList, LevelR, UpdBloom); NewSegmentList, LevelR, UpdBloom);
@ -1061,13 +1068,13 @@ create_slot(KL1, KL2, LevelR, BlockCount, Bloom,
{null, LSN, HSN, LastKey, Status}; {null, LSN, HSN, LastKey, Status};
{null, _} -> {null, _} ->
[NewLowKeyV|_] = BlockKeyList, [NewLowKeyV|_] = BlockKeyList,
NewLastKey = lists:last([{keyonly, LastKey}|BlockKeyList]), NewLastKey = last_key(BlockKeyList, {keyonly, LastKey}),
{leveled_codec:strip_to_keyonly(NewLowKeyV), {leveled_codec:strip_to_keyonly(NewLowKeyV),
min(LSN, LSNb), max(HSN, HSNb), min(LSN, LSNb), max(HSN, HSNb),
leveled_codec:strip_to_keyonly(NewLastKey), leveled_codec:strip_to_keyonly(NewLastKey),
Status}; Status};
{_, _} -> {_, _} ->
NewLastKey = lists:last([{keyonly, LastKey}|BlockKeyList]), NewLastKey = last_key(BlockKeyList, {keyonly, LastKey}),
{LowKey, {LowKey,
min(LSN, LSNb), max(HSN, HSNb), min(LSN, LSNb), max(HSN, HSNb),
leveled_codec:strip_to_keyonly(NewLastKey), leveled_codec:strip_to_keyonly(NewLastKey),
@ -1081,8 +1088,13 @@ create_slot(KL1, KL2, LevelR, BlockCount, Bloom,
SegList2, SerialisedSlot2, LengthList ++ [BlockLength], SegList2, SerialisedSlot2, LengthList ++ [BlockLength],
TrackingMetadata). TrackingMetadata).
last_key([], LastKey) ->
LastKey;
last_key(BlockKeyList, _LastKey) ->
lists:last(BlockKeyList).
serialise_block(BlockKeyList) -> serialise_block(BlockKeyList) ->
term_to_binary(BlockKeyList). term_to_binary(BlockKeyList, [compressed]).
%% Compare the keys at the head of the list, and either skip that "best" key or %% Compare the keys at the head of the list, and either skip that "best" key or