Re-introduce tinybloom to SST

This had been removed due to the CPU cost of adding - however then the
tinybloom wa simplemented by directly manipulating bits through binary
comprehension - rather than applying bor band bsl bsr operations.

With these operations the cost of producing and checking the bloom is
<10% by comparison.
This commit is contained in:
martinsumner 2017-01-24 21:51:12 +00:00
parent f8f2e02d92
commit d57b74d967
2 changed files with 67 additions and 79 deletions

View file

@ -451,7 +451,7 @@ sst_timing({N, SSTTimerD}, SW, TimerType) ->
end. end.
sst_keylist() -> sst_keylist() ->
[slot_bloom, slot_fetch]. [tiny_bloom, slot_bloom, slot_fetch].
get_timing(undefined, SW, TimerType) -> get_timing(undefined, SW, TimerType) ->

View file

@ -114,7 +114,8 @@
-record(slot_index_value, {slot_id :: integer(), -record(slot_index_value, {slot_id :: integer(),
start_position :: integer(), start_position :: integer(),
length :: integer()}). length :: integer(),
bloom :: binary()}).
-record(summary, {first_key :: tuple(), -record(summary, {first_key :: tuple(),
last_key :: tuple(), last_key :: tuple(),
@ -398,6 +399,11 @@ fetch(LedgerKey, Hash, State) ->
Summary = State#state.summary, Summary = State#state.summary,
Slot = lookup_slot(LedgerKey, Summary#summary.index), Slot = lookup_slot(LedgerKey, Summary#summary.index),
SlotID = Slot#slot_index_value.slot_id, SlotID = Slot#slot_index_value.slot_id,
Bloom = Slot#slot_index_value.bloom,
case leveled_tinybloom:check_hash(Hash, Bloom) of
false ->
{not_present, tiny_bloom, SlotID, State};
true ->
CachedBlockIdx = array:get(SlotID - 1, CachedBlockIdx = array:get(SlotID - 1,
State#state.blockindex_cache), State#state.blockindex_cache),
case CachedBlockIdx of case CachedBlockIdx of
@ -430,15 +436,14 @@ fetch(LedgerKey, Hash, State) ->
{true, PosList}), {true, PosList}),
{element(1, Result), slot_fetch, SlotID, State} {element(1, Result), slot_fetch, SlotID, State}
end end
end
end. end.
fetch_range(StartKey, EndKey, ScanWidth, State) -> fetch_range(StartKey, EndKey, ScanWidth, State) ->
Summary = State#state.summary, Summary = State#state.summary,
Handle = State#state.handle, Handle = State#state.handle,
{Slots, LTrim, RTrim} = lookup_slots(StartKey, {Slots, RTrim} = lookup_slots(StartKey, EndKey, Summary#summary.index),
EndKey,
Summary#summary.index),
Self = self(), Self = self(),
SL = length(Slots), SL = length(Slots),
ExpandedSlots = ExpandedSlots =
@ -447,15 +452,11 @@ fetch_range(StartKey, EndKey, ScanWidth, State) ->
[]; [];
1 -> 1 ->
[Slot] = Slots, [Slot] = Slots,
case {LTrim, RTrim} of case RTrim of
{true, true} -> true ->
[{pointer, Self, Slot, StartKey, EndKey}]; [{pointer, Self, Slot, StartKey, EndKey}];
{true, false} -> false ->
[{pointer, Self, Slot, StartKey, all}]; [{pointer, Self, Slot, StartKey, all}]
{false, true} ->
[{pointer, Self, Slot, all, EndKey}];
{false, false} ->
[{pointer, Self, Slot, all, all}]
end; end;
N -> N ->
{LSlot, MidSlots, RSlot} = {LSlot, MidSlots, RSlot} =
@ -472,21 +473,13 @@ fetch_range(StartKey, EndKey, ScanWidth, State) ->
{pointer, Self, S, all, all} {pointer, Self, S, all, all}
end, end,
MidSlots), MidSlots),
case {LTrim, RTrim} of case RTrim of
{true, true} -> true ->
[{pointer, Self, LSlot, StartKey, all}] ++ [{pointer, Self, LSlot, StartKey, all}] ++
MidSlotPointers ++ MidSlotPointers ++
[{pointer, Self, RSlot, all, EndKey}]; [{pointer, Self, RSlot, all, EndKey}];
{true, false} -> false ->
[{pointer, Self, LSlot, StartKey, all}] ++ [{pointer, Self, LSlot, StartKey, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, all}];
{false, true} ->
[{pointer, Self, LSlot, all, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, EndKey}];
{false, false} ->
[{pointer, Self, LSlot, all, all}] ++
MidSlotPointers ++ MidSlotPointers ++
[{pointer, Self, RSlot, all, all}] [{pointer, Self, RSlot, all, all}]
end end
@ -603,11 +596,13 @@ build_all_slots(KVL, SC, Pos, SlotID, SlotIdx, BlockIdxA, SlotsBin) ->
lists:split(?SLOT_SIZE, KVL) lists:split(?SLOT_SIZE, KVL)
end, end,
{LastKey, _V} = lists:last(SlotList), {LastKey, _V} = lists:last(SlotList),
{BlockIndex, SlotBin} = generate_binary_slot(SlotList), {BlockIndex, SlotBin, HashList} = generate_binary_slot(SlotList),
Length = byte_size(SlotBin), Length = byte_size(SlotBin),
Bloom = leveled_tinybloom:create_bloom(HashList),
SlotIndexV = #slot_index_value{slot_id = SlotID, SlotIndexV = #slot_index_value{slot_id = SlotID,
start_position = Pos, start_position = Pos,
length = Length}, length = Length,
bloom = Bloom},
build_all_slots(KVRem, build_all_slots(KVRem,
SC - 1, SC - 1,
Pos + Length, Pos + Length,
@ -706,9 +701,9 @@ lookup_slots(StartKey, EndKey, Tree) ->
{EK, _EndSlot} = lists:last(SlotList), {EK, _EndSlot} = lists:last(SlotList),
case EK of case EK of
EndKey -> EndKey ->
{lists:map(MapFun, SlotList), true, false}; {lists:map(MapFun, SlotList), false};
_ -> _ ->
{lists:map(MapFun, SlotList), true, true} {lists:map(MapFun, SlotList), true}
end. end.
@ -739,7 +734,7 @@ lookup_slots(StartKey, EndKey, Tree) ->
generate_binary_slot(KVL) -> generate_binary_slot(KVL) ->
HashFoldFun = HashFoldFun =
fun({K, V}, {PosBinAcc, NoHashCount}) -> fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) ->
{_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}), {_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
case is_integer(H1) of case is_integer(H1) of
@ -750,7 +745,8 @@ generate_binary_slot(KVL) ->
{<<1:1/integer, {<<1:1/integer,
PosH1:15/integer, PosH1:15/integer,
PosBinAcc/binary>>, PosBinAcc/binary>>,
0}; 0,
[H1|HashAcc]};
N -> N ->
% The No Hash Count is an integer between 0 and 127 % The No Hash Count is an integer between 0 and 127
% and so at read time should count NHC + 1 % and so at read time should count NHC + 1
@ -760,15 +756,16 @@ generate_binary_slot(KVL) ->
0:1/integer, 0:1/integer,
NHC:7/integer, NHC:7/integer,
PosBinAcc/binary>>, PosBinAcc/binary>>,
0} 0,
HashAcc}
end; end;
false -> false ->
{PosBinAcc, NoHashCount + 1} {PosBinAcc, NoHashCount + 1, HashAcc}
end end
end, end,
{PosBinIndex0, NHC} = lists:foldr(HashFoldFun, {<<>>, 0}, KVL), {PosBinIndex0, NHC, HashL} = lists:foldr(HashFoldFun, {<<>>, 0, []}, KVL),
PosBinIndex1 = PosBinIndex1 =
case NHC of case NHC of
0 -> 0 ->
@ -825,7 +822,7 @@ generate_binary_slot(KVL) ->
CRC32 = erlang:crc32(SlotBin), CRC32 = erlang:crc32(SlotBin),
FullBin = <<CRC32:32/integer, SlotBin/binary>>, FullBin = <<CRC32:32/integer, SlotBin/binary>>,
{PosBinIndex1, FullBin}. {PosBinIndex1, FullBin, HashL}.
binaryslot_get(FullBin, Key, Hash, CachedPosLookup) -> binaryslot_get(FullBin, Key, Hash, CachedPosLookup) ->
@ -1212,18 +1209,9 @@ indexed_list_test() ->
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 4)), KVL0 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 4)),
KVL1 = lists:sublist(KVL0, 128), KVL1 = lists:sublist(KVL0, 128),
% BloomAddFun =
% fun({H, K}, {Bloom, Total, Max}) ->
% SW = os:timestamp(),
% Bloom0 = leveled_tinybloom:tiny_enter(H, K, Bloom),
% T0 = timer:now_diff(os:timestamp(), SW),
% {Bloom0, Total + T0, max(T0, Max)}
% end,
SW0 = os:timestamp(), SW0 = os:timestamp(),
{_PosBinIndex1, FullBin} = generate_binary_slot(KVL1), {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(KVL1),
io:format(user, io:format(user,
"Indexed list created slot in ~w microseconds of size ~w~n", "Indexed list created slot in ~w microseconds of size ~w~n",
[timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]), [timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),
@ -1251,7 +1239,7 @@ indexed_list_mixedkeys_test() ->
KVL1 = lists:sublist(KVL0, 33), KVL1 = lists:sublist(KVL0, 33),
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys), {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
{TestK1, TestV1} = lists:nth(4, KVL1), {TestK1, TestV1} = lists:nth(4, KVL1),
MH1 = leveled_codec:magic_hash(TestK1), MH1 = leveled_codec:magic_hash(TestK1),
@ -1277,7 +1265,7 @@ indexed_list_mixedkeys2_test() ->
IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)), IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)),
% this isn't actually ordered correctly % this isn't actually ordered correctly
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2, Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys), {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
lists:foreach(fun({K, V}) -> lists:foreach(fun({K, V}) ->
MH = leveled_codec:magic_hash(K), MH = leveled_codec:magic_hash(K),
test_binary_slot(FullBin, K, MH, {K, V}) test_binary_slot(FullBin, K, MH, {K, V})
@ -1286,7 +1274,7 @@ indexed_list_mixedkeys2_test() ->
indexed_list_allindexkeys_test() -> indexed_list_allindexkeys_test() ->
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128), Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
{PosBinIndex1, FullBin} = generate_binary_slot(Keys), {PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
?assertMatch(<<127:8/integer>>, PosBinIndex1), ?assertMatch(<<127:8/integer>>, PosBinIndex1),
% SW = os:timestamp(), % SW = os:timestamp(),
BinToList = binaryslot_tolist(FullBin), BinToList = binaryslot_tolist(FullBin),
@ -1299,7 +1287,7 @@ indexed_list_allindexkeys_test() ->
indexed_list_allindexkeys_trimmed_test() -> indexed_list_allindexkeys_trimmed_test() ->
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128), Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
{PosBinIndex1, FullBin} = generate_binary_slot(Keys), {PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
?assertMatch(<<127:8/integer>>, PosBinIndex1), ?assertMatch(<<127:8/integer>>, PosBinIndex1),
?assertMatch(Keys, binaryslot_trimmedlist(FullBin, ?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
{i, {i,
@ -1337,7 +1325,7 @@ indexed_list_mixedkeys_bitflip_test() ->
KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)), KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)),
KVL1 = lists:sublist(KVL0, 33), KVL1 = lists:sublist(KVL0, 33),
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys), {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
L = byte_size(FullBin), L = byte_size(FullBin),
Byte1 = random:uniform(L), Byte1 = random:uniform(L),
<<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = FullBin, <<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = FullBin,