Re-introduce tinybloom to SST
This had been removed due to the CPU cost of adding - however then the tinybloom wa simplemented by directly manipulating bits through binary comprehension - rather than applying bor band bsl bsr operations. With these operations the cost of producing and checking the bloom is <10% by comparison.
This commit is contained in:
parent
f8f2e02d92
commit
d57b74d967
2 changed files with 67 additions and 79 deletions
|
@ -451,7 +451,7 @@ sst_timing({N, SSTTimerD}, SW, TimerType) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
sst_keylist() ->
|
sst_keylist() ->
|
||||||
[slot_bloom, slot_fetch].
|
[tiny_bloom, slot_bloom, slot_fetch].
|
||||||
|
|
||||||
|
|
||||||
get_timing(undefined, SW, TimerType) ->
|
get_timing(undefined, SW, TimerType) ->
|
||||||
|
|
|
@ -114,7 +114,8 @@
|
||||||
|
|
||||||
-record(slot_index_value, {slot_id :: integer(),
|
-record(slot_index_value, {slot_id :: integer(),
|
||||||
start_position :: integer(),
|
start_position :: integer(),
|
||||||
length :: integer()}).
|
length :: integer(),
|
||||||
|
bloom :: binary()}).
|
||||||
|
|
||||||
-record(summary, {first_key :: tuple(),
|
-record(summary, {first_key :: tuple(),
|
||||||
last_key :: tuple(),
|
last_key :: tuple(),
|
||||||
|
@ -398,6 +399,11 @@ fetch(LedgerKey, Hash, State) ->
|
||||||
Summary = State#state.summary,
|
Summary = State#state.summary,
|
||||||
Slot = lookup_slot(LedgerKey, Summary#summary.index),
|
Slot = lookup_slot(LedgerKey, Summary#summary.index),
|
||||||
SlotID = Slot#slot_index_value.slot_id,
|
SlotID = Slot#slot_index_value.slot_id,
|
||||||
|
Bloom = Slot#slot_index_value.bloom,
|
||||||
|
case leveled_tinybloom:check_hash(Hash, Bloom) of
|
||||||
|
false ->
|
||||||
|
{not_present, tiny_bloom, SlotID, State};
|
||||||
|
true ->
|
||||||
CachedBlockIdx = array:get(SlotID - 1,
|
CachedBlockIdx = array:get(SlotID - 1,
|
||||||
State#state.blockindex_cache),
|
State#state.blockindex_cache),
|
||||||
case CachedBlockIdx of
|
case CachedBlockIdx of
|
||||||
|
@ -430,15 +436,14 @@ fetch(LedgerKey, Hash, State) ->
|
||||||
{true, PosList}),
|
{true, PosList}),
|
||||||
{element(1, Result), slot_fetch, SlotID, State}
|
{element(1, Result), slot_fetch, SlotID, State}
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
fetch_range(StartKey, EndKey, ScanWidth, State) ->
|
fetch_range(StartKey, EndKey, ScanWidth, State) ->
|
||||||
Summary = State#state.summary,
|
Summary = State#state.summary,
|
||||||
Handle = State#state.handle,
|
Handle = State#state.handle,
|
||||||
{Slots, LTrim, RTrim} = lookup_slots(StartKey,
|
{Slots, RTrim} = lookup_slots(StartKey, EndKey, Summary#summary.index),
|
||||||
EndKey,
|
|
||||||
Summary#summary.index),
|
|
||||||
Self = self(),
|
Self = self(),
|
||||||
SL = length(Slots),
|
SL = length(Slots),
|
||||||
ExpandedSlots =
|
ExpandedSlots =
|
||||||
|
@ -447,15 +452,11 @@ fetch_range(StartKey, EndKey, ScanWidth, State) ->
|
||||||
[];
|
[];
|
||||||
1 ->
|
1 ->
|
||||||
[Slot] = Slots,
|
[Slot] = Slots,
|
||||||
case {LTrim, RTrim} of
|
case RTrim of
|
||||||
{true, true} ->
|
true ->
|
||||||
[{pointer, Self, Slot, StartKey, EndKey}];
|
[{pointer, Self, Slot, StartKey, EndKey}];
|
||||||
{true, false} ->
|
false ->
|
||||||
[{pointer, Self, Slot, StartKey, all}];
|
[{pointer, Self, Slot, StartKey, all}]
|
||||||
{false, true} ->
|
|
||||||
[{pointer, Self, Slot, all, EndKey}];
|
|
||||||
{false, false} ->
|
|
||||||
[{pointer, Self, Slot, all, all}]
|
|
||||||
end;
|
end;
|
||||||
N ->
|
N ->
|
||||||
{LSlot, MidSlots, RSlot} =
|
{LSlot, MidSlots, RSlot} =
|
||||||
|
@ -472,21 +473,13 @@ fetch_range(StartKey, EndKey, ScanWidth, State) ->
|
||||||
{pointer, Self, S, all, all}
|
{pointer, Self, S, all, all}
|
||||||
end,
|
end,
|
||||||
MidSlots),
|
MidSlots),
|
||||||
case {LTrim, RTrim} of
|
case RTrim of
|
||||||
{true, true} ->
|
true ->
|
||||||
[{pointer, Self, LSlot, StartKey, all}] ++
|
[{pointer, Self, LSlot, StartKey, all}] ++
|
||||||
MidSlotPointers ++
|
MidSlotPointers ++
|
||||||
[{pointer, Self, RSlot, all, EndKey}];
|
[{pointer, Self, RSlot, all, EndKey}];
|
||||||
{true, false} ->
|
false ->
|
||||||
[{pointer, Self, LSlot, StartKey, all}] ++
|
[{pointer, Self, LSlot, StartKey, all}] ++
|
||||||
MidSlotPointers ++
|
|
||||||
[{pointer, Self, RSlot, all, all}];
|
|
||||||
{false, true} ->
|
|
||||||
[{pointer, Self, LSlot, all, all}] ++
|
|
||||||
MidSlotPointers ++
|
|
||||||
[{pointer, Self, RSlot, all, EndKey}];
|
|
||||||
{false, false} ->
|
|
||||||
[{pointer, Self, LSlot, all, all}] ++
|
|
||||||
MidSlotPointers ++
|
MidSlotPointers ++
|
||||||
[{pointer, Self, RSlot, all, all}]
|
[{pointer, Self, RSlot, all, all}]
|
||||||
end
|
end
|
||||||
|
@ -603,11 +596,13 @@ build_all_slots(KVL, SC, Pos, SlotID, SlotIdx, BlockIdxA, SlotsBin) ->
|
||||||
lists:split(?SLOT_SIZE, KVL)
|
lists:split(?SLOT_SIZE, KVL)
|
||||||
end,
|
end,
|
||||||
{LastKey, _V} = lists:last(SlotList),
|
{LastKey, _V} = lists:last(SlotList),
|
||||||
{BlockIndex, SlotBin} = generate_binary_slot(SlotList),
|
{BlockIndex, SlotBin, HashList} = generate_binary_slot(SlotList),
|
||||||
Length = byte_size(SlotBin),
|
Length = byte_size(SlotBin),
|
||||||
|
Bloom = leveled_tinybloom:create_bloom(HashList),
|
||||||
SlotIndexV = #slot_index_value{slot_id = SlotID,
|
SlotIndexV = #slot_index_value{slot_id = SlotID,
|
||||||
start_position = Pos,
|
start_position = Pos,
|
||||||
length = Length},
|
length = Length,
|
||||||
|
bloom = Bloom},
|
||||||
build_all_slots(KVRem,
|
build_all_slots(KVRem,
|
||||||
SC - 1,
|
SC - 1,
|
||||||
Pos + Length,
|
Pos + Length,
|
||||||
|
@ -706,9 +701,9 @@ lookup_slots(StartKey, EndKey, Tree) ->
|
||||||
{EK, _EndSlot} = lists:last(SlotList),
|
{EK, _EndSlot} = lists:last(SlotList),
|
||||||
case EK of
|
case EK of
|
||||||
EndKey ->
|
EndKey ->
|
||||||
{lists:map(MapFun, SlotList), true, false};
|
{lists:map(MapFun, SlotList), false};
|
||||||
_ ->
|
_ ->
|
||||||
{lists:map(MapFun, SlotList), true, true}
|
{lists:map(MapFun, SlotList), true}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
@ -739,7 +734,7 @@ lookup_slots(StartKey, EndKey, Tree) ->
|
||||||
generate_binary_slot(KVL) ->
|
generate_binary_slot(KVL) ->
|
||||||
|
|
||||||
HashFoldFun =
|
HashFoldFun =
|
||||||
fun({K, V}, {PosBinAcc, NoHashCount}) ->
|
fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) ->
|
||||||
|
|
||||||
{_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
{_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
||||||
case is_integer(H1) of
|
case is_integer(H1) of
|
||||||
|
@ -750,7 +745,8 @@ generate_binary_slot(KVL) ->
|
||||||
{<<1:1/integer,
|
{<<1:1/integer,
|
||||||
PosH1:15/integer,
|
PosH1:15/integer,
|
||||||
PosBinAcc/binary>>,
|
PosBinAcc/binary>>,
|
||||||
0};
|
0,
|
||||||
|
[H1|HashAcc]};
|
||||||
N ->
|
N ->
|
||||||
% The No Hash Count is an integer between 0 and 127
|
% The No Hash Count is an integer between 0 and 127
|
||||||
% and so at read time should count NHC + 1
|
% and so at read time should count NHC + 1
|
||||||
|
@ -760,15 +756,16 @@ generate_binary_slot(KVL) ->
|
||||||
0:1/integer,
|
0:1/integer,
|
||||||
NHC:7/integer,
|
NHC:7/integer,
|
||||||
PosBinAcc/binary>>,
|
PosBinAcc/binary>>,
|
||||||
0}
|
0,
|
||||||
|
HashAcc}
|
||||||
end;
|
end;
|
||||||
false ->
|
false ->
|
||||||
{PosBinAcc, NoHashCount + 1}
|
{PosBinAcc, NoHashCount + 1, HashAcc}
|
||||||
end
|
end
|
||||||
|
|
||||||
end,
|
end,
|
||||||
|
|
||||||
{PosBinIndex0, NHC} = lists:foldr(HashFoldFun, {<<>>, 0}, KVL),
|
{PosBinIndex0, NHC, HashL} = lists:foldr(HashFoldFun, {<<>>, 0, []}, KVL),
|
||||||
PosBinIndex1 =
|
PosBinIndex1 =
|
||||||
case NHC of
|
case NHC of
|
||||||
0 ->
|
0 ->
|
||||||
|
@ -825,7 +822,7 @@ generate_binary_slot(KVL) ->
|
||||||
CRC32 = erlang:crc32(SlotBin),
|
CRC32 = erlang:crc32(SlotBin),
|
||||||
FullBin = <<CRC32:32/integer, SlotBin/binary>>,
|
FullBin = <<CRC32:32/integer, SlotBin/binary>>,
|
||||||
|
|
||||||
{PosBinIndex1, FullBin}.
|
{PosBinIndex1, FullBin, HashL}.
|
||||||
|
|
||||||
|
|
||||||
binaryslot_get(FullBin, Key, Hash, CachedPosLookup) ->
|
binaryslot_get(FullBin, Key, Hash, CachedPosLookup) ->
|
||||||
|
@ -1212,18 +1209,9 @@ indexed_list_test() ->
|
||||||
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 4)),
|
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 4)),
|
||||||
KVL1 = lists:sublist(KVL0, 128),
|
KVL1 = lists:sublist(KVL0, 128),
|
||||||
|
|
||||||
% BloomAddFun =
|
|
||||||
% fun({H, K}, {Bloom, Total, Max}) ->
|
|
||||||
% SW = os:timestamp(),
|
|
||||||
% Bloom0 = leveled_tinybloom:tiny_enter(H, K, Bloom),
|
|
||||||
% T0 = timer:now_diff(os:timestamp(), SW),
|
|
||||||
% {Bloom0, Total + T0, max(T0, Max)}
|
|
||||||
|
|
||||||
% end,
|
|
||||||
|
|
||||||
SW0 = os:timestamp(),
|
SW0 = os:timestamp(),
|
||||||
|
|
||||||
{_PosBinIndex1, FullBin} = generate_binary_slot(KVL1),
|
{_PosBinIndex1, FullBin, _HL} = generate_binary_slot(KVL1),
|
||||||
io:format(user,
|
io:format(user,
|
||||||
"Indexed list created slot in ~w microseconds of size ~w~n",
|
"Indexed list created slot in ~w microseconds of size ~w~n",
|
||||||
[timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),
|
[timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),
|
||||||
|
@ -1251,7 +1239,7 @@ indexed_list_mixedkeys_test() ->
|
||||||
KVL1 = lists:sublist(KVL0, 33),
|
KVL1 = lists:sublist(KVL0, 33),
|
||||||
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
|
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
|
||||||
|
|
||||||
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
{_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||||
|
|
||||||
{TestK1, TestV1} = lists:nth(4, KVL1),
|
{TestK1, TestV1} = lists:nth(4, KVL1),
|
||||||
MH1 = leveled_codec:magic_hash(TestK1),
|
MH1 = leveled_codec:magic_hash(TestK1),
|
||||||
|
@ -1277,7 +1265,7 @@ indexed_list_mixedkeys2_test() ->
|
||||||
IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)),
|
IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)),
|
||||||
% this isn't actually ordered correctly
|
% this isn't actually ordered correctly
|
||||||
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
||||||
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
{_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||||
lists:foreach(fun({K, V}) ->
|
lists:foreach(fun({K, V}) ->
|
||||||
MH = leveled_codec:magic_hash(K),
|
MH = leveled_codec:magic_hash(K),
|
||||||
test_binary_slot(FullBin, K, MH, {K, V})
|
test_binary_slot(FullBin, K, MH, {K, V})
|
||||||
|
@ -1286,7 +1274,7 @@ indexed_list_mixedkeys2_test() ->
|
||||||
|
|
||||||
indexed_list_allindexkeys_test() ->
|
indexed_list_allindexkeys_test() ->
|
||||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
|
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
|
||||||
{PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
{PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||||
?assertMatch(<<127:8/integer>>, PosBinIndex1),
|
?assertMatch(<<127:8/integer>>, PosBinIndex1),
|
||||||
% SW = os:timestamp(),
|
% SW = os:timestamp(),
|
||||||
BinToList = binaryslot_tolist(FullBin),
|
BinToList = binaryslot_tolist(FullBin),
|
||||||
|
@ -1299,7 +1287,7 @@ indexed_list_allindexkeys_test() ->
|
||||||
|
|
||||||
indexed_list_allindexkeys_trimmed_test() ->
|
indexed_list_allindexkeys_trimmed_test() ->
|
||||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
|
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
|
||||||
{PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
{PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||||
?assertMatch(<<127:8/integer>>, PosBinIndex1),
|
?assertMatch(<<127:8/integer>>, PosBinIndex1),
|
||||||
?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
|
?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
|
||||||
{i,
|
{i,
|
||||||
|
@ -1337,7 +1325,7 @@ indexed_list_mixedkeys_bitflip_test() ->
|
||||||
KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)),
|
KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)),
|
||||||
KVL1 = lists:sublist(KVL0, 33),
|
KVL1 = lists:sublist(KVL0, 33),
|
||||||
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
|
Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1),
|
||||||
{_PosBinIndex1, FullBin} = generate_binary_slot(Keys),
|
{_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys),
|
||||||
L = byte_size(FullBin),
|
L = byte_size(FullBin),
|
||||||
Byte1 = random:uniform(L),
|
Byte1 = random:uniform(L),
|
||||||
<<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = FullBin,
|
<<PreB1:Byte1/binary, A:8/integer, PostByte1/binary>> = FullBin,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue