Refactoring of skiplist ranges and support for sst ranges

the Skiplist range code was needlessly complicated.  It may be faster
than the new code, but the complexity delta cannot be support for such a
small change.

This was incovered whilst troubleshooting the initial kv range test.
This commit is contained in:
martinsumner 2016-12-28 15:48:04 +00:00
parent 6e5f5d2d44
commit cbad375373
4 changed files with 270 additions and 103 deletions

View file

@ -19,7 +19,7 @@
-define(GET_LOGPOINT, 160000).
-define(SST_LOGPOINT, 200000).
-define(LOG_LEVEL, [info, warn, error, critical]).
-define(SAMPLE_RATE, 16#F).
-define(SAMPLE_RATE, 1).
-define(LOGBASE, dict:from_list([
@ -377,7 +377,7 @@ head_timing(undefined, SW, Level, R) ->
T0 = timer:now_diff(os:timestamp(), SW),
head_timing_int(undefined, T0, Level, R);
head_timing({N, HeadTimingD}, SW, Level, R) ->
case N band ?SAMPLE_RATE of
case N band (?SAMPLE_RATE - 1) of
0 ->
T0 = timer:now_diff(os:timestamp(), SW),
head_timing_int({N, HeadTimingD}, T0, Level, R);
@ -440,7 +440,7 @@ sst_timing(undefined, SW, TimerType) ->
?SST_LOGPOINT,
"SST01");
sst_timing({N, SSTTimerD}, SW, TimerType) ->
case N band ?SAMPLE_RATE of
case N band (?SAMPLE_RATE - 1) of
0 ->
T0 = timer:now_diff(os:timestamp(), SW),
gen_timing_int({N, SSTTimerD},
@ -468,7 +468,7 @@ get_timing(undefined, SW, TimerType) ->
?GET_LOGPOINT,
"B0014");
get_timing({N, GetTimerD}, SW, TimerType) ->
case N band ?SAMPLE_RATE of
case N band (?SAMPLE_RATE - 1) of
0 ->
T0 = timer:now_diff(os:timestamp(), SW),
gen_timing_int({N, GetTimerD},

View file

@ -266,78 +266,60 @@ to_list(SkipList, Level) ->
[],
SkipList).
to_range(SkipList, Start, End, 1) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of
{true, true} ->
{true, true, Acc, null};
{false, false} ->
case Start > Mark of
true ->
{false, false, Acc, SL};
false ->
RHS = splitlist_start(Start, PrevList ++ SL),
case leveled_codec:endkey_passed(End, Mark) of
true ->
EL = splitlist_end(End, RHS),
{true, true, EL, null};
false ->
{true, false, RHS, null}
end
end;
{true, false} ->
case leveled_codec:endkey_passed(End, Mark) of
true ->
EL = splitlist_end(End, SL),
{true, true, Acc ++ EL, null};
false ->
{true, false, Acc ++ SL, null}
end
end end,
{false, false, [], []},
SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R,
SubList;
to_range(SkipList, Start, End, Level) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of
{true, true} ->
{true, true, Acc, null};
{false, false} ->
case Start > Mark of
true ->
{false, false, Acc, SL};
false ->
SkipLRange = to_range(PrevList,
Start, End,
Level - 1) ++
to_range(SL,
Start, End,
Level - 1),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, SkipLRange, null};
false ->
{true, false, SkipLRange, null}
end
end;
{true, false} ->
SkipLRange = to_range(SL, Start, End, Level - 1),
case leveled_codec:endkey_passed(End, Mark) of
true ->
{true, true, Acc ++ SkipLRange, null};
false ->
{true, false, Acc ++ SkipLRange, null}
end
end end,
{false, false, [], []},
SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R,
SubList.
to_range(SkipList, StartKey, EndKey, ListHeight) ->
to_range(SkipList, StartKey, EndKey, ListHeight, [], true).
to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) ->
SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl),
case SL of
[] ->
Acc;
_ ->
{LK, _LV} = lists:last(SL),
case leveled_codec:endkey_passed(EndKey, LK) of
false ->
to_range(SkipList,
LK,
EndKey,
ListHeight,
Acc ++ SL,
false);
true ->
SplitFun =
fun({K, _V}) ->
not leveled_codec:endkey_passed(EndKey, K) end,
LHS = lists:takewhile(SplitFun, SL),
Acc ++ LHS
end
end.
sublist_above(SkipList, StartKey, 0, StartIncl) ->
TestFun =
fun({K, _V}) ->
case StartIncl of
true ->
K < StartKey;
false ->
K =< StartKey
end end,
lists:dropwhile(TestFun, SkipList);
sublist_above(SkipList, StartKey, Level, StartIncl) ->
TestFun =
fun({K, _SL}) ->
case StartIncl of
true ->
K < StartKey;
false ->
K =< StartKey
end end,
RHS = lists:dropwhile(TestFun, SkipList),
case RHS of
[] ->
[];
[{_K, SL}|_Rest] ->
sublist_above(SL, StartKey, Level - 1, StartIncl)
end.
key_above(SkipList, Key, 0) ->
FindFun = fun({Mark, V}, Found) ->
@ -419,17 +401,6 @@ get_sublist(Key, SkipList) ->
null,
SkipList).
splitlist_start(StartKey, SL) ->
{_LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < StartKey end, SL),
RHS.
splitlist_end(EndKey, SL) ->
{LHS, _RHS} = lists:splitwith(fun({K, _V}) ->
not leveled_codec:endkey_passed(EndKey, K)
end,
SL),
LHS.
%%%============================================================================
%%% Test
%%%============================================================================

View file

@ -79,6 +79,8 @@
sst_open/1,
sst_get/2,
sst_get/3,
sst_getkvrange/4,
sst_getslots/2,
sst_close/1]).
-export([generate_randomkeys/1]).
@ -152,9 +154,21 @@ sst_get(Pid, LedgerKey) ->
sst_get(Pid, LedgerKey, Hash) ->
gen_fsm:sync_send_event(Pid, {get_kv, LedgerKey, Hash}, infinity).
sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) ->
gen_fsm:sync_send_event(Pid,
{get_kvrange, StartKey, EndKey, ScanWidth},
infinity).
sst_getslots(Pid, SlotList) ->
gen_fsm:sync_send_event(Pid, {get_slots, SlotList}, infinity).
sst_close(Pid) ->
gen_fsm:sync_send_event(Pid, close, 2000).
%% Used in unit tests to force the printing of timings
sst_printtimings(Pid) ->
gen_fsm:sync_send_event(Pid, print_timings, 1000).
%%%============================================================================
%%% gen_server callbacks
@ -199,6 +213,23 @@ reader({get_kv, LedgerKey, Hash}, _From, State) ->
_ ->
{reply, Result, reader, State#state{sst_timings = UpdTimings}}
end;
reader({get_kvrange, StartKey, EndKey, ScanWidth}, _From, State) ->
{reply,
fetch_range(StartKey, EndKey, ScanWidth, State),
reader,
State};
reader({get_slots, SlotList}, _From, State) ->
Handle = State#state.handle,
FetchFun =
fun({pointer, S, SK, EK}, Acc) ->
Acc ++ trim_slot({pointer, Handle, S}, SK, EK) end,
{reply,
lists:foldl(FetchFun, [], SlotList),
reader,
State};
reader(print_timings, _From, State) ->
io:format(user, "Timings of ~w~n", [State#state.sst_timings]),
{reply, ok, reader, State#state{sst_timings = undefined}};
reader(close, _From, State) ->
ok = file:close(State#state.handle),
{stop, normal, ok, State}.
@ -263,6 +294,76 @@ fetch(LedgerKey, Hash, State) ->
end
end.
fetch_range(StartKey, EndKey, ScanWidth, State) ->
Summary = State#state.summary,
Handle = State#state.handle,
{Slots, LTrim, RTrim} = lookup_slots(StartKey,
EndKey,
Summary#summary.index),
Self = self(),
SL = length(Slots),
ExpandedSlots =
case SL of
0 ->
[];
1 ->
[Slot] = Slots,
case {LTrim, RTrim} of
{true, true} ->
[{pointer, Self, Slot, StartKey, EndKey}];
{true, false} ->
[{pointer, Self, Slot, StartKey, all}];
{false, true} ->
[{pointer, Self, Slot, all, EndKey}];
{false, false} ->
[{pointer, Self, Slot, all, all}]
end;
N ->
{LSlot, MidSlots, RSlot} =
case N of
2 ->
[Slot1, Slot2] = Slots,
{Slot1, [], Slot2};
N ->
[Slot1|_Rest] = Slots,
SlotN = lists:last(Slots),
{Slot1, lists:sublist(Slots, 2, N - 2), SlotN}
end,
MidSlotPointers = lists:map(fun(S) ->
{pointer, Self, S, all, all}
end,
MidSlots),
case {LTrim, RTrim} of
{true, true} ->
[{pointer, Self, LSlot, StartKey, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, EndKey}];
{true, false} ->
[{pointer, Self, LSlot, StartKey, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, all}];
{false, true} ->
[{pointer, Self, LSlot, all, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, EndKey}];
{false, false} ->
[{pointer, Self, LSlot, all, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, all}]
end
end,
{SlotsToFetch, SlotsToPoint} =
case ScanWidth of
SW when SW >= SL ->
{ExpandedSlots, []};
_ ->
lists:split(ScanWidth, ExpandedSlots)
end,
FetchFun =
fun({pointer, _Self, S, SK, EK}, Acc) ->
Acc ++ trim_slot({pointer, Handle, S}, SK, EK) end,
lists:foldl(FetchFun, [], SlotsToFetch) ++ SlotsToPoint.
write_file(Filename, SummaryBin, SlotsBin) ->
SummaryLength = byte_size(SummaryBin),
@ -409,6 +510,34 @@ is_check_slot_required(_Hash, none) ->
is_check_slot_required(Hash, Bloom) ->
leveled_tinybloom:tiny_check(Hash, Bloom).
%% Returns a section from the summary index and two booleans to indicate if
%% the first slot needs trimming, or the last slot
lookup_slots(StartKey, EndKey, SkipList) ->
SlotsOnlyFun = fun({_K, V}) -> V end,
{KSL, LTrim, RTrim} = lookup_slots_int(StartKey, EndKey, SkipList),
{lists:map(SlotsOnlyFun, KSL), LTrim, RTrim}.
lookup_slots_int(all, all, SkipList) ->
{leveled_skiplist:to_list(SkipList), false, false};
lookup_slots_int(StartKey, all, SkipList) ->
L = leveled_skiplist:to_list(SkipList),
LTrimFun = fun({K, _V}) -> K < StartKey end,
{_LDrop, RKeep0} = lists:splitwith(LTrimFun, L),
[{FirstKey, _V}|_Rest] = RKeep0,
LTrim = FirstKey < StartKey,
{RKeep0, LTrim, false};
lookup_slots_int(StartKey, EndKey, SkipList) ->
L0 = leveled_skiplist:to_range(SkipList, StartKey, EndKey),
{LastKey, _LastVal} = lists:last(L0),
case LastKey of
EndKey ->
{L0, true, false};
_ ->
LTail = leveled_skiplist:key_above(SkipList, EndKey),
{L0 ++ [LTail], true, true}
end.
lookup_slot(Key, SkipList) ->
{_Mark, Slot} = leveled_skiplist:key_above(SkipList, Key),
Slot.
@ -425,12 +554,6 @@ lookup_in_slot(Key, SlotBin) ->
Tree = binary_to_term(SlotBin),
gb_trees:lookup(Key, Tree).
all_from_slot({pointer, Handle, Slot}) ->
all_from_slot(read_slot(Handle, Slot));
all_from_slot(SlotBin) ->
SkipList = binary_to_term(SlotBin),
gb_trees:to_list(SkipList).
read_slot(Handle, Slot) ->
{ok, SlotBin} = file:pread(Handle,
Slot#slot_index_value.start_position,
@ -443,6 +566,48 @@ read_slot(Handle, Slot) ->
crc_wonky
end.
trim_slot({pointer, Handle, Slot}, all, all) ->
case read_slot(Handle, Slot) of
crc_wonky ->
[];
SlotBin ->
trim_slot(SlotBin, all, all)
end;
trim_slot(SlotBinary, all, all) ->
Tree = binary_to_term(SlotBinary),
gb_trees:to_list(Tree);
trim_slot({pointer, Handle, Slot}, StartKey, EndKey) ->
case read_slot(Handle, Slot) of
crc_wonky ->
[];
SlotBin ->
trim_slot(SlotBin, StartKey, EndKey)
end;
trim_slot(SlotBinary, StartKey, EndKey) ->
Tree = binary_to_term(SlotBinary),
L = gb_trees:to_list(Tree),
LTrimFun = fun({K, _V}) ->
K < StartKey end,
RTrimFun = fun({K, _V}) ->
not leveled_codec:endkey_passed(EndKey, K) end,
LTrimL =
case StartKey of
all ->
L;
_ ->
{_LDrop, RKeep} = lists:splitwith(LTrimFun, L),
RKeep
end,
RTrimL =
case EndKey of
all ->
LTrimL;
_ ->
{LKeep, _RDrop} = lists:splitwith(RTrimFun, L),
LKeep
end,
RTrimL.
generate_filenames(RootFilename) ->
Ext = filename:extension(RootFilename),
@ -490,7 +655,7 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BRand = random:uniform(BRange),
string:right(integer_to_list(BucketLow + BRand), 4, $0)
end,
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
KNumber = string:right(integer_to_list(random:uniform(1000)), 6, $0),
LedgerKey = leveled_codec:to_ledgerkey("Bucket" ++ BNumber,
"Key" ++ KNumber,
o),
@ -532,7 +697,7 @@ simple_slotbin_test() ->
io:format(user, "Slot checked for all keys in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SW1)]),
SW2 = os:timestamp(),
?assertMatch(KVList1, all_from_slot(SlotBin0)),
?assertMatch(KVList1, trim_slot(SlotBin0, all, all)),
io:format(user, "Slot flattened in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SW2)]).
@ -572,7 +737,7 @@ simple_slotbinsummary_test() ->
simple_persisted_test() ->
Filename = "../test/simple_test",
KVList0 = generate_randomkeys(1, ?SLOT_SIZE * 8 + 100, 1, 4),
KVList0 = generate_randomkeys(1, ?SLOT_SIZE * 16, 1, 20),
KVList1 = lists:ukeysort(1, KVList0),
[{FirstKey, _FV}|_Rest] = KVList1,
{LastKey, _LV} = lists:last(KVList1),
@ -587,7 +752,8 @@ simple_persisted_test() ->
"Checking for ~w keys (twice) in file with cache hit took ~w "
++ "microseconds~n",
[length(KVList1), timer:now_diff(os:timestamp(), SW1)]),
KVList2 = generate_randomkeys(1, ?SLOT_SIZE * 20 + 100, 1, 4),
ok = sst_printtimings(Pid),
KVList2 = generate_randomkeys(1, ?SLOT_SIZE * 16, 1, 20),
MapFun =
fun({K, V}, Acc) ->
In = lists:keymember(K, 1, KVList1),
@ -607,6 +773,37 @@ simple_persisted_test() ->
io:format(user,
"Checking for ~w missing keys took ~w microseconds~n",
[length(KVList3), timer:now_diff(os:timestamp(), SW2)]),
ok = sst_printtimings(Pid),
FetchList1 = sst_getkvrange(Pid, all, all, 2),
FoldFun = fun(X, Acc) ->
case X of
{pointer, P, S, SK, EK} ->
Acc ++ sst_getslots(P, [{pointer, S, SK, EK}]);
_ ->
Acc ++ [X]
end end,
FetchedList1 = lists:foldl(FoldFun, [], FetchList1),
?assertMatch(KVList1, FetchedList1),
{TenthKey, _v10} = lists:nth(10, KVList1),
{Three000Key, _v300} = lists:nth(300, KVList1),
io:format("Looking for 291 elements between ~s ~s and ~s ~s~n",
[element(2, TenthKey),
element(3, TenthKey),
element(2, Three000Key),
element(3, Three000Key)]),
SubKVList1 = lists:sublist(KVList1, 10, 291),
SubKVList1L = length(SubKVList1),
FetchList2 = sst_getkvrange(Pid, TenthKey, Three000Key, 2),
FetchedList2 = lists:foldl(FoldFun, [], FetchList2),
io:format("Found elements between ~s ~s and ~s ~s~n",
[element(2, element(1, lists:nth(1, FetchedList2))),
element(3, element(1, lists:nth(1, FetchedList2))),
element(2, element(1, lists:last(FetchedList2))),
element(3, element(1, lists:last(FetchedList2)))]),
?assertMatch(SubKVList1L, length(FetchedList2)),
?assertMatch(SubKVList1, FetchedList2),
ok = sst_close(Pid),
ok = file:delete(Filename ++ ".sst").

View file

@ -78,13 +78,13 @@ tiny_empty() ->
tiny_enter({hash, no_lookup}, Bloom) ->
Bloom;
tiny_enter({hash, Hash}, Bloom) ->
{_Q, Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
{Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
AddFun = fun(Bit, Arr0) -> add_to_array(Bit, Arr0, 1024) end,
lists:foldl(AddFun, Bloom, [Bit0, Bit1, Bit2]).
tiny_check({hash, Hash}, Bloom) ->
{_Q, Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
{Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
case getbit(Bit0, Bloom, 1024) of
<<0:1>> ->
false;
@ -115,11 +115,10 @@ split_hash(Hash) ->
split_hash_for_tinybloom(Hash) ->
% Tiny bloom can make k=3 from one hash
Q = Hash band 3,
H0 = (Hash bsr 2) band 1023,
H1 = (Hash bsr 12) band 1023,
H0 = Hash band 1023,
H1 = (Hash bsr 11) band 1023,
H2 = (Hash bsr 22) band 1023,
{Q, H0, H1, H2}.
{H0, H1, H2}.
add_to_array(Bit, BitArray, ArrayLength) ->
RestLen = ArrayLength - Bit - 1,