Refactoring of skiplist ranges and support for sst ranges

the Skiplist range code was needlessly complicated.  It may be faster
than the new code, but the complexity delta cannot be support for such a
small change.

This was incovered whilst troubleshooting the initial kv range test.
This commit is contained in:
martinsumner 2016-12-28 15:48:04 +00:00
parent 6e5f5d2d44
commit cbad375373
4 changed files with 270 additions and 103 deletions

View file

@ -19,7 +19,7 @@
-define(GET_LOGPOINT, 160000). -define(GET_LOGPOINT, 160000).
-define(SST_LOGPOINT, 200000). -define(SST_LOGPOINT, 200000).
-define(LOG_LEVEL, [info, warn, error, critical]). -define(LOG_LEVEL, [info, warn, error, critical]).
-define(SAMPLE_RATE, 16#F). -define(SAMPLE_RATE, 1).
-define(LOGBASE, dict:from_list([ -define(LOGBASE, dict:from_list([
@ -377,7 +377,7 @@ head_timing(undefined, SW, Level, R) ->
T0 = timer:now_diff(os:timestamp(), SW), T0 = timer:now_diff(os:timestamp(), SW),
head_timing_int(undefined, T0, Level, R); head_timing_int(undefined, T0, Level, R);
head_timing({N, HeadTimingD}, SW, Level, R) -> head_timing({N, HeadTimingD}, SW, Level, R) ->
case N band ?SAMPLE_RATE of case N band (?SAMPLE_RATE - 1) of
0 -> 0 ->
T0 = timer:now_diff(os:timestamp(), SW), T0 = timer:now_diff(os:timestamp(), SW),
head_timing_int({N, HeadTimingD}, T0, Level, R); head_timing_int({N, HeadTimingD}, T0, Level, R);
@ -440,7 +440,7 @@ sst_timing(undefined, SW, TimerType) ->
?SST_LOGPOINT, ?SST_LOGPOINT,
"SST01"); "SST01");
sst_timing({N, SSTTimerD}, SW, TimerType) -> sst_timing({N, SSTTimerD}, SW, TimerType) ->
case N band ?SAMPLE_RATE of case N band (?SAMPLE_RATE - 1) of
0 -> 0 ->
T0 = timer:now_diff(os:timestamp(), SW), T0 = timer:now_diff(os:timestamp(), SW),
gen_timing_int({N, SSTTimerD}, gen_timing_int({N, SSTTimerD},
@ -468,7 +468,7 @@ get_timing(undefined, SW, TimerType) ->
?GET_LOGPOINT, ?GET_LOGPOINT,
"B0014"); "B0014");
get_timing({N, GetTimerD}, SW, TimerType) -> get_timing({N, GetTimerD}, SW, TimerType) ->
case N band ?SAMPLE_RATE of case N band (?SAMPLE_RATE - 1) of
0 -> 0 ->
T0 = timer:now_diff(os:timestamp(), SW), T0 = timer:now_diff(os:timestamp(), SW),
gen_timing_int({N, GetTimerD}, gen_timing_int({N, GetTimerD},

View file

@ -266,78 +266,60 @@ to_list(SkipList, Level) ->
[], [],
SkipList). SkipList).
to_range(SkipList, Start, End, 1) ->
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
case {PassedStart, PassedEnd} of to_range(SkipList, StartKey, EndKey, ListHeight) ->
{true, true} -> to_range(SkipList, StartKey, EndKey, ListHeight, [], true).
{true, true, Acc, null};
{false, false} ->
case Start > Mark of
true ->
{false, false, Acc, SL};
false ->
RHS = splitlist_start(Start, PrevList ++ SL),
case leveled_codec:endkey_passed(End, Mark) of
true ->
EL = splitlist_end(End, RHS),
{true, true, EL, null};
false ->
{true, false, RHS, null}
end
end;
{true, false} ->
case leveled_codec:endkey_passed(End, Mark) of
true ->
EL = splitlist_end(End, SL),
{true, true, Acc ++ EL, null};
false ->
{true, false, Acc ++ SL, null}
end
end end,
{false, false, [], []}, to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) ->
SkipList), SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl),
{_Bool1, _Bool2, SubList, _PrevList} = R, case SL of
SubList; [] ->
to_range(SkipList, Start, End, Level) -> Acc;
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) -> _ ->
{LK, _LV} = lists:last(SL),
case leveled_codec:endkey_passed(EndKey, LK) of
false ->
to_range(SkipList,
LK,
EndKey,
ListHeight,
Acc ++ SL,
false);
true ->
SplitFun =
fun({K, _V}) ->
not leveled_codec:endkey_passed(EndKey, K) end,
LHS = lists:takewhile(SplitFun, SL),
Acc ++ LHS
end
end.
case {PassedStart, PassedEnd} of sublist_above(SkipList, StartKey, 0, StartIncl) ->
{true, true} -> TestFun =
{true, true, Acc, null}; fun({K, _V}) ->
{false, false} -> case StartIncl of
case Start > Mark of true ->
true -> K < StartKey;
{false, false, Acc, SL}; false ->
false -> K =< StartKey
SkipLRange = to_range(PrevList, end end,
Start, End, lists:dropwhile(TestFun, SkipList);
Level - 1) ++ sublist_above(SkipList, StartKey, Level, StartIncl) ->
to_range(SL, TestFun =
Start, End, fun({K, _SL}) ->
Level - 1), case StartIncl of
case leveled_codec:endkey_passed(End, Mark) of true ->
true -> K < StartKey;
{true, true, SkipLRange, null}; false ->
false -> K =< StartKey
{true, false, SkipLRange, null} end end,
end RHS = lists:dropwhile(TestFun, SkipList),
end; case RHS of
{true, false} -> [] ->
SkipLRange = to_range(SL, Start, End, Level - 1), [];
case leveled_codec:endkey_passed(End, Mark) of [{_K, SL}|_Rest] ->
true -> sublist_above(SL, StartKey, Level - 1, StartIncl)
{true, true, Acc ++ SkipLRange, null}; end.
false ->
{true, false, Acc ++ SkipLRange, null}
end
end end,
{false, false, [], []},
SkipList),
{_Bool1, _Bool2, SubList, _PrevList} = R,
SubList.
key_above(SkipList, Key, 0) -> key_above(SkipList, Key, 0) ->
FindFun = fun({Mark, V}, Found) -> FindFun = fun({Mark, V}, Found) ->
@ -419,17 +401,6 @@ get_sublist(Key, SkipList) ->
null, null,
SkipList). SkipList).
splitlist_start(StartKey, SL) ->
{_LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < StartKey end, SL),
RHS.
splitlist_end(EndKey, SL) ->
{LHS, _RHS} = lists:splitwith(fun({K, _V}) ->
not leveled_codec:endkey_passed(EndKey, K)
end,
SL),
LHS.
%%%============================================================================ %%%============================================================================
%%% Test %%% Test
%%%============================================================================ %%%============================================================================

View file

@ -79,6 +79,8 @@
sst_open/1, sst_open/1,
sst_get/2, sst_get/2,
sst_get/3, sst_get/3,
sst_getkvrange/4,
sst_getslots/2,
sst_close/1]). sst_close/1]).
-export([generate_randomkeys/1]). -export([generate_randomkeys/1]).
@ -152,9 +154,21 @@ sst_get(Pid, LedgerKey) ->
sst_get(Pid, LedgerKey, Hash) -> sst_get(Pid, LedgerKey, Hash) ->
gen_fsm:sync_send_event(Pid, {get_kv, LedgerKey, Hash}, infinity). gen_fsm:sync_send_event(Pid, {get_kv, LedgerKey, Hash}, infinity).
sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) ->
gen_fsm:sync_send_event(Pid,
{get_kvrange, StartKey, EndKey, ScanWidth},
infinity).
sst_getslots(Pid, SlotList) ->
gen_fsm:sync_send_event(Pid, {get_slots, SlotList}, infinity).
sst_close(Pid) -> sst_close(Pid) ->
gen_fsm:sync_send_event(Pid, close, 2000). gen_fsm:sync_send_event(Pid, close, 2000).
%% Used in unit tests to force the printing of timings
sst_printtimings(Pid) ->
gen_fsm:sync_send_event(Pid, print_timings, 1000).
%%%============================================================================ %%%============================================================================
%%% gen_server callbacks %%% gen_server callbacks
@ -199,6 +213,23 @@ reader({get_kv, LedgerKey, Hash}, _From, State) ->
_ -> _ ->
{reply, Result, reader, State#state{sst_timings = UpdTimings}} {reply, Result, reader, State#state{sst_timings = UpdTimings}}
end; end;
reader({get_kvrange, StartKey, EndKey, ScanWidth}, _From, State) ->
{reply,
fetch_range(StartKey, EndKey, ScanWidth, State),
reader,
State};
reader({get_slots, SlotList}, _From, State) ->
Handle = State#state.handle,
FetchFun =
fun({pointer, S, SK, EK}, Acc) ->
Acc ++ trim_slot({pointer, Handle, S}, SK, EK) end,
{reply,
lists:foldl(FetchFun, [], SlotList),
reader,
State};
reader(print_timings, _From, State) ->
io:format(user, "Timings of ~w~n", [State#state.sst_timings]),
{reply, ok, reader, State#state{sst_timings = undefined}};
reader(close, _From, State) -> reader(close, _From, State) ->
ok = file:close(State#state.handle), ok = file:close(State#state.handle),
{stop, normal, ok, State}. {stop, normal, ok, State}.
@ -263,6 +294,76 @@ fetch(LedgerKey, Hash, State) ->
end end
end. end.
fetch_range(StartKey, EndKey, ScanWidth, State) ->
Summary = State#state.summary,
Handle = State#state.handle,
{Slots, LTrim, RTrim} = lookup_slots(StartKey,
EndKey,
Summary#summary.index),
Self = self(),
SL = length(Slots),
ExpandedSlots =
case SL of
0 ->
[];
1 ->
[Slot] = Slots,
case {LTrim, RTrim} of
{true, true} ->
[{pointer, Self, Slot, StartKey, EndKey}];
{true, false} ->
[{pointer, Self, Slot, StartKey, all}];
{false, true} ->
[{pointer, Self, Slot, all, EndKey}];
{false, false} ->
[{pointer, Self, Slot, all, all}]
end;
N ->
{LSlot, MidSlots, RSlot} =
case N of
2 ->
[Slot1, Slot2] = Slots,
{Slot1, [], Slot2};
N ->
[Slot1|_Rest] = Slots,
SlotN = lists:last(Slots),
{Slot1, lists:sublist(Slots, 2, N - 2), SlotN}
end,
MidSlotPointers = lists:map(fun(S) ->
{pointer, Self, S, all, all}
end,
MidSlots),
case {LTrim, RTrim} of
{true, true} ->
[{pointer, Self, LSlot, StartKey, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, EndKey}];
{true, false} ->
[{pointer, Self, LSlot, StartKey, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, all}];
{false, true} ->
[{pointer, Self, LSlot, all, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, EndKey}];
{false, false} ->
[{pointer, Self, LSlot, all, all}] ++
MidSlotPointers ++
[{pointer, Self, RSlot, all, all}]
end
end,
{SlotsToFetch, SlotsToPoint} =
case ScanWidth of
SW when SW >= SL ->
{ExpandedSlots, []};
_ ->
lists:split(ScanWidth, ExpandedSlots)
end,
FetchFun =
fun({pointer, _Self, S, SK, EK}, Acc) ->
Acc ++ trim_slot({pointer, Handle, S}, SK, EK) end,
lists:foldl(FetchFun, [], SlotsToFetch) ++ SlotsToPoint.
write_file(Filename, SummaryBin, SlotsBin) -> write_file(Filename, SummaryBin, SlotsBin) ->
SummaryLength = byte_size(SummaryBin), SummaryLength = byte_size(SummaryBin),
@ -409,6 +510,34 @@ is_check_slot_required(_Hash, none) ->
is_check_slot_required(Hash, Bloom) -> is_check_slot_required(Hash, Bloom) ->
leveled_tinybloom:tiny_check(Hash, Bloom). leveled_tinybloom:tiny_check(Hash, Bloom).
%% Returns a section from the summary index and two booleans to indicate if
%% the first slot needs trimming, or the last slot
lookup_slots(StartKey, EndKey, SkipList) ->
SlotsOnlyFun = fun({_K, V}) -> V end,
{KSL, LTrim, RTrim} = lookup_slots_int(StartKey, EndKey, SkipList),
{lists:map(SlotsOnlyFun, KSL), LTrim, RTrim}.
lookup_slots_int(all, all, SkipList) ->
{leveled_skiplist:to_list(SkipList), false, false};
lookup_slots_int(StartKey, all, SkipList) ->
L = leveled_skiplist:to_list(SkipList),
LTrimFun = fun({K, _V}) -> K < StartKey end,
{_LDrop, RKeep0} = lists:splitwith(LTrimFun, L),
[{FirstKey, _V}|_Rest] = RKeep0,
LTrim = FirstKey < StartKey,
{RKeep0, LTrim, false};
lookup_slots_int(StartKey, EndKey, SkipList) ->
L0 = leveled_skiplist:to_range(SkipList, StartKey, EndKey),
{LastKey, _LastVal} = lists:last(L0),
case LastKey of
EndKey ->
{L0, true, false};
_ ->
LTail = leveled_skiplist:key_above(SkipList, EndKey),
{L0 ++ [LTail], true, true}
end.
lookup_slot(Key, SkipList) -> lookup_slot(Key, SkipList) ->
{_Mark, Slot} = leveled_skiplist:key_above(SkipList, Key), {_Mark, Slot} = leveled_skiplist:key_above(SkipList, Key),
Slot. Slot.
@ -425,12 +554,6 @@ lookup_in_slot(Key, SlotBin) ->
Tree = binary_to_term(SlotBin), Tree = binary_to_term(SlotBin),
gb_trees:lookup(Key, Tree). gb_trees:lookup(Key, Tree).
all_from_slot({pointer, Handle, Slot}) ->
all_from_slot(read_slot(Handle, Slot));
all_from_slot(SlotBin) ->
SkipList = binary_to_term(SlotBin),
gb_trees:to_list(SkipList).
read_slot(Handle, Slot) -> read_slot(Handle, Slot) ->
{ok, SlotBin} = file:pread(Handle, {ok, SlotBin} = file:pread(Handle,
Slot#slot_index_value.start_position, Slot#slot_index_value.start_position,
@ -443,6 +566,48 @@ read_slot(Handle, Slot) ->
crc_wonky crc_wonky
end. end.
trim_slot({pointer, Handle, Slot}, all, all) ->
case read_slot(Handle, Slot) of
crc_wonky ->
[];
SlotBin ->
trim_slot(SlotBin, all, all)
end;
trim_slot(SlotBinary, all, all) ->
Tree = binary_to_term(SlotBinary),
gb_trees:to_list(Tree);
trim_slot({pointer, Handle, Slot}, StartKey, EndKey) ->
case read_slot(Handle, Slot) of
crc_wonky ->
[];
SlotBin ->
trim_slot(SlotBin, StartKey, EndKey)
end;
trim_slot(SlotBinary, StartKey, EndKey) ->
Tree = binary_to_term(SlotBinary),
L = gb_trees:to_list(Tree),
LTrimFun = fun({K, _V}) ->
K < StartKey end,
RTrimFun = fun({K, _V}) ->
not leveled_codec:endkey_passed(EndKey, K) end,
LTrimL =
case StartKey of
all ->
L;
_ ->
{_LDrop, RKeep} = lists:splitwith(LTrimFun, L),
RKeep
end,
RTrimL =
case EndKey of
all ->
LTrimL;
_ ->
{LKeep, _RDrop} = lists:splitwith(RTrimFun, L),
LKeep
end,
RTrimL.
generate_filenames(RootFilename) -> generate_filenames(RootFilename) ->
Ext = filename:extension(RootFilename), Ext = filename:extension(RootFilename),
@ -490,7 +655,7 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BRand = random:uniform(BRange), BRand = random:uniform(BRange),
string:right(integer_to_list(BucketLow + BRand), 4, $0) string:right(integer_to_list(BucketLow + BRand), 4, $0)
end, end,
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0), KNumber = string:right(integer_to_list(random:uniform(1000)), 6, $0),
LedgerKey = leveled_codec:to_ledgerkey("Bucket" ++ BNumber, LedgerKey = leveled_codec:to_ledgerkey("Bucket" ++ BNumber,
"Key" ++ KNumber, "Key" ++ KNumber,
o), o),
@ -532,7 +697,7 @@ simple_slotbin_test() ->
io:format(user, "Slot checked for all keys in ~w microseconds~n", io:format(user, "Slot checked for all keys in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SW1)]), [timer:now_diff(os:timestamp(), SW1)]),
SW2 = os:timestamp(), SW2 = os:timestamp(),
?assertMatch(KVList1, all_from_slot(SlotBin0)), ?assertMatch(KVList1, trim_slot(SlotBin0, all, all)),
io:format(user, "Slot flattened in ~w microseconds~n", io:format(user, "Slot flattened in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SW2)]). [timer:now_diff(os:timestamp(), SW2)]).
@ -572,7 +737,7 @@ simple_slotbinsummary_test() ->
simple_persisted_test() -> simple_persisted_test() ->
Filename = "../test/simple_test", Filename = "../test/simple_test",
KVList0 = generate_randomkeys(1, ?SLOT_SIZE * 8 + 100, 1, 4), KVList0 = generate_randomkeys(1, ?SLOT_SIZE * 16, 1, 20),
KVList1 = lists:ukeysort(1, KVList0), KVList1 = lists:ukeysort(1, KVList0),
[{FirstKey, _FV}|_Rest] = KVList1, [{FirstKey, _FV}|_Rest] = KVList1,
{LastKey, _LV} = lists:last(KVList1), {LastKey, _LV} = lists:last(KVList1),
@ -587,7 +752,8 @@ simple_persisted_test() ->
"Checking for ~w keys (twice) in file with cache hit took ~w " "Checking for ~w keys (twice) in file with cache hit took ~w "
++ "microseconds~n", ++ "microseconds~n",
[length(KVList1), timer:now_diff(os:timestamp(), SW1)]), [length(KVList1), timer:now_diff(os:timestamp(), SW1)]),
KVList2 = generate_randomkeys(1, ?SLOT_SIZE * 20 + 100, 1, 4), ok = sst_printtimings(Pid),
KVList2 = generate_randomkeys(1, ?SLOT_SIZE * 16, 1, 20),
MapFun = MapFun =
fun({K, V}, Acc) -> fun({K, V}, Acc) ->
In = lists:keymember(K, 1, KVList1), In = lists:keymember(K, 1, KVList1),
@ -607,6 +773,37 @@ simple_persisted_test() ->
io:format(user, io:format(user,
"Checking for ~w missing keys took ~w microseconds~n", "Checking for ~w missing keys took ~w microseconds~n",
[length(KVList3), timer:now_diff(os:timestamp(), SW2)]), [length(KVList3), timer:now_diff(os:timestamp(), SW2)]),
ok = sst_printtimings(Pid),
FetchList1 = sst_getkvrange(Pid, all, all, 2),
FoldFun = fun(X, Acc) ->
case X of
{pointer, P, S, SK, EK} ->
Acc ++ sst_getslots(P, [{pointer, S, SK, EK}]);
_ ->
Acc ++ [X]
end end,
FetchedList1 = lists:foldl(FoldFun, [], FetchList1),
?assertMatch(KVList1, FetchedList1),
{TenthKey, _v10} = lists:nth(10, KVList1),
{Three000Key, _v300} = lists:nth(300, KVList1),
io:format("Looking for 291 elements between ~s ~s and ~s ~s~n",
[element(2, TenthKey),
element(3, TenthKey),
element(2, Three000Key),
element(3, Three000Key)]),
SubKVList1 = lists:sublist(KVList1, 10, 291),
SubKVList1L = length(SubKVList1),
FetchList2 = sst_getkvrange(Pid, TenthKey, Three000Key, 2),
FetchedList2 = lists:foldl(FoldFun, [], FetchList2),
io:format("Found elements between ~s ~s and ~s ~s~n",
[element(2, element(1, lists:nth(1, FetchedList2))),
element(3, element(1, lists:nth(1, FetchedList2))),
element(2, element(1, lists:last(FetchedList2))),
element(3, element(1, lists:last(FetchedList2)))]),
?assertMatch(SubKVList1L, length(FetchedList2)),
?assertMatch(SubKVList1, FetchedList2),
ok = sst_close(Pid), ok = sst_close(Pid),
ok = file:delete(Filename ++ ".sst"). ok = file:delete(Filename ++ ".sst").

View file

@ -78,13 +78,13 @@ tiny_empty() ->
tiny_enter({hash, no_lookup}, Bloom) -> tiny_enter({hash, no_lookup}, Bloom) ->
Bloom; Bloom;
tiny_enter({hash, Hash}, Bloom) -> tiny_enter({hash, Hash}, Bloom) ->
{_Q, Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash), {Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
AddFun = fun(Bit, Arr0) -> add_to_array(Bit, Arr0, 1024) end, AddFun = fun(Bit, Arr0) -> add_to_array(Bit, Arr0, 1024) end,
lists:foldl(AddFun, Bloom, [Bit0, Bit1, Bit2]). lists:foldl(AddFun, Bloom, [Bit0, Bit1, Bit2]).
tiny_check({hash, Hash}, Bloom) -> tiny_check({hash, Hash}, Bloom) ->
{_Q, Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash), {Bit0, Bit1, Bit2} = split_hash_for_tinybloom(Hash),
case getbit(Bit0, Bloom, 1024) of case getbit(Bit0, Bloom, 1024) of
<<0:1>> -> <<0:1>> ->
false; false;
@ -115,11 +115,10 @@ split_hash(Hash) ->
split_hash_for_tinybloom(Hash) -> split_hash_for_tinybloom(Hash) ->
% Tiny bloom can make k=3 from one hash % Tiny bloom can make k=3 from one hash
Q = Hash band 3, H0 = Hash band 1023,
H0 = (Hash bsr 2) band 1023, H1 = (Hash bsr 11) band 1023,
H1 = (Hash bsr 12) band 1023,
H2 = (Hash bsr 22) band 1023, H2 = (Hash bsr 22) band 1023,
{Q, H0, H1, H2}. {H0, H1, H2}.
add_to_array(Bit, BitArray, ArrayLength) -> add_to_array(Bit, BitArray, ArrayLength) ->
RestLen = ArrayLength - Bit - 1, RestLen = ArrayLength - Bit - 1,