From f287895db0e48b720d6cac2ad2773116a375bd08 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Fri, 17 Mar 2017 10:43:34 +0000 Subject: [PATCH 1/5] Pass out slots as a binary If we convetr firts to a list, then the list has to be copied - passing out as binaries means the bulk can be passed as references --- src/leveled_sst.erl | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 489715e..498e043 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -201,7 +201,12 @@ sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) -> infinity). sst_getslots(Pid, SlotList) -> - gen_fsm:sync_send_event(Pid, {get_slots, SlotList}, infinity). + SlotBins = gen_fsm:sync_send_event(Pid, {get_slots, SlotList}, infinity), + FetchFun = + fun({SlotBin, SK, EK}, Acc) -> + Acc ++ binaryslot_trimmedlist(SlotBin, SK, EK) + end, + lists:foldl(FetchFun, [], SlotBins). sst_getmaxsequencenumber(Pid) -> gen_fsm:sync_send_event(Pid, get_maxsequencenumber, infinity). @@ -310,11 +315,7 @@ reader({get_kvrange, StartKey, EndKey, ScanWidth}, _From, State) -> State}; reader({get_slots, SlotList}, _From, State) -> SlotBins = read_slots(State#state.handle, SlotList), - FetchFun = - fun({SlotBin, SK, EK}, Acc) -> - Acc ++ binaryslot_trimmedlist(SlotBin, SK, EK) - end, - {reply, lists:foldl(FetchFun, [], SlotBins), reader, State}; + {reply, SlotBins, reader, State}; reader(get_maxsequencenumber, _From, State) -> Summary = State#state.summary, {reply, Summary#summary.max_sqn, reader, State}; @@ -353,15 +354,7 @@ delete_pending({get_kvrange, StartKey, EndKey, ScanWidth}, _From, State) -> ?DELETE_TIMEOUT}; delete_pending({get_slots, SlotList}, _From, State) -> SlotBins = read_slots(State#state.handle, SlotList), - FetchFun = - fun({SlotBin, SK, EK}, Acc) -> - Acc ++ binaryslot_trimmedlist(SlotBin, SK, EK) - end, - {reply, - lists:foldl(FetchFun, [], SlotBins), - delete_pending, - State, - ?DELETE_TIMEOUT}; + {reply, SlotBins, delete_pending, State, ?DELETE_TIMEOUT}; delete_pending(close, _From, State) -> leveled_log:log("SST07", [State#state.filename]), ok = file:close(State#state.handle), From c203e2ee065260a1f20227bab9f84330320c8b9e Mon Sep 17 00:00:00 2001 From: martinsumner Date: Fri, 17 Mar 2017 10:47:20 +0000 Subject: [PATCH 2/5] Range queries - pass out as binaries Avoid converting to erlang temr wihtin the FSM and then requiring a copy outside of the FSM - pass out as a binary --- src/leveled_sst.erl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 498e043..c662583 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -196,9 +196,15 @@ sst_get(Pid, LedgerKey, Hash) -> gen_fsm:sync_send_event(Pid, {get_kv, LedgerKey, Hash}, infinity). sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) -> - gen_fsm:sync_send_event(Pid, - {get_kvrange, StartKey, EndKey, ScanWidth}, - infinity). + Reply = gen_fsm:sync_send_event(Pid, + {get_kvrange, StartKey, EndKey, ScanWidth}, + infinity), + FetchFun = + fun({SlotBin, SK, EK}, Acc) -> + Acc ++ binaryslot_trimmedlist(SlotBin, SK, EK) + end, + {SlotsToFetchBinList, SlotsToPoint} = Reply, + lists:foldl(FetchFun, [], SlotsToFetchBinList) ++ SlotsToPoint. sst_getslots(Pid, SlotList) -> SlotBins = gen_fsm:sync_send_event(Pid, {get_slots, SlotList}, infinity), @@ -493,12 +499,7 @@ fetch_range(StartKey, EndKey, ScanWidth, State) -> end, SlotsToFetchBinList = read_slots(Handle, SlotsToFetch), - - FetchFun = - fun({SlotBin, SK, EK}, Acc) -> - Acc ++ binaryslot_trimmedlist(SlotBin, SK, EK) - end, - lists:foldl(FetchFun, [], SlotsToFetchBinList) ++ SlotsToPoint. + {SlotsToFetchBinList, SlotsToPoint}. write_file(RootPath, Filename, SummaryBin, SlotsBin) -> From f20aba9c8bf29fa8a1d8ffb32d5e39ffc3bf8b52 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sun, 19 Mar 2017 21:47:22 +0000 Subject: [PATCH 3/5] Curtail trimmed slot crazyness There was complicated and confusing code that achieved nothing for effiency when trimming slots. the expensive part (binary_to_term) was still needed on every block, and it was hard to get code coverage and make sense of what it was really trying to achieve. This is now much simpler - and may set us up for potential further indexing help. --- src/leveled_sst.erl | 89 +++++++++++---------------------------------- 1 file changed, 22 insertions(+), 67 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index c662583..b1cbf62 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -909,41 +909,34 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey) -> LTrimFun = fun({K, _V}) -> K < StartKey end, RTrimFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndKey, K) end, BlockFetchFun = - fun(Length, {Acc, Bin}) -> - case Length of - 0 -> - {Acc, Bin}; - _ -> + fun(Length, {Acc, Bin, Continue}) -> + case {Length, Continue} of + {0, _} -> + {Acc, Bin, false}; + {_, true} -> <> = Bin, BlockList = binary_to_term(Block), - {FirstKey, _FV} = lists:nth(1, BlockList), {LastKey, _LV} = lists:last(BlockList), - TrimBools = trim_booleans(FirstKey, LastKey, - StartKey, EndKey), - case TrimBools of - {true, _, _, _} -> - {Acc, Rest}; - {false, true, _, _} -> - {Acc ++ BlockList, Rest}; - {false, false, true, false} -> + case StartKey > LastKey of + true -> + {Acc, Rest, true}; + false -> {_LDrop, RKeep} = lists:splitwith(LTrimFun, BlockList), - {Acc ++ RKeep, Rest}; - {false, false, false, true} -> - {LKeep, _RDrop} = lists:splitwith(RTrimFun, - BlockList), - {Acc ++ LKeep, Rest}; - {false, false, true, true} -> - {_LDrop, RKeep} = lists:splitwith(LTrimFun, - BlockList), - {LKeep, _RDrop} = lists:splitwith(RTrimFun, RKeep), - {Acc ++ LKeep, Rest} - end - + case leveled_codec:endkey_passed(EndKey, LastKey) of + true -> + {LKeep, _RDrop} = lists:splitwith(RTrimFun, RKeep), + {Acc ++ LKeep, Rest, false}; + false -> + {Acc ++ RKeep, Rest, true} + end + end; + {_ , false} -> + {Acc, Bin, false} end end, - {Out, _Rem} = + {Out, _Rem, _Continue} = case crc_check_slot(FullBin) of {BlockLengths, RestBin} -> < B3L:32/integer, B4L:32/integer>> = BlockLengths, <<_PosBinIndex:B1P/binary, Blocks/binary>> = RestBin, - lists:foldl(BlockFetchFun, {[], Blocks}, [B1L, B2L, B3L, B4L]); + lists:foldl(BlockFetchFun, {[], Blocks, true}, [B1L, B2L, B3L, B4L]); crc_wonky -> - {[], <<>>} + {[], <<>>, true} end, Out. -trim_booleans(FirstKey, _LastKey, StartKey, all) -> - FirstKeyPassedStart = FirstKey > StartKey, - case FirstKeyPassedStart of - true -> - {false, true, false, false}; - false -> - {false, false, true, false} - end; -trim_booleans(_FirstKey, LastKey, all, EndKey) -> - LastKeyPassedEnd = leveled_codec:endkey_passed(EndKey, LastKey), - case LastKeyPassedEnd of - true -> - {false, false, false, true}; - false -> - {false, true, false, false} - end; -trim_booleans(FirstKey, LastKey, StartKey, EndKey) -> - FirstKeyPassedStart = FirstKey > StartKey, - PreRange = LastKey < StartKey, - PostRange = leveled_codec:endkey_passed(EndKey, FirstKey), - OutOfRange = PreRange or PostRange, - LastKeyPassedEnd = leveled_codec:endkey_passed(EndKey, LastKey), - case OutOfRange of - true -> - {true, false, false, false}; - false -> - case {FirstKeyPassedStart, LastKeyPassedEnd} of - {true, false} -> - {false, true, false, false}; - {false, false} -> - {false, false, true, false}; - {true, true} -> - {false, false, false, true}; - {false, true} -> - {false, false, true, true} - end - end. - crc_check_slot(FullBin) -> <> = FullBin, From 431c2cee40ed8467cbf237d3a8d98ceccb2e80f5 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sun, 19 Mar 2017 23:37:50 +0000 Subject: [PATCH 4/5] Remove unnecessary line Brnach cannot be reached as firts key is always discovered when it is a no_loolup --- src/leveled_sst.erl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index b1cbf62..4a80ecf 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1111,15 +1111,6 @@ form_slot(KVList1, KVList2, _LI, no_lookup, ?NOLOOK_SLOTSIZE, Slot, FK) -> {KVList1, KVList2, {no_lookup, lists:reverse(Slot)}, FK}; form_slot(KVList1, KVList2, {IsBasement, TS}, lookup, Size, Slot, FK) -> case {key_dominates(KVList1, KVList2, {IsBasement, TS}), FK} of - {{{next_key, TopKV}, Rem1, Rem2}, null} -> - {TopK, _TopV} = TopKV, - form_slot(Rem1, - Rem2, - {IsBasement, TS}, - lookup, - Size + 1, - [TopKV|Slot], - TopK); {{{next_key, TopKV}, Rem1, Rem2}, _} -> form_slot(Rem1, Rem2, From 5c662aeca17d96de5a977807ca7dc31f1ba9f643 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sun, 19 Mar 2017 23:42:24 +0000 Subject: [PATCH 5/5] Additional unit test Need to test scenario where the key list the SST file created from is an exact multiple of the slot size --- src/leveled_sst.erl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 4a80ecf..cb5d71c 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1656,7 +1656,24 @@ additional_range_test() -> % R8 = sst_getkvrange(P1, element(1, PastEKV), element(1, PastEKV), 2), % ?assertMatch([], R8). - + +simple_persisted_slotsize_test() -> + {RP, Filename} = {"../test/", "simple_slotsize_test"}, + KVList0 = generate_randomkeys(1, ?SLOT_SIZE * 2, 1, 20), + KVList1 = lists:sublist(lists:ukeysort(1, KVList0), ?SLOT_SIZE), + [{FirstKey, _FV}|_Rest] = KVList1, + {LastKey, _LV} = lists:last(KVList1), + {ok, Pid, {FirstKey, LastKey}} = sst_new(RP, + Filename, + 1, + KVList1, + length(KVList1)), + lists:foreach(fun({K, V}) -> + ?assertMatch({K, V}, sst_get(Pid, K)) + end, + KVList1), + ok = sst_close(Pid), + ok = file:delete(filename:join(RP, Filename ++ ".sst")). simple_persisted_test() -> {RP, Filename} = {"../test/", "simple_test"},