From 04cfb453c447514850adcb3879119bdee26fadb6 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Tue, 7 Mar 2017 20:19:11 +0000 Subject: [PATCH 01/23] Fetch specific block only Rely on CRC check in zlib. Still need to catch on failure --- src/leveled_sst.erl | 273 +++++++++++++++++++++++++------------------- 1 file changed, 158 insertions(+), 115 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 1d2b1c0..78fbce5 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -404,20 +404,22 @@ fetch(LedgerKey, Hash, State) -> State#state.blockindex_cache), case CachedBlockIdx of none -> + io:format("Looking for key without cache~n"), SlotBin = read_slot(State#state.handle, Slot), - {Result, BlockIdx} = binaryslot_get(SlotBin, - LedgerKey, - Hash, - none), + {Result, + BlockLengths, + BlockIdx} = binaryslot_get(SlotBin, LedgerKey, Hash), BlockIndexCache = array:set(SlotID - 1, - BlockIdx, + <>, State#state.blockindex_cache), {Result, slot_fetch, Slot#slot_index_value.slot_id, State#state{blockindex_cache = BlockIndexCache}}; - _ -> - PosList = find_pos(CachedBlockIdx, + <> -> + io:format("Looking for key with cache~n"), + PosList = find_pos(BlockIdx, double_hash(Hash, LedgerKey), [], 0), @@ -425,12 +427,12 @@ fetch(LedgerKey, Hash, State) -> [] -> {not_present, slot_bloom, SlotID, State}; _ -> - SlotBin = read_slot(State#state.handle, Slot), - Result = binaryslot_get(SlotBin, - LedgerKey, - Hash, - {true, PosList}), - {element(1, Result), slot_fetch, SlotID, State} + Result = check_blocks(PosList, + State#state.handle, + Slot, + BlockLengths, + LedgerKey), + {Result, slot_fetch, SlotID, State} end end end. @@ -607,45 +609,6 @@ build_all_slots(KVL, SC, Pos, SlotID, SlotIdx, BlockIdxA, SlotsBin) -> array:set(SlotID - 1, BlockIndex, BlockIdxA), <>). -read_slot(Handle, Slot) -> - {ok, SlotBin} = file:pread(Handle, - Slot#slot_index_value.start_position, - Slot#slot_index_value.length), - SlotBin. - -read_slots(Handle, SlotList) -> - PointerMapFun = - fun(Pointer) -> - {Slot, SK, EK} = - case Pointer of - {pointer, _Pid, Slot0, SK0, EK0} -> - {Slot0, SK0, EK0}; - {pointer, Slot0, SK0, EK0} -> - {Slot0, SK0, EK0} - end, - - {Slot#slot_index_value.start_position, - Slot#slot_index_value.length, - SK, - EK} - end, - - LengthList = lists:map(PointerMapFun, SlotList), - StartPos = element(1, lists:nth(1, LengthList)), - EndPos = element(1, lists:last(LengthList)) - + element(2, lists:last(LengthList)), - {ok, MultiSlotBin} = file:pread(Handle, StartPos, EndPos - StartPos), - - BinSplitMapFun = - fun({SP, L, SK, EK}) -> - Start = SP - StartPos, - <<_Pre:Start/binary, - SlotBin:L/binary, - _Post/binary>> = MultiSlotBin, - {SlotBin, SK, EK} - end, - - lists:map(BinSplitMapFun, LengthList). generate_filenames(RootFilename) -> Ext = filename:extension(RootFilename), @@ -808,7 +771,7 @@ generate_binary_slot(KVL) -> B2L = byte_size(B2), B3L = byte_size(B3), B4L = byte_size(B4), - Lengths = < CRC32 = erlang:crc32(SlotBin), FullBin = <>, - {PosBinIndex1, FullBin, HashL}. + {<>, FullBin, HashL}. -binaryslot_get(FullBin, Key, Hash, CachedPosLookup) -> +check_blocks([], _Handle, _Slot, _BlockLengths, _LedgerKey) -> + not_present; +check_blocks([Pos|Rest], Handle, Slot, BlockLengths, LedgerKey) -> + {BlockNumber, BlockPos} = revert_position(Pos), + io:format("Checking BlockNumber ~w in BlockPos ~w~n", + [BlockNumber, BlockPos]), + BlockBin = read_block(Handle, Slot, BlockLengths, BlockNumber), + BlockL = binary_to_term(BlockBin), + {K, V} = lists:nth(BlockPos, BlockL), + case K of + LedgerKey -> + io:format("Key mismatch in check_blocks~n"), + {K, V}; + _ -> + check_blocks(Rest, Handle, Slot, BlockLengths, LedgerKey) + end. + + +read_block(Handle, Slot, BlockLengths, BlockID) -> + {BlockPos, Offset, Length} = block_offsetandlength(BlockLengths, BlockID), + io:format("Reading offset ~w Length ~w~n", [Offset, Length]), + {ok, BlockBin} = file:pread(Handle, + Slot#slot_index_value.start_position + + BlockPos + + Offset + + 24, + % 4-byte CRC, 4 byte pos, 4x4 byte lengths + Length), + BlockBin. + +read_slot(Handle, Slot) -> + {ok, SlotBin} = file:pread(Handle, + Slot#slot_index_value.start_position, + Slot#slot_index_value.length), + SlotBin. + +read_slots(Handle, SlotList) -> + PointerMapFun = + fun(Pointer) -> + {Slot, SK, EK} = + case Pointer of + {pointer, _Pid, Slot0, SK0, EK0} -> + {Slot0, SK0, EK0}; + {pointer, Slot0, SK0, EK0} -> + {Slot0, SK0, EK0} + end, + + {Slot#slot_index_value.start_position, + Slot#slot_index_value.length, + SK, + EK} + end, + + LengthList = lists:map(PointerMapFun, SlotList), + StartPos = element(1, lists:nth(1, LengthList)), + EndPos = element(1, lists:last(LengthList)) + + element(2, lists:last(LengthList)), + {ok, MultiSlotBin} = file:pread(Handle, StartPos, EndPos - StartPos), + + BinSplitMapFun = + fun({SP, L, SK, EK}) -> + Start = SP - StartPos, + <<_Pre:Start/binary, + SlotBin:L/binary, + _Post/binary>> = MultiSlotBin, + {SlotBin, SK, EK} + end, + + lists:map(BinSplitMapFun, LengthList). + + +binaryslot_get(FullBin, Key, Hash) -> case crc_check_slot(FullBin) of - {Lengths, Rest} -> - B1P = element(1, Lengths), - case CachedPosLookup of - {true, PosList} -> - <<_PosBinIndex:B1P/binary, Blocks/binary>> = Rest, - {fetch_value(PosList, Lengths, Blocks, Key), none}; - none -> - <> = Rest, - PosList = find_pos(PosBinIndex, - double_hash(Hash, Key), - [], - 0), - {fetch_value(PosList, Lengths, Blocks, Key), PosBinIndex} - end; + {BlockLengths, Rest} -> + <> = BlockLengths, + <> = Rest, + PosList = find_pos(PosBinIndex, + double_hash(Hash, Key), + [], + 0), + {fetch_value(PosList, BlockLengths, Blocks, Key), + BlockLengths, + PosBinIndex}; crc_wonky -> - {not_present, none} + {not_present, + none, + none} end. binaryslot_tolist(FullBin) -> @@ -856,8 +888,12 @@ binaryslot_tolist(FullBin) -> {Out, _Rem} = case crc_check_slot(FullBin) of - {Lengths, RestBin} -> - {B1P, B1L, B2L, B3L, B4L} = Lengths, + {BlockLengths, RestBin} -> + <> = BlockLengths, <<_PosBinIndex:B1P/binary, Blocks/binary>> = RestBin, lists:foldl(BlockFetchFun, {[], Blocks}, [B1L, B2L, B3L, B4L]); crc_wonky -> @@ -908,8 +944,12 @@ binaryslot_trimmedlist(FullBin, StartKey, EndKey) -> {Out, _Rem} = case crc_check_slot(FullBin) of - {Lengths, RestBin} -> - {B1P, B1L, B2L, B3L, B4L} = Lengths, + {BlockLengths, RestBin} -> + <> = BlockLengths, <<_PosBinIndex:B1P/binary, Blocks/binary>> = RestBin, lists:foldl(BlockFetchFun, {[], Blocks}, [B1L, B2L, B3L, B4L]); crc_wonky -> @@ -957,65 +997,68 @@ trim_booleans(FirstKey, LastKey, StartKey, EndKey) -> end. - - crc_check_slot(FullBin) -> <> = FullBin, case erlang:crc32(SlotBin) of CRC32 -> - <> = SlotBin, - Lengths = {B1P, B1L, B2L, B3L, B4L}, - {Lengths, Rest}; + <> = SlotBin, + {BlockLengths, Rest}; _ -> leveled_log:log("SST09", []), crc_wonky end. +block_offsetandlength(BlockLengths, BlockID) -> + <> = BlockLengths, + case BlockID of + 1 -> + <> = BlockLengths0, + {BlocksPos, 0, B1L}; + 2 -> + <> = BlockLengths0, + {BlocksPos, B1L, B2L}; + 3 -> + <> = BlockLengths0, + {BlocksPos, B1L + B2L, B3L}; + 4 -> + <> = BlockLengths0, + {BlocksPos, B1L + B2L + B3L, B4L} + end. + double_hash(Hash, Key) -> H2 = erlang:phash2(Key), (Hash bxor H2) band 32767. -fetch_value([], _Lengths, _Blocks, _Key) -> +fetch_value([], _BlockLengths, _Blocks, _Key) -> not_present; -fetch_value([Pos|Rest], Lengths, Blocks, Key) -> - BlockNumber = (Pos div 32) + 1, - BlockPos = (Pos rem 32) + 1, - BlockL = - case BlockNumber of - 1 -> - B1L = element(2, Lengths), - <> = Blocks, - binary_to_term(Block); - 2 -> - B1L = element(2, Lengths), - B2L = element(3, Lengths), - <<_Pass:B1L/binary, Block:B2L/binary, _Rest/binary>> = Blocks, - binary_to_term(Block); - 3 -> - PreL = element(2, Lengths) + element(3, Lengths), - B3L = element(4, Lengths), - <<_Pass:PreL/binary, Block:B3L/binary, _Rest/binary>> = Blocks, - binary_to_term(Block); - 4 -> - {_B1P, B1L, B2L, B3L, B4L} = Lengths, - PreL = B1L + B2L + B3L, - <<_Pass:PreL/binary, Block:B4L/binary>> = Blocks, - binary_to_term(Block) - end, - +fetch_value([Pos|Rest], BlockLengths, Blocks, Key) -> + {BlockNumber, BlockPos} = revert_position(Pos), + {_BlockPos, + Offset, + Length} = block_offsetandlength(BlockLengths, BlockNumber), + <<_Pre:Offset/binary, Block:Length/binary, _Rest/binary>> = Blocks, + BlockL = binary_to_term(Block), {K, V} = lists:nth(BlockPos, BlockL), case K of Key -> + io:format("Key mismatch in fetch_value~n"), {K, V}; _ -> - fetch_value(Rest, Lengths, Blocks, Key) + fetch_value(Rest, BlockLengths, Blocks, Key) end. + +revert_position(Pos) -> + BlockNumber = (Pos div 32) + 1, + BlockPos = (Pos rem 32) + 1, + {BlockNumber, BlockPos}. + find_pos(<<>>, _Hash, PosList, _Count) -> PosList; find_pos(<<1:1/integer, Hash:15/integer, T/binary>>, Hash, PosList, Count) -> @@ -1272,7 +1315,7 @@ indexed_list_mixedkeys2_test() -> indexed_list_allindexkeys_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128), {PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys), - ?assertMatch(<<127:8/integer>>, PosBinIndex1), + ?assertMatch(<<_BL:20/binary, 127:8/integer>>, PosBinIndex1), % SW = os:timestamp(), BinToList = binaryslot_tolist(FullBin), % io:format(user, @@ -1285,7 +1328,7 @@ indexed_list_allindexkeys_test() -> indexed_list_allindexkeys_trimmed_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128), {PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys), - ?assertMatch(<<127:8/integer>>, PosBinIndex1), + ?assertMatch(<<_BL:20/binary, 127:8/integer>>, PosBinIndex1), ?assertMatch(Keys, binaryslot_trimmedlist(FullBin, {i, "Bucket", @@ -1351,7 +1394,7 @@ indexed_list_mixedkeys_bitflip_test() -> test_binary_slot(FullBin, Key, Hash, ExpectedValue) -> % SW = os:timestamp(), - {ReturnedValue, _} = binaryslot_get(FullBin, Key, Hash, none), + {ReturnedValue, _BLs, _Idx} = binaryslot_get(FullBin, Key, Hash), ?assertMatch(ExpectedValue, ReturnedValue). % io:format(user, "Fetch success in ~w microseconds ~n", % [timer:now_diff(os:timestamp(), SW)]). From 730ab2ec485f4c75ca2202061213f02581251b5a Mon Sep 17 00:00:00 2001 From: martinsumner Date: Fri, 10 Mar 2017 11:10:15 +0000 Subject: [PATCH 02/23] tidy out io:format --- src/leveled_sst.erl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 8f12fcc..76dc9b5 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -414,7 +414,6 @@ fetch(LedgerKey, Hash, State) -> State#state.blockindex_cache), case CachedBlockIdx of none -> - io:format("Looking for key without cache~n"), SlotBin = read_slot(State#state.handle, Slot), {Result, BlockLengths, @@ -428,7 +427,6 @@ fetch(LedgerKey, Hash, State) -> Slot#slot_index_value.slot_id, State#state{blockindex_cache = BlockIndexCache}}; <> -> - io:format("Looking for key with cache~n"), PosList = find_pos(BlockIdx, double_hash(Hash, LedgerKey), [], @@ -800,14 +798,11 @@ check_blocks([], _Handle, _Slot, _BlockLengths, _LedgerKey) -> not_present; check_blocks([Pos|Rest], Handle, Slot, BlockLengths, LedgerKey) -> {BlockNumber, BlockPos} = revert_position(Pos), - io:format("Checking BlockNumber ~w in BlockPos ~w~n", - [BlockNumber, BlockPos]), BlockBin = read_block(Handle, Slot, BlockLengths, BlockNumber), BlockL = binary_to_term(BlockBin), {K, V} = lists:nth(BlockPos, BlockL), case K of LedgerKey -> - io:format("Key mismatch in check_blocks~n"), {K, V}; _ -> check_blocks(Rest, Handle, Slot, BlockLengths, LedgerKey) @@ -816,7 +811,6 @@ check_blocks([Pos|Rest], Handle, Slot, BlockLengths, LedgerKey) -> read_block(Handle, Slot, BlockLengths, BlockID) -> {BlockPos, Offset, Length} = block_offsetandlength(BlockLengths, BlockID), - io:format("Reading offset ~w Length ~w~n", [Offset, Length]), {ok, BlockBin} = file:pread(Handle, Slot#slot_index_value.start_position + BlockPos @@ -1058,7 +1052,6 @@ fetch_value([Pos|Rest], BlockLengths, Blocks, Key) -> {K, V} = lists:nth(BlockPos, BlockL), case K of Key -> - io:format("Key mismatch in fetch_value~n"), {K, V}; _ -> fetch_value(Rest, BlockLengths, Blocks, Key) From b2f3d882a92b66a5ab59ca146cd14d2d85fb272a Mon Sep 17 00:00:00 2001 From: martinsumner Date: Fri, 10 Mar 2017 20:43:37 +0000 Subject: [PATCH 03/23] Draft of branch to condense range_only keys --- src/leveled_codec.erl | 11 +- src/leveled_log.erl | 2 +- src/leveled_sst.erl | 303 +++++++++++++++++++++++++++--------------- 3 files changed, 210 insertions(+), 106 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index ad25c58..656e210 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -61,7 +61,8 @@ generate_uuid/0, integer_now/0, riak_extract_metadata/2, - magic_hash/1]). + magic_hash/1, + to_lookup/1]). -define(V1_VERS, 1). -define(MAGIC, 53). % riak_kv -> riak_object @@ -73,6 +74,14 @@ %% what they are - %% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function +to_lookup(Key) -> + case element(1, Key) of + ?IDX_TAG -> + no_lookup; + _ -> + lookup + end. + magic_hash({?RIAK_TAG, Bucket, Key, _SubKey}) -> magic_hash({Bucket, Key}); magic_hash({?STD_TAG, Bucket, Key, _SubKey}) -> diff --git a/src/leveled_log.erl b/src/leveled_log.erl index c89fbea..47a64bd 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -259,7 +259,7 @@ {error, "False result returned from SST with filename ~s as " ++ "slot ~w has failed crc check"}}, {"SST03", - {info, "Opening SST file with filename ~s keys ~w slots ~w and" + {info, "Opening SST file with filename ~s slots ~w and" ++ " max sqn ~w"}}, {"SST04", {info, "Exit called for reason ~w on filename ~s"}}, diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 76dc9b5..c4ae1be 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -66,6 +66,8 @@ -define(MAX_SLOTS, 256). -define(SLOT_SIZE, 128). % This is not configurable +-define(NOLOOK_MULT, 2). % How much bigger is a slot/block with no lookups +-define(NOLOOK_SLOTSIZE, ?SLOT_SIZE * ?NOLOOK_MULT). -define(COMPRESSION_LEVEL, 1). -define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_LEVEL}]). % -define(LEVEL_BLOOM_BITS, [{0, 8}, {1, 10}, {2, 8}, {default, 6}]). @@ -143,22 +145,23 @@ sst_open(RootPath, Filename) -> sst_new(RootPath, Filename, Level, KVList, MaxSQN) -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), + SlotList = merge_lists(KVList), case gen_fsm:sync_send_event(Pid, {sst_new, RootPath, Filename, Level, - KVList, + SlotList, MaxSQN}, infinity) of {ok, {SK, EK}} -> {ok, Pid, {SK, EK}} end. -sst_new(RootPath, Filename, KL1, KL2, IsBasement, Level, MaxSQN) -> - {{Rem1, Rem2}, MergedList} = merge_lists(KL1, KL2, {IsBasement, Level}), - case MergedList of - [] -> +sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN) -> + {Rem1, Rem2, SlotList} = merge_lists(KVL1, KVL2, {IsBasement, Level}), + case SlotList of + {_, []} -> empty; _ -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), @@ -167,7 +170,7 @@ sst_new(RootPath, Filename, KL1, KL2, IsBasement, Level, MaxSQN) -> RootPath, Filename, Level, - MergedList, + SlotList, MaxSQN}, infinity) of {ok, {SK, EK}} -> @@ -241,13 +244,14 @@ starting({sst_open, RootPath, Filename}, _From, State) -> {ok, {Summary#summary.first_key, Summary#summary.last_key}}, reader, UpdState}; -starting({sst_new, RootPath, Filename, Level, KVList, MaxSQN}, _From, State) -> +starting({sst_new, RootPath, Filename, Level, SlotList, MaxSQN}, + _From, State) -> SW = os:timestamp(), {FirstKey, Length, SlotIndex, BlockIndex, - SlotsBin} = build_all_slots(KVList), + SlotsBin} = build_all_slots(SlotList), SummaryBin = build_table_summary(SlotIndex, Level, FirstKey, @@ -268,15 +272,16 @@ starting({sst_newlevelzero, RootPath, Filename, Slots, FetchFun, Penciller, MaxSQN}, State) -> SW = os:timestamp(), KVList = leveled_pmem:to_list(Slots, FetchFun), + SlotList = merge_lists(KVList), {FirstKey, - Length, + SlotCount, SlotIndex, BlockIndex, - SlotsBin} = build_all_slots(KVList), + SlotsBin} = build_all_slots(SlotList), SummaryBin = build_table_summary(SlotIndex, 0, FirstKey, - Length, + SlotCount, MaxSQN), ActualFilename = write_file(RootPath, Filename, SummaryBin, SlotsBin), UpdState = read_file(ActualFilename, State#state{root_path=RootPath}), @@ -534,14 +539,13 @@ read_file(Filename, State) -> {Handle, SummaryBin} = open_reader(filename:join(State#state.root_path, Filename)), {Summary, SlotList} = read_table_summary(SummaryBin), - SlotCount = length(SlotList), - BlockIndexCache = array:new([{size, SlotCount}, {default, none}]), + BlockIndexCache = array:new([{size, Summary#summary.size}, + {default, none}]), UpdState = State#state{blockindex_cache = BlockIndexCache}, SlotIndex = from_list(SlotList), UpdSummary = Summary#summary{index = SlotIndex}, leveled_log:log("SST03", [Filename, Summary#summary.size, - SlotCount, Summary#summary.max_sqn]), UpdState#state{summary = UpdSummary, handle = Handle, @@ -554,13 +558,13 @@ open_reader(Filename) -> {ok, SummaryBin} = file:pread(Handle, SlotsLength + 8, SummaryLength), {Handle, SummaryBin}. -build_table_summary(SlotList, _Level, FirstKey, L, MaxSQN) -> - [{LastKey, _LastV}|_Rest] = SlotList, +build_table_summary(SlotIndex, _Level, FirstKey, SlotCount, MaxSQN) -> + [{LastKey, _LastV}|_Rest] = SlotIndex, Summary = #summary{first_key = FirstKey, last_key = LastKey, - size = L, + size = SlotCount, max_sqn = MaxSQN}, - SummBin = term_to_binary({Summary, lists:reverse(SlotList)}, + SummBin = term_to_binary({Summary, lists:reverse(SlotIndex)}, ?BINARY_SETTINGS), SummCRC = erlang:crc32(SummBin), <>. @@ -574,15 +578,12 @@ read_table_summary(BinWithCheck) -> binary_to_term(SummBin) end. -build_all_slots(KVList) -> - L = length(KVList), - % The length is not a constant time command and the list may be large, - % but otherwise length must be called each iteration to avoid exception - % on split or sublist + +build_all_slots(SlotList) -> + [{_Lookup, KVList}|_R] = SlotList, [{FirstKey, _FirstV}|_Rest] = KVList, - SlotCount = L div ?SLOT_SIZE + 1, - BuildResponse = build_all_slots(KVList, - SlotCount, + SlotCount = length(SlotList), + BuildResponse = build_all_slots(SlotList, 8, 1, [], @@ -590,33 +591,26 @@ build_all_slots(KVList) -> {default, none}]), <<>>), {SlotIndex, BlockIndex, SlotsBin} = BuildResponse, - {FirstKey, L, SlotIndex, BlockIndex, SlotsBin}. + {FirstKey, SlotCount, SlotIndex, BlockIndex, SlotsBin}. -build_all_slots([], _SC, _Pos, _SlotID, SlotIdx, BlockIdxA, SlotsBin) -> - {SlotIdx, BlockIdxA, SlotsBin}; -build_all_slots(KVL, SC, Pos, SlotID, SlotIdx, BlockIdxA, SlotsBin) -> - {SlotList, KVRem} = - case SC of - 1 -> - {lists:sublist(KVL, ?SLOT_SIZE), []}; - _N -> - lists:split(?SLOT_SIZE, KVL) - end, - {LastKey, _V} = lists:last(SlotList), - {BlockIndex, SlotBin, HashList} = generate_binary_slot(SlotList), +build_all_slots([], _Pos, _SlotID, SlotIdxAcc, BlockIdxAcc, SlotBinAcc) -> + {SlotIdxAcc, BlockIdxAcc, SlotBinAcc}; +build_all_slots(SlotList, Pos, SlotID, SlotIdxAcc, BlockIdxAcc, SlotBinAcc) -> + [{Lookup, KVList}|Rest] = SlotList, + {LastKey, _LastV} = lists:last(KVList), + {BlockIdx, SlotBin, HashList} = generate_binary_slot(Lookup, KVList), Length = byte_size(SlotBin), Bloom = leveled_tinybloom:create_bloom(HashList), SlotIndexV = #slot_index_value{slot_id = SlotID, start_position = Pos, length = Length, bloom = Bloom}, - build_all_slots(KVRem, - SC - 1, + build_all_slots(Rest, Pos + Length, SlotID + 1, - [{LastKey, SlotIndexV}|SlotIdx], - array:set(SlotID - 1, BlockIndex, BlockIdxA), - <>). + [{LastKey, SlotIndexV}|SlotIdxAcc], + array:set(SlotID - 1, BlockIdx, BlockIdxAcc), + <>). generate_filenames(RootFilename) -> @@ -700,7 +694,7 @@ lookup_slots(StartKey, EndKey, Tree) -> %% based on a 17-bit hash (so 0.0039 fpr). -generate_binary_slot(KVL) -> +generate_binary_slot(Lookup, KVL) -> HashFoldFun = fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) -> @@ -733,49 +727,66 @@ generate_binary_slot(KVL) -> end end, - - {PosBinIndex0, NHC, HashL} = lists:foldr(HashFoldFun, {<<>>, 0, []}, KVL), - PosBinIndex1 = - case NHC of - 0 -> - PosBinIndex0; - _ -> - N = NHC - 1, - <<0:1/integer, N:7/integer, PosBinIndex0/binary>> + + {HashL, PosBinIndex} = + case Lookup of + lookup -> + {PosBinIndex0, + NHC, + HashL0} = lists:foldr(HashFoldFun, {<<>>, 0, []}, KVL), + PosBinIndex1 = + case NHC of + 0 -> + PosBinIndex0; + _ -> + N = NHC - 1, + <<0:1/integer, N:7/integer, PosBinIndex0/binary>> + end, + {HashL0, PosBinIndex1}; + no_lookup -> + {[], <<0:1/integer, 127:7/integer>>} end, + BlockSize = + case Lookup of + lookup -> + ?SLOT_SIZE div 4; + no_lookup -> + ?NOLOOK_SLOTSIZE div 4 + end, + {B1, B2, B3, B4} = case length(KVL) of - L when L =< 32 -> + L when L =< BlockSize -> {term_to_binary(KVL, ?BINARY_SETTINGS), <<0:0>>, <<0:0>>, <<0:0>>}; - L when L =< 64 -> - {KVLA_32, KVLB_32} = lists:split(32, KVL), - {term_to_binary(KVLA_32, ?BINARY_SETTINGS), - term_to_binary(KVLB_32, ?BINARY_SETTINGS), + L when L =< 2 * BlockSize -> + {KVLA, KVLB} = lists:split(BlockSize, KVL), + {term_to_binary(KVLA, ?BINARY_SETTINGS), + term_to_binary(KVLB, ?BINARY_SETTINGS), <<0:0>>, <<0:0>>}; - L when L =< 96 -> - {KVLA_32, KVLB_64} = lists:split(32, KVL), - {KVLB_32, KVLC_32} = lists:split(32, KVLB_64), - {term_to_binary(KVLA_32, ?BINARY_SETTINGS), - term_to_binary(KVLB_32, ?BINARY_SETTINGS), - term_to_binary(KVLC_32, ?BINARY_SETTINGS), + L when L =< 3 * BlockSize -> + {KVLA, KVLB_Rest} = lists:split(BlockSize, KVL), + {KVLB, KVLC} = lists:split(BlockSize, KVLB_Rest), + {term_to_binary(KVLA, ?BINARY_SETTINGS), + term_to_binary(KVLB, ?BINARY_SETTINGS), + term_to_binary(KVLC, ?BINARY_SETTINGS), <<0:0>>}; - L when L =< 128 -> - {KVLA_32, KVLB_96} = lists:split(32, KVL), - {KVLB_32, KVLC_64} = lists:split(32, KVLB_96), - {KVLC_32, KVLD_32} = lists:split(32, KVLC_64), - {term_to_binary(KVLA_32, ?BINARY_SETTINGS), - term_to_binary(KVLB_32, ?BINARY_SETTINGS), - term_to_binary(KVLC_32, ?BINARY_SETTINGS), - term_to_binary(KVLD_32, ?BINARY_SETTINGS)} + L when L =< 4 * BlockSize -> + {KVLA, KVLB_Rest} = lists:split(BlockSize, KVL), + {KVLB, KVLC_Rest} = lists:split(BlockSize, KVLB_Rest), + {KVLC, KVLD} = lists:split(BlockSize, KVLC_Rest), + {term_to_binary(KVLA, ?BINARY_SETTINGS), + term_to_binary(KVLB, ?BINARY_SETTINGS), + term_to_binary(KVLC, ?BINARY_SETTINGS), + term_to_binary(KVLD, ?BINARY_SETTINGS)} end, - B1P = byte_size(PosBinIndex1), + B1P = byte_size(PosBinIndex), B1L = byte_size(B1), B2L = byte_size(B2), B3L = byte_size(B3), @@ -786,12 +797,12 @@ generate_binary_slot(KVL) -> B3L:32/integer, B4L:32/integer>>, SlotBin = <>, CRC32 = erlang:crc32(SlotBin), FullBin = <>, - {<>, FullBin, HashL}. + {<>, FullBin, HashL}. check_blocks([], _Handle, _Slot, _BlockLengths, _LedgerKey) -> @@ -1078,8 +1089,25 @@ find_pos(<<0:1/integer, NHC:7/integer, T/binary>>, Hash, PosList, Count) -> %%% Merge Functions %%%============================================================================ -%% functions for merging two KV lists with pointers - +%% The source lists are merged into lists of slots before the file is created +%% At Level zero, there will be a single source list - and this will always be +%% split into standard size slots +%% +%% At lower levels there will be two source lists and they will need to be +%% merged to ensure that the best conflicting answer survives and compactable +%% KV pairs are discarded. +%% +%% At lower levels slots can be larger if there are no lookup keys present in +%% the slot. This is to slow the growth of the manifest/number-of-files when +%% large numbers of index keys are present - as well as improving compression +%% ratios in the Ledger. +%% +%% The outcome of merge_lists/1 and merge_lists/3 should be an list of slots. +%% Each slot should be ordered by Key and be of the form {Flag, KVList}, where +%% Flag can either be lookup or no-lookup. The list of slots should also be +%% ordered by Key (i.e. the first key in the slot) +%% +%% For merging ... %% Compare the keys at the head of the list, and either skip that "best" key or %% identify as the next key. %% @@ -1090,23 +1118,82 @@ find_pos(<<0:1/integer, NHC:7/integer, T/binary>>, Hash, PosList, Count) -> %% there are matching keys then the highest sequence number must be chosen and %% any lower sequence numbers should be compacted out of existence -merge_lists(KeyList1, KeyList2, LevelInfo) -> - merge_lists(KeyList1, KeyList2, LevelInfo, [], ?MAX_SLOTS * ?SLOT_SIZE). +merge_lists(KVList1) -> + SlotCount = length(KVList1) div ?SLOT_SIZE, + split_lists(KVList1, [], SlotCount). -merge_lists([], [], _LevelR, MergedList, _MaxSize) -> - {{[], []}, lists:reverse(MergedList)}; -merge_lists(Rem1, Rem2, _LevelR, MergedList, 0) -> - {{Rem1, Rem2}, lists:reverse(MergedList)}; -merge_lists(KeyList1, KeyList2, {IsBasement, TS}, MergedList, MaxSize) -> - case key_dominates(KeyList1, KeyList2, {IsBasement, TS}) of - {{next_key, TopKey}, Rem1, Rem2} -> - merge_lists(Rem1, +split_lists([], SlotLists, 0) -> + lists:reverse(SlotLists); +split_lists(LastPuff, SlotLists, 0) -> + lists:reverse([{lookup, LastPuff}|SlotLists]); +split_lists(KVList1, SlotLists, N) -> + {Slot, KVListRem} = lists:split(?SLOT_SIZE, KVList1), + split_lists(KVListRem, [{lookup, Slot}|SlotLists], N - 1). + +merge_lists(KVList1, KVList2, LevelInfo) -> + merge_lists(KVList1, KVList2, LevelInfo, [], 0). + +merge_lists(KVList1, KVList2, _LI, SlotList, ?MAX_SLOTS) -> + {KVList1, KVList2, lists:reverse(SlotList)}; +merge_lists([], [], _LI, SlotList, _SlotCount) -> + {[], [], lists:reverse(SlotList)}; +merge_lists(KVList1, KVList2, LI, SlotList, SlotCount) -> + {KVRem1, + KVRem2, + Slot} = form_slot(KVList1, KVList2, LI, no_lookup, 0, []), + case Slot of + [] -> + merge_lists(KVRem1, KVRem2, LI, SlotList, SlotCount); + _ -> + merge_lists(KVRem1, KVRem2, LI, [Slot|SlotList], SlotCount + 1) + end. + +form_slot([], [], _LI, Type, _Size, Slot) -> + {[], [], {Type, lists:reverse(Slot)}}; +form_slot(KVList1, KVList2, _LI, lookup, ?SLOT_SIZE, Slot) -> + {KVList1, KVList2, {lookup, lists:reverse(Slot)}}; +form_slot(KVList1, KVList2, _LI, no_lookup, ?NOLOOK_SLOTSIZE, Slot) -> + {KVList1, KVList2, {no_lookup, lists:reverse(Slot)}}; +form_slot(KVList1, KVList2, {IsBasement, TS}, lookup, Size, Slot) -> + case key_dominates(KVList1, KVList2, {IsBasement, TS}) of + {{next_key, TopKV}, Rem1, Rem2} -> + form_slot(Rem1, Rem2, {IsBasement, TS}, - [TopKey|MergedList], - MaxSize - 1); + lookup, + Size + 1, + [TopKV|Slot]); {skipped_key, Rem1, Rem2} -> - merge_lists(Rem1, Rem2, {IsBasement, TS}, MergedList, MaxSize) + form_slot(Rem1, Rem2, {IsBasement, TS}, lookup, Size, Slot) + end; +form_slot(KVList1, KVList2, {IsBasement, TS}, no_lookup, Size, Slot) -> + case key_dominates(KVList1, KVList2, {IsBasement, TS}) of + {{next_key, {TopK, TopV}}, Rem1, Rem2} -> + case leveled_codec:to_lookup(TopK) of + no_lookup -> + form_slot(Rem1, + Rem2, + {IsBasement, TS}, + no_lookup, + Size + 1, + [{TopK, TopV}|Slot]); + lookup -> + case Size >= ?SLOT_SIZE of + true -> + {KVList1, + KVList2, + {no_lookup, lists:reverse(Slot)}}; + false -> + form_slot(Rem1, + Rem2, + {IsBasement, TS}, + lookup, + Size + 1, + [{TopK, TopV}|Slot]) + end + end; + {skipped_key, Rem1, Rem2} -> + form_slot(Rem1, Rem2, {IsBasement, TS}, lookup, Size, Slot) end. key_dominates(KL1, KL2, Level) -> @@ -1255,7 +1342,7 @@ indexed_list_test() -> SW0 = os:timestamp(), - {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(KVL1), + {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, KVL1), io:format(user, "Indexed list created slot in ~w microseconds of size ~w~n", [timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]), @@ -1283,7 +1370,7 @@ indexed_list_mixedkeys_test() -> KVL1 = lists:sublist(KVL0, 33), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), - {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys), + {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), {TestK1, TestV1} = lists:nth(4, KVL1), MH1 = leveled_codec:magic_hash(TestK1), @@ -1309,7 +1396,7 @@ indexed_list_mixedkeys2_test() -> IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)), % this isn't actually ordered correctly Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2, - {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys), + {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), lists:foreach(fun({K, V}) -> MH = leveled_codec:magic_hash(K), test_binary_slot(FullBin, K, MH, {K, V}) @@ -1318,7 +1405,7 @@ indexed_list_mixedkeys2_test() -> indexed_list_allindexkeys_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128), - {PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys), + {PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), ?assertMatch(<<_BL:20/binary, 127:8/integer>>, PosBinIndex1), % SW = os:timestamp(), BinToList = binaryslot_tolist(FullBin), @@ -1328,10 +1415,22 @@ indexed_list_allindexkeys_test() -> ?assertMatch(Keys, BinToList), ?assertMatch(Keys, binaryslot_trimmedlist(FullBin, all, all)). +indexed_list_allindexkeys_nolookup_test() -> + Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)), + 128 * ?NOLOOK_MULT), + {PosBinIndex1, FullBin, _HL} = generate_binary_slot(no_lookup, Keys), + ?assertMatch(<<_BL:20/binary, 127:8/integer>>, PosBinIndex1), + % SW = os:timestamp(), + BinToList = binaryslot_tolist(FullBin), + % io:format(user, + % "Indexed list flattened in ~w microseconds ~n", + % [timer:now_diff(os:timestamp(), SW)]), + ?assertMatch(Keys, BinToList), + ?assertMatch(Keys, binaryslot_trimmedlist(FullBin, all, all)). indexed_list_allindexkeys_trimmed_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128), - {PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys), + {PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), ?assertMatch(<<_BL:20/binary, 127:8/integer>>, PosBinIndex1), ?assertMatch(Keys, binaryslot_trimmedlist(FullBin, {i, @@ -1369,7 +1468,7 @@ indexed_list_mixedkeys_bitflip_test() -> KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)), KVL1 = lists:sublist(KVL0, 33), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), - {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(Keys), + {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), L = byte_size(FullBin), Byte1 = random:uniform(L), <> = FullBin, @@ -1423,16 +1522,12 @@ merge_test() -> ?assertMatch(ExpLK2, LK2), ML1 = [{next, #manifest_entry{owner = P1}, FK1}], ML2 = [{next, #manifest_entry{owner = P2}, FK2}], - {ok, P3, {{Rem1, Rem2}, FK3, LK3}} = sst_new("../test/", - "level2_merge", - ML1, - ML2, - false, - 2, - N * 2), + NewR = sst_new("../test/", "level2_merge", ML1, ML2, false, 2, N * 2), + {ok, P3, {{Rem1, Rem2}, FK3, LK3}} = NewR, ?assertMatch([], Rem1), ?assertMatch([], Rem2), ?assertMatch(true, FK3 == min(FK1, FK2)), + io:format("LK1 ~w LK2 ~w LK3 ~w~n", [LK1, LK2, LK3]), ?assertMatch(true, LK3 == max(LK1, LK2)), io:format(user, "Created and merged two files of size ~w in ~w microseconds~n", From 181331712137e89103d6d15fe484a32763db21cf Mon Sep 17 00:00:00 2001 From: martinsumner Date: Fri, 10 Mar 2017 22:49:00 +0000 Subject: [PATCH 04/23] Correctly identify empty slotlist --- src/leveled_sst.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index c4ae1be..c8135f6 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -161,7 +161,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN) -> sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN) -> {Rem1, Rem2, SlotList} = merge_lists(KVL1, KVL2, {IsBasement, Level}), case SlotList of - {_, []} -> + [{_, []}] -> empty; _ -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), From 4e4f498f208a64aac2041e97aa76a9b107f5f872 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Fri, 10 Mar 2017 23:48:17 +0000 Subject: [PATCH 05/23] Correctly set no_lookup on skip_key Otherwise could change to lookup after the size limit has been reached --- src/leveled_sst.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index c8135f6..d398937 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1193,7 +1193,7 @@ form_slot(KVList1, KVList2, {IsBasement, TS}, no_lookup, Size, Slot) -> end end; {skipped_key, Rem1, Rem2} -> - form_slot(Rem1, Rem2, {IsBasement, TS}, lookup, Size, Slot) + form_slot(Rem1, Rem2, {IsBasement, TS}, no_lookup, Size, Slot) end. key_dominates(KL1, KL2, Level) -> From a07770a3df067c1e129d224257c6d34202806f70 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 11 Mar 2017 00:03:55 +0000 Subject: [PATCH 06/23] Unit tets of lookup over-size issue A mistake meant resetting to lookup on a skipped key would cause issues if the skipped key ocurred under a no_lookup slot after the ?SLOT_SIZE had been reached. This caused the slot to switch to lookup, but beyond the maximum size --- src/leveled_sst.erl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index d398937..4914f40 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1334,6 +1334,18 @@ generate_indexkeys(Count, IndexList) -> generate_indexkeys(Count - 1, IndexList ++ Changes). +form_slot_test() -> + % If a skip key happens, mustn't switch to loookup by accident as could be + % over the expected size + SkippingKV = {{o, "B1", "K9999", null}, {9999, tomb, 1234567, {}}}, + Slot = [{{o, "B1", "K5", null}, {5, active, 99234567, {}}}], + R1 = form_slot([SkippingKV], [], + {true, 99999999}, + no_lookup, + ?SLOT_SIZE + 1, + Slot), + ?assertMatch({[], [], {no_lookup, Slot}}, R1). + indexed_list_test() -> io:format(user, "~nIndexed list timing test:~n", []), N = 150, From 1f8de798bd8998a181c935b5f9baf12f8eb35fde Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 11 Mar 2017 12:41:30 +0000 Subject: [PATCH 07/23] Fix empty slot issue --- src/leveled_sst.erl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 4914f40..7451d01 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1142,7 +1142,7 @@ merge_lists(KVList1, KVList2, LI, SlotList, SlotCount) -> KVRem2, Slot} = form_slot(KVList1, KVList2, LI, no_lookup, 0, []), case Slot of - [] -> + {_, []} -> merge_lists(KVRem1, KVRem2, LI, SlotList, SlotCount); _ -> merge_lists(KVRem1, KVRem2, LI, [Slot|SlotList], SlotCount + 1) @@ -1346,6 +1346,18 @@ form_slot_test() -> Slot), ?assertMatch({[], [], {no_lookup, Slot}}, R1). +merge_tombstonelist_test() -> + % Merge lists wiht nothing but tombstones + SkippingKV1 = {{o, "B1", "K9995", null}, {9995, tomb, 1234567, {}}}, + SkippingKV2 = {{o, "B1", "K9996", null}, {9996, tomb, 1234567, {}}}, + SkippingKV3 = {{o, "B1", "K9997", null}, {9997, tomb, 1234567, {}}}, + SkippingKV4 = {{o, "B1", "K9998", null}, {9998, tomb, 1234567, {}}}, + SkippingKV5 = {{o, "B1", "K9999", null}, {9999, tomb, 1234567, {}}}, + R = merge_lists([SkippingKV1, SkippingKV3, SkippingKV5], + [SkippingKV2, SkippingKV4], + {true, 9999999}), + ?assertMatch({[], [], []}, R). + indexed_list_test() -> io:format(user, "~nIndexed list timing test:~n", []), N = 150, From f2cd9b3f3375020b8bc69f52c2968fb5d951f184 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 11 Mar 2017 13:04:55 +0000 Subject: [PATCH 08/23] Consistency of empty slotlist references Need to return an empty slotlist in a consistent way --- src/leveled_sst.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 7451d01..f5bf7f8 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -68,6 +68,7 @@ -define(SLOT_SIZE, 128). % This is not configurable -define(NOLOOK_MULT, 2). % How much bigger is a slot/block with no lookups -define(NOLOOK_SLOTSIZE, ?SLOT_SIZE * ?NOLOOK_MULT). +-define(EMPTY_SLOTLIST, [{no_lookup, []}]). -define(COMPRESSION_LEVEL, 1). -define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_LEVEL}]). % -define(LEVEL_BLOOM_BITS, [{0, 8}, {1, 10}, {2, 8}, {default, 6}]). @@ -161,7 +162,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN) -> sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN) -> {Rem1, Rem2, SlotList} = merge_lists(KVL1, KVL2, {IsBasement, Level}), case SlotList of - [{_, []}] -> + ?EMPTY_SLOTLIST -> empty; _ -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), @@ -1135,6 +1136,8 @@ merge_lists(KVList1, KVList2, LevelInfo) -> merge_lists(KVList1, KVList2, _LI, SlotList, ?MAX_SLOTS) -> {KVList1, KVList2, lists:reverse(SlotList)}; +merge_lists([], [], _LI, [], _SlotCount) -> + {[], [], ?EMPTY_SLOTLIST}; merge_lists([], [], _LI, SlotList, _SlotCount) -> {[], [], lists:reverse(SlotList)}; merge_lists(KVList1, KVList2, LI, SlotList, SlotCount) -> @@ -1356,7 +1359,7 @@ merge_tombstonelist_test() -> R = merge_lists([SkippingKV1, SkippingKV3, SkippingKV5], [SkippingKV2, SkippingKV4], {true, 9999999}), - ?assertMatch({[], [], []}, R). + ?assertMatch({[], [], ?EMPTY_SLOTLIST}, R). indexed_list_test() -> io:format(user, "~nIndexed list timing test:~n", []), From 62c3ba8b6fd1dab3eb0243b19800e9b9169f18de Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 13 Mar 2017 11:54:46 +0000 Subject: [PATCH 09/23] Passing ETS reference not tree ETS reference gets converted to tree by Penciller --- src/leveled_bookie.erl | 2 +- src/leveled_penciller.erl | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 5d441da..b450bcc 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -1181,7 +1181,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) -> TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize), if TimeToPush -> - CacheToLoad = {leveled_tree:from_orderedset(Tab, ?CACHE_TYPE), + CacheToLoad = {Tab, Cache#ledger_cache.index, Cache#ledger_cache.min_sqn, Cache#ledger_cache.max_sqn}, diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 2f5c1a5..bb67e21 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -341,7 +341,7 @@ init([PCLopts]) -> end. -handle_call({push_mem, {PushedTree, PushedIdx, MinSQN, MaxSQN}}, +handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}}, From, State=#state{is_snapshot=Snap}) when Snap == false -> % The push_mem process is as follows: @@ -370,7 +370,16 @@ handle_call({push_mem, {PushedTree, PushedIdx, MinSQN, MaxSQN}}, {reply, returned, State}; false -> leveled_log:log("P0018", [ok, false, false]), - gen_server:reply(From, ok), + PushedTree = + case is_tuple(LedgerTable) of + true -> + LedgerTable; + false -> + leveled_tree:from_orderedset(LedgerTable, + ?CACHE_TYPE) + end, + % Reply ust happen after the table has been converted + gen_server:reply(From, ok), {noreply, update_levelzero(State#state.levelzero_size, {PushedTree, PushedIdx, MinSQN, MaxSQN}, From f3e962c43ac46170fa26a27ef4697d11e1cd31ce Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 13 Mar 2017 12:16:36 +0000 Subject: [PATCH 10/23] Add level to SST slow fetch log --- src/leveled_log.erl | 3 ++- src/leveled_penciller.erl | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index c89fbea..7f42bf3 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -166,7 +166,8 @@ {"PC015", {info, "File created"}}, {"PC016", - {info, "Slow fetch from SFT ~w of ~w microseconds with result ~w"}}, + {info, "Slow fetch from SFT ~w of ~w microseconds at level ~w " + ++ "with result ~w"}}, {"PC017", {info, "Notified clerk of manifest change"}}, {"PC018", diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 2f5c1a5..110889a 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -853,7 +853,7 @@ fetch_mem(Key, Hash, Manifest, L0Cache, L0Index) -> L0Check = leveled_pmem:check_levelzero(Key, Hash, PosList, L0Cache), case L0Check of {false, not_found} -> - fetch(Key, Hash, Manifest, 0, fun timed_sst_get/3); + fetch(Key, Hash, Manifest, 0, fun timed_sst_get/4); {true, KV} -> {KV, 0} end. @@ -865,7 +865,7 @@ fetch(Key, Hash, Manifest, Level, FetchFun) -> false -> fetch(Key, Hash, Manifest, Level + 1, FetchFun); FP -> - case FetchFun(FP, Key, Hash) of + case FetchFun(FP, Key, Hash, Level) of not_present -> fetch(Key, Hash, Manifest, Level + 1, FetchFun); ObjectFound -> @@ -873,21 +873,21 @@ fetch(Key, Hash, Manifest, Level, FetchFun) -> end end. -timed_sst_get(PID, Key, Hash) -> +timed_sst_get(PID, Key, Hash, Level) -> SW = os:timestamp(), R = leveled_sst:sst_get(PID, Key, Hash), T0 = timer:now_diff(os:timestamp(), SW), - log_slowfetch(T0, R, PID, ?SLOW_FETCH). + log_slowfetch(T0, R, PID, Level, ?SLOW_FETCH). -log_slowfetch(T0, R, PID, FetchTolerance) -> +log_slowfetch(T0, R, PID, Level, FetchTolerance) -> case {T0, R} of {T, R} when T < FetchTolerance -> R; {T, not_present} -> - leveled_log:log("PC016", [PID, T, not_present]), + leveled_log:log("PC016", [PID, T, Level, not_present]), not_present; {T, R} -> - leveled_log:log("PC016", [PID, T, found]), + leveled_log:log("PC016", [PID, T, Level, found]), R end. @@ -1498,7 +1498,7 @@ create_file_test() -> ?assertMatch("hello", binary_to_term(Bin)). slow_fetch_test() -> - ?assertMatch(not_present, log_slowfetch(2, not_present, "fake", 1)). + ?assertMatch(not_present, log_slowfetch(2, not_present, "fake", 0, 1)). checkready(Pid) -> try From c787e0cd784eac80d5d6440304f0760c6bbbddaa Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 13 Mar 2017 14:32:46 +0000 Subject: [PATCH 11/23] Handle corrupted Ledger Key when applying recovery strategy Otherwise may blow up in journal_compaction_bustedjournal test --- src/leveled_codec.erl | 61 +++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index ad25c58..a01f008 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -198,32 +198,41 @@ compact_inkerkvc({_InkerKey, crc_wonky, false}, _Strategy) -> compact_inkerkvc({{_SQN, ?INKT_TOMB, _LK}, _V, _CrcCheck}, _Strategy) -> skip; compact_inkerkvc({{SQN, ?INKT_KEYD, LK}, V, CrcCheck}, Strategy) -> - {Tag, _, _, _} = LK, - {Tag, TagStrat} = lists:keyfind(Tag, 1, Strategy), - case TagStrat of + case get_tagstrategy(LK, Strategy) of + skip -> + skip; retain -> {retain, {{SQN, ?INKT_KEYD, LK}, V, CrcCheck}}; TagStrat -> {TagStrat, null} end; compact_inkerkvc({{SQN, ?INKT_STND, LK}, V, CrcCheck}, Strategy) -> - {Tag, _, _, _} = LK, - case lists:keyfind(Tag, 1, Strategy) of - {Tag, TagStrat} -> - case TagStrat of - retain -> - {_V, KeyDeltas} = split_inkvalue(V), - {TagStrat, {{SQN, ?INKT_KEYD, LK}, {null, KeyDeltas}, CrcCheck}}; - TagStrat -> - {TagStrat, null} - end; - false -> - leveled_log:log("IC012", [Tag, Strategy]), - skip + case get_tagstrategy(LK, Strategy) of + skip -> + skip; + retain -> + {_V, KeyDeltas} = split_inkvalue(V), + {retain, {{SQN, ?INKT_KEYD, LK}, {null, KeyDeltas}, CrcCheck}}; + TagStrat -> + {TagStrat, null} end; compact_inkerkvc(_KVC, _Strategy) -> skip. +get_tagstrategy(LK, Strategy) -> + case LK of + {Tag, _, _, _} -> + case lists:keyfind(Tag, 1, Strategy) of + {Tag, TagStrat} -> + TagStrat; + false -> + leveled_log:log("IC012", [Tag, Strategy]), + skip + end; + _ -> + skip + end. + split_inkvalue(VBin) -> case is_binary(VBin) of true -> @@ -429,6 +438,26 @@ endkey_passed_test() -> ?assertMatch(true, endkey_passed(TestKey, K2)). +corrupted_ledgerkey_test() -> + % When testing for compacted journal which has been corrupted, there may + % be a corruptes ledger key. Always skip these keys + % Key has become a 3-tuple not a 4-tuple + TagStrat1 = compact_inkerkvc({{1, + ?INKT_STND, + {?STD_TAG, "B1", "K1andSK"}}, + {}, + true}, + [{?STD_TAG, retain}]), + ?assertMatch(skip, TagStrat1), + TagStrat2 = compact_inkerkvc({{1, + ?INKT_KEYD, + {?STD_TAG, "B1", "K1andSK"}}, + {}, + true}, + [{?STD_TAG, retain}]), + ?assertMatch(skip, TagStrat2). + + %% Test below proved that the overhead of performing hashes was trivial %% Maybe 5 microseconds per hash From 54534e725fb13ba84cd873e4ee3534cfa23504d5 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 13 Mar 2017 19:53:12 +0000 Subject: [PATCH 12/23] Experiment with smaller scan width When testing with large numbers of 2i terms (and hence more Riak Metadata), there is a surge in slow response times when there are multiple concurrent merge events. This could be veyr short term CPU starvation because of the merge process. Perhaps it is delays waiting for the scan to complete - smaller scanwidth may mena more interleaving and less latency? --- src/leveled_sst.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 76dc9b5..5e4670b 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -69,8 +69,7 @@ -define(COMPRESSION_LEVEL, 1). -define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_LEVEL}]). % -define(LEVEL_BLOOM_BITS, [{0, 8}, {1, 10}, {2, 8}, {default, 6}]). --define(MERGE_SCANWIDTH, 16). --define(INDEX_MARKER_WIDTH, 16). +-define(MERGE_SCANWIDTH, 4). -define(DISCARD_EXT, ".discarded"). -define(DELETE_TIMEOUT, 10000). -define(TREE_TYPE, idxt). From c5bb150f970c464411d685d63ac6be89d486ba53 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 13 Mar 2017 20:30:33 +0000 Subject: [PATCH 13/23] Drop some logs Not found to be interesting so far --- src/leveled_log.erl | 2 +- src/leveled_penciller.erl | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/leveled_log.erl b/src/leveled_log.erl index 7f42bf3..8eb56e4 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -267,7 +267,7 @@ {"SST05", {warn, "Rename rogue filename ~s to ~s"}}, {"SST06", - {info, "File ~s has been set for delete"}}, + {debug, "File ~s has been set for delete"}}, {"SST07", {info, "Exit called and now clearing ~s"}}, {"SST08", diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 110889a..5460f59 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -369,7 +369,6 @@ handle_call({push_mem, {PushedTree, PushedIdx, MinSQN, MaxSQN}}, State#state.work_backlog]), {reply, returned, State}; false -> - leveled_log:log("P0018", [ok, false, false]), gen_server:reply(From, ok), {noreply, update_levelzero(State#state.levelzero_size, From 2b0ec1d9cce9718283a2423265f8145275a9819f Mon Sep 17 00:00:00 2001 From: martinsumner Date: Mon, 13 Mar 2017 23:51:48 +0000 Subject: [PATCH 14/23] Don't double-loop on slots Previous version built a list of slots, then iterated over it to build a list of binaries. This converts the slot to a binary before building the list --- src/leveled_sst.erl | 160 +++++++++++++++++++++++++++----------------- 1 file changed, 97 insertions(+), 63 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 5c6db08..3f1a021 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -145,13 +145,13 @@ sst_open(RootPath, Filename) -> sst_new(RootPath, Filename, Level, KVList, MaxSQN) -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), - SlotList = merge_lists(KVList), + {[], [], SlotList, FK} = merge_lists(KVList), case gen_fsm:sync_send_event(Pid, {sst_new, RootPath, Filename, Level, - SlotList, + {SlotList, FK}, MaxSQN}, infinity) of {ok, {SK, EK}} -> @@ -159,7 +159,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN) -> end. sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN) -> - {Rem1, Rem2, SlotList} = merge_lists(KVL1, KVL2, {IsBasement, Level}), + {Rem1, Rem2, SlotList, FK} = merge_lists(KVL1, KVL2, {IsBasement, Level}), case SlotList of ?EMPTY_SLOTLIST -> empty; @@ -170,7 +170,7 @@ sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN) -> RootPath, Filename, Level, - SlotList, + {SlotList, FK}, MaxSQN}, infinity) of {ok, {SK, EK}} -> @@ -244,11 +244,10 @@ starting({sst_open, RootPath, Filename}, _From, State) -> {ok, {Summary#summary.first_key, Summary#summary.last_key}}, reader, UpdState}; -starting({sst_new, RootPath, Filename, Level, SlotList, MaxSQN}, - _From, State) -> +starting({sst_new, RootPath, Filename, Level, {SlotList, FirstKey}, MaxSQN}, + _From, State) -> SW = os:timestamp(), - {FirstKey, - Length, + {Length, SlotIndex, BlockIndex, SlotsBin} = build_all_slots(SlotList), @@ -272,9 +271,8 @@ starting({sst_newlevelzero, RootPath, Filename, Slots, FetchFun, Penciller, MaxSQN}, State) -> SW = os:timestamp(), KVList = leveled_pmem:to_list(Slots, FetchFun), - SlotList = merge_lists(KVList), - {FirstKey, - SlotCount, + {[], [], SlotList, FirstKey} = merge_lists(KVList), + {SlotCount, SlotIndex, BlockIndex, SlotsBin} = build_all_slots(SlotList), @@ -580,8 +578,6 @@ read_table_summary(BinWithCheck) -> build_all_slots(SlotList) -> - [{_Lookup, KVList}|_R] = SlotList, - [{FirstKey, _FirstV}|_Rest] = KVList, SlotCount = length(SlotList), BuildResponse = build_all_slots(SlotList, 8, @@ -591,14 +587,14 @@ build_all_slots(SlotList) -> {default, none}]), <<>>), {SlotIndex, BlockIndex, SlotsBin} = BuildResponse, - {FirstKey, SlotCount, SlotIndex, BlockIndex, SlotsBin}. + {SlotCount, SlotIndex, BlockIndex, SlotsBin}. -build_all_slots([], _Pos, _SlotID, SlotIdxAcc, BlockIdxAcc, SlotBinAcc) -> +build_all_slots([], _Pos, _SlotID, + SlotIdxAcc, BlockIdxAcc, SlotBinAcc) -> {SlotIdxAcc, BlockIdxAcc, SlotBinAcc}; -build_all_slots(SlotList, Pos, SlotID, SlotIdxAcc, BlockIdxAcc, SlotBinAcc) -> - [{Lookup, KVList}|Rest] = SlotList, - {LastKey, _LastV} = lists:last(KVList), - {BlockIdx, SlotBin, HashList} = generate_binary_slot(Lookup, KVList), +build_all_slots([SlotD|Rest], Pos, SlotID, + SlotIdxAcc, BlockIdxAcc, SlotBinAcc) -> + {BlockIdx, SlotBin, HashList, LastKey} = SlotD, Length = byte_size(SlotBin), Bloom = leveled_tinybloom:create_bloom(HashList), SlotIndexV = #slot_index_value{slot_id = SlotID, @@ -801,8 +797,10 @@ generate_binary_slot(Lookup, KVL) -> B1/binary, B2/binary, B3/binary, B4/binary>>, CRC32 = erlang:crc32(SlotBin), FullBin = <>, + + {LastKey, _LV} = lists:last(KVL), - {<>, FullBin, HashL}. + {<>, FullBin, HashL, LastKey}. check_blocks([], _Handle, _Slot, _BlockLengths, _LedgerKey) -> @@ -1120,57 +1118,89 @@ find_pos(<<0:1/integer, NHC:7/integer, T/binary>>, Hash, PosList, Count) -> merge_lists(KVList1) -> SlotCount = length(KVList1) div ?SLOT_SIZE, - split_lists(KVList1, [], SlotCount). + {[], + [], + split_lists(KVList1, [], SlotCount), + element(1, lists:nth(1, KVList1))}. split_lists([], SlotLists, 0) -> lists:reverse(SlotLists); split_lists(LastPuff, SlotLists, 0) -> - lists:reverse([{lookup, LastPuff}|SlotLists]); + SlotD = generate_binary_slot(lookup, LastPuff), + lists:reverse([SlotD|SlotLists]); split_lists(KVList1, SlotLists, N) -> {Slot, KVListRem} = lists:split(?SLOT_SIZE, KVList1), - split_lists(KVListRem, [{lookup, Slot}|SlotLists], N - 1). + SlotD = generate_binary_slot(lookup, Slot), + split_lists(KVListRem, [SlotD|SlotLists], N - 1). merge_lists(KVList1, KVList2, LevelInfo) -> - merge_lists(KVList1, KVList2, LevelInfo, [], 0). + merge_lists(KVList1, KVList2, LevelInfo, [], null, 0). -merge_lists(KVList1, KVList2, _LI, SlotList, ?MAX_SLOTS) -> - {KVList1, KVList2, lists:reverse(SlotList)}; -merge_lists([], [], _LI, [], _SlotCount) -> - {[], [], ?EMPTY_SLOTLIST}; -merge_lists([], [], _LI, SlotList, _SlotCount) -> - {[], [], lists:reverse(SlotList)}; -merge_lists(KVList1, KVList2, LI, SlotList, SlotCount) -> - {KVRem1, - KVRem2, - Slot} = form_slot(KVList1, KVList2, LI, no_lookup, 0, []), +merge_lists(KVList1, KVList2, _LI, SlotList, FirstKey, ?MAX_SLOTS) -> + {KVList1, KVList2, lists:reverse(SlotList), FirstKey}; +merge_lists([], [], _LI, [], null, _SlotCount) -> + {[], [], ?EMPTY_SLOTLIST, null}; +merge_lists([], [], _LI, SlotList, FirstKey, _SlotCount) -> + {[], [], lists:reverse(SlotList), FirstKey}; +merge_lists(KVList1, KVList2, LI, SlotList, FirstKey, SlotCount) -> + {KVRem1, KVRem2, Slot, FK0} = + form_slot(KVList1, KVList2, LI, no_lookup, 0, [], FirstKey), case Slot of {_, []} -> - merge_lists(KVRem1, KVRem2, LI, SlotList, SlotCount); - _ -> - merge_lists(KVRem1, KVRem2, LI, [Slot|SlotList], SlotCount + 1) + merge_lists(KVRem1, + KVRem2, + LI, + SlotList, + FK0, + SlotCount); + {Lookup, KVL} -> + SlotD = generate_binary_slot(Lookup, KVL), + merge_lists(KVRem1, + KVRem2, + LI, + [SlotD|SlotList], + FK0, + SlotCount + 1) end. -form_slot([], [], _LI, Type, _Size, Slot) -> - {[], [], {Type, lists:reverse(Slot)}}; -form_slot(KVList1, KVList2, _LI, lookup, ?SLOT_SIZE, Slot) -> - {KVList1, KVList2, {lookup, lists:reverse(Slot)}}; -form_slot(KVList1, KVList2, _LI, no_lookup, ?NOLOOK_SLOTSIZE, Slot) -> - {KVList1, KVList2, {no_lookup, lists:reverse(Slot)}}; -form_slot(KVList1, KVList2, {IsBasement, TS}, lookup, Size, Slot) -> - case key_dominates(KVList1, KVList2, {IsBasement, TS}) of - {{next_key, TopKV}, Rem1, Rem2} -> +form_slot([], [], _LI, Type, _Size, Slot, FK) -> + {[], [], {Type, lists:reverse(Slot)}, FK}; +form_slot(KVList1, KVList2, _LI, lookup, ?SLOT_SIZE, Slot, FK) -> + {KVList1, KVList2, {lookup, lists:reverse(Slot)}, FK}; +form_slot(KVList1, KVList2, _LI, no_lookup, ?NOLOOK_SLOTSIZE, Slot, FK) -> + {KVList1, KVList2, {no_lookup, lists:reverse(Slot)}, FK}; +form_slot(KVList1, KVList2, {IsBasement, TS}, lookup, Size, Slot, FK) -> + case {key_dominates(KVList1, KVList2, {IsBasement, TS}), FK} of + {{{next_key, TopKV}, Rem1, Rem2}, null} -> + {TopK, _TopV} = TopKV, form_slot(Rem1, Rem2, {IsBasement, TS}, lookup, Size + 1, - [TopKV|Slot]); - {skipped_key, Rem1, Rem2} -> - form_slot(Rem1, Rem2, {IsBasement, TS}, lookup, Size, Slot) + [TopKV|Slot], + TopK); + {{{next_key, TopKV}, Rem1, Rem2}, _} -> + form_slot(Rem1, + Rem2, + {IsBasement, TS}, + lookup, + Size + 1, + [TopKV|Slot], + FK); + {{skipped_key, Rem1, Rem2}, _} -> + form_slot(Rem1, Rem2, {IsBasement, TS}, lookup, Size, Slot, FK) end; -form_slot(KVList1, KVList2, {IsBasement, TS}, no_lookup, Size, Slot) -> +form_slot(KVList1, KVList2, {IsBasement, TS}, no_lookup, Size, Slot, FK) -> case key_dominates(KVList1, KVList2, {IsBasement, TS}) of {{next_key, {TopK, TopV}}, Rem1, Rem2} -> + FK0 = + case FK of + null -> + TopK; + _ -> + FK + end, case leveled_codec:to_lookup(TopK) of no_lookup -> form_slot(Rem1, @@ -1178,7 +1208,8 @@ form_slot(KVList1, KVList2, {IsBasement, TS}, no_lookup, Size, Slot) -> {IsBasement, TS}, no_lookup, Size + 1, - [{TopK, TopV}|Slot]); + [{TopK, TopV}|Slot], + FK0); lookup -> case Size >= ?SLOT_SIZE of true -> @@ -1191,11 +1222,12 @@ form_slot(KVList1, KVList2, {IsBasement, TS}, no_lookup, Size, Slot) -> {IsBasement, TS}, lookup, Size + 1, - [{TopK, TopV}|Slot]) + [{TopK, TopV}|Slot], + FK0) end end; {skipped_key, Rem1, Rem2} -> - form_slot(Rem1, Rem2, {IsBasement, TS}, no_lookup, Size, Slot) + form_slot(Rem1, Rem2, {IsBasement, TS}, no_lookup, Size, Slot, FK) end. key_dominates(KL1, KL2, Level) -> @@ -1345,8 +1377,9 @@ form_slot_test() -> {true, 99999999}, no_lookup, ?SLOT_SIZE + 1, - Slot), - ?assertMatch({[], [], {no_lookup, Slot}}, R1). + Slot, + {o, "B1", "K5", null}), + ?assertMatch({[], [], {no_lookup, Slot}, {o, "B1", "K5", null}}, R1). merge_tombstonelist_test() -> % Merge lists wiht nothing but tombstones @@ -1358,7 +1391,7 @@ merge_tombstonelist_test() -> R = merge_lists([SkippingKV1, SkippingKV3, SkippingKV5], [SkippingKV2, SkippingKV4], {true, 9999999}), - ?assertMatch({[], [], ?EMPTY_SLOTLIST}, R). + ?assertMatch({[], [], ?EMPTY_SLOTLIST, null}, R). indexed_list_test() -> io:format(user, "~nIndexed list timing test:~n", []), @@ -1368,7 +1401,7 @@ indexed_list_test() -> SW0 = os:timestamp(), - {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, KVL1), + {_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, KVL1), io:format(user, "Indexed list created slot in ~w microseconds of size ~w~n", [timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]), @@ -1396,7 +1429,7 @@ indexed_list_mixedkeys_test() -> KVL1 = lists:sublist(KVL0, 33), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), - {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), + {_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys), {TestK1, TestV1} = lists:nth(4, KVL1), MH1 = leveled_codec:magic_hash(TestK1), @@ -1422,7 +1455,7 @@ indexed_list_mixedkeys2_test() -> IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)), % this isn't actually ordered correctly Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2, - {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), + {_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys), lists:foreach(fun({K, V}) -> MH = leveled_codec:magic_hash(K), test_binary_slot(FullBin, K, MH, {K, V}) @@ -1431,7 +1464,7 @@ indexed_list_mixedkeys2_test() -> indexed_list_allindexkeys_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128), - {PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), + {PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys), ?assertMatch(<<_BL:20/binary, 127:8/integer>>, PosBinIndex1), % SW = os:timestamp(), BinToList = binaryslot_tolist(FullBin), @@ -1444,7 +1477,7 @@ indexed_list_allindexkeys_test() -> indexed_list_allindexkeys_nolookup_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)), 128 * ?NOLOOK_MULT), - {PosBinIndex1, FullBin, _HL} = generate_binary_slot(no_lookup, Keys), + {PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(no_lookup, Keys), ?assertMatch(<<_BL:20/binary, 127:8/integer>>, PosBinIndex1), % SW = os:timestamp(), BinToList = binaryslot_tolist(FullBin), @@ -1456,7 +1489,7 @@ indexed_list_allindexkeys_nolookup_test() -> indexed_list_allindexkeys_trimmed_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128), - {PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), + {PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys), ?assertMatch(<<_BL:20/binary, 127:8/integer>>, PosBinIndex1), ?assertMatch(Keys, binaryslot_trimmedlist(FullBin, {i, @@ -1494,7 +1527,8 @@ indexed_list_mixedkeys_bitflip_test() -> KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)), KVL1 = lists:sublist(KVL0, 33), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), - {_PosBinIndex1, FullBin, _HL} = generate_binary_slot(lookup, Keys), + {_PosBinIndex1, FullBin, _HL, LK} = generate_binary_slot(lookup, Keys), + ?assertMatch(LK, element(1, lists:last(Keys))), L = byte_size(FullBin), Byte1 = random:uniform(L), <> = FullBin, From a1c49b668a205b6afe93274dba80f21add3ed8fe Mon Sep 17 00:00:00 2001 From: martinsumner Date: Tue, 14 Mar 2017 00:17:09 +0000 Subject: [PATCH 15/23] Fix empty file again No special definition of empty required, as now an empty list when empty --- src/leveled_sst.erl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 3f1a021..b2025a8 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -68,7 +68,6 @@ -define(SLOT_SIZE, 128). % This is not configurable -define(NOLOOK_MULT, 2). % How much bigger is a slot/block with no lookups -define(NOLOOK_SLOTSIZE, ?SLOT_SIZE * ?NOLOOK_MULT). --define(EMPTY_SLOTLIST, [{no_lookup, []}]). -define(COMPRESSION_LEVEL, 1). -define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_LEVEL}]). % -define(LEVEL_BLOOM_BITS, [{0, 8}, {1, 10}, {2, 8}, {default, 6}]). @@ -161,7 +160,7 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN) -> sst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN) -> {Rem1, Rem2, SlotList, FK} = merge_lists(KVL1, KVL2, {IsBasement, Level}), case SlotList of - ?EMPTY_SLOTLIST -> + [] -> empty; _ -> {ok, Pid} = gen_fsm:start(?MODULE, [], []), @@ -1138,8 +1137,6 @@ merge_lists(KVList1, KVList2, LevelInfo) -> merge_lists(KVList1, KVList2, _LI, SlotList, FirstKey, ?MAX_SLOTS) -> {KVList1, KVList2, lists:reverse(SlotList), FirstKey}; -merge_lists([], [], _LI, [], null, _SlotCount) -> - {[], [], ?EMPTY_SLOTLIST, null}; merge_lists([], [], _LI, SlotList, FirstKey, _SlotCount) -> {[], [], lists:reverse(SlotList), FirstKey}; merge_lists(KVList1, KVList2, LI, SlotList, FirstKey, SlotCount) -> @@ -1391,7 +1388,7 @@ merge_tombstonelist_test() -> R = merge_lists([SkippingKV1, SkippingKV3, SkippingKV5], [SkippingKV2, SkippingKV4], {true, 9999999}), - ?assertMatch({[], [], ?EMPTY_SLOTLIST, null}, R). + ?assertMatch({[], [], [], null}, R). indexed_list_test() -> io:format(user, "~nIndexed list timing test:~n", []), From 19bc838d90c8f3cf4be18aa09a33aca609ff4305 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Tue, 14 Mar 2017 00:52:07 +0000 Subject: [PATCH 16/23] Fix bad exit with no FK --- src/leveled_sst.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index b2025a8..2b304ba 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1212,7 +1212,8 @@ form_slot(KVList1, KVList2, {IsBasement, TS}, no_lookup, Size, Slot, FK) -> true -> {KVList1, KVList2, - {no_lookup, lists:reverse(Slot)}}; + {no_lookup, lists:reverse(Slot)}, + FK}; false -> form_slot(Rem1, Rem2, From 4b60c0e35bc099bbf8c5030ae708898032de33c4 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Tue, 14 Mar 2017 01:18:50 +0000 Subject: [PATCH 17/23] Scan width semi-reverted No evidence from valume test that the scan width has made a posiitve difference - so reverting, but not fully as slots may now be twice as big, so sticking to half previous value --- src/leveled_sst.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 2b304ba..ea3bb27 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -71,7 +71,7 @@ -define(COMPRESSION_LEVEL, 1). -define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_LEVEL}]). % -define(LEVEL_BLOOM_BITS, [{0, 8}, {1, 10}, {2, 8}, {default, 6}]). --define(MERGE_SCANWIDTH, 4). +-define(MERGE_SCANWIDTH, 8). -define(DISCARD_EXT, ".discarded"). -define(DELETE_TIMEOUT, 10000). -define(TREE_TYPE, idxt). From 8a5ed1e1980003761a9f67ce5fe3b76b4ec818c3 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Tue, 14 Mar 2017 17:26:39 +0000 Subject: [PATCH 18/23] Confirm skip on unknowns when compacting journal --- src/leveled_codec.erl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 8ea7fb4..aae0b6b 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -465,7 +465,33 @@ corrupted_ledgerkey_test() -> true}, [{?STD_TAG, retain}]), ?assertMatch(skip, TagStrat2). + +general_skip_strategy_test() -> + % Confirm that we will skip if the strategy says so + TagStrat1 = compact_inkerkvc({{1, + ?INKT_STND, + {?STD_TAG, "B1", "K1andSK"}}, + {}, + true}, + [{?STD_TAG, skip}]), + ?assertMatch(skip, TagStrat1), + TagStrat2 = compact_inkerkvc({{1, + ?INKT_KEYD, + {?STD_TAG, "B1", "K1andSK"}}, + {}, + true}, + [{?STD_TAG, skip}]), + ?assertMatch(skip, TagStrat2). +corrupted_inker_tag_test() -> + % Confirm that we will skip on unknown inker tag + TagStrat1 = compact_inkerkvc({{1, + foo, + {?STD_TAG, "B1", "K1andSK"}}, + {}, + true}, + [{?STD_TAG, retain}]), + ?assertMatch(skip, TagStrat1). %% Test below proved that the overhead of performing hashes was trivial %% Maybe 5 microseconds per hash From 508da0be45849939e34c645f6b649e1ccc13513e Mon Sep 17 00:00:00 2001 From: martinsumner Date: Tue, 14 Mar 2017 22:47:48 +0000 Subject: [PATCH 19/23] Additional unit tests --- src/leveled_codec.erl | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index aae0b6b..0e5d17f 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -481,7 +481,28 @@ general_skip_strategy_test() -> {}, true}, [{?STD_TAG, skip}]), - ?assertMatch(skip, TagStrat2). + ?assertMatch(skip, TagStrat2), + TagStrat3 = compact_inkerkvc({{1, + ?INKT_KEYD, + {?IDX_TAG, "B1", "K1", "SK"}}, + {}, + true}, + [{?STD_TAG, skip}]), + ?assertMatch(skip, TagStrat3), + TagStrat4 = compact_inkerkvc({{1, + ?INKT_KEYD, + {?IDX_TAG, "B1", "K1", "SK"}}, + {}, + true}, + [{?STD_TAG, skip}, {?IDX_TAG, recalc}]), + ?assertMatch({recalc, null}, TagStrat4), + TagStrat5 = compact_inkerkvc({{1, + ?INKT_TOMB, + {?IDX_TAG, "B1", "K1", "SK"}}, + {}, + true}, + [{?STD_TAG, skip}, {?IDX_TAG, recalc}]), + ?assertMatch(skip, TagStrat5). corrupted_inker_tag_test() -> % Confirm that we will skip on unknown inker tag From c6d17b998e8073e2f2b34743af11170191f80506 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Wed, 15 Mar 2017 11:27:46 +0000 Subject: [PATCH 20/23] Additional unit tests for SST range fetches Resolve some of coverage issues --- src/leveled_sst.erl | 72 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 6 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index ea3bb27..e73b00d 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -454,6 +454,7 @@ fetch_range(StartKey, EndKey, ScanWidth, State) -> {Slots, RTrim} = lookup_slots(StartKey, EndKey, Summary#summary.index), Self = self(), SL = length(Slots), + ExpandedSlots = case SL of 0 -> @@ -1357,14 +1358,16 @@ generate_indexkeys(Count) -> generate_indexkeys(0, IndexList) -> IndexList; generate_indexkeys(Count, IndexList) -> - IndexSpecs = [{add, "t1_int", random:uniform(80000)}], - Changes = leveled_codec:convert_indexspecs(IndexSpecs, - "Bucket", - "Key" ++ integer_to_list(Count), - Count, - infinity), + Changes = generate_indexkey(random:uniform(8000), Count), generate_indexkeys(Count - 1, IndexList ++ Changes). +generate_indexkey(Term, Count) -> + IndexSpecs = [{add, "t1_int", Term}], + leveled_codec:convert_indexspecs(IndexSpecs, + "Bucket", + "Key" ++ integer_to_list(Count), + Count, + infinity). form_slot_test() -> % If a skip key happens, mustn't switch to loookup by accident as could be @@ -1648,6 +1651,63 @@ simple_persisted_range_test() -> TL4 = lists:map(fun(EK) -> {SK4, EK} end, [EK2, EK3, EK4, EK5]), TL5 = lists:map(fun(EK) -> {SK5, EK} end, [EK2, EK3, EK4, EK5]), lists:foreach(TestFun, TL2 ++ TL3 ++ TL4 ++ TL5). + +additional_range_test() -> + % Test fetching ranges that fall into odd situations with regards to the + % summayr index + % - ranges which fall between entries in summary + % - ranges which go beyond the end of the range of the sst + % - ranges which match to an end key in the summary index + IK1 = lists:foldl(fun(X, Acc) -> + Acc ++ generate_indexkey(X, X) + end, + [], + lists:seq(1, ?NOLOOK_SLOTSIZE)), + Gap = 2, + IK2 = lists:foldl(fun(X, Acc) -> + Acc ++ generate_indexkey(X, X) + end, + [], + lists:seq(?NOLOOK_SLOTSIZE + Gap + 1, + 2 * ?NOLOOK_SLOTSIZE + Gap)), + {ok, + P1, + {{Rem1, Rem2}, + SK, + EK}} = sst_new("../test/", "range1_src", IK1, IK2, false, 1, 9999), + ?assertMatch([], Rem1), + ?assertMatch([], Rem2), + ?assertMatch(SK, element(1, lists:nth(1, IK1))), + ?assertMatch(EK, element(1, lists:last(IK2))), + + % Basic test - checking scanwidth + R1 = sst_getkvrange(P1, SK, EK, 1), + ?assertMatch(?NOLOOK_SLOTSIZE + 1, length(R1)), + QR1 = lists:sublist(R1, ?NOLOOK_SLOTSIZE), + ?assertMatch(IK1, QR1), + R2 = sst_getkvrange(P1, SK, EK, 2), + ?assertMatch(?NOLOOK_SLOTSIZE * 2, length(R2)), + QR2 = lists:sublist(R2, ?NOLOOK_SLOTSIZE), + QR3 = lists:sublist(R2, ?NOLOOK_SLOTSIZE + 1, 2 * ?NOLOOK_SLOTSIZE), + ?assertMatch(IK1, QR2), + ?assertMatch(IK2, QR3), + + % Testing the gap + [GapSKV] = generate_indexkey(?NOLOOK_SLOTSIZE + 1, ?NOLOOK_SLOTSIZE + 1), + [GapEKV] = generate_indexkey(?NOLOOK_SLOTSIZE + 2, ?NOLOOK_SLOTSIZE + 2), + R3 = sst_getkvrange(P1, element(1, GapSKV), element(1, GapEKV), 1), + ?assertMatch([], R3), + + % Testing beyond the range + [PastEKV] = generate_indexkey(2 * ?NOLOOK_SLOTSIZE + Gap + 1, + 2 * ?NOLOOK_SLOTSIZE + Gap + 1), + R4 = sst_getkvrange(P1, element(1, GapSKV), element(1, PastEKV), 2), + ?assertMatch(IK2, R4), + + % Testing at a slot end + Slot1EK = element(1, lists:last(IK1)), + R5 = sst_getkvrange(P1, SK, Slot1EK, 2), + ?assertMatch(IK1, R5). From dde37566b9b33ab48594fb06e848015cbc4fe5c9 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Wed, 15 Mar 2017 16:40:43 +0000 Subject: [PATCH 21/23] Add unit test for more than one slot --- src/leveled_sst.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index e73b00d..f76ac54 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -1703,11 +1703,14 @@ additional_range_test() -> 2 * ?NOLOOK_SLOTSIZE + Gap + 1), R4 = sst_getkvrange(P1, element(1, GapSKV), element(1, PastEKV), 2), ?assertMatch(IK2, R4), + R5 = sst_getkvrange(P1, SK, element(1, PastEKV), 2), + IKAll = IK1 ++ IK2, + ?assertMatch(IKAll, R5), % Testing at a slot end Slot1EK = element(1, lists:last(IK1)), - R5 = sst_getkvrange(P1, SK, Slot1EK, 2), - ?assertMatch(IK1, R5). + R6 = sst_getkvrange(P1, SK, Slot1EK, 2), + ?assertMatch(IK1, R6). From 6199a2c3520515544a4075ca1da72e063e8b0321 Mon Sep 17 00:00:00 2001 From: martinsumner Date: Thu, 16 Mar 2017 08:37:36 +0000 Subject: [PATCH 22/23] RTrim RTrim only worked in special case of key matching, that would never occur in real world range query. RTrim should really check for key passing. Returning empty list should not be possible - unless the query is outside of the range entirely (and such a query should never go to this SST). --- src/leveled_sst.erl | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index f76ac54..05afac6 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -457,8 +457,6 @@ fetch_range(StartKey, EndKey, ScanWidth, State) -> ExpandedSlots = case SL of - 0 -> - []; 1 -> [Slot] = Slots, case RTrim of @@ -657,12 +655,7 @@ lookup_slots(StartKey, EndKey, Tree) -> end, SlotList = leveled_tree:search_range(StartKey, EndKey, Tree, StartKeyFun), {EK, _EndSlot} = lists:last(SlotList), - case EK of - EndKey -> - {lists:map(MapFun, SlotList), false}; - _ -> - {lists:map(MapFun, SlotList), true} - end. + {lists:map(MapFun, SlotList), not leveled_codec:endkey_passed(EK, EndKey)}. %%%============================================================================ @@ -1706,11 +1699,17 @@ additional_range_test() -> R5 = sst_getkvrange(P1, SK, element(1, PastEKV), 2), IKAll = IK1 ++ IK2, ?assertMatch(IKAll, R5), + [MidREKV] = generate_indexkey(?NOLOOK_SLOTSIZE + Gap + 2, + ?NOLOOK_SLOTSIZE + Gap + 2), + io:format(user, "Mid second range to past range test~n", []), + R6 = sst_getkvrange(P1, element(1, MidREKV), element(1, PastEKV), 2), + Exp6 = lists:sublist(IK2, 2, length(IK2)), + ?assertMatch(Exp6, R6), % Testing at a slot end Slot1EK = element(1, lists:last(IK1)), - R6 = sst_getkvrange(P1, SK, Slot1EK, 2), - ?assertMatch(IK1, R6). + R7 = sst_getkvrange(P1, SK, Slot1EK, 2), + ?assertMatch(IK1, R7). From 5dbd7a2bc2b8e98ec115c38e237d08dfe3d384dd Mon Sep 17 00:00:00 2001 From: martinsumner Date: Thu, 16 Mar 2017 08:43:18 +0000 Subject: [PATCH 23/23] Check query out of range It doesn't work - so protecting against it in fetch_range is pointless, will blow up in lookup_slots --- src/leveled_sst.erl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 05afac6..489715e 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -319,7 +319,7 @@ reader(get_maxsequencenumber, _From, State) -> Summary = State#state.summary, {reply, Summary#summary.max_sqn, reader, State}; reader(print_timings, _From, State) -> - io:format(user, "Timings of ~w~n", [State#state.sst_timings]), + io:format(user, "~nTimings of ~w~n", [State#state.sst_timings]), {reply, ok, reader, State#state{sst_timings = undefined}}; reader({set_for_delete, Penciller}, _From, State) -> leveled_log:log("SST06", [State#state.filename]), @@ -1711,6 +1711,11 @@ additional_range_test() -> R7 = sst_getkvrange(P1, SK, Slot1EK, 2), ?assertMatch(IK1, R7). + % Testing beyond end (should never happen if manifest behaves) + % Test blows up anyway + % R8 = sst_getkvrange(P1, element(1, PastEKV), element(1, PastEKV), 2), + % ?assertMatch([], R8). + simple_persisted_test() ->