diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl index 1a4ee97..382ce85 100644 --- a/src/leveled_pmem.erl +++ b/src/leveled_pmem.erl @@ -51,14 +51,20 @@ -include_lib("eunit/include/eunit.hrl"). --define(SLOT_WIDTH, {4096, 12}). --define(SKIP_WIDTH, 128). - +-define(SLOT_WIDTH, {2048, 11}). +-define(SKIP_WIDTH, 32). +-define(INFINITE_KEY, {null, null, null, null, null}). +-define(EMPTY_SKIPLIST, [{?INFINITE_KEY, []}]). %%%============================================================================ %%% API %%%============================================================================ + + + + + add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) -> SW = os:timestamp(), SlotInTreeList = length(TreeList) + 1, @@ -152,8 +158,73 @@ merge_trees(StartKey, EndKey, TreeList, LevelMinus1) -> %%%============================================================================ -generate_skiplist(Dict) -> - KVL = lists:ukeysort(1, dict:to_list(Dict)), +addkey_to_index(HashIndex, Key, Count) -> + {Hash, Slot} = hash_to_slot(Key), + L = array:get(Slot, HashIndex), + case lists:member(Hash, L) of + true -> + {HashIndex, Count}; + false -> + {array:set(Slot, [Hash|L], HashIndex), Count + 1} + end. + +merge_indexes(HashIndex, MergedIndex, Count, L0Slot) -> + lists:foldl(fun(Slot, {MHI, AccCount}) -> + HashList = array:get(Slot, HashIndex), + case length(HashList) > 0 of + true -> + merge_indexes_singleslot(HashList, + Slot, + MHI, + L0Slot, + AccCount); + false -> + {MHI, AccCount} + end end, + {MergedIndex, Count}, + lists:seq(0, element(1, ?SLOT_WIDTH) - 1)). + +merge_indexes_singleslot(HashList, IndexSlot, MergedIndex, L0Slot, Count) -> + L = array:get(IndexSlot, MergedIndex), + {UpdHL, UpdCount} = lists:foldl(fun(H, {HL, C}) -> + case lists:keymember(H, 1, L) of + true -> + {[{H, L0Slot}|HL], C + 1}; + false -> + {[{H, L0Slot}|HL], C} + end end, + {L, Count}, + HashList), + {array:set(IndexSlot, UpdHL, MergedIndex), UpdCount}. + +load_dynamic_skiplist(SkipList, Key, Value, Hash) -> + {MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) -> + case Acc of + false -> + case Marker >= Key of + true -> + {Marker, SL}; + false -> + Acc + end; + _ -> + Acc + end end, + false, + SkipList), + case Hash rem ?SKIP_WIDTH of + 0 -> + {LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList), + SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), + SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1], + lists:ukeysort(1, SkpL2); + _ -> + UpdSubList = lists:ukeysort(1, [{Key, Value}|SubList]), + lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList}) + end. + +generate_balanced_skiplist(UnsortedKVL) -> + KVL = lists:ukeysort(1, UnsortedKVL), Slots = length(KVL) div ?SKIP_WIDTH, SkipList0 = lists:map(fun(X) -> N = X * ?SKIP_WIDTH, @@ -171,7 +242,6 @@ generate_skiplist(Dict) -> SkipList0 end. - fetchkey_from_skiplist(SkipList, Key) -> SubList = lists:foldl(fun({SkipKey, SL}, Acc) -> case {Acc, SkipKey} of @@ -196,13 +266,22 @@ fetchkey_from_skiplist(SkipList, Key) -> fetchrange_from_skiplist(SkipList, StartKey, EndKey) -> R = lists:foldl(fun({SkipKey, SL}, {Continue, Acc}) -> + % io:format("SkipKey ~w StartKey ~w EndKey ~w~n", [SkipKey, StartKey, EndKey]), case Continue of true -> - case SkipKey of - SkipKey when StartKey >= SkipKey -> + case StartKey > SkipKey of + true -> + % io:format("StartKey after SkipKey~n"), {true, Acc}; - SkipKey when EndKey < SkipKey -> - {false, Acc ++ SL} + false -> + case leveled_codec:endkey_passed(EndKey, SkipKey) of + true -> + % io:format("EndKey after SkipKey~n"), + {false, Acc ++ SL}; + false -> + % io:format("EndKey before SkipKey~n"), + {true, Acc ++ SL} + end end; false -> {false, Acc} @@ -333,25 +412,29 @@ compare_method_test() -> gb_trees:empty(), DumpList), Sz0 = gb_trees:size(Q0), - io:format("Crude method took ~w microseconds resulting in tree of " ++ - "size ~w~n", + io:format(user, "Crude method took ~w microseconds resulting in tree of " + ++ "size ~w~n", [timer:now_diff(os:timestamp(), SWa), Sz0]), SWb = os:timestamp(), Q1 = merge_trees(StartKey, EndKey, TreeList, gb_trees:empty()), Sz1 = gb_trees:size(Q1), - io:format("Merge method took ~w microseconds resulting in tree of " ++ - "size ~w~n", + io:format(user, "Merge method took ~w microseconds resulting in tree of " + ++ "size ~w~n", [timer:now_diff(os:timestamp(), SWb), Sz1]), ?assertMatch(Sz0, Sz1). skiplist_test() -> KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)), - D = lists:foldl(fun({K, V}, Acc) -> dict:store(K, V, Acc) end, + SWaD = os:timestamp(), + _D = lists:foldl(fun({K, V}, AccD) -> dict:store(K, V, AccD) end, dict:new(), KL), + io:format(user, "Loading dict with 4000 keys in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaD)]), + SWa = os:timestamp(), - SkipList = generate_skiplist(D), - io:format("Generating skip list with 4000 keys in ~w microseconds~n", + SkipList = generate_balanced_skiplist(KL), + io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n", [timer:now_diff(os:timestamp(), SWa)]), CheckList1 = lists:sublist(KL, 1200, 100), @@ -370,48 +453,141 @@ skiplist_test() -> fetchkey_from_skiplist(SkipList, K)) end, CheckAll), - io:format("Finding 520 keys took ~w microseconds~n", + io:format(user, "Finding 520 keys took ~w microseconds~n", [timer:now_diff(os:timestamp(), SWb)]), SWc = os:timestamp(), KR1 = fetchrange_from_skiplist(SkipList, - lists:nth(1, CheckList1), - lists:last(CheckList1)), + element(1, lists:nth(1, CheckList1)), + element(1, lists:last(CheckList1))), + io:format("Result length ~w ~n", [length(KR1)]), ?assertMatch(true, length(KR1) >= 100), ?assertMatch(true, length(KR1) < 400), KR2 = fetchrange_from_skiplist(SkipList, - lists:nth(1, CheckList2), - lists:last(CheckList2)), + element(1, lists:nth(1, CheckList2)), + element(1, lists:last(CheckList2))), ?assertMatch(true, length(KR2) >= 100), ?assertMatch(true, length(KR2) < 400), KR3 = fetchrange_from_skiplist(SkipList, - lists:nth(1, CheckList3), - lists:last(CheckList3)), + element(1, lists:nth(1, CheckList3)), + element(1, lists:last(CheckList3))), ?assertMatch(true, length(KR3) >= 100), ?assertMatch(true, length(KR3) < 400), KR4 = fetchrange_from_skiplist(SkipList, - lists:nth(1, CheckList4), - lists:last(CheckList4)), + element(1, lists:nth(1, CheckList4)), + element(1, lists:last(CheckList4))), ?assertMatch(true, length(KR4) >= 100), ?assertMatch(true, length(KR4) < 400), KR5 = fetchrange_from_skiplist(SkipList, - lists:nth(1, CheckList5), - lists:last(CheckList5)), + element(1, lists:nth(1, CheckList5)), + element(1, lists:last(CheckList5))), ?assertMatch(true, length(KR5) >= 100), ?assertMatch(true, length(KR5) < 400), KR6 = fetchrange_from_skiplist(SkipList, - lists:nth(1, CheckList6), - lists:last(CheckList6)), + element(1, lists:nth(1, CheckList6)), + element(1, lists:last(CheckList6))), ?assertMatch(true, length(KR6) >= 10), ?assertMatch(true, length(KR6) < 200), KR7 = fetchrange_from_skiplist(SkipList, - lists:nth(1, CheckList7), - lists:last(CheckList7)), + element(1, lists:nth(1, CheckList7)), + element(1, lists:last(CheckList7))), ?assertMatch(true, length(KR7) >= 10), ?assertMatch(true, length(KR7) < 200), - io:format("Finding 7 ranges took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWc)]), + io:format(user, "Finding 7 ranges took ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWc)]). + +hash_index_test() -> + KeyCount = 4000, + SlotWidth = element(1, ?SLOT_WIDTH), + HI0 = new_index(), + MHI0 = new_index(), + KL0 = gb_trees:to_list(generate_randomkeys(1, KeyCount, 1, 200)), + CheckList1 = lists:sublist(KL0, 1200, 100), + CheckList2 = lists:sublist(KL0, 1600, 100), + CheckList3 = lists:sublist(KL0, 2000, 100), + CheckList4 = lists:sublist(KL0, 2400, 100), + CheckList5 = lists:sublist(KL0, 2800, 100), + CheckList6 = lists:sublist(KL0, 1, 10), + CheckList7 = lists:nthtail(3800, KL0), + CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++ + CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7, - ?assertMatch(true, false). + SWa = os:timestamp(), + SkipList1 = + lists:foldl(fun({K, V}, Acc) -> + {H, _Slot} = hash_to_slot(K), + load_dynamic_skiplist(Acc, K, V, H) end, + ?EMPTY_SKIPLIST, + KL0), + io:format(user, "Dynamic load of skiplist took ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWa)]), + + {LL, LN} = lists:foldl(fun({K, SL}, {Count, Number}) -> + {Count + length(SL), Number + 1} end, + {0, 0}, + SkipList1), + io:format(user, + "Skip list has ~w markers with total members of ~w~n", + [LN, LL]), + ?assertMatch(true, LL / LN > ?SKIP_WIDTH / 2 ), + ?assertMatch(true, LL / LN < ?SKIP_WIDTH * 2 ), + + SWb = os:timestamp(), + lists:foreach(fun({K, V}) -> + ?assertMatch({K, V}, + fetchkey_from_skiplist(SkipList1, K)) + end, + CheckAll), + io:format(user, "Fetching ~w keys from skiplist took ~w microseconds~n", + [KeyCount, timer:now_diff(os:timestamp(), SWb)]), + + SWc = os:timestamp(), + {HI1, _C1} = lists:foldl(fun({K, _V}, {HI, C}) -> + addkey_to_index(HI, K, C) end, + {HI0, 0}, + KL0), + io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n", + [KeyCount, timer:now_diff(os:timestamp(), SWc)]), + ?assertMatch(SlotWidth, array:size(HI1)), + + SWd = os:timestamp(), + {MHI1, TC1} = merge_indexes(HI1, MHI0, 0, 0), + io:format(user, "First merge to hashindex took ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWd)]), + ?assertMatch(SlotWidth, array:size(MHI1)), + + KL1 = gb_trees:to_list(generate_randomkeys(1, KeyCount, 1, 200)), + + SWe = os:timestamp(), + HI2 = new_index(), + {HI3, _C2} = lists:foldl(fun({K, _V}, {HI, C}) -> + addkey_to_index(HI, K, C) end, + {HI2, 0}, + KL1), + io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n", + [KeyCount, timer:now_diff(os:timestamp(), SWe)]), + + SWf = os:timestamp(), + {MHI2, TC2} = merge_indexes(HI3, MHI1, TC1, 1), + io:format(user, "Second merge to hashindex took ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWf)]), + ?assertMatch(SlotWidth, array:size(MHI2)), + + SWg = os:timestamp(), + HI4 = new_index(), + {HI5, _C3} = lists:foldl(fun({K, _V}, {HI, C}) -> + addkey_to_index(HI, K, C) end, + {HI4, 0}, + KL1), + io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n", + [KeyCount, timer:now_diff(os:timestamp(), SWg)]), + + SWh = os:timestamp(), + {MHI3, _TC3} = merge_indexes(HI5, MHI2, TC2, 2), + io:format(user, "Third merge to hashindex took ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWh)]), + ?assertMatch(SlotWidth, array:size(MHI2)). + + -endif. \ No newline at end of file