From 626a8e63f914b416e1db7dba603bf120b485c32f Mon Sep 17 00:00:00 2001 From: martinsumner Date: Sat, 10 Dec 2016 10:55:35 +0000 Subject: [PATCH] Experiment converting CDB to use skiplist not gb_tree Might insertion time be faster? --- src/leveled_bookie.erl | 6 +- src/leveled_cdb.erl | 52 ++++++------ src/leveled_penciller.erl | 2 +- src/leveled_skiplist.erl | 169 ++++++++++++++++++++++++++++++-------- 4 files changed, 166 insertions(+), 63 deletions(-) diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 3e335a2..a50e9fa 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -238,14 +238,14 @@ init([Opts]) -> {ok, #state{inker=Inker, penciller=Penciller, cache_size=CacheSize, - ledger_cache=leveled_skiplist:empty(), + ledger_cache=leveled_skiplist:empty(true), is_snapshot=false}}; Bookie -> {ok, {Penciller, LedgerCache}, Inker} = book_snapshotstore(Bookie, self(), ?SNAPSHOT_TIMEOUT), ok = leveled_penciller:pcl_loadsnapshot(Penciller, - leveled_skiplist:empty()), + leveled_skiplist:empty(true)), leveled_log:log("B0002", [Inker, Penciller]), {ok, #state{penciller=Penciller, inker=Inker, @@ -885,7 +885,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) -> TimeToPush -> case leveled_penciller:pcl_pushmem(Penciller, Cache) of ok -> - {ok, leveled_skiplist:empty()}; + {ok, leveled_skiplist:empty(true)}; returned -> {returned, Cache} end; diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index 1354571..8e4451c 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -861,24 +861,28 @@ get_hashtree(Key, HashTree) -> Hash = hash(Key), Index = hash_to_index(Hash), Tree = array:get(Index, HashTree), - case gb_trees:lookup(Hash, Tree) of + case leveled_skiplist:lookup(Hash, Tree) of {value, List} -> List; _ -> [] end. -%% Add to hash tree - this is an array of 256 gb_trees that contains the Hash +%% Add to hash tree - this is an array of 256 skiplists that contains the Hash %% and position of objects which have been added to an open CDB file put_hashtree(Key, Position, HashTree) -> Hash = hash(Key), Index = hash_to_index(Hash), Tree = array:get(Index, HashTree), - case gb_trees:lookup(Hash, Tree) of + case leveled_skiplist:lookup(Hash, Tree) of none -> - array:set(Index, gb_trees:insert(Hash, [Position], Tree), HashTree); + array:set(Index, + leveled_skiplist:enter(Hash, [Position], Tree), + HashTree); {value, L} -> - array:set(Index, gb_trees:update(Hash, [Position|L], Tree), HashTree) + array:set(Index, + leveled_skiplist:enter(Hash, [Position|L], Tree), + HashTree) end. %% Function to extract a Key-Value pair given a file handle and a position @@ -920,7 +924,7 @@ extract_key_value_check(Handle, Position) -> %% Scan through the file until there is a failure to crc check an input, and %% at that point return the position and the key dictionary scanned so far startup_scan_over_file(Handle, Position) -> - HashTree = array:new(256, {default, gb_trees:empty()}), + HashTree = array:new(256, {default, leveled_skiplist:empty()}), scan_over_file(Handle, Position, fun startup_filter/5, @@ -1148,7 +1152,7 @@ search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, QuickCheck) -> % key/value binary in the file. write_key_value_pairs(Handle, KeyValueList) -> {ok, Position} = file:position(Handle, cur), - HashTree = array:new(256, {default, gb_trees:empty()}), + HashTree = array:new(256, {default, leveled_skiplist:empty()}), write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}). write_key_value_pairs(_, [], Acc) -> @@ -1181,11 +1185,11 @@ write_hash_tables([], _HashTree, _CurrPos, IndexList, HashTreeBin) -> {IndexList, HashTreeBin}; write_hash_tables([Index|Rest], HashTree, CurrPos, IndexList, HashTreeBin) -> Tree = array:get(Index, HashTree), - case gb_trees:keys(Tree) of - [] -> + case leveled_skiplist:size(Tree) of + 0 -> write_hash_tables(Rest, HashTree, CurrPos, IndexList, HashTreeBin); _ -> - HashList = gb_trees:to_list(Tree), + HashList = leveled_skiplist:to_list(Tree), BinList = build_binaryhashlist(HashList, []), IndexLength = length(BinList) * 2, SlotList = lists:duplicate(IndexLength, <<0:32, 0:32>>), @@ -1402,16 +1406,16 @@ write_key_value_pairs_1_test() -> Index1 = hash_to_index(Hash1), Hash2 = hash("key2"), Index2 = hash_to_index(Hash2), - R0 = array:new(256, {default, gb_trees:empty()}), + R0 = array:new(256, {default, leveled_skiplist:empty()}), R1 = array:set(Index1, - gb_trees:insert(Hash1, - [0], - array:get(Index1, R0)), + leveled_skiplist:enter(Hash1, + [0], + array:get(Index1, R0)), R0), R2 = array:set(Index2, - gb_trees:insert(Hash2, - [30], - array:get(Index2, R1)), + leveled_skiplist:enter(Hash2, + [30], + array:get(Index2, R1)), R1), io:format("HashTree is ~w~n", [HashTree]), io:format("Expected HashTree is ~w~n", [R2]), @@ -1421,16 +1425,16 @@ write_key_value_pairs_1_test() -> write_hash_tables_1_test() -> {ok, Handle} = file:open("../test/testx.cdb", [write]), - R0 = array:new(256, {default, gb_trees:empty()}), + R0 = array:new(256, {default, leveled_skiplist:empty()}), R1 = array:set(64, - gb_trees:insert(6383014720, - [18], - array:get(64, R0)), + leveled_skiplist:enter(6383014720, + [18], + array:get(64, R0)), R0), R2 = array:set(67, - gb_trees:insert(6383014723, - [0], - array:get(67, R1)), + leveled_skiplist:enter(6383014723, + [0], + array:get(67, R1)), R1), Result = write_hash_tables(Handle, R2), io:format("write hash tables result of ~w ~n", [Result]), diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index fb8ef02..dc83474 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -1283,7 +1283,7 @@ confirm_delete_test() -> maybe_pause_push(PCL, KL) -> - T0 = leveled_skiplist:empty(), + T0 = leveled_skiplist:empty(true), T1 = lists:foldl(fun({K, V}, Acc) -> leveled_skiplist:enter(K, V, Acc) end, T0, KL), diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl index 5cf8961..63a3842 100644 --- a/src/leveled_skiplist.erl +++ b/src/leveled_skiplist.erl @@ -17,7 +17,9 @@ -export([ from_list/1, + from_list/2, from_sortedlist/1, + from_sortedlist/2, to_list/1, enter/3, to_range/2, @@ -25,6 +27,7 @@ lookup/2, lookup/3, empty/0, + empty/1, size/1 ]). @@ -41,28 +44,49 @@ enter(Key, Value, SkipList) -> Hash = erlang:phash2(Key), - SkipList0 = add_to_array(Hash, SkipList), - NewListPart = enter(Key, Value, Hash, - dict:fetch(?SKIP_WIDTH, SkipList0), - ?SKIP_WIDTH, ?LIST_HEIGHT), - dict:store(?SKIP_WIDTH, NewListPart, SkipList0). + case is_list(SkipList) of + true -> + enter(Key, Value, Hash, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT); + false -> + SkipList0 = add_to_array(Hash, SkipList), + NewListPart = enter(Key, Value, Hash, + dict:fetch(?SKIP_WIDTH, SkipList0), + ?SKIP_WIDTH, ?LIST_HEIGHT), + dict:store(?SKIP_WIDTH, NewListPart, SkipList0) + end. from_list(UnsortedKVL) -> + from_list(UnsortedKVL, false). + +from_list(UnsortedKVL, BloomProtect) -> KVL = lists:ukeysort(1, UnsortedKVL), - from_sortedlist(KVL). + from_sortedlist(KVL, BloomProtect). from_sortedlist(SortedKVL) -> - SL0 = lists:foldr(fun({K, _V}, SkipL) -> - H = erlang:phash2(K), - add_to_array(H, SkipL) end, - empty(), - SortedKVL), - dict:store(?SKIP_WIDTH, - from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT), - SL0). + from_sortedlist(SortedKVL, false). + +from_sortedlist(SortedKVL, BloomProtect) -> + case BloomProtect of + true -> + SL0 = lists:foldr(fun({K, _V}, SkipL) -> + H = erlang:phash2(K), + add_to_array(H, SkipL) end, + empty(true), + SortedKVL), + dict:store(?SKIP_WIDTH, + from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT), + SL0); + false -> + from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT) + end. lookup(Key, SkipList) -> - lookup(Key, erlang:phash2(Key), SkipList). + case is_list(SkipList) of + true -> + list_lookup(Key, SkipList, ?LIST_HEIGHT); + false -> + lookup(Key, erlang:phash2(Key), SkipList) + end. lookup(Key, Hash, SkipList) -> {Slot, Bit} = hash_toslotbit(Hash), @@ -81,27 +105,57 @@ lookup(Key, Hash, SkipList) -> %% Rather than support iterator_from like gb_trees, will just an output a key %% sorted list for the desired range, which can the be iterated over as normal to_range(SkipList, Start) -> - to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT). + case is_list(SkipList) of + true -> + to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT); + false -> + to_range(dict:fetch(?SKIP_WIDTH, SkipList), + Start, ?INFINITY_KEY, + ?LIST_HEIGHT) + end. to_range(SkipList, Start, End) -> - to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, End, ?LIST_HEIGHT). + case is_list(SkipList) of + true -> + to_range(SkipList, Start, End, ?LIST_HEIGHT); + false -> + to_range(dict:fetch(?SKIP_WIDTH, SkipList), + Start, End, + ?LIST_HEIGHT) + end. to_list(SkipList) -> - to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT). + case is_list(SkipList) of + true -> + to_list(SkipList, ?LIST_HEIGHT); + false -> + to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT) + end. empty() -> - FoldFun = - fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end, - lists:foldl(FoldFun, - dict:store(?SKIP_WIDTH, - empty([], ?LIST_HEIGHT), - dict:new()), - lists:seq(0, ?SKIP_WIDTH - 1)). - + empty(false). +empty(BloomProtect) -> + case BloomProtect of + true -> + FoldFun = + fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end, + lists:foldl(FoldFun, + dict:store(?SKIP_WIDTH, + empty([], ?LIST_HEIGHT), + dict:new()), + lists:seq(0, ?SKIP_WIDTH - 1)); + false -> + empty([], ?LIST_HEIGHT) + end. size(SkipList) -> - size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT). + case is_list(SkipList) of + true -> + size(SkipList, ?LIST_HEIGHT); + false -> + size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT) + end. @@ -432,7 +486,54 @@ dotest_skiplist_small(N) -> end, lists:ukeysort(1, lists:reverse(KL))). -skiplist_test() -> +skiplist_withbloom_test() -> + io:format(user, "~n~nBloom protected skiplist test:~n~n", []), + N = 4000, + KL = generate_randomkeys(1, N, 1, N div 5), + + SWaGSL = os:timestamp(), + SkipList = from_list(lists:reverse(KL), true), + io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++ + "Top level key count of ~w~n", + [N, + timer:now_diff(os:timestamp(), SWaGSL), + length(dict:fetch(?SKIP_WIDTH, SkipList))]), + io:format(user, "Second tier key counts of ~w~n", + [lists:map(fun({_L, SL}) -> length(SL) end, + dict:fetch(?SKIP_WIDTH, SkipList))]), + KLSorted = lists:ukeysort(1, lists:reverse(KL)), + + SWaGSL2 = os:timestamp(), + SkipList = from_sortedlist(KLSorted, true), + io:format(user, "Generating skip list with ~w sorted keys in ~w " ++ + "microseconds~n", + [N, timer:now_diff(os:timestamp(), SWaGSL2)]), + + SWaDSL = os:timestamp(), + SkipList1 = + lists:foldl(fun({K, V}, SL) -> + enter(K, V, SL) + end, + empty(true), + KL), + io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++ + "microseconds~n" ++ + "Top level key count of ~w~n", + [N, + timer:now_diff(os:timestamp(), SWaDSL), + length(dict:fetch(?SKIP_WIDTH, SkipList1))]), + io:format(user, "Second tier key counts of ~w~n", + [lists:map(fun({_L, SL}) -> length(SL) end, + dict:fetch(?SKIP_WIDTH, SkipList1))]), + + io:format(user, "~nRunning timing tests for generated skiplist:~n", []), + skiplist_timingtest(KLSorted, SkipList, N), + + io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []), + skiplist_timingtest(KLSorted, SkipList1, N). + +skiplist_nobloom_test() -> + io:format(user, "~n~nBloom free skiplist test:~n~n", []), N = 4000, KL = generate_randomkeys(1, N, 1, N div 5), @@ -442,10 +543,9 @@ skiplist_test() -> "Top level key count of ~w~n", [N, timer:now_diff(os:timestamp(), SWaGSL), - length(dict:fetch(?SKIP_WIDTH, SkipList))]), + length(SkipList)]), io:format(user, "Second tier key counts of ~w~n", - [lists:map(fun({_L, SL}) -> length(SL) end, - dict:fetch(?SKIP_WIDTH, SkipList))]), + [lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]), KLSorted = lists:ukeysort(1, lists:reverse(KL)), SWaGSL2 = os:timestamp(), @@ -466,17 +566,16 @@ skiplist_test() -> "Top level key count of ~w~n", [N, timer:now_diff(os:timestamp(), SWaDSL), - length(dict:fetch(?SKIP_WIDTH, SkipList1))]), + length(SkipList1)]), io:format(user, "Second tier key counts of ~w~n", - [lists:map(fun({_L, SL}) -> length(SL) end, - dict:fetch(?SKIP_WIDTH, SkipList1))]), + [lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]), io:format(user, "~nRunning timing tests for generated skiplist:~n", []), skiplist_timingtest(KLSorted, SkipList, N), io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []), skiplist_timingtest(KLSorted, SkipList1, N). - + skiplist_timingtest(KL, SkipList, N) -> io:format(user, "Timing tests on skiplist of size ~w~n",