Experiment converting CDB to use skiplist not gb_tree
Might insertion time be faster?
This commit is contained in:
parent
a3f60e3609
commit
626a8e63f9
4 changed files with 166 additions and 63 deletions
|
@ -238,14 +238,14 @@ init([Opts]) ->
|
|||
{ok, #state{inker=Inker,
|
||||
penciller=Penciller,
|
||||
cache_size=CacheSize,
|
||||
ledger_cache=leveled_skiplist:empty(),
|
||||
ledger_cache=leveled_skiplist:empty(true),
|
||||
is_snapshot=false}};
|
||||
Bookie ->
|
||||
{ok,
|
||||
{Penciller, LedgerCache},
|
||||
Inker} = book_snapshotstore(Bookie, self(), ?SNAPSHOT_TIMEOUT),
|
||||
ok = leveled_penciller:pcl_loadsnapshot(Penciller,
|
||||
leveled_skiplist:empty()),
|
||||
leveled_skiplist:empty(true)),
|
||||
leveled_log:log("B0002", [Inker, Penciller]),
|
||||
{ok, #state{penciller=Penciller,
|
||||
inker=Inker,
|
||||
|
@ -885,7 +885,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) ->
|
|||
TimeToPush ->
|
||||
case leveled_penciller:pcl_pushmem(Penciller, Cache) of
|
||||
ok ->
|
||||
{ok, leveled_skiplist:empty()};
|
||||
{ok, leveled_skiplist:empty(true)};
|
||||
returned ->
|
||||
{returned, Cache}
|
||||
end;
|
||||
|
|
|
@ -861,24 +861,28 @@ get_hashtree(Key, HashTree) ->
|
|||
Hash = hash(Key),
|
||||
Index = hash_to_index(Hash),
|
||||
Tree = array:get(Index, HashTree),
|
||||
case gb_trees:lookup(Hash, Tree) of
|
||||
case leveled_skiplist:lookup(Hash, Tree) of
|
||||
{value, List} ->
|
||||
List;
|
||||
_ ->
|
||||
[]
|
||||
end.
|
||||
|
||||
%% Add to hash tree - this is an array of 256 gb_trees that contains the Hash
|
||||
%% Add to hash tree - this is an array of 256 skiplists that contains the Hash
|
||||
%% and position of objects which have been added to an open CDB file
|
||||
put_hashtree(Key, Position, HashTree) ->
|
||||
Hash = hash(Key),
|
||||
Index = hash_to_index(Hash),
|
||||
Tree = array:get(Index, HashTree),
|
||||
case gb_trees:lookup(Hash, Tree) of
|
||||
case leveled_skiplist:lookup(Hash, Tree) of
|
||||
none ->
|
||||
array:set(Index, gb_trees:insert(Hash, [Position], Tree), HashTree);
|
||||
array:set(Index,
|
||||
leveled_skiplist:enter(Hash, [Position], Tree),
|
||||
HashTree);
|
||||
{value, L} ->
|
||||
array:set(Index, gb_trees:update(Hash, [Position|L], Tree), HashTree)
|
||||
array:set(Index,
|
||||
leveled_skiplist:enter(Hash, [Position|L], Tree),
|
||||
HashTree)
|
||||
end.
|
||||
|
||||
%% Function to extract a Key-Value pair given a file handle and a position
|
||||
|
@ -920,7 +924,7 @@ extract_key_value_check(Handle, Position) ->
|
|||
%% Scan through the file until there is a failure to crc check an input, and
|
||||
%% at that point return the position and the key dictionary scanned so far
|
||||
startup_scan_over_file(Handle, Position) ->
|
||||
HashTree = array:new(256, {default, gb_trees:empty()}),
|
||||
HashTree = array:new(256, {default, leveled_skiplist:empty()}),
|
||||
scan_over_file(Handle,
|
||||
Position,
|
||||
fun startup_filter/5,
|
||||
|
@ -1148,7 +1152,7 @@ search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, QuickCheck) ->
|
|||
% key/value binary in the file.
|
||||
write_key_value_pairs(Handle, KeyValueList) ->
|
||||
{ok, Position} = file:position(Handle, cur),
|
||||
HashTree = array:new(256, {default, gb_trees:empty()}),
|
||||
HashTree = array:new(256, {default, leveled_skiplist:empty()}),
|
||||
write_key_value_pairs(Handle, KeyValueList, {Position, HashTree}).
|
||||
|
||||
write_key_value_pairs(_, [], Acc) ->
|
||||
|
@ -1181,11 +1185,11 @@ write_hash_tables([], _HashTree, _CurrPos, IndexList, HashTreeBin) ->
|
|||
{IndexList, HashTreeBin};
|
||||
write_hash_tables([Index|Rest], HashTree, CurrPos, IndexList, HashTreeBin) ->
|
||||
Tree = array:get(Index, HashTree),
|
||||
case gb_trees:keys(Tree) of
|
||||
[] ->
|
||||
case leveled_skiplist:size(Tree) of
|
||||
0 ->
|
||||
write_hash_tables(Rest, HashTree, CurrPos, IndexList, HashTreeBin);
|
||||
_ ->
|
||||
HashList = gb_trees:to_list(Tree),
|
||||
HashList = leveled_skiplist:to_list(Tree),
|
||||
BinList = build_binaryhashlist(HashList, []),
|
||||
IndexLength = length(BinList) * 2,
|
||||
SlotList = lists:duplicate(IndexLength, <<0:32, 0:32>>),
|
||||
|
@ -1402,16 +1406,16 @@ write_key_value_pairs_1_test() ->
|
|||
Index1 = hash_to_index(Hash1),
|
||||
Hash2 = hash("key2"),
|
||||
Index2 = hash_to_index(Hash2),
|
||||
R0 = array:new(256, {default, gb_trees:empty()}),
|
||||
R0 = array:new(256, {default, leveled_skiplist:empty()}),
|
||||
R1 = array:set(Index1,
|
||||
gb_trees:insert(Hash1,
|
||||
[0],
|
||||
array:get(Index1, R0)),
|
||||
leveled_skiplist:enter(Hash1,
|
||||
[0],
|
||||
array:get(Index1, R0)),
|
||||
R0),
|
||||
R2 = array:set(Index2,
|
||||
gb_trees:insert(Hash2,
|
||||
[30],
|
||||
array:get(Index2, R1)),
|
||||
leveled_skiplist:enter(Hash2,
|
||||
[30],
|
||||
array:get(Index2, R1)),
|
||||
R1),
|
||||
io:format("HashTree is ~w~n", [HashTree]),
|
||||
io:format("Expected HashTree is ~w~n", [R2]),
|
||||
|
@ -1421,16 +1425,16 @@ write_key_value_pairs_1_test() ->
|
|||
|
||||
write_hash_tables_1_test() ->
|
||||
{ok, Handle} = file:open("../test/testx.cdb", [write]),
|
||||
R0 = array:new(256, {default, gb_trees:empty()}),
|
||||
R0 = array:new(256, {default, leveled_skiplist:empty()}),
|
||||
R1 = array:set(64,
|
||||
gb_trees:insert(6383014720,
|
||||
[18],
|
||||
array:get(64, R0)),
|
||||
leveled_skiplist:enter(6383014720,
|
||||
[18],
|
||||
array:get(64, R0)),
|
||||
R0),
|
||||
R2 = array:set(67,
|
||||
gb_trees:insert(6383014723,
|
||||
[0],
|
||||
array:get(67, R1)),
|
||||
leveled_skiplist:enter(6383014723,
|
||||
[0],
|
||||
array:get(67, R1)),
|
||||
R1),
|
||||
Result = write_hash_tables(Handle, R2),
|
||||
io:format("write hash tables result of ~w ~n", [Result]),
|
||||
|
|
|
@ -1283,7 +1283,7 @@ confirm_delete_test() ->
|
|||
|
||||
|
||||
maybe_pause_push(PCL, KL) ->
|
||||
T0 = leveled_skiplist:empty(),
|
||||
T0 = leveled_skiplist:empty(true),
|
||||
T1 = lists:foldl(fun({K, V}, Acc) -> leveled_skiplist:enter(K, V, Acc) end,
|
||||
T0,
|
||||
KL),
|
||||
|
|
|
@ -17,7 +17,9 @@
|
|||
|
||||
-export([
|
||||
from_list/1,
|
||||
from_list/2,
|
||||
from_sortedlist/1,
|
||||
from_sortedlist/2,
|
||||
to_list/1,
|
||||
enter/3,
|
||||
to_range/2,
|
||||
|
@ -25,6 +27,7 @@
|
|||
lookup/2,
|
||||
lookup/3,
|
||||
empty/0,
|
||||
empty/1,
|
||||
size/1
|
||||
]).
|
||||
|
||||
|
@ -41,28 +44,49 @@
|
|||
|
||||
enter(Key, Value, SkipList) ->
|
||||
Hash = erlang:phash2(Key),
|
||||
SkipList0 = add_to_array(Hash, SkipList),
|
||||
NewListPart = enter(Key, Value, Hash,
|
||||
dict:fetch(?SKIP_WIDTH, SkipList0),
|
||||
?SKIP_WIDTH, ?LIST_HEIGHT),
|
||||
dict:store(?SKIP_WIDTH, NewListPart, SkipList0).
|
||||
case is_list(SkipList) of
|
||||
true ->
|
||||
enter(Key, Value, Hash, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT);
|
||||
false ->
|
||||
SkipList0 = add_to_array(Hash, SkipList),
|
||||
NewListPart = enter(Key, Value, Hash,
|
||||
dict:fetch(?SKIP_WIDTH, SkipList0),
|
||||
?SKIP_WIDTH, ?LIST_HEIGHT),
|
||||
dict:store(?SKIP_WIDTH, NewListPart, SkipList0)
|
||||
end.
|
||||
|
||||
from_list(UnsortedKVL) ->
|
||||
from_list(UnsortedKVL, false).
|
||||
|
||||
from_list(UnsortedKVL, BloomProtect) ->
|
||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
||||
from_sortedlist(KVL).
|
||||
from_sortedlist(KVL, BloomProtect).
|
||||
|
||||
from_sortedlist(SortedKVL) ->
|
||||
SL0 = lists:foldr(fun({K, _V}, SkipL) ->
|
||||
H = erlang:phash2(K),
|
||||
add_to_array(H, SkipL) end,
|
||||
empty(),
|
||||
SortedKVL),
|
||||
dict:store(?SKIP_WIDTH,
|
||||
from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT),
|
||||
SL0).
|
||||
from_sortedlist(SortedKVL, false).
|
||||
|
||||
from_sortedlist(SortedKVL, BloomProtect) ->
|
||||
case BloomProtect of
|
||||
true ->
|
||||
SL0 = lists:foldr(fun({K, _V}, SkipL) ->
|
||||
H = erlang:phash2(K),
|
||||
add_to_array(H, SkipL) end,
|
||||
empty(true),
|
||||
SortedKVL),
|
||||
dict:store(?SKIP_WIDTH,
|
||||
from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT),
|
||||
SL0);
|
||||
false ->
|
||||
from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)
|
||||
end.
|
||||
|
||||
lookup(Key, SkipList) ->
|
||||
lookup(Key, erlang:phash2(Key), SkipList).
|
||||
case is_list(SkipList) of
|
||||
true ->
|
||||
list_lookup(Key, SkipList, ?LIST_HEIGHT);
|
||||
false ->
|
||||
lookup(Key, erlang:phash2(Key), SkipList)
|
||||
end.
|
||||
|
||||
lookup(Key, Hash, SkipList) ->
|
||||
{Slot, Bit} = hash_toslotbit(Hash),
|
||||
|
@ -81,27 +105,57 @@ lookup(Key, Hash, SkipList) ->
|
|||
%% Rather than support iterator_from like gb_trees, will just an output a key
|
||||
%% sorted list for the desired range, which can the be iterated over as normal
|
||||
to_range(SkipList, Start) ->
|
||||
to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
|
||||
case is_list(SkipList) of
|
||||
true ->
|
||||
to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT);
|
||||
false ->
|
||||
to_range(dict:fetch(?SKIP_WIDTH, SkipList),
|
||||
Start, ?INFINITY_KEY,
|
||||
?LIST_HEIGHT)
|
||||
end.
|
||||
|
||||
to_range(SkipList, Start, End) ->
|
||||
to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, End, ?LIST_HEIGHT).
|
||||
case is_list(SkipList) of
|
||||
true ->
|
||||
to_range(SkipList, Start, End, ?LIST_HEIGHT);
|
||||
false ->
|
||||
to_range(dict:fetch(?SKIP_WIDTH, SkipList),
|
||||
Start, End,
|
||||
?LIST_HEIGHT)
|
||||
end.
|
||||
|
||||
to_list(SkipList) ->
|
||||
to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT).
|
||||
case is_list(SkipList) of
|
||||
true ->
|
||||
to_list(SkipList, ?LIST_HEIGHT);
|
||||
false ->
|
||||
to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
|
||||
end.
|
||||
|
||||
empty() ->
|
||||
FoldFun =
|
||||
fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end,
|
||||
lists:foldl(FoldFun,
|
||||
dict:store(?SKIP_WIDTH,
|
||||
empty([], ?LIST_HEIGHT),
|
||||
dict:new()),
|
||||
lists:seq(0, ?SKIP_WIDTH - 1)).
|
||||
|
||||
empty(false).
|
||||
|
||||
empty(BloomProtect) ->
|
||||
case BloomProtect of
|
||||
true ->
|
||||
FoldFun =
|
||||
fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end,
|
||||
lists:foldl(FoldFun,
|
||||
dict:store(?SKIP_WIDTH,
|
||||
empty([], ?LIST_HEIGHT),
|
||||
dict:new()),
|
||||
lists:seq(0, ?SKIP_WIDTH - 1));
|
||||
false ->
|
||||
empty([], ?LIST_HEIGHT)
|
||||
end.
|
||||
|
||||
size(SkipList) ->
|
||||
size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT).
|
||||
case is_list(SkipList) of
|
||||
true ->
|
||||
size(SkipList, ?LIST_HEIGHT);
|
||||
false ->
|
||||
size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
|
||||
end.
|
||||
|
||||
|
||||
|
||||
|
@ -432,7 +486,54 @@ dotest_skiplist_small(N) ->
|
|||
end,
|
||||
lists:ukeysort(1, lists:reverse(KL))).
|
||||
|
||||
skiplist_test() ->
|
||||
skiplist_withbloom_test() ->
|
||||
io:format(user, "~n~nBloom protected skiplist test:~n~n", []),
|
||||
N = 4000,
|
||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||
|
||||
SWaGSL = os:timestamp(),
|
||||
SkipList = from_list(lists:reverse(KL), true),
|
||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
|
||||
"Top level key count of ~w~n",
|
||||
[N,
|
||||
timer:now_diff(os:timestamp(), SWaGSL),
|
||||
length(dict:fetch(?SKIP_WIDTH, SkipList))]),
|
||||
io:format(user, "Second tier key counts of ~w~n",
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||
dict:fetch(?SKIP_WIDTH, SkipList))]),
|
||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
||||
|
||||
SWaGSL2 = os:timestamp(),
|
||||
SkipList = from_sortedlist(KLSorted, true),
|
||||
io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
|
||||
"microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SWaGSL2)]),
|
||||
|
||||
SWaDSL = os:timestamp(),
|
||||
SkipList1 =
|
||||
lists:foldl(fun({K, V}, SL) ->
|
||||
enter(K, V, SL)
|
||||
end,
|
||||
empty(true),
|
||||
KL),
|
||||
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
|
||||
"microseconds~n" ++
|
||||
"Top level key count of ~w~n",
|
||||
[N,
|
||||
timer:now_diff(os:timestamp(), SWaDSL),
|
||||
length(dict:fetch(?SKIP_WIDTH, SkipList1))]),
|
||||
io:format(user, "Second tier key counts of ~w~n",
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||
dict:fetch(?SKIP_WIDTH, SkipList1))]),
|
||||
|
||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
||||
skiplist_timingtest(KLSorted, SkipList, N),
|
||||
|
||||
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
||||
skiplist_timingtest(KLSorted, SkipList1, N).
|
||||
|
||||
skiplist_nobloom_test() ->
|
||||
io:format(user, "~n~nBloom free skiplist test:~n~n", []),
|
||||
N = 4000,
|
||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||
|
||||
|
@ -442,10 +543,9 @@ skiplist_test() ->
|
|||
"Top level key count of ~w~n",
|
||||
[N,
|
||||
timer:now_diff(os:timestamp(), SWaGSL),
|
||||
length(dict:fetch(?SKIP_WIDTH, SkipList))]),
|
||||
length(SkipList)]),
|
||||
io:format(user, "Second tier key counts of ~w~n",
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||
dict:fetch(?SKIP_WIDTH, SkipList))]),
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]),
|
||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
||||
|
||||
SWaGSL2 = os:timestamp(),
|
||||
|
@ -466,17 +566,16 @@ skiplist_test() ->
|
|||
"Top level key count of ~w~n",
|
||||
[N,
|
||||
timer:now_diff(os:timestamp(), SWaDSL),
|
||||
length(dict:fetch(?SKIP_WIDTH, SkipList1))]),
|
||||
length(SkipList1)]),
|
||||
io:format(user, "Second tier key counts of ~w~n",
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||
dict:fetch(?SKIP_WIDTH, SkipList1))]),
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]),
|
||||
|
||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
||||
skiplist_timingtest(KLSorted, SkipList, N),
|
||||
|
||||
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
||||
skiplist_timingtest(KLSorted, SkipList1, N).
|
||||
|
||||
|
||||
|
||||
skiplist_timingtest(KL, SkipList, N) ->
|
||||
io:format(user, "Timing tests on skiplist of size ~w~n",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue