Add fast fail to skiplist
Add a bloom filter to the skiplist, to make it faster at returning not found. The SkipList is now encapsulated within a dict().
This commit is contained in:
parent
f0db730f07
commit
d2bd01eaf1
3 changed files with 81 additions and 27 deletions
|
@ -151,7 +151,7 @@
|
||||||
-record(state, {inker :: pid(),
|
-record(state, {inker :: pid(),
|
||||||
penciller :: pid(),
|
penciller :: pid(),
|
||||||
cache_size :: integer(),
|
cache_size :: integer(),
|
||||||
ledger_cache :: list(), % a skiplist
|
ledger_cache :: dict:dict(), % a skiplist
|
||||||
is_snapshot :: boolean(),
|
is_snapshot :: boolean(),
|
||||||
slow_offer = false :: boolean()}).
|
slow_offer = false :: boolean()}).
|
||||||
|
|
||||||
|
|
|
@ -222,7 +222,7 @@
|
||||||
is_snapshot = false :: boolean(),
|
is_snapshot = false :: boolean(),
|
||||||
snapshot_fully_loaded = false :: boolean(),
|
snapshot_fully_loaded = false :: boolean(),
|
||||||
source_penciller :: pid(),
|
source_penciller :: pid(),
|
||||||
levelzero_astree :: list(), % skiplist
|
levelzero_astree :: list(),
|
||||||
|
|
||||||
ongoing_work = [] :: list(),
|
ongoing_work = [] :: list(),
|
||||||
work_backlog = false :: boolean()}).
|
work_backlog = false :: boolean()}).
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
to_range/2,
|
to_range/2,
|
||||||
to_range/3,
|
to_range/3,
|
||||||
lookup/2,
|
lookup/2,
|
||||||
|
lookup/3,
|
||||||
empty/0,
|
empty/0,
|
||||||
size/1
|
size/1
|
||||||
]).
|
]).
|
||||||
|
@ -32,50 +33,98 @@
|
||||||
-define(SKIP_WIDTH, 16).
|
-define(SKIP_WIDTH, 16).
|
||||||
-define(LIST_HEIGHT, 2).
|
-define(LIST_HEIGHT, 2).
|
||||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
-define(INFINITY_KEY, {null, null, null, null, null}).
|
||||||
|
-define(BITARRAY_SIZE, 2048).
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% SkipList API
|
%%% SkipList API
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
enter(Key, Value, SkipList) ->
|
enter(Key, Value, SkipList) ->
|
||||||
enter(Key, Value, SkipList, ?SKIP_WIDTH, ?LIST_HEIGHT).
|
Hash = erlang:phash2(Key),
|
||||||
|
SkipList0 = add_to_array(Hash, SkipList),
|
||||||
|
NewListPart = enter(Key, Value, Hash,
|
||||||
|
dict:fetch(?SKIP_WIDTH, SkipList0),
|
||||||
|
?SKIP_WIDTH, ?LIST_HEIGHT),
|
||||||
|
dict:store(?SKIP_WIDTH, NewListPart, SkipList0).
|
||||||
|
|
||||||
from_list(UnsortedKVL) ->
|
from_list(UnsortedKVL) ->
|
||||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
KVL = lists:ukeysort(1, UnsortedKVL),
|
||||||
from_list(KVL, ?SKIP_WIDTH, ?LIST_HEIGHT).
|
from_sortedlist(KVL).
|
||||||
|
|
||||||
from_sortedlist(SortedKVL) ->
|
from_sortedlist(SortedKVL) ->
|
||||||
from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT).
|
SL0 = lists:foldr(fun({K, _V}, SkipL) ->
|
||||||
|
H = erlang:phash2(K),
|
||||||
|
add_to_array(H, SkipL) end,
|
||||||
|
empty(),
|
||||||
|
SortedKVL),
|
||||||
|
dict:store(?SKIP_WIDTH,
|
||||||
|
from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT),
|
||||||
|
SL0).
|
||||||
|
|
||||||
lookup(Key, SkipList) ->
|
lookup(Key, SkipList) ->
|
||||||
lookup(Key, SkipList, ?LIST_HEIGHT).
|
lookup(Key, erlang:phash2(Key), SkipList).
|
||||||
|
|
||||||
|
lookup(Key, Hash, SkipList) ->
|
||||||
|
{Slot, Bit} = hash_toslotbit(Hash),
|
||||||
|
RestLen = ?BITARRAY_SIZE - Bit - 1,
|
||||||
|
<<_Head:Bit/bitstring,
|
||||||
|
B:1/bitstring,
|
||||||
|
_Rest:RestLen/bitstring>> = dict:fetch(Slot, SkipList),
|
||||||
|
case B of
|
||||||
|
<<0:1>> ->
|
||||||
|
none;
|
||||||
|
<<1:1>> ->
|
||||||
|
list_lookup(Key, dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT)
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
%% Rather than support iterator_from like gb_trees, will just an output a key
|
%% Rather than support iterator_from like gb_trees, will just an output a key
|
||||||
%% sorted list for the desired range, which can the be iterated over as normal
|
%% sorted list for the desired range, which can the be iterated over as normal
|
||||||
to_range(SkipList, Start) ->
|
to_range(SkipList, Start) ->
|
||||||
to_range(SkipList, Start, ?INFINITY_KEY, ?LIST_HEIGHT).
|
to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
|
||||||
|
|
||||||
to_range(SkipList, Start, End) ->
|
to_range(SkipList, Start, End) ->
|
||||||
to_range(SkipList, Start, End, ?LIST_HEIGHT).
|
to_range(dict:fetch(?SKIP_WIDTH, SkipList), Start, End, ?LIST_HEIGHT).
|
||||||
|
|
||||||
to_list(SkipList) ->
|
to_list(SkipList) ->
|
||||||
to_list(SkipList, ?LIST_HEIGHT).
|
to_list(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT).
|
||||||
|
|
||||||
empty() ->
|
empty() ->
|
||||||
empty([], ?LIST_HEIGHT).
|
FoldFun =
|
||||||
|
fun(X, Acc) -> dict:store(X, <<0:?BITARRAY_SIZE>>, Acc) end,
|
||||||
|
lists:foldl(FoldFun,
|
||||||
|
dict:store(?SKIP_WIDTH,
|
||||||
|
empty([], ?LIST_HEIGHT),
|
||||||
|
dict:new()),
|
||||||
|
lists:seq(0, ?SKIP_WIDTH - 1)).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
size(SkipList) ->
|
size(SkipList) ->
|
||||||
size(SkipList, ?LIST_HEIGHT).
|
size(dict:fetch(?SKIP_WIDTH, SkipList), ?LIST_HEIGHT).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% SkipList Base Functions
|
%%% SkipList Base Functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
enter(Key, Value, SkipList, Width, 1) ->
|
hash_toslotbit(Hash) ->
|
||||||
Hash = erlang:phash2(Key),
|
Slot = Hash band (?SKIP_WIDTH - 1),
|
||||||
|
Bit = (Hash bsr ?SKIP_WIDTH) band (?BITARRAY_SIZE - 1),
|
||||||
|
{Slot, Bit}.
|
||||||
|
|
||||||
|
|
||||||
|
add_to_array(Hash, SkipList) ->
|
||||||
|
{Slot, Bit} = hash_toslotbit(Hash),
|
||||||
|
RestLen = ?BITARRAY_SIZE - Bit - 1,
|
||||||
|
<<Head:Bit/bitstring,
|
||||||
|
_B:1/bitstring,
|
||||||
|
Rest:RestLen/bitstring>> = dict:fetch(Slot, SkipList),
|
||||||
|
BitArray = <<Head/bitstring, 1:1, Rest/bitstring>>,
|
||||||
|
dict:store(Slot, BitArray, SkipList).
|
||||||
|
|
||||||
|
enter(Key, Value, Hash, SkipList, Width, 1) ->
|
||||||
{MarkerKey, SubList} = find_mark(Key, SkipList),
|
{MarkerKey, SubList} = find_mark(Key, SkipList),
|
||||||
case Hash rem Width of
|
case Hash rem Width of
|
||||||
0 ->
|
0 ->
|
||||||
|
@ -101,11 +150,10 @@ enter(Key, Value, SkipList, Width, 1) ->
|
||||||
end,
|
end,
|
||||||
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
||||||
end;
|
end;
|
||||||
enter(Key, Value, SkipList, Width, Level) ->
|
enter(Key, Value, Hash, SkipList, Width, Level) ->
|
||||||
Hash = erlang:phash2(Key),
|
|
||||||
HashMatch = width(Level, Width),
|
HashMatch = width(Level, Width),
|
||||||
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
|
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
|
||||||
UpdSubSkipList = enter(Key, Value, SubSkipList, Width, Level - 1),
|
UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1),
|
||||||
case Hash rem HashMatch of
|
case Hash rem HashMatch of
|
||||||
0 ->
|
0 ->
|
||||||
%
|
%
|
||||||
|
@ -171,7 +219,7 @@ from_list(KVL, Width, Level) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
lookup(Key, SkipList, 1) ->
|
list_lookup(Key, SkipList, 1) ->
|
||||||
SubList = get_sublist(Key, SkipList),
|
SubList = get_sublist(Key, SkipList),
|
||||||
case lists:keyfind(Key, 1, SubList) of
|
case lists:keyfind(Key, 1, SubList) of
|
||||||
false ->
|
false ->
|
||||||
|
@ -179,13 +227,13 @@ lookup(Key, SkipList, 1) ->
|
||||||
{Key, V} ->
|
{Key, V} ->
|
||||||
{value, V}
|
{value, V}
|
||||||
end;
|
end;
|
||||||
lookup(Key, SkipList, Level) ->
|
list_lookup(Key, SkipList, Level) ->
|
||||||
SubList = get_sublist(Key, SkipList),
|
SubList = get_sublist(Key, SkipList),
|
||||||
case SubList of
|
case SubList of
|
||||||
null ->
|
null ->
|
||||||
none;
|
none;
|
||||||
_ ->
|
_ ->
|
||||||
lookup(Key, SubList, Level - 1)
|
list_lookup(Key, SubList, Level - 1)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
@ -385,16 +433,19 @@ dotest_skiplist_small(N) ->
|
||||||
lists:ukeysort(1, lists:reverse(KL))).
|
lists:ukeysort(1, lists:reverse(KL))).
|
||||||
|
|
||||||
skiplist_test() ->
|
skiplist_test() ->
|
||||||
N = 8000,
|
N = 4000,
|
||||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||||
|
|
||||||
SWaGSL = os:timestamp(),
|
SWaGSL = os:timestamp(),
|
||||||
SkipList = from_list(lists:reverse(KL)),
|
SkipList = from_list(lists:reverse(KL)),
|
||||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
|
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
|
||||||
"Top level key count of ~w~n",
|
"Top level key count of ~w~n",
|
||||||
[N, timer:now_diff(os:timestamp(), SWaGSL), length(SkipList)]),
|
[N,
|
||||||
|
timer:now_diff(os:timestamp(), SWaGSL),
|
||||||
|
length(dict:fetch(?SKIP_WIDTH, SkipList))]),
|
||||||
io:format(user, "Second tier key counts of ~w~n",
|
io:format(user, "Second tier key counts of ~w~n",
|
||||||
[lists:map(fun({_L, SL}) -> length(SL) end, SkipList)]),
|
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||||
|
dict:fetch(?SKIP_WIDTH, SkipList))]),
|
||||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
||||||
|
|
||||||
SWaGSL2 = os:timestamp(),
|
SWaGSL2 = os:timestamp(),
|
||||||
|
@ -413,9 +464,12 @@ skiplist_test() ->
|
||||||
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
|
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
|
||||||
"microseconds~n" ++
|
"microseconds~n" ++
|
||||||
"Top level key count of ~w~n",
|
"Top level key count of ~w~n",
|
||||||
[N, timer:now_diff(os:timestamp(), SWaDSL), length(SkipList1)]),
|
[N,
|
||||||
|
timer:now_diff(os:timestamp(), SWaDSL),
|
||||||
|
length(dict:fetch(?SKIP_WIDTH, SkipList1))]),
|
||||||
io:format(user, "Second tier key counts of ~w~n",
|
io:format(user, "Second tier key counts of ~w~n",
|
||||||
[lists:map(fun({_L, SL}) -> length(SL) end, SkipList1)]),
|
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||||
|
dict:fetch(?SKIP_WIDTH, SkipList1))]),
|
||||||
|
|
||||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
||||||
skiplist_timingtest(KLSorted, SkipList, N),
|
skiplist_timingtest(KLSorted, SkipList, N),
|
||||||
|
@ -482,13 +536,13 @@ skiplist_timingtest(KL, SkipList, N) ->
|
||||||
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
||||||
[timer:now_diff(os:timestamp(), SWc)]),
|
[timer:now_diff(os:timestamp(), SWc)]),
|
||||||
|
|
||||||
AltKL1 = generate_randomkeys(1, 1000, 1, 200),
|
AltKL1 = generate_randomkeys(1, 2000, 1, 200),
|
||||||
SWd = os:timestamp(),
|
SWd = os:timestamp(),
|
||||||
lists:foreach(fun({K, _V}) ->
|
lists:foreach(fun({K, _V}) ->
|
||||||
lookup(K, SkipList)
|
lookup(K, SkipList)
|
||||||
end,
|
end,
|
||||||
AltKL1),
|
AltKL1),
|
||||||
io:format(user, "Getting 1000 mainly missing keys took ~w microseconds~n",
|
io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n",
|
||||||
[timer:now_diff(os:timestamp(), SWd)]),
|
[timer:now_diff(os:timestamp(), SWd)]),
|
||||||
AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300),
|
AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300),
|
||||||
SWe = os:timestamp(),
|
SWe = os:timestamp(),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue