Merge pull request #18 from martinsumner/mas-leveledtree
Mas leveledtree
This commit is contained in:
commit
266e851a96
13 changed files with 1190 additions and 1081 deletions
|
@ -15,6 +15,8 @@
|
||||||
%% Inker key type used for tombstones
|
%% Inker key type used for tombstones
|
||||||
-define(INKT_TOMB, tomb).
|
-define(INKT_TOMB, tomb).
|
||||||
|
|
||||||
|
-define(CACHE_TYPE, skpl).
|
||||||
|
|
||||||
-record(sft_options,
|
-record(sft_options,
|
||||||
{wait = true :: boolean(),
|
{wait = true :: boolean(),
|
||||||
expire_tombstones = false :: boolean(),
|
expire_tombstones = false :: boolean(),
|
||||||
|
|
|
@ -139,6 +139,7 @@
|
||||||
get_opt/3,
|
get_opt/3,
|
||||||
load_snapshot/2,
|
load_snapshot/2,
|
||||||
empty_ledgercache/0,
|
empty_ledgercache/0,
|
||||||
|
loadqueue_ledgercache/1,
|
||||||
push_ledgercache/2]).
|
push_ledgercache/2]).
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
@ -153,7 +154,8 @@
|
||||||
-define(LONG_RUNNING, 80000).
|
-define(LONG_RUNNING, 80000).
|
||||||
|
|
||||||
-record(ledger_cache, {mem :: ets:tab(),
|
-record(ledger_cache, {mem :: ets:tab(),
|
||||||
loader = leveled_skiplist:empty(false) :: tuple(),
|
loader = leveled_tree:empty(?CACHE_TYPE) :: tuple(),
|
||||||
|
load_queue = [] :: list(),
|
||||||
index = leveled_pmem:new_index(), % array
|
index = leveled_pmem:new_index(), % array
|
||||||
min_sqn = infinity :: integer()|infinity,
|
min_sqn = infinity :: integer()|infinity,
|
||||||
max_sqn = 0 :: integer()}).
|
max_sqn = 0 :: integer()}).
|
||||||
|
@ -474,6 +476,11 @@ push_ledgercache(Penciller, Cache) ->
|
||||||
Cache#ledger_cache.max_sqn},
|
Cache#ledger_cache.max_sqn},
|
||||||
leveled_penciller:pcl_pushmem(Penciller, CacheToLoad).
|
leveled_penciller:pcl_pushmem(Penciller, CacheToLoad).
|
||||||
|
|
||||||
|
loadqueue_ledgercache(Cache) ->
|
||||||
|
SL = lists:ukeysort(1, Cache#ledger_cache.load_queue),
|
||||||
|
T = leveled_tree:from_orderedlist(SL, ?CACHE_TYPE),
|
||||||
|
Cache#ledger_cache{load_queue = [], loader = T}.
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% Internal functions
|
%%% Internal functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
@ -719,11 +726,12 @@ snapshot_store(State, SnapType) ->
|
||||||
|
|
||||||
readycache_forsnapshot(LedgerCache) ->
|
readycache_forsnapshot(LedgerCache) ->
|
||||||
% Need to convert the Ledger Cache away from using the ETS table
|
% Need to convert the Ledger Cache away from using the ETS table
|
||||||
SkipList = leveled_skiplist:from_orderedset(LedgerCache#ledger_cache.mem),
|
Tree = leveled_tree:from_orderedset(LedgerCache#ledger_cache.mem,
|
||||||
|
?CACHE_TYPE),
|
||||||
Idx = LedgerCache#ledger_cache.index,
|
Idx = LedgerCache#ledger_cache.index,
|
||||||
MinSQN = LedgerCache#ledger_cache.min_sqn,
|
MinSQN = LedgerCache#ledger_cache.min_sqn,
|
||||||
MaxSQN = LedgerCache#ledger_cache.max_sqn,
|
MaxSQN = LedgerCache#ledger_cache.max_sqn,
|
||||||
#ledger_cache{loader=SkipList, index=Idx, min_sqn=MinSQN, max_sqn=MaxSQN}.
|
#ledger_cache{loader=Tree, index=Idx, min_sqn=MinSQN, max_sqn=MaxSQN}.
|
||||||
|
|
||||||
set_options(Opts) ->
|
set_options(Opts) ->
|
||||||
MaxJournalSize0 = get_opt(max_journalsize, Opts, 10000000000),
|
MaxJournalSize0 = get_opt(max_journalsize, Opts, 10000000000),
|
||||||
|
@ -961,14 +969,10 @@ addto_ledgercache({H, SQN, KeyChanges}, Cache) ->
|
||||||
max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}.
|
max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}.
|
||||||
|
|
||||||
addto_ledgercache({H, SQN, KeyChanges}, Cache, loader) ->
|
addto_ledgercache({H, SQN, KeyChanges}, Cache, loader) ->
|
||||||
FoldChangesFun =
|
UpdQ = KeyChanges ++ Cache#ledger_cache.load_queue,
|
||||||
fun({K, V}, SL0) ->
|
|
||||||
leveled_skiplist:enter_nolookup(K, V, SL0)
|
|
||||||
end,
|
|
||||||
UpdSL = lists:foldl(FoldChangesFun, Cache#ledger_cache.loader, KeyChanges),
|
|
||||||
UpdIndex = leveled_pmem:prepare_for_index(Cache#ledger_cache.index, H),
|
UpdIndex = leveled_pmem:prepare_for_index(Cache#ledger_cache.index, H),
|
||||||
Cache#ledger_cache{index = UpdIndex,
|
Cache#ledger_cache{index = UpdIndex,
|
||||||
loader = UpdSL,
|
load_queue = UpdQ,
|
||||||
min_sqn=min(SQN, Cache#ledger_cache.min_sqn),
|
min_sqn=min(SQN, Cache#ledger_cache.min_sqn),
|
||||||
max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}.
|
max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}.
|
||||||
|
|
||||||
|
@ -979,7 +983,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) ->
|
||||||
TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize),
|
TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize),
|
||||||
if
|
if
|
||||||
TimeToPush ->
|
TimeToPush ->
|
||||||
CacheToLoad = {leveled_skiplist:from_orderedset(Tab),
|
CacheToLoad = {leveled_tree:from_orderedset(Tab, ?CACHE_TYPE),
|
||||||
Cache#ledger_cache.index,
|
Cache#ledger_cache.index,
|
||||||
Cache#ledger_cache.min_sqn,
|
Cache#ledger_cache.min_sqn,
|
||||||
Cache#ledger_cache.max_sqn},
|
Cache#ledger_cache.max_sqn},
|
||||||
|
|
|
@ -34,7 +34,6 @@
|
||||||
|
|
||||||
-export([
|
-export([
|
||||||
inker_reload_strategy/1,
|
inker_reload_strategy/1,
|
||||||
strip_to_keyonly/1,
|
|
||||||
strip_to_seqonly/1,
|
strip_to_seqonly/1,
|
||||||
strip_to_statusonly/1,
|
strip_to_statusonly/1,
|
||||||
strip_to_keyseqonly/1,
|
strip_to_keyseqonly/1,
|
||||||
|
@ -44,7 +43,6 @@
|
||||||
endkey_passed/2,
|
endkey_passed/2,
|
||||||
key_dominates/2,
|
key_dominates/2,
|
||||||
maybe_reap_expiredkey/2,
|
maybe_reap_expiredkey/2,
|
||||||
print_key/1,
|
|
||||||
to_ledgerkey/3,
|
to_ledgerkey/3,
|
||||||
to_ledgerkey/5,
|
to_ledgerkey/5,
|
||||||
from_ledgerkey/1,
|
from_ledgerkey/1,
|
||||||
|
@ -108,8 +106,6 @@ inker_reload_strategy(AltList) ->
|
||||||
ReloadStrategy0,
|
ReloadStrategy0,
|
||||||
AltList).
|
AltList).
|
||||||
|
|
||||||
strip_to_keyonly({K, _V}) -> K.
|
|
||||||
|
|
||||||
strip_to_statusonly({_, {_, St, _, _}}) -> St.
|
strip_to_statusonly({_, {_, St, _, _}}) -> St.
|
||||||
|
|
||||||
strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN.
|
strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN.
|
||||||
|
@ -252,33 +248,6 @@ create_value_for_journal(Value) ->
|
||||||
hash(Obj) ->
|
hash(Obj) ->
|
||||||
erlang:phash2(term_to_binary(Obj)).
|
erlang:phash2(term_to_binary(Obj)).
|
||||||
|
|
||||||
% Return a tuple of strings to ease the printing of keys to logs
|
|
||||||
print_key(Key) ->
|
|
||||||
{A_STR, B_TERM, C_TERM} = case Key of
|
|
||||||
{?STD_TAG, B, K, _SK} ->
|
|
||||||
{"Object", B, K};
|
|
||||||
{?RIAK_TAG, B, K, _SK} ->
|
|
||||||
{"RiakObject", B, K};
|
|
||||||
{?IDX_TAG, B, {F, _V}, _K} ->
|
|
||||||
{"Index", B, F}
|
|
||||||
end,
|
|
||||||
B_STR = turn_to_string(B_TERM),
|
|
||||||
C_STR = turn_to_string(C_TERM),
|
|
||||||
{A_STR, B_STR, C_STR}.
|
|
||||||
|
|
||||||
turn_to_string(Item) ->
|
|
||||||
if
|
|
||||||
is_binary(Item) == true ->
|
|
||||||
binary_to_list(Item);
|
|
||||||
is_integer(Item) == true ->
|
|
||||||
integer_to_list(Item);
|
|
||||||
is_list(Item) == true ->
|
|
||||||
Item;
|
|
||||||
true ->
|
|
||||||
[Output] = io_lib:format("~w", [Item]),
|
|
||||||
Output
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
% Compare a key against a query key, only comparing elements that are non-null
|
% Compare a key against a query key, only comparing elements that are non-null
|
||||||
% in the Query key. This is used for comparing against end keys in queries.
|
% in the Query key. This is used for comparing against end keys in queries.
|
||||||
|
@ -461,10 +430,6 @@ endkey_passed_test() ->
|
||||||
?assertMatch(false, endkey_passed(TestKey, K1)),
|
?assertMatch(false, endkey_passed(TestKey, K1)),
|
||||||
?assertMatch(true, endkey_passed(TestKey, K2)).
|
?assertMatch(true, endkey_passed(TestKey, K2)).
|
||||||
|
|
||||||
stringcheck_test() ->
|
|
||||||
?assertMatch("Bucket", turn_to_string("Bucket")),
|
|
||||||
?assertMatch("Bucket", turn_to_string(<<"Bucket">>)),
|
|
||||||
?assertMatch("bucket", turn_to_string(bucket)).
|
|
||||||
|
|
||||||
%% Test below proved that the overhead of performing hashes was trivial
|
%% Test below proved that the overhead of performing hashes was trivial
|
||||||
%% Maybe 5 microseconds per hash
|
%% Maybe 5 microseconds per hash
|
||||||
|
|
|
@ -673,10 +673,14 @@ load_between_sequence(MinSQN, MaxSQN, FilterFun, Penciller,
|
||||||
push_to_penciller(Penciller, LedgerCache) ->
|
push_to_penciller(Penciller, LedgerCache) ->
|
||||||
% The push to penciller must start as a tree to correctly de-duplicate
|
% The push to penciller must start as a tree to correctly de-duplicate
|
||||||
% the list by order before becoming a de-duplicated list for loading
|
% the list by order before becoming a de-duplicated list for loading
|
||||||
|
LC0 = leveled_bookie:loadqueue_ledgercache(LedgerCache),
|
||||||
|
push_to_penciller_loop(Penciller, LC0).
|
||||||
|
|
||||||
|
push_to_penciller_loop(Penciller, LedgerCache) ->
|
||||||
case leveled_bookie:push_ledgercache(Penciller, LedgerCache) of
|
case leveled_bookie:push_ledgercache(Penciller, LedgerCache) of
|
||||||
returned ->
|
returned ->
|
||||||
timer:sleep(?LOADING_PAUSE),
|
timer:sleep(?LOADING_PAUSE),
|
||||||
push_to_penciller(Penciller, LedgerCache);
|
push_to_penciller_loop(Penciller, LedgerCache);
|
||||||
ok ->
|
ok ->
|
||||||
ok
|
ok
|
||||||
end.
|
end.
|
||||||
|
|
|
@ -15,8 +15,8 @@
|
||||||
sst_timing/3]).
|
sst_timing/3]).
|
||||||
|
|
||||||
-define(PUT_LOGPOINT, 20000).
|
-define(PUT_LOGPOINT, 20000).
|
||||||
-define(HEAD_LOGPOINT, 160000).
|
-define(HEAD_LOGPOINT, 50000).
|
||||||
-define(GET_LOGPOINT, 160000).
|
-define(GET_LOGPOINT, 50000).
|
||||||
-define(SST_LOGPOINT, 20000).
|
-define(SST_LOGPOINT, 20000).
|
||||||
-define(LOG_LEVEL, [info, warn, error, critical]).
|
-define(LOG_LEVEL, [info, warn, error, critical]).
|
||||||
-define(SAMPLE_RATE, 16).
|
-define(SAMPLE_RATE, 16).
|
||||||
|
@ -309,10 +309,12 @@
|
||||||
|
|
||||||
|
|
||||||
log(LogReference, Subs) ->
|
log(LogReference, Subs) ->
|
||||||
{ok, {LogLevel, LogText}} = dict:find(LogReference, ?LOGBASE),
|
{LogLevel, LogText} = dict:fetch(LogReference, ?LOGBASE),
|
||||||
case lists:member(LogLevel, ?LOG_LEVEL) of
|
case lists:member(LogLevel, ?LOG_LEVEL) of
|
||||||
true ->
|
true ->
|
||||||
io:format(LogReference ++ " ~w " ++ LogText ++ "~n",
|
io:format(format_time()
|
||||||
|
++ " " ++ LogReference ++ " ~w "
|
||||||
|
++ LogText ++ "~n",
|
||||||
[self()|Subs]);
|
[self()|Subs]);
|
||||||
false ->
|
false ->
|
||||||
ok
|
ok
|
||||||
|
@ -320,7 +322,7 @@ log(LogReference, Subs) ->
|
||||||
|
|
||||||
|
|
||||||
log_timer(LogReference, Subs, StartTime) ->
|
log_timer(LogReference, Subs, StartTime) ->
|
||||||
{ok, {LogLevel, LogText}} = dict:find(LogReference, ?LOGBASE),
|
{LogLevel, LogText} = dict:fetch(LogReference, ?LOGBASE),
|
||||||
case lists:member(LogLevel, ?LOG_LEVEL) of
|
case lists:member(LogLevel, ?LOG_LEVEL) of
|
||||||
true ->
|
true ->
|
||||||
MicroS = timer:now_diff(os:timestamp(), StartTime),
|
MicroS = timer:now_diff(os:timestamp(), StartTime),
|
||||||
|
@ -330,7 +332,9 @@ log_timer(LogReference, Subs, StartTime) ->
|
||||||
MicroS ->
|
MicroS ->
|
||||||
{"ms", MicroS div 1000}
|
{"ms", MicroS div 1000}
|
||||||
end,
|
end,
|
||||||
io:format(LogReference ++ " ~w " ++ LogText
|
io:format(format_time()
|
||||||
|
++ " " ++ LogReference ++ " ~w "
|
||||||
|
++ LogText
|
||||||
++ " with time taken ~w " ++ Unit ++ "~n",
|
++ " with time taken ~w " ++ Unit ++ "~n",
|
||||||
[self()|Subs] ++ [Time]);
|
[self()|Subs] ++ [Time]);
|
||||||
false ->
|
false ->
|
||||||
|
@ -510,6 +514,17 @@ gen_timing_int({N, TimerD}, T0, TimerType, _KeyListFun, _LogPoint, _LogRef) ->
|
||||||
TimerD)}.
|
TimerD)}.
|
||||||
|
|
||||||
|
|
||||||
|
format_time() ->
|
||||||
|
format_time(localtime_ms()).
|
||||||
|
|
||||||
|
localtime_ms() ->
|
||||||
|
{_, _, Micro} = Now = os:timestamp(),
|
||||||
|
{Date, {Hours, Minutes, Seconds}} = calendar:now_to_local_time(Now),
|
||||||
|
{Date, {Hours, Minutes, Seconds, Micro div 1000 rem 1000}}.
|
||||||
|
|
||||||
|
format_time({{Y, M, D}, {H, Mi, S, Ms}}) ->
|
||||||
|
io_lib:format("~b-~2..0b-~2..0b", [Y, M, D]) ++ "T" ++
|
||||||
|
io_lib:format("~2..0b:~2..0b:~2..0b.~3..0b", [H, Mi, S, Ms]).
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
|
@ -183,16 +183,12 @@ perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN) ->
|
||||||
ME
|
ME
|
||||||
end,
|
end,
|
||||||
SinkManifestList = lists:map(RevertPointerFun, SinkList),
|
SinkManifestList = lists:map(RevertPointerFun, SinkList),
|
||||||
Man0 = leveled_pmanifest:remove_manifest_entry(Manifest,
|
Man0 = leveled_pmanifest:replace_manifest_entry(Manifest,
|
||||||
NewSQN,
|
|
||||||
SinkLevel,
|
|
||||||
SinkManifestList),
|
|
||||||
Man1 = leveled_pmanifest:insert_manifest_entry(Man0,
|
|
||||||
NewSQN,
|
NewSQN,
|
||||||
SinkLevel,
|
SinkLevel,
|
||||||
|
SinkManifestList,
|
||||||
Additions),
|
Additions),
|
||||||
|
Man2 = leveled_pmanifest:remove_manifest_entry(Man0,
|
||||||
Man2 = leveled_pmanifest:remove_manifest_entry(Man1,
|
|
||||||
NewSQN,
|
NewSQN,
|
||||||
SrcLevel,
|
SrcLevel,
|
||||||
Src),
|
Src),
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
%% the Penciller's Clerk
|
%% the Penciller's Clerk
|
||||||
%% - The Penciller can be cloned and maintains a register of clones who have
|
%% - The Penciller can be cloned and maintains a register of clones who have
|
||||||
%% requested snapshots of the Ledger
|
%% requested snapshots of the Ledger
|
||||||
%% - The accepts new dumps (in the form of a leveled_skiplist accomponied by
|
%% - The accepts new dumps (in the form of a leveled_tree accomponied by
|
||||||
%% an array of hash-listing binaries) from the Bookie, and responds either 'ok'
|
%% an array of hash-listing binaries) from the Bookie, and responds either 'ok'
|
||||||
%% to the bookie if the information is accepted nad the Bookie can refresh its
|
%% to the bookie if the information is accepted nad the Bookie can refresh its
|
||||||
%% memory, or 'returned' if the bookie must continue without refreshing as the
|
%% memory, or 'returned' if the bookie must continue without refreshing as the
|
||||||
|
@ -224,7 +224,7 @@
|
||||||
|
|
||||||
levelzero_pending = false :: boolean(),
|
levelzero_pending = false :: boolean(),
|
||||||
levelzero_constructor :: pid(),
|
levelzero_constructor :: pid(),
|
||||||
levelzero_cache = [] :: list(), % a list of skiplists
|
levelzero_cache = [] :: list(), % a list of trees
|
||||||
levelzero_size = 0 :: integer(),
|
levelzero_size = 0 :: integer(),
|
||||||
levelzero_maxcachesize :: integer(),
|
levelzero_maxcachesize :: integer(),
|
||||||
levelzero_cointoss = false :: boolean(),
|
levelzero_cointoss = false :: boolean(),
|
||||||
|
@ -345,9 +345,9 @@ handle_call({push_mem, {PushedTree, PushedIdx, MinSQN, MaxSQN}},
|
||||||
State=#state{is_snapshot=Snap}) when Snap == false ->
|
State=#state{is_snapshot=Snap}) when Snap == false ->
|
||||||
% The push_mem process is as follows:
|
% The push_mem process is as follows:
|
||||||
%
|
%
|
||||||
% 1 - Receive a cache. The cache has four parts: a skiplist of keys and
|
% 1 - Receive a cache. The cache has four parts: a tree of keys and
|
||||||
% values, an array of 256 binaries listing the hashes present in the
|
% values, an array of 256 binaries listing the hashes present in the
|
||||||
% skiplist, a min SQN and a max SQN
|
% tree, a min SQN and a max SQN
|
||||||
%
|
%
|
||||||
% 2 - Check to see if there is a levelzero file pending. If so, the
|
% 2 - Check to see if there is a levelzero file pending. If so, the
|
||||||
% update must be returned. If not the update can be accepted
|
% update must be returned. If not the update can be accepted
|
||||||
|
@ -404,7 +404,7 @@ handle_call({fetch_keys, StartKey, EndKey, AccFun, InitAcc, MaxKeys},
|
||||||
leveled_pmem:merge_trees(StartKey,
|
leveled_pmem:merge_trees(StartKey,
|
||||||
EndKey,
|
EndKey,
|
||||||
State#state.levelzero_cache,
|
State#state.levelzero_cache,
|
||||||
leveled_skiplist:empty());
|
leveled_tree:empty(?CACHE_TYPE));
|
||||||
List ->
|
List ->
|
||||||
List
|
List
|
||||||
end,
|
end,
|
||||||
|
@ -1072,10 +1072,10 @@ clean_subdir(DirPath) ->
|
||||||
|
|
||||||
|
|
||||||
maybe_pause_push(PCL, KL) ->
|
maybe_pause_push(PCL, KL) ->
|
||||||
T0 = leveled_skiplist:empty(true),
|
T0 = [],
|
||||||
I0 = leveled_pmem:new_index(),
|
I0 = leveled_pmem:new_index(),
|
||||||
T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
|
T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
|
||||||
UpdSL = leveled_skiplist:enter(K, V, AccSL),
|
UpdSL = [{K, V}|AccSL],
|
||||||
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
||||||
H = leveled_codec:magic_hash(K),
|
H = leveled_codec:magic_hash(K),
|
||||||
UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
|
UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
|
||||||
|
@ -1083,7 +1083,10 @@ maybe_pause_push(PCL, KL) ->
|
||||||
end,
|
end,
|
||||||
{T0, I0, infinity, 0},
|
{T0, I0, infinity, 0},
|
||||||
KL),
|
KL),
|
||||||
case pcl_pushmem(PCL, T1) of
|
SL = element(1, T1),
|
||||||
|
Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, SL), ?CACHE_TYPE),
|
||||||
|
T2 = setelement(1, T1, Tree),
|
||||||
|
case pcl_pushmem(PCL, T2) of
|
||||||
returned ->
|
returned ->
|
||||||
timer:sleep(50),
|
timer:sleep(50),
|
||||||
maybe_pause_push(PCL, KL);
|
maybe_pause_push(PCL, KL);
|
||||||
|
@ -1315,63 +1318,63 @@ sqnoverlap_otherway_findnextkey_test() ->
|
||||||
|
|
||||||
foldwithimm_simple_test() ->
|
foldwithimm_simple_test() ->
|
||||||
QueryArray = [
|
QueryArray = [
|
||||||
{2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
|
{2, [{{o, "Bucket1", "Key1", null},
|
||||||
{{o, "Bucket1", "Key5"}, {1, {active, infinity}, 0, null}}]},
|
{5, {active, infinity}, 0, null}},
|
||||||
{3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]},
|
{{o, "Bucket1", "Key5", null},
|
||||||
{5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]}
|
{1, {active, infinity}, 0, null}}]},
|
||||||
|
{3, [{{o, "Bucket1", "Key3", null},
|
||||||
|
{3, {active, infinity}, 0, null}}]},
|
||||||
|
{5, [{{o, "Bucket1", "Key5", null},
|
||||||
|
{2, {active, infinity}, 0, null}}]}
|
||||||
],
|
],
|
||||||
IMM0 = leveled_skiplist:enter({o, "Bucket1", "Key6"},
|
KL1A = [{{o, "Bucket1", "Key6", null}, {7, {active, infinity}, 0, null}},
|
||||||
{7, {active, infinity}, 0, null},
|
{{o, "Bucket1", "Key1", null}, {8, {active, infinity}, 0, null}},
|
||||||
leveled_skiplist:empty()),
|
{{o, "Bucket1", "Key8", null}, {9, {active, infinity}, 0, null}}],
|
||||||
IMM1 = leveled_skiplist:enter({o, "Bucket1", "Key1"},
|
IMM2 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1A), ?CACHE_TYPE),
|
||||||
{8, {active, infinity}, 0, null},
|
IMMiter = leveled_tree:match_range({o, "Bucket1", "Key1", null},
|
||||||
IMM0),
|
{o, null, null, null},
|
||||||
IMM2 = leveled_skiplist:enter({o, "Bucket1", "Key8"},
|
IMM2),
|
||||||
{9, {active, infinity}, 0, null},
|
|
||||||
IMM1),
|
|
||||||
IMMiter = leveled_skiplist:to_range(IMM2, {o, "Bucket1", "Key1"}),
|
|
||||||
AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}),
|
AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}),
|
||||||
Acc ++ [{K, SQN}] end,
|
Acc ++ [{K, SQN}] end,
|
||||||
Acc = keyfolder(IMMiter,
|
Acc = keyfolder(IMMiter,
|
||||||
QueryArray,
|
QueryArray,
|
||||||
{o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"},
|
{o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null},
|
||||||
{AccFun, []}),
|
{AccFun, []}),
|
||||||
?assertMatch([{{o, "Bucket1", "Key1"}, 8},
|
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
||||||
{{o, "Bucket1", "Key3"}, 3},
|
{{o, "Bucket1", "Key3", null}, 3},
|
||||||
{{o, "Bucket1", "Key5"}, 2},
|
{{o, "Bucket1", "Key5", null}, 2},
|
||||||
{{o, "Bucket1", "Key6"}, 7}], Acc),
|
{{o, "Bucket1", "Key6", null}, 7}], Acc),
|
||||||
|
|
||||||
IMM1A = leveled_skiplist:enter({o, "Bucket1", "Key1"},
|
IMMiterA = [{{o, "Bucket1", "Key1", null},
|
||||||
{8, {active, infinity}, 0, null},
|
{8, {active, infinity}, 0, null}}],
|
||||||
leveled_skiplist:empty()),
|
|
||||||
IMMiterA = leveled_skiplist:to_range(IMM1A, {o, "Bucket1", "Key1"}),
|
|
||||||
AccA = keyfolder(IMMiterA,
|
AccA = keyfolder(IMMiterA,
|
||||||
QueryArray,
|
QueryArray,
|
||||||
{o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"},
|
{o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null},
|
||||||
{AccFun, []}),
|
{AccFun, []}),
|
||||||
?assertMatch([{{o, "Bucket1", "Key1"}, 8},
|
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
||||||
{{o, "Bucket1", "Key3"}, 3},
|
{{o, "Bucket1", "Key3", null}, 3},
|
||||||
{{o, "Bucket1", "Key5"}, 2}], AccA),
|
{{o, "Bucket1", "Key5", null}, 2}], AccA),
|
||||||
|
|
||||||
IMM3 = leveled_skiplist:enter({o, "Bucket1", "Key4"},
|
KL1B = [{{o, "Bucket1", "Key4", null}, {10, {active, infinity}, 0, null}}|KL1A],
|
||||||
{10, {active, infinity}, 0, null},
|
IMM3 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1B), ?CACHE_TYPE),
|
||||||
IMM2),
|
IMMiterB = leveled_tree:match_range({o, "Bucket1", "Key1", null},
|
||||||
IMMiterB = leveled_skiplist:to_range(IMM3, {o, "Bucket1", "Key1"}),
|
{o, null, null, null},
|
||||||
|
IMM3),
|
||||||
AccB = keyfolder(IMMiterB,
|
AccB = keyfolder(IMMiterB,
|
||||||
QueryArray,
|
QueryArray,
|
||||||
{o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"},
|
{o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null},
|
||||||
{AccFun, []}),
|
{AccFun, []}),
|
||||||
?assertMatch([{{o, "Bucket1", "Key1"}, 8},
|
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
||||||
{{o, "Bucket1", "Key3"}, 3},
|
{{o, "Bucket1", "Key3", null}, 3},
|
||||||
{{o, "Bucket1", "Key4"}, 10},
|
{{o, "Bucket1", "Key4", null}, 10},
|
||||||
{{o, "Bucket1", "Key5"}, 2},
|
{{o, "Bucket1", "Key5", null}, 2},
|
||||||
{{o, "Bucket1", "Key6"}, 7}], AccB).
|
{{o, "Bucket1", "Key6", null}, 7}], AccB).
|
||||||
|
|
||||||
create_file_test() ->
|
create_file_test() ->
|
||||||
Filename = "../test/new_file.sst",
|
Filename = "../test/new_file.sst",
|
||||||
ok = file:write_file(Filename, term_to_binary("hello")),
|
ok = file:write_file(Filename, term_to_binary("hello")),
|
||||||
KVL = lists:usort(generate_randomkeys(10000)),
|
KVL = lists:usort(generate_randomkeys(10000)),
|
||||||
Tree = leveled_skiplist:from_list(KVL),
|
Tree = leveled_tree:from_orderedlist(KVL, ?CACHE_TYPE),
|
||||||
FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end,
|
FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end,
|
||||||
{ok,
|
{ok,
|
||||||
SP,
|
SP,
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
merge_lookup/4,
|
merge_lookup/4,
|
||||||
insert_manifest_entry/4,
|
insert_manifest_entry/4,
|
||||||
remove_manifest_entry/4,
|
remove_manifest_entry/4,
|
||||||
|
replace_manifest_entry/5,
|
||||||
switch_manifest_entry/4,
|
switch_manifest_entry/4,
|
||||||
mergefile_selector/2,
|
mergefile_selector/2,
|
||||||
add_snapshot/3,
|
add_snapshot/3,
|
||||||
|
@ -51,6 +52,8 @@
|
||||||
-define(MANIFEST_FILEX, "man").
|
-define(MANIFEST_FILEX, "man").
|
||||||
-define(MANIFEST_FP, "ledger_manifest").
|
-define(MANIFEST_FP, "ledger_manifest").
|
||||||
-define(MAX_LEVELS, 8).
|
-define(MAX_LEVELS, 8).
|
||||||
|
-define(TREE_TYPE, idxt).
|
||||||
|
-define(TREE_WIDTH, 8).
|
||||||
|
|
||||||
-record(manifest, {levels,
|
-record(manifest, {levels,
|
||||||
% an array of lists or trees representing the manifest
|
% an array of lists or trees representing the manifest
|
||||||
|
@ -73,8 +76,16 @@
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
new_manifest() ->
|
new_manifest() ->
|
||||||
|
LevelArray0 = array:new([{size, ?MAX_LEVELS + 1}, {default, []}]),
|
||||||
|
SetLowerLevelFun =
|
||||||
|
fun(IDX, Acc) ->
|
||||||
|
array:set(IDX, leveled_tree:empty(?TREE_TYPE), Acc)
|
||||||
|
end,
|
||||||
|
LevelArray1 = lists:foldl(SetLowerLevelFun,
|
||||||
|
LevelArray0,
|
||||||
|
lists:seq(2, ?MAX_LEVELS)),
|
||||||
#manifest{
|
#manifest{
|
||||||
levels = array:new([{size, ?MAX_LEVELS + 1}, {default, []}]),
|
levels = LevelArray1,
|
||||||
manifest_sqn = 0,
|
manifest_sqn = 0,
|
||||||
snapshots = [],
|
snapshots = [],
|
||||||
pending_deletes = dict:new(),
|
pending_deletes = dict:new(),
|
||||||
|
@ -139,6 +150,30 @@ save_manifest(Manifest, RootPath) ->
|
||||||
CRC = erlang:crc32(ManBin),
|
CRC = erlang:crc32(ManBin),
|
||||||
ok = file:write_file(FP, <<CRC:32/integer, ManBin/binary>>).
|
ok = file:write_file(FP, <<CRC:32/integer, ManBin/binary>>).
|
||||||
|
|
||||||
|
|
||||||
|
replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) ->
|
||||||
|
Levels = Manifest#manifest.levels,
|
||||||
|
Level = array:get(LevelIdx, Levels),
|
||||||
|
UpdLevel = replace_entry(LevelIdx, Level, Removals, Additions),
|
||||||
|
leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]),
|
||||||
|
PendingDeletes = update_pendingdeletes(ManSQN,
|
||||||
|
Removals,
|
||||||
|
Manifest#manifest.pending_deletes),
|
||||||
|
UpdLevels = array:set(LevelIdx, UpdLevel, Levels),
|
||||||
|
case is_empty(LevelIdx, UpdLevel) of
|
||||||
|
true ->
|
||||||
|
Manifest#manifest{levels = UpdLevels,
|
||||||
|
basement = get_basement(UpdLevels),
|
||||||
|
manifest_sqn = ManSQN,
|
||||||
|
pending_deletes = PendingDeletes};
|
||||||
|
false ->
|
||||||
|
Basement = max(LevelIdx, Manifest#manifest.basement),
|
||||||
|
Manifest#manifest{levels = UpdLevels,
|
||||||
|
basement = Basement,
|
||||||
|
manifest_sqn = ManSQN,
|
||||||
|
pending_deletes = PendingDeletes}
|
||||||
|
end.
|
||||||
|
|
||||||
insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
|
insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
|
||||||
Levels = Manifest#manifest.levels,
|
Levels = Manifest#manifest.levels,
|
||||||
Level = array:get(LevelIdx, Levels),
|
Level = array:get(LevelIdx, Levels),
|
||||||
|
@ -154,22 +189,9 @@ remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
|
||||||
Level = array:get(LevelIdx, Levels),
|
Level = array:get(LevelIdx, Levels),
|
||||||
UpdLevel = remove_entry(LevelIdx, Level, Entry),
|
UpdLevel = remove_entry(LevelIdx, Level, Entry),
|
||||||
leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]),
|
leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]),
|
||||||
DelFun =
|
PendingDeletes = update_pendingdeletes(ManSQN,
|
||||||
fun(E, Acc) ->
|
Entry,
|
||||||
dict:store(E#manifest_entry.filename,
|
Manifest#manifest.pending_deletes),
|
||||||
{ManSQN, E},
|
|
||||||
Acc)
|
|
||||||
end,
|
|
||||||
Entries =
|
|
||||||
case is_list(Entry) of
|
|
||||||
true ->
|
|
||||||
Entry;
|
|
||||||
false ->
|
|
||||||
[Entry]
|
|
||||||
end,
|
|
||||||
PendingDeletes = lists:foldl(DelFun,
|
|
||||||
Manifest#manifest.pending_deletes,
|
|
||||||
Entries),
|
|
||||||
UpdLevels = array:set(LevelIdx, UpdLevel, Levels),
|
UpdLevels = array:set(LevelIdx, UpdLevel, Levels),
|
||||||
case is_empty(LevelIdx, UpdLevel) of
|
case is_empty(LevelIdx, UpdLevel) of
|
||||||
true ->
|
true ->
|
||||||
|
@ -322,58 +344,181 @@ levelzero_present(Manifest) ->
|
||||||
%%% Internal Functions
|
%%% Internal Functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
|
|
||||||
%% All these internal functions that work on a level are also passed LeveIdx
|
%% All these internal functions that work on a level are also passed LeveIdx
|
||||||
%% even if this is not presently relevant. Currnetly levels are lists, but
|
%% even if this is not presently relevant. Currnetly levels are lists, but
|
||||||
%% future branches may make lower levels trees or skiplists to improve fetch
|
%% future branches may make lower levels trees or skiplists to improve fetch
|
||||||
%% efficiency
|
%% efficiency
|
||||||
|
|
||||||
load_level(_LevelIdx, Level, PidFun, SQNFun) ->
|
load_level(LevelIdx, Level, PidFun, SQNFun) ->
|
||||||
LevelLoadFun =
|
HigherLevelLoadFun =
|
||||||
fun(ME, {L_Out, L_MaxSQN}) ->
|
fun(ME, {L_Out, L_MaxSQN}) ->
|
||||||
FN = ME#manifest_entry.filename,
|
FN = ME#manifest_entry.filename,
|
||||||
P = PidFun(FN),
|
P = PidFun(FN),
|
||||||
SQN = SQNFun(P),
|
SQN = SQNFun(P),
|
||||||
{[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)}
|
{[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)}
|
||||||
end,
|
end,
|
||||||
lists:foldr(LevelLoadFun, {[], 0}, Level).
|
LowerLevelLoadFun =
|
||||||
|
fun({EK, ME}, {L_Out, L_MaxSQN}) ->
|
||||||
|
FN = ME#manifest_entry.filename,
|
||||||
|
P = PidFun(FN),
|
||||||
|
SQN = SQNFun(P),
|
||||||
|
{[{EK, ME#manifest_entry{owner=P}}|L_Out], max(SQN, L_MaxSQN)}
|
||||||
|
end,
|
||||||
|
case LevelIdx =< 1 of
|
||||||
|
true ->
|
||||||
|
lists:foldr(HigherLevelLoadFun, {[], 0}, Level);
|
||||||
|
false ->
|
||||||
|
{L0, MaxSQN} = lists:foldr(LowerLevelLoadFun,
|
||||||
|
{[], 0},
|
||||||
|
leveled_tree:to_list(Level)),
|
||||||
|
{leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), MaxSQN}
|
||||||
|
end.
|
||||||
|
|
||||||
|
close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 ->
|
||||||
|
lists:foreach(CloseEntryFun, Level);
|
||||||
close_level(_LevelIdx, Level, CloseEntryFun) ->
|
close_level(_LevelIdx, Level, CloseEntryFun) ->
|
||||||
lists:foreach(CloseEntryFun, Level).
|
lists:foreach(CloseEntryFun, leveled_tree:to_list(Level)).
|
||||||
|
|
||||||
is_empty(_LevelIdx, []) ->
|
is_empty(_LevelIdx, []) ->
|
||||||
true;
|
true;
|
||||||
is_empty(_LevelIdx, _Level) ->
|
is_empty(LevelIdx, _Level) when LevelIdx =< 1 ->
|
||||||
false.
|
|
||||||
|
|
||||||
size(_LevelIdx, Level) ->
|
|
||||||
length(Level).
|
|
||||||
|
|
||||||
add_entry(_LevelIdx, Level, Entries) when is_list(Entries) ->
|
|
||||||
lists:sort(Level ++ Entries);
|
|
||||||
add_entry(_LevelIdx, Level, Entry) ->
|
|
||||||
lists:sort([Entry|Level]).
|
|
||||||
|
|
||||||
remove_entry(_LevelIdx, Level, Entries) when is_list(Entries) ->
|
|
||||||
% We're assuming we're removing a sorted sublist
|
|
||||||
RemLength = length(Entries),
|
|
||||||
[RemStart|_Tail] = Entries,
|
|
||||||
remove_section(Level, RemStart#manifest_entry.start_key, RemLength);
|
|
||||||
remove_entry(_LevelIdx, Level, Entry) ->
|
|
||||||
remove_section(Level, Entry#manifest_entry.start_key, 1).
|
|
||||||
|
|
||||||
remove_section(Level, SectionStartKey, SectionLength) ->
|
|
||||||
PredFun =
|
|
||||||
fun(E) ->
|
|
||||||
E#manifest_entry.start_key < SectionStartKey
|
|
||||||
end,
|
|
||||||
{Pre, Rest} = lists:splitwith(PredFun, Level),
|
|
||||||
Post = lists:nthtail(SectionLength, Rest),
|
|
||||||
Pre ++ Post.
|
|
||||||
|
|
||||||
|
|
||||||
key_lookup_level(_LevelIdx, [], _Key) ->
|
|
||||||
false;
|
false;
|
||||||
key_lookup_level(LevelIdx, [Entry|Rest], Key) ->
|
is_empty(_LevelIdx, Level) ->
|
||||||
|
leveled_tree:tsize(Level) == 0.
|
||||||
|
|
||||||
|
size(LevelIdx, Level) when LevelIdx =< 1 ->
|
||||||
|
length(Level);
|
||||||
|
size(_LevelIdx, Level) ->
|
||||||
|
leveled_tree:tsize(Level).
|
||||||
|
|
||||||
|
pred_fun(LevelIdx, StartKey, _EndKey) when LevelIdx =< 1 ->
|
||||||
|
fun(ME) ->
|
||||||
|
ME#manifest_entry.start_key < StartKey
|
||||||
|
end;
|
||||||
|
pred_fun(_LevelIdx, _StartKey, EndKey) ->
|
||||||
|
fun({EK, _ME}) ->
|
||||||
|
EK < EndKey
|
||||||
|
end.
|
||||||
|
|
||||||
|
add_entry(_LevelIdx, Level, []) ->
|
||||||
|
Level;
|
||||||
|
add_entry(LevelIdx, Level, Entries) when is_list(Entries) ->
|
||||||
|
FirstEntry = lists:nth(1, Entries),
|
||||||
|
PredFun = pred_fun(LevelIdx,
|
||||||
|
FirstEntry#manifest_entry.start_key,
|
||||||
|
FirstEntry#manifest_entry.end_key),
|
||||||
|
case LevelIdx =< 1 of
|
||||||
|
true ->
|
||||||
|
{LHS, RHS} = lists:splitwith(PredFun, Level),
|
||||||
|
lists:append([LHS, Entries, RHS]);
|
||||||
|
false ->
|
||||||
|
{LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)),
|
||||||
|
MapFun =
|
||||||
|
fun(ME) ->
|
||||||
|
{ME#manifest_entry.end_key, ME}
|
||||||
|
end,
|
||||||
|
Entries0 = lists:map(MapFun, Entries),
|
||||||
|
leveled_tree:from_orderedlist(lists:append([LHS, Entries0, RHS]),
|
||||||
|
?TREE_TYPE,
|
||||||
|
?TREE_WIDTH)
|
||||||
|
end;
|
||||||
|
add_entry(LevelIdx, Level, Entry) ->
|
||||||
|
add_entry(LevelIdx, Level, [Entry]).
|
||||||
|
|
||||||
|
remove_entry(LevelIdx, Level, Entries) ->
|
||||||
|
% We're assuming we're removing a sorted sublist
|
||||||
|
{RemLength, FirstRemoval} = measure_removals(Entries),
|
||||||
|
remove_section(LevelIdx, Level, FirstRemoval, RemLength).
|
||||||
|
|
||||||
|
measure_removals(Removals) ->
|
||||||
|
case is_list(Removals) of
|
||||||
|
true ->
|
||||||
|
{length(Removals), lists:nth(1, Removals)};
|
||||||
|
false ->
|
||||||
|
{1, Removals}
|
||||||
|
end.
|
||||||
|
|
||||||
|
remove_section(LevelIdx, Level, FirstEntry, SectionLength) ->
|
||||||
|
PredFun = pred_fun(LevelIdx,
|
||||||
|
FirstEntry#manifest_entry.start_key,
|
||||||
|
FirstEntry#manifest_entry.end_key),
|
||||||
|
case LevelIdx =< 1 of
|
||||||
|
true ->
|
||||||
|
{LHS, RHS} = lists:splitwith(PredFun, Level),
|
||||||
|
Post = lists:nthtail(SectionLength, RHS),
|
||||||
|
lists:append([LHS, Post]);
|
||||||
|
false ->
|
||||||
|
{LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)),
|
||||||
|
Post = lists:nthtail(SectionLength, RHS),
|
||||||
|
leveled_tree:from_orderedlist(lists:append([LHS, Post]),
|
||||||
|
?TREE_TYPE,
|
||||||
|
?TREE_WIDTH)
|
||||||
|
end.
|
||||||
|
|
||||||
|
replace_entry(LevelIdx, Level, Removals, Additions) when LevelIdx =< 1 ->
|
||||||
|
{SectionLength, FirstEntry} = measure_removals(Removals),
|
||||||
|
PredFun = pred_fun(LevelIdx,
|
||||||
|
FirstEntry#manifest_entry.start_key,
|
||||||
|
FirstEntry#manifest_entry.end_key),
|
||||||
|
{LHS, RHS} = lists:splitwith(PredFun, Level),
|
||||||
|
Post = lists:nthtail(SectionLength, RHS),
|
||||||
|
case is_list(Additions) of
|
||||||
|
true ->
|
||||||
|
lists:append([LHS, Additions, Post]);
|
||||||
|
false ->
|
||||||
|
lists:append([LHS, [Additions], Post])
|
||||||
|
end;
|
||||||
|
replace_entry(LevelIdx, Level, Removals, Additions) ->
|
||||||
|
{SectionLength, FirstEntry} = measure_removals(Removals),
|
||||||
|
PredFun = pred_fun(LevelIdx,
|
||||||
|
FirstEntry#manifest_entry.start_key,
|
||||||
|
FirstEntry#manifest_entry.end_key),
|
||||||
|
{LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)),
|
||||||
|
Post =
|
||||||
|
case RHS of
|
||||||
|
[] ->
|
||||||
|
[];
|
||||||
|
_ ->
|
||||||
|
lists:nthtail(SectionLength, RHS)
|
||||||
|
end,
|
||||||
|
UpdList =
|
||||||
|
case is_list(Additions) of
|
||||||
|
true ->
|
||||||
|
MapFun =
|
||||||
|
fun(ME) ->
|
||||||
|
{ME#manifest_entry.end_key, ME}
|
||||||
|
end,
|
||||||
|
Additions0 = lists:map(MapFun, Additions),
|
||||||
|
lists:append([LHS, Additions0, Post]);
|
||||||
|
false ->
|
||||||
|
lists:append([LHS,
|
||||||
|
[{Additions#manifest_entry.end_key,
|
||||||
|
Additions}],
|
||||||
|
Post])
|
||||||
|
end,
|
||||||
|
leveled_tree:from_orderedlist(UpdList, ?TREE_TYPE, ?TREE_WIDTH).
|
||||||
|
|
||||||
|
|
||||||
|
update_pendingdeletes(ManSQN, Removals, PendingDeletes) ->
|
||||||
|
DelFun =
|
||||||
|
fun(E, Acc) ->
|
||||||
|
dict:store(E#manifest_entry.filename,
|
||||||
|
{ManSQN, E},
|
||||||
|
Acc)
|
||||||
|
end,
|
||||||
|
Entries =
|
||||||
|
case is_list(Removals) of
|
||||||
|
true ->
|
||||||
|
Removals;
|
||||||
|
false ->
|
||||||
|
[Removals]
|
||||||
|
end,
|
||||||
|
lists:foldl(DelFun, PendingDeletes, Entries).
|
||||||
|
|
||||||
|
key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 ->
|
||||||
|
false;
|
||||||
|
key_lookup_level(LevelIdx, [Entry|Rest], Key) when LevelIdx =< 1 ->
|
||||||
case Entry#manifest_entry.end_key >= Key of
|
case Entry#manifest_entry.end_key >= Key of
|
||||||
true ->
|
true ->
|
||||||
case Key >= Entry#manifest_entry.start_key of
|
case Key >= Entry#manifest_entry.start_key of
|
||||||
|
@ -384,8 +529,20 @@ key_lookup_level(LevelIdx, [Entry|Rest], Key) ->
|
||||||
end;
|
end;
|
||||||
false ->
|
false ->
|
||||||
key_lookup_level(LevelIdx, Rest, Key)
|
key_lookup_level(LevelIdx, Rest, Key)
|
||||||
|
end;
|
||||||
|
key_lookup_level(_LevelIdx, Level, Key) ->
|
||||||
|
StartKeyFun =
|
||||||
|
fun(ME) ->
|
||||||
|
ME#manifest_entry.start_key
|
||||||
|
end,
|
||||||
|
case leveled_tree:search(Key, Level, StartKeyFun) of
|
||||||
|
none ->
|
||||||
|
false;
|
||||||
|
{_EK, ME} ->
|
||||||
|
ME#manifest_entry.owner
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) ->
|
range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) ->
|
||||||
Range =
|
Range =
|
||||||
case LevelIdx > Manifest#manifest.basement of
|
case LevelIdx > Manifest#manifest.basement of
|
||||||
|
@ -400,7 +557,7 @@ range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) ->
|
||||||
end,
|
end,
|
||||||
lists:map(MakePointerFun, Range).
|
lists:map(MakePointerFun, Range).
|
||||||
|
|
||||||
range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) ->
|
range_lookup_level(LevelIdx, Level, QStartKey, QEndKey) when LevelIdx =< 1 ->
|
||||||
BeforeFun =
|
BeforeFun =
|
||||||
fun(M) ->
|
fun(M) ->
|
||||||
QStartKey > M#manifest_entry.end_key
|
QStartKey > M#manifest_entry.end_key
|
||||||
|
@ -412,7 +569,19 @@ range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) ->
|
||||||
end,
|
end,
|
||||||
{_Before, MaybeIn} = lists:splitwith(BeforeFun, Level),
|
{_Before, MaybeIn} = lists:splitwith(BeforeFun, Level),
|
||||||
{In, _After} = lists:splitwith(NotAfterFun, MaybeIn),
|
{In, _After} = lists:splitwith(NotAfterFun, MaybeIn),
|
||||||
In.
|
In;
|
||||||
|
range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) ->
|
||||||
|
StartKeyFun =
|
||||||
|
fun(ME) ->
|
||||||
|
ME#manifest_entry.start_key
|
||||||
|
end,
|
||||||
|
Range = leveled_tree:search_range(QStartKey, QEndKey, Level, StartKeyFun),
|
||||||
|
MapFun =
|
||||||
|
fun({_EK, ME}) ->
|
||||||
|
ME
|
||||||
|
end,
|
||||||
|
lists:map(MapFun, Range).
|
||||||
|
|
||||||
|
|
||||||
get_basement(Levels) ->
|
get_basement(Levels) ->
|
||||||
GetBaseFun =
|
GetBaseFun =
|
||||||
|
@ -456,6 +625,7 @@ open_manifestfile(RootPath, [TopManSQN|Rest]) ->
|
||||||
open_manifestfile(RootPath, Rest)
|
open_manifestfile(RootPath, Rest)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% Test
|
%%% Test
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
@ -587,6 +757,98 @@ keylookup_manifest_test() ->
|
||||||
?assertMatch("pid_y3", key_lookup(Man13, 1, LK1_4)),
|
?assertMatch("pid_y3", key_lookup(Man13, 1, LK1_4)),
|
||||||
?assertMatch("pid_z5", key_lookup(Man13, 2, LK1_4)).
|
?assertMatch("pid_z5", key_lookup(Man13, 2, LK1_4)).
|
||||||
|
|
||||||
|
ext_keylookup_manifest_test() ->
|
||||||
|
RP = "../test",
|
||||||
|
{_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(),
|
||||||
|
save_manifest(Man6, RP),
|
||||||
|
|
||||||
|
E7 = #manifest_entry{start_key={o, "Bucket1", "K997", null},
|
||||||
|
end_key={o, "Bucket1", "K999", null},
|
||||||
|
filename="Z7",
|
||||||
|
owner="pid_z7"},
|
||||||
|
Man7 = insert_manifest_entry(Man6, 2, 2, E7),
|
||||||
|
save_manifest(Man7, RP),
|
||||||
|
ManOpen1 = open_manifest(RP),
|
||||||
|
?assertMatch(2, get_manifest_sqn(ManOpen1)),
|
||||||
|
|
||||||
|
Man7FN = filepath(RP, 2, current_manifest),
|
||||||
|
Man7FNAlt = filename:rootname(Man7FN) ++ ".pnd",
|
||||||
|
{ok, BytesCopied} = file:copy(Man7FN, Man7FNAlt),
|
||||||
|
{ok, Bin} = file:read_file(Man7FN),
|
||||||
|
?assertMatch(BytesCopied, byte_size(Bin)),
|
||||||
|
RandPos = random:uniform(bit_size(Bin) - 1),
|
||||||
|
<<Pre:RandPos/bitstring, BitToFlip:1/integer, Rest/bitstring>> = Bin,
|
||||||
|
Flipped = BitToFlip bxor 1,
|
||||||
|
ok = file:write_file(Man7FN,
|
||||||
|
<<Pre:RandPos/bitstring,
|
||||||
|
Flipped:1/integer,
|
||||||
|
Rest/bitstring>>),
|
||||||
|
|
||||||
|
?assertMatch(2, get_manifest_sqn(Man7)),
|
||||||
|
|
||||||
|
ManOpen2 = open_manifest(RP),
|
||||||
|
?assertMatch(1, get_manifest_sqn(ManOpen2)),
|
||||||
|
|
||||||
|
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
|
||||||
|
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
|
||||||
|
filename="Z1",
|
||||||
|
owner="pid_z1"},
|
||||||
|
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
|
||||||
|
end_key={o, "Bucket1", "K71", null},
|
||||||
|
filename="Z2",
|
||||||
|
owner="pid_z2"},
|
||||||
|
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
|
||||||
|
end_key={o, "Bucket1", "K993", null},
|
||||||
|
filename="Z3",
|
||||||
|
owner="pid_z3"},
|
||||||
|
|
||||||
|
E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"},
|
||||||
|
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"},
|
||||||
|
owner="pid_y1",
|
||||||
|
filename="Y1"},
|
||||||
|
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
|
||||||
|
end_key={o, "Bucket1", "K45", null},
|
||||||
|
owner="pid_y2",
|
||||||
|
filename="Y2"},
|
||||||
|
E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null},
|
||||||
|
end_key={o, "Bucket1", "K812", null},
|
||||||
|
owner="pid_y3",
|
||||||
|
filename="Y3"},
|
||||||
|
E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null},
|
||||||
|
end_key={o, "Bucket1", "K998", null},
|
||||||
|
owner="pid_y4",
|
||||||
|
filename="Y4"},
|
||||||
|
|
||||||
|
Man8 = replace_manifest_entry(ManOpen2, 2, 1, E1, E1_2),
|
||||||
|
Man9 = remove_manifest_entry(Man8, 2, 1, [E2, E3]),
|
||||||
|
Man10 = insert_manifest_entry(Man9, 2, 1, [E2_2, E3_2, E4_2]),
|
||||||
|
?assertMatch(2, get_manifest_sqn(Man10)),
|
||||||
|
|
||||||
|
LK1_4 = {o, "Bucket1", "K75", null},
|
||||||
|
?assertMatch("pid_y3", key_lookup(Man10, 1, LK1_4)),
|
||||||
|
?assertMatch("pid_z5", key_lookup(Man10, 2, LK1_4)),
|
||||||
|
|
||||||
|
E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"},
|
||||||
|
end_key={o, "Bucket1", "K78", null},
|
||||||
|
filename="Z5",
|
||||||
|
owner="pid_z5"},
|
||||||
|
E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null},
|
||||||
|
end_key={o, "Bucket1", "K996", null},
|
||||||
|
filename="Z6",
|
||||||
|
owner="pid_z6"},
|
||||||
|
|
||||||
|
Man11 = remove_manifest_entry(Man10, 3, 2, [E5, E6]),
|
||||||
|
?assertMatch(3, get_manifest_sqn(Man11)),
|
||||||
|
?assertMatch(false, key_lookup(Man11, 2, LK1_4)),
|
||||||
|
|
||||||
|
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
|
||||||
|
end_key={o, "Bucket1", "K45", null},
|
||||||
|
owner="pid_y2",
|
||||||
|
filename="Y2"},
|
||||||
|
|
||||||
|
Man12 = replace_manifest_entry(Man11, 4, 2, E2_2, E5),
|
||||||
|
?assertMatch(4, get_manifest_sqn(Man12)),
|
||||||
|
?assertMatch("pid_z5", key_lookup(Man12, 2, LK1_4)).
|
||||||
|
|
||||||
rangequery_manifest_test() ->
|
rangequery_manifest_test() ->
|
||||||
{_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(),
|
{_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(),
|
||||||
|
|
|
@ -57,7 +57,7 @@ prepare_for_index(IndexArray, Hash) ->
|
||||||
|
|
||||||
|
|
||||||
add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) ->
|
add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) ->
|
||||||
LM1Size = leveled_skiplist:size(LevelMinus1),
|
LM1Size = leveled_tree:tsize(LevelMinus1),
|
||||||
case LM1Size of
|
case LM1Size of
|
||||||
0 ->
|
0 ->
|
||||||
{LedgerSQN, L0Size, TreeList};
|
{LedgerSQN, L0Size, TreeList};
|
||||||
|
@ -99,7 +99,7 @@ to_list(Slots, FetchFun) ->
|
||||||
SlotList = lists:reverse(lists:seq(1, Slots)),
|
SlotList = lists:reverse(lists:seq(1, Slots)),
|
||||||
FullList = lists:foldl(fun(Slot, Acc) ->
|
FullList = lists:foldl(fun(Slot, Acc) ->
|
||||||
Tree = FetchFun(Slot),
|
Tree = FetchFun(Slot),
|
||||||
L = leveled_skiplist:to_list(Tree),
|
L = leveled_tree:to_list(Tree),
|
||||||
lists:ukeymerge(1, Acc, L)
|
lists:ukeymerge(1, Acc, L)
|
||||||
end,
|
end,
|
||||||
[],
|
[],
|
||||||
|
@ -119,14 +119,14 @@ check_levelzero(Key, Hash, PosList, TreeList) ->
|
||||||
check_slotlist(Key, Hash, PosList, TreeList).
|
check_slotlist(Key, Hash, PosList, TreeList).
|
||||||
|
|
||||||
|
|
||||||
merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) ->
|
merge_trees(StartKey, EndKey, TreeList, LevelMinus1) ->
|
||||||
lists:foldl(fun(SkipList, Acc) ->
|
lists:foldl(fun(Tree, Acc) ->
|
||||||
R = leveled_skiplist:to_range(SkipList,
|
R = leveled_tree:match_range(StartKey,
|
||||||
StartKey,
|
EndKey,
|
||||||
EndKey),
|
Tree),
|
||||||
lists:ukeymerge(1, Acc, R) end,
|
lists:ukeymerge(1, Acc, R) end,
|
||||||
[],
|
[],
|
||||||
[LevelMinus1|lists:reverse(SkipListList)]).
|
[LevelMinus1|lists:reverse(TreeList)]).
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% Internal Functions
|
%%% Internal Functions
|
||||||
|
@ -148,7 +148,7 @@ split_hash(Hash) ->
|
||||||
H0 = (Hash bsr 8) band 8388607,
|
H0 = (Hash bsr 8) band 8388607,
|
||||||
{Slot, H0}.
|
{Slot, H0}.
|
||||||
|
|
||||||
check_slotlist(Key, Hash, CheckList, TreeList) ->
|
check_slotlist(Key, _Hash, CheckList, TreeList) ->
|
||||||
SlotCheckFun =
|
SlotCheckFun =
|
||||||
fun(SlotToCheck, {Found, KV}) ->
|
fun(SlotToCheck, {Found, KV}) ->
|
||||||
case Found of
|
case Found of
|
||||||
|
@ -156,7 +156,7 @@ check_slotlist(Key, Hash, CheckList, TreeList) ->
|
||||||
{Found, KV};
|
{Found, KV};
|
||||||
false ->
|
false ->
|
||||||
CheckTree = lists:nth(SlotToCheck, TreeList),
|
CheckTree = lists:nth(SlotToCheck, TreeList),
|
||||||
case leveled_skiplist:lookup(Key, Hash, CheckTree) of
|
case leveled_tree:match(Key, CheckTree) of
|
||||||
none ->
|
none ->
|
||||||
{Found, KV};
|
{Found, KV};
|
||||||
{value, Value} ->
|
{value, Value} ->
|
||||||
|
@ -188,7 +188,7 @@ generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||||
[],
|
[],
|
||||||
BucketRangeLow,
|
BucketRangeLow,
|
||||||
BucketRangeHigh),
|
BucketRangeHigh),
|
||||||
leveled_skiplist:from_list(KVL).
|
leveled_tree:from_orderedlist(lists:ukeysort(1, KVL), ?CACHE_TYPE).
|
||||||
|
|
||||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||||
Acc;
|
Acc;
|
||||||
|
@ -223,7 +223,7 @@ compare_method_test() ->
|
||||||
?assertMatch(32000, SQN),
|
?assertMatch(32000, SQN),
|
||||||
?assertMatch(true, Size =< 32000),
|
?assertMatch(true, Size =< 32000),
|
||||||
|
|
||||||
TestList = leveled_skiplist:to_list(generate_randomkeys(1, 2000, 1, 800)),
|
TestList = leveled_tree:to_list(generate_randomkeys(1, 2000, 1, 800)),
|
||||||
|
|
||||||
FindKeyFun =
|
FindKeyFun =
|
||||||
fun(Key) ->
|
fun(Key) ->
|
||||||
|
@ -232,7 +232,7 @@ compare_method_test() ->
|
||||||
true ->
|
true ->
|
||||||
{true, KV};
|
{true, KV};
|
||||||
false ->
|
false ->
|
||||||
L0 = leveled_skiplist:lookup(Key, Tree),
|
L0 = leveled_tree:match(Key, Tree),
|
||||||
case L0 of
|
case L0 of
|
||||||
none ->
|
none ->
|
||||||
{false, not_found};
|
{false, not_found};
|
||||||
|
@ -270,19 +270,20 @@ compare_method_test() ->
|
||||||
P = leveled_codec:endkey_passed(EndKey, K),
|
P = leveled_codec:endkey_passed(EndKey, K),
|
||||||
case {K, P} of
|
case {K, P} of
|
||||||
{K, false} when K >= StartKey ->
|
{K, false} when K >= StartKey ->
|
||||||
leveled_skiplist:enter(K, V, Acc);
|
[{K, V}|Acc];
|
||||||
_ ->
|
_ ->
|
||||||
Acc
|
Acc
|
||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
leveled_skiplist:empty(),
|
[],
|
||||||
DumpList),
|
DumpList),
|
||||||
Sz0 = leveled_skiplist:size(Q0),
|
Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, Q0), ?CACHE_TYPE),
|
||||||
|
Sz0 = leveled_tree:tsize(Tree),
|
||||||
io:format("Crude method took ~w microseconds resulting in tree of " ++
|
io:format("Crude method took ~w microseconds resulting in tree of " ++
|
||||||
"size ~w~n",
|
"size ~w~n",
|
||||||
[timer:now_diff(os:timestamp(), SWa), Sz0]),
|
[timer:now_diff(os:timestamp(), SWa), Sz0]),
|
||||||
SWb = os:timestamp(),
|
SWb = os:timestamp(),
|
||||||
Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_skiplist:empty()),
|
Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty(?CACHE_TYPE)),
|
||||||
Sz1 = length(Q1),
|
Sz1 = length(Q1),
|
||||||
io:format("Merge method took ~w microseconds resulting in tree of " ++
|
io:format("Merge method took ~w microseconds resulting in tree of " ++
|
||||||
"size ~w~n",
|
"size ~w~n",
|
||||||
|
@ -299,7 +300,7 @@ with_index_test() ->
|
||||||
fun(_X, {{LedgerSQN, L0Size, L0TreeList}, L0Idx, SrcList}) ->
|
fun(_X, {{LedgerSQN, L0Size, L0TreeList}, L0Idx, SrcList}) ->
|
||||||
LM1 = generate_randomkeys_aslist(LedgerSQN + 1, 2000, 1, 500),
|
LM1 = generate_randomkeys_aslist(LedgerSQN + 1, 2000, 1, 500),
|
||||||
LM1Array = lists:foldl(IndexPrepareFun, new_index(), LM1),
|
LM1Array = lists:foldl(IndexPrepareFun, new_index(), LM1),
|
||||||
LM1SL = leveled_skiplist:from_list(LM1),
|
LM1SL = leveled_tree:from_orderedlist(lists:ukeysort(1, LM1), ?CACHE_TYPE),
|
||||||
UpdL0Index = add_to_index(LM1Array, L0Idx, length(L0TreeList) + 1),
|
UpdL0Index = add_to_index(LM1Array, L0Idx, length(L0TreeList) + 1),
|
||||||
R = add_to_cache(L0Size,
|
R = add_to_cache(L0Size,
|
||||||
{LM1SL, LedgerSQN + 1, LedgerSQN + 2000},
|
{LM1SL, LedgerSQN + 1, LedgerSQN + 2000},
|
||||||
|
|
|
@ -1,661 +0,0 @@
|
||||||
%% -------- SKIPLIST ---------
|
|
||||||
%%
|
|
||||||
%% For storing small numbers of {K, V} pairs where reasonable insertion and
|
|
||||||
%% fetch times, but with fast support for flattening to a list or a sublist
|
|
||||||
%% within a certain key range
|
|
||||||
%%
|
|
||||||
%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's
|
|
||||||
%% ongoing dependency on OTP16)
|
|
||||||
%%
|
|
||||||
%% Not a proper skip list. Only supports a fixed depth. Good enough for the
|
|
||||||
%% purposes of leveled. Also uses peculiar enkey_passed function within
|
|
||||||
%% leveled. Not tested beyond a depth of 2.
|
|
||||||
|
|
||||||
-module(leveled_skiplist).
|
|
||||||
|
|
||||||
-include("include/leveled.hrl").
|
|
||||||
|
|
||||||
-export([
|
|
||||||
from_list/1,
|
|
||||||
from_list/2,
|
|
||||||
from_sortedlist/1,
|
|
||||||
from_sortedlist/2,
|
|
||||||
from_orderedset/1,
|
|
||||||
from_orderedset/2,
|
|
||||||
to_list/1,
|
|
||||||
enter/3,
|
|
||||||
enter/4,
|
|
||||||
enter_nolookup/3,
|
|
||||||
to_range/2,
|
|
||||||
to_range/3,
|
|
||||||
lookup/2,
|
|
||||||
lookup/3,
|
|
||||||
empty/0,
|
|
||||||
empty/1,
|
|
||||||
size/1
|
|
||||||
]).
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
-define(SKIP_WIDTH, 16).
|
|
||||||
-define(LIST_HEIGHT, 2).
|
|
||||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
|
||||||
-define(BITARRAY_SIZE, 2048).
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% SkipList API
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
enter(Key, Value, SkipList) ->
|
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
|
||||||
enter(Key, Hash, Value, SkipList).
|
|
||||||
|
|
||||||
enter(Key, Hash, Value, SkipList) ->
|
|
||||||
Bloom0 =
|
|
||||||
case element(1, SkipList) of
|
|
||||||
list_only ->
|
|
||||||
list_only;
|
|
||||||
Bloom ->
|
|
||||||
leveled_tinybloom:enter({hash, Hash}, Bloom)
|
|
||||||
end,
|
|
||||||
{Bloom0,
|
|
||||||
enter(Key, Value, erlang:phash2(Key),
|
|
||||||
element(2, SkipList),
|
|
||||||
?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
|
||||||
|
|
||||||
%% Can iterate over a key entered this way, but never lookup the key
|
|
||||||
%% used for index terms
|
|
||||||
%% The key may still be a marker key - and the much cheaper native hash
|
|
||||||
%% is used to dtermine this, avoiding the more expensive magic hash
|
|
||||||
enter_nolookup(Key, Value, SkipList) ->
|
|
||||||
{element(1, SkipList),
|
|
||||||
enter(Key, Value, erlang:phash2(Key),
|
|
||||||
element(2, SkipList),
|
|
||||||
?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
|
||||||
|
|
||||||
from_orderedset(Table) ->
|
|
||||||
from_orderedset(Table, false).
|
|
||||||
|
|
||||||
from_orderedset(Table, Bloom) ->
|
|
||||||
from_sortedlist(ets:tab2list(Table), Bloom).
|
|
||||||
|
|
||||||
from_list(UnsortedKVL) ->
|
|
||||||
from_list(UnsortedKVL, false).
|
|
||||||
|
|
||||||
from_list(UnsortedKVL, BloomProtect) ->
|
|
||||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
|
||||||
from_sortedlist(KVL, BloomProtect).
|
|
||||||
|
|
||||||
from_sortedlist(SortedKVL) ->
|
|
||||||
from_sortedlist(SortedKVL, false).
|
|
||||||
|
|
||||||
from_sortedlist([], BloomProtect) ->
|
|
||||||
empty(BloomProtect);
|
|
||||||
from_sortedlist(SortedKVL, BloomProtect) ->
|
|
||||||
Bloom0 =
|
|
||||||
case BloomProtect of
|
|
||||||
true ->
|
|
||||||
lists:foldr(fun({K, _V}, Bloom) ->
|
|
||||||
leveled_tinybloom:enter(K, Bloom) end,
|
|
||||||
leveled_tinybloom:empty(?SKIP_WIDTH),
|
|
||||||
SortedKVL);
|
|
||||||
false ->
|
|
||||||
list_only
|
|
||||||
end,
|
|
||||||
{Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
|
||||||
|
|
||||||
lookup(Key, SkipList) ->
|
|
||||||
case element(1, SkipList) of
|
|
||||||
list_only ->
|
|
||||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
|
|
||||||
_ ->
|
|
||||||
lookup(Key, leveled_codec:magic_hash(Key), SkipList)
|
|
||||||
end.
|
|
||||||
|
|
||||||
lookup(Key, Hash, SkipList) ->
|
|
||||||
case element(1, SkipList) of
|
|
||||||
list_only ->
|
|
||||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
|
|
||||||
_ ->
|
|
||||||
case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of
|
|
||||||
false ->
|
|
||||||
none;
|
|
||||||
true ->
|
|
||||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT)
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
%% Rather than support iterator_from like gb_trees, will just an output a key
|
|
||||||
%% sorted list for the desired range, which can the be iterated over as normal
|
|
||||||
to_range(SkipList, Start) ->
|
|
||||||
to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
|
|
||||||
|
|
||||||
to_range(SkipList, Start, End) ->
|
|
||||||
to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT).
|
|
||||||
|
|
||||||
to_list(SkipList) ->
|
|
||||||
to_list(element(2, SkipList), ?LIST_HEIGHT).
|
|
||||||
|
|
||||||
empty() ->
|
|
||||||
empty(false).
|
|
||||||
|
|
||||||
empty(BloomProtect) ->
|
|
||||||
case BloomProtect of
|
|
||||||
true ->
|
|
||||||
{leveled_tinybloom:empty(?SKIP_WIDTH),
|
|
||||||
empty([], ?LIST_HEIGHT)};
|
|
||||||
false ->
|
|
||||||
{list_only, empty([], ?LIST_HEIGHT)}
|
|
||||||
end.
|
|
||||||
|
|
||||||
size(SkipList) ->
|
|
||||||
size(element(2, SkipList), ?LIST_HEIGHT).
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% SkipList Base Functions
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
enter(Key, Value, Hash, SkipList, Width, 1) ->
|
|
||||||
{MarkerKey, SubList} = find_mark(Key, SkipList),
|
|
||||||
case Hash rem Width of
|
|
||||||
0 ->
|
|
||||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
|
|
||||||
K =< Key end,
|
|
||||||
SubList),
|
|
||||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
|
||||||
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
|
|
||||||
lists:ukeysort(1, SkpL2);
|
|
||||||
_ ->
|
|
||||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
|
|
||||||
UpdSubList =
|
|
||||||
case RHS of
|
|
||||||
[] ->
|
|
||||||
LHS ++ [{Key, Value}];
|
|
||||||
[{FirstKey, _V}|RHSTail] ->
|
|
||||||
case FirstKey of
|
|
||||||
Key ->
|
|
||||||
LHS ++ [{Key, Value}] ++ RHSTail;
|
|
||||||
_ ->
|
|
||||||
LHS ++ [{Key, Value}] ++ RHS
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
|
||||||
end;
|
|
||||||
enter(Key, Value, Hash, SkipList, Width, Level) ->
|
|
||||||
HashMatch = width(Level, Width),
|
|
||||||
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
|
|
||||||
UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1),
|
|
||||||
case Hash rem HashMatch of
|
|
||||||
0 ->
|
|
||||||
%
|
|
||||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
|
|
||||||
K =< Key end,
|
|
||||||
UpdSubSkipList),
|
|
||||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
|
||||||
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
|
|
||||||
_ ->
|
|
||||||
% Need to replace Marker Key with sublist
|
|
||||||
lists:keyreplace(MarkerKey,
|
|
||||||
1,
|
|
||||||
SkipList,
|
|
||||||
{MarkerKey, UpdSubSkipList})
|
|
||||||
end.
|
|
||||||
|
|
||||||
from_list(SkipList, _SkipWidth, 0) ->
|
|
||||||
SkipList;
|
|
||||||
from_list(KVList, SkipWidth, ListHeight) ->
|
|
||||||
L0 = length(KVList),
|
|
||||||
SL0 =
|
|
||||||
case L0 > SkipWidth of
|
|
||||||
true ->
|
|
||||||
from_list(KVList, L0, [], SkipWidth);
|
|
||||||
false ->
|
|
||||||
{LastK, _LastSL} = lists:last(KVList),
|
|
||||||
[{LastK, KVList}]
|
|
||||||
end,
|
|
||||||
from_list(SL0, SkipWidth, ListHeight - 1).
|
|
||||||
|
|
||||||
from_list([], 0, SkipList, _SkipWidth) ->
|
|
||||||
SkipList;
|
|
||||||
from_list(KVList, L, SkipList, SkipWidth) ->
|
|
||||||
SubLL = min(SkipWidth, L),
|
|
||||||
{Head, Tail} = lists:split(SubLL, KVList),
|
|
||||||
{LastK, _LastV} = lists:last(Head),
|
|
||||||
from_list(Tail, L - SubLL, SkipList ++ [{LastK, Head}], SkipWidth).
|
|
||||||
|
|
||||||
|
|
||||||
list_lookup(Key, SkipList, 1) ->
|
|
||||||
SubList = get_sublist(Key, SkipList),
|
|
||||||
case lists:keyfind(Key, 1, SubList) of
|
|
||||||
false ->
|
|
||||||
none;
|
|
||||||
{Key, V} ->
|
|
||||||
{value, V}
|
|
||||||
end;
|
|
||||||
list_lookup(Key, SkipList, Level) ->
|
|
||||||
SubList = get_sublist(Key, SkipList),
|
|
||||||
case SubList of
|
|
||||||
null ->
|
|
||||||
none;
|
|
||||||
_ ->
|
|
||||||
list_lookup(Key, SubList, Level - 1)
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
to_list(SkipList, 1) ->
|
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList);
|
|
||||||
to_list(SkipList, Level) ->
|
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end,
|
|
||||||
[],
|
|
||||||
SkipList).
|
|
||||||
|
|
||||||
|
|
||||||
to_range(SkipList, StartKey, EndKey, ListHeight) ->
|
|
||||||
to_range(SkipList, StartKey, EndKey, ListHeight, [], true).
|
|
||||||
|
|
||||||
to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) ->
|
|
||||||
SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl),
|
|
||||||
case SL of
|
|
||||||
[] ->
|
|
||||||
Acc;
|
|
||||||
_ ->
|
|
||||||
{LK, _LV} = lists:last(SL),
|
|
||||||
case leveled_codec:endkey_passed(EndKey, LK) of
|
|
||||||
false ->
|
|
||||||
to_range(SkipList,
|
|
||||||
LK,
|
|
||||||
EndKey,
|
|
||||||
ListHeight,
|
|
||||||
Acc ++ SL,
|
|
||||||
false);
|
|
||||||
true ->
|
|
||||||
SplitFun =
|
|
||||||
fun({K, _V}) ->
|
|
||||||
not leveled_codec:endkey_passed(EndKey, K) end,
|
|
||||||
LHS = lists:takewhile(SplitFun, SL),
|
|
||||||
Acc ++ LHS
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
sublist_above(SkipList, StartKey, 0, StartIncl) ->
|
|
||||||
TestFun =
|
|
||||||
fun({K, _V}) ->
|
|
||||||
case StartIncl of
|
|
||||||
true ->
|
|
||||||
K < StartKey;
|
|
||||||
false ->
|
|
||||||
K =< StartKey
|
|
||||||
end end,
|
|
||||||
lists:dropwhile(TestFun, SkipList);
|
|
||||||
sublist_above(SkipList, StartKey, Level, StartIncl) ->
|
|
||||||
TestFun =
|
|
||||||
fun({K, _SL}) ->
|
|
||||||
case StartIncl of
|
|
||||||
true ->
|
|
||||||
K < StartKey;
|
|
||||||
false ->
|
|
||||||
K =< StartKey
|
|
||||||
end end,
|
|
||||||
RHS = lists:dropwhile(TestFun, SkipList),
|
|
||||||
case RHS of
|
|
||||||
[] ->
|
|
||||||
[];
|
|
||||||
[{_K, SL}|_Rest] ->
|
|
||||||
sublist_above(SL, StartKey, Level - 1, StartIncl)
|
|
||||||
end.
|
|
||||||
|
|
||||||
empty(SkipList, 1) ->
|
|
||||||
[{?INFINITY_KEY, SkipList}];
|
|
||||||
empty(SkipList, Level) ->
|
|
||||||
empty([{?INFINITY_KEY, SkipList}], Level - 1).
|
|
||||||
|
|
||||||
size(SkipList, 1) ->
|
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList);
|
|
||||||
size(SkipList, Level) ->
|
|
||||||
lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end,
|
|
||||||
0,
|
|
||||||
SkipList).
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Internal Functions
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
width(1, Width) ->
|
|
||||||
Width;
|
|
||||||
width(N, Width) ->
|
|
||||||
width(N - 1, Width * Width).
|
|
||||||
|
|
||||||
find_mark(Key, SkipList) ->
|
|
||||||
lists:foldl(fun({Marker, SL}, Acc) ->
|
|
||||||
case Acc of
|
|
||||||
false ->
|
|
||||||
case Marker >= Key of
|
|
||||||
true ->
|
|
||||||
{Marker, SL};
|
|
||||||
false ->
|
|
||||||
Acc
|
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
Acc
|
|
||||||
end end,
|
|
||||||
false,
|
|
||||||
SkipList).
|
|
||||||
|
|
||||||
get_sublist(Key, SkipList) ->
|
|
||||||
lists:foldl(fun({SkipKey, SL}, Acc) ->
|
|
||||||
case {Acc, SkipKey} of
|
|
||||||
{null, SkipKey} when SkipKey >= Key ->
|
|
||||||
SL;
|
|
||||||
_ ->
|
|
||||||
Acc
|
|
||||||
end end,
|
|
||||||
null,
|
|
||||||
SkipList).
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Test
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
|
|
||||||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
|
||||||
generate_randomkeys(Seqn,
|
|
||||||
Count,
|
|
||||||
[],
|
|
||||||
BucketRangeLow,
|
|
||||||
BucketRangeHigh).
|
|
||||||
|
|
||||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
|
||||||
Acc;
|
|
||||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
|
||||||
BNumber =
|
|
||||||
case BRange of
|
|
||||||
0 ->
|
|
||||||
string:right(integer_to_list(BucketLow), 4, $0);
|
|
||||||
_ ->
|
|
||||||
BRand = random:uniform(BRange),
|
|
||||||
string:right(integer_to_list(BucketLow + BRand), 4, $0)
|
|
||||||
end,
|
|
||||||
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
|
||||||
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
|
||||||
{Seqn, {active, infinity}, null}},
|
|
||||||
generate_randomkeys(Seqn + 1,
|
|
||||||
Count - 1,
|
|
||||||
[{K, V}|Acc],
|
|
||||||
BucketLow,
|
|
||||||
BRange).
|
|
||||||
|
|
||||||
skiplist_small_test() ->
|
|
||||||
% Check nothing bad happens with very small lists
|
|
||||||
lists:foreach(fun(N) -> dotest_skiplist_small(N) end, lists:seq(1, 32)).
|
|
||||||
|
|
||||||
|
|
||||||
dotest_skiplist_small(N) ->
|
|
||||||
KL = generate_randomkeys(1, N, 1, 2),
|
|
||||||
SkipList1 =
|
|
||||||
lists:foldl(fun({K, V}, SL) ->
|
|
||||||
enter(K, V, SL)
|
|
||||||
end,
|
|
||||||
empty(),
|
|
||||||
KL),
|
|
||||||
SkipList2 = from_list(lists:reverse(KL)),
|
|
||||||
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList1))
|
|
||||||
end,
|
|
||||||
lists:ukeysort(1, lists:reverse(KL))),
|
|
||||||
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList2))
|
|
||||||
end,
|
|
||||||
lists:ukeysort(1, lists:reverse(KL))).
|
|
||||||
|
|
||||||
skiplist_withbloom_test() ->
|
|
||||||
io:format(user, "~n~nBloom protected skiplist test:~n~n", []),
|
|
||||||
skiplist_tester(true).
|
|
||||||
|
|
||||||
skiplist_nobloom_test() ->
|
|
||||||
io:format(user, "~n~nBloom free skiplist test:~n~n", []),
|
|
||||||
skiplist_tester(false).
|
|
||||||
|
|
||||||
skiplist_tester(Bloom) ->
|
|
||||||
N = 4000,
|
|
||||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
|
||||||
|
|
||||||
OS = ets:new(test, [ordered_set, private]),
|
|
||||||
ets:insert(OS, KL),
|
|
||||||
SWaETS = os:timestamp(),
|
|
||||||
SkipList = from_orderedset(OS, Bloom),
|
|
||||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds " ++
|
|
||||||
"from ordered set~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SWaETS)]),
|
|
||||||
|
|
||||||
SWaGSL = os:timestamp(),
|
|
||||||
SkipList = from_list(lists:reverse(KL), Bloom),
|
|
||||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
|
|
||||||
"Top level key count of ~w~n",
|
|
||||||
[N,
|
|
||||||
timer:now_diff(os:timestamp(), SWaGSL),
|
|
||||||
length(element(2, SkipList))]),
|
|
||||||
io:format(user, "Second tier key counts of ~w~n",
|
|
||||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
|
||||||
element(2, SkipList))]),
|
|
||||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
|
||||||
|
|
||||||
SWaGSL2 = os:timestamp(),
|
|
||||||
SkipList = from_sortedlist(KLSorted, Bloom),
|
|
||||||
io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
|
|
||||||
"microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SWaGSL2)]),
|
|
||||||
|
|
||||||
SWaDSL = os:timestamp(),
|
|
||||||
SkipList1 =
|
|
||||||
lists:foldl(fun({K, V}, SL) ->
|
|
||||||
enter(K, V, SL)
|
|
||||||
end,
|
|
||||||
empty(Bloom),
|
|
||||||
KL),
|
|
||||||
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
|
|
||||||
"microseconds~n" ++
|
|
||||||
"Top level key count of ~w~n",
|
|
||||||
[N,
|
|
||||||
timer:now_diff(os:timestamp(), SWaDSL),
|
|
||||||
length(element(2, SkipList1))]),
|
|
||||||
io:format(user, "Second tier key counts of ~w~n",
|
|
||||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
|
||||||
element(2, SkipList1))]),
|
|
||||||
|
|
||||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
|
||||||
skiplist_timingtest(KLSorted, SkipList, N, Bloom),
|
|
||||||
|
|
||||||
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
|
||||||
skiplist_timingtest(KLSorted, SkipList1, N, Bloom).
|
|
||||||
|
|
||||||
|
|
||||||
skiplist_timingtest(KL, SkipList, N, Bloom) ->
|
|
||||||
io:format(user, "Timing tests on skiplist of size ~w~n",
|
|
||||||
[leveled_skiplist:size(SkipList)]),
|
|
||||||
CheckList1 = lists:sublist(KL, N div 4, 200),
|
|
||||||
CheckList2 = lists:sublist(KL, N div 3, 200),
|
|
||||||
CheckList3 = lists:sublist(KL, N div 2, 200),
|
|
||||||
CheckList4 = lists:sublist(KL, N - 1000, 200),
|
|
||||||
CheckList5 = lists:sublist(KL, N - 500, 200),
|
|
||||||
CheckList6 = lists:sublist(KL, 1, 10),
|
|
||||||
CheckList7 = lists:nthtail(N - 200, KL),
|
|
||||||
CheckList8 = lists:sublist(KL, N div 2, 1),
|
|
||||||
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
|
||||||
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
|
||||||
|
|
||||||
SWb = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, V}) ->
|
|
||||||
?assertMatch({value, V}, lookup(K, SkipList))
|
|
||||||
end,
|
|
||||||
CheckAll),
|
|
||||||
io:format(user, "Finding 1020 keys took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWb)]),
|
|
||||||
|
|
||||||
RangeFun =
|
|
||||||
fun(SkipListToQuery, CheckListForQ, Assert) ->
|
|
||||||
KR =
|
|
||||||
to_range(SkipListToQuery,
|
|
||||||
element(1, lists:nth(1, CheckListForQ)),
|
|
||||||
element(1, lists:last(CheckListForQ))),
|
|
||||||
case Assert of
|
|
||||||
true ->
|
|
||||||
CompareL = length(lists:usort(CheckListForQ)),
|
|
||||||
?assertMatch(CompareL, length(KR));
|
|
||||||
false ->
|
|
||||||
KR
|
|
||||||
end
|
|
||||||
end,
|
|
||||||
|
|
||||||
SWc = os:timestamp(),
|
|
||||||
RangeFun(SkipList, CheckList1, true),
|
|
||||||
RangeFun(SkipList, CheckList2, true),
|
|
||||||
RangeFun(SkipList, CheckList3, true),
|
|
||||||
RangeFun(SkipList, CheckList4, true),
|
|
||||||
RangeFun(SkipList, CheckList5, true),
|
|
||||||
RangeFun(SkipList, CheckList6, true),
|
|
||||||
RangeFun(SkipList, CheckList7, true),
|
|
||||||
RangeFun(SkipList, CheckList8, true),
|
|
||||||
|
|
||||||
KL_OOR1 = generate_randomkeys(1, 4, N div 5 + 1, N div 5 + 10),
|
|
||||||
KR9 = RangeFun(SkipList, KL_OOR1, false),
|
|
||||||
?assertMatch([], KR9),
|
|
||||||
|
|
||||||
KL_OOR2 = generate_randomkeys(1, 4, 0, 0),
|
|
||||||
KR10 = RangeFun(SkipList, KL_OOR2, false),
|
|
||||||
?assertMatch([], KR10),
|
|
||||||
|
|
||||||
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWc)]),
|
|
||||||
|
|
||||||
AltKL1 = generate_randomkeys(1, 2000, 1, 200),
|
|
||||||
SWd0 = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
lookup(K, SkipList)
|
|
||||||
end,
|
|
||||||
AltKL1),
|
|
||||||
io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWd0)]),
|
|
||||||
SWd1 = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
leveled_codec:magic_hash(K)
|
|
||||||
end,
|
|
||||||
AltKL1),
|
|
||||||
io:format(user, "Generating 2000 magic hashes took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWd1)]),
|
|
||||||
SWd2 = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
erlang:phash2(K)
|
|
||||||
end,
|
|
||||||
AltKL1),
|
|
||||||
io:format(user, "Generating 2000 not so magic hashes took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWd2)]),
|
|
||||||
|
|
||||||
AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300),
|
|
||||||
SWe = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
none = lookup(K, SkipList)
|
|
||||||
end,
|
|
||||||
AltKL2),
|
|
||||||
io:format(user, "Getting 1000 missing keys above range took ~w " ++
|
|
||||||
"microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWe)]),
|
|
||||||
AltKL3 = generate_randomkeys(1, 1000, 0, 0),
|
|
||||||
SWf = os:timestamp(),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
none = lookup(K, SkipList)
|
|
||||||
end,
|
|
||||||
AltKL3),
|
|
||||||
io:format(user, "Getting 1000 missing keys below range took ~w " ++
|
|
||||||
"microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWf)]),
|
|
||||||
|
|
||||||
SWg = os:timestamp(),
|
|
||||||
FlatList = to_list(SkipList),
|
|
||||||
io:format(user, "Flattening skiplist took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWg)]),
|
|
||||||
?assertMatch(KL, FlatList),
|
|
||||||
|
|
||||||
case Bloom of
|
|
||||||
true ->
|
|
||||||
HashList = lists:map(fun(_X) ->
|
|
||||||
random:uniform(4294967295) end,
|
|
||||||
lists:seq(1, 2000)),
|
|
||||||
SWh = os:timestamp(),
|
|
||||||
lists:foreach(fun(X) ->
|
|
||||||
lookup(X, X, SkipList) end,
|
|
||||||
HashList),
|
|
||||||
io:format(user,
|
|
||||||
"Getting 2000 missing keys when hash was known " ++
|
|
||||||
"took ~w microseconds~n",
|
|
||||||
[timer:now_diff(os:timestamp(), SWh)]);
|
|
||||||
false ->
|
|
||||||
ok
|
|
||||||
end.
|
|
||||||
|
|
||||||
define_kv(X) ->
|
|
||||||
{{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null},
|
|
||||||
{X, {active, infinity}, null}}.
|
|
||||||
|
|
||||||
skiplist_roundsize_test() ->
|
|
||||||
KVL = lists:map(fun(X) -> define_kv(X) end, lists:seq(1, 4096)),
|
|
||||||
SkipList = from_list(KVL),
|
|
||||||
lists:foreach(fun({K, V}) ->
|
|
||||||
?assertMatch({value, V}, lookup(K, SkipList)) end,
|
|
||||||
KVL),
|
|
||||||
lists:foreach(fun(X) ->
|
|
||||||
{KS, _VS} = define_kv(X * 32 + 1),
|
|
||||||
{KE, _VE} = define_kv((X + 1) * 32),
|
|
||||||
R = to_range(SkipList, KS, KE),
|
|
||||||
L = lists:sublist(KVL,
|
|
||||||
X * 32 + 1,
|
|
||||||
32),
|
|
||||||
?assertMatch(L, R) end,
|
|
||||||
lists:seq(0, 24)).
|
|
||||||
|
|
||||||
skiplist_nolookup_test() ->
|
|
||||||
N = 4000,
|
|
||||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
|
||||||
SkipList = lists:foldl(fun({K, V}, Acc) ->
|
|
||||||
enter_nolookup(K, V, Acc) end,
|
|
||||||
empty(true),
|
|
||||||
KL),
|
|
||||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
|
||||||
lists:foreach(fun({K, _V}) ->
|
|
||||||
?assertMatch(none, lookup(K, SkipList)) end,
|
|
||||||
KL),
|
|
||||||
?assertMatch(KLSorted, to_list(SkipList)).
|
|
||||||
|
|
||||||
skiplist_range_test() ->
|
|
||||||
N = 150,
|
|
||||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
|
||||||
|
|
||||||
KLSL1 = lists:sublist(lists:ukeysort(1, KL), 128),
|
|
||||||
SkipList1 = from_list(KLSL1),
|
|
||||||
{LastK1, V1} = lists:last(KLSL1),
|
|
||||||
R1 = to_range(SkipList1, LastK1, LastK1),
|
|
||||||
?assertMatch([{LastK1, V1}], R1),
|
|
||||||
|
|
||||||
KLSL2 = lists:sublist(lists:ukeysort(1, KL), 127),
|
|
||||||
SkipList2 = from_list(KLSL2),
|
|
||||||
{LastK2, V2} = lists:last(KLSL2),
|
|
||||||
R2 = to_range(SkipList2, LastK2, LastK2),
|
|
||||||
?assertMatch([{LastK2, V2}], R2),
|
|
||||||
|
|
||||||
KLSL3 = lists:sublist(lists:ukeysort(1, KL), 129),
|
|
||||||
SkipList3 = from_list(KLSL3),
|
|
||||||
{LastK3, V3} = lists:last(KLSL3),
|
|
||||||
R3 = to_range(SkipList3, LastK3, LastK3),
|
|
||||||
?assertMatch([{LastK3, V3}], R3),
|
|
||||||
|
|
||||||
{FirstK4, V4} = lists:nth(1, KLSL3),
|
|
||||||
R4 = to_range(SkipList3, FirstK4, FirstK4),
|
|
||||||
?assertMatch([{FirstK4, V4}], R4).
|
|
||||||
|
|
||||||
|
|
||||||
empty_skiplist_size_test() ->
|
|
||||||
?assertMatch(0, leveled_skiplist:size(empty(false))),
|
|
||||||
?assertMatch(0, leveled_skiplist:size(empty(true))).
|
|
||||||
|
|
||||||
-endif.
|
|
|
@ -77,6 +77,8 @@
|
||||||
-define(INDEX_MARKER_WIDTH, 16).
|
-define(INDEX_MARKER_WIDTH, 16).
|
||||||
-define(DISCARD_EXT, ".discarded").
|
-define(DISCARD_EXT, ".discarded").
|
||||||
-define(DELETE_TIMEOUT, 10000).
|
-define(DELETE_TIMEOUT, 10000).
|
||||||
|
-define(TREE_TYPE, idxt).
|
||||||
|
-define(TREE_SIZE, 4).
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
|
@ -676,93 +678,37 @@ generate_filenames(RootFilename) ->
|
||||||
%% The Slot Index is stored as a flat (sorted) list of {Key, Slot} where Key
|
%% The Slot Index is stored as a flat (sorted) list of {Key, Slot} where Key
|
||||||
%% is the last key within the slot.
|
%% is the last key within the slot.
|
||||||
%%
|
%%
|
||||||
%% This implementation of the SlotIndex stores it as a tuple with the original
|
%% This implementation of the SlotIndex uses leveled_tree
|
||||||
%% list as the second element and a list of mark points as the first element
|
|
||||||
%% containing every 16th key. The Mark points are stored as {Mark, Index},
|
|
||||||
%% where the Index correspnds with the nth point in the original list that the
|
|
||||||
%% Mark occurs.
|
|
||||||
|
|
||||||
from_list(SlotList) ->
|
from_list(SlotList) ->
|
||||||
L = length(SlotList),
|
leveled_tree:from_orderedlist(SlotList, ?TREE_TYPE, ?TREE_SIZE).
|
||||||
MarkerList = set_marks(lists:reverse(SlotList),
|
|
||||||
{?INDEX_MARKER_WIDTH, L rem ?INDEX_MARKER_WIDTH},
|
|
||||||
L,
|
|
||||||
[]),
|
|
||||||
{MarkerList, SlotList}.
|
|
||||||
|
|
||||||
set_marks([], _MarkInfo, 0, MarkerList) ->
|
lookup_slot(Key, Tree) ->
|
||||||
MarkerList;
|
StartKeyFun =
|
||||||
set_marks([{Key, _Slot}|Rest], {MarkerWidth, MarkPoint}, Count, MarkerList) ->
|
fun(_V) ->
|
||||||
case Count rem MarkerWidth of
|
all
|
||||||
MarkPoint ->
|
end,
|
||||||
set_marks(Rest,
|
% The penciller should never ask for presence out of range - so will
|
||||||
{MarkerWidth, MarkPoint},
|
% always return a slot (As we don't compare to StartKey)
|
||||||
Count - 1,
|
{_LK, Slot} = leveled_tree:search(Key, Tree, StartKeyFun),
|
||||||
[{Key, Count}|MarkerList]);
|
|
||||||
_ ->
|
|
||||||
set_marks(Rest,
|
|
||||||
{MarkerWidth, MarkPoint},
|
|
||||||
Count - 1,
|
|
||||||
MarkerList)
|
|
||||||
end.
|
|
||||||
|
|
||||||
find_mark(Key, [{Mark, Pos}|_Rest]) when Mark >= Key ->
|
|
||||||
Pos;
|
|
||||||
find_mark(Key, [_H|T]) ->
|
|
||||||
find_mark(Key, T).
|
|
||||||
|
|
||||||
lookup_slot(Key, {MarkerList, SlotList}) ->
|
|
||||||
Pos = find_mark(Key, MarkerList),
|
|
||||||
SubList = lists:sublist(SlotList, max(1, Pos - ?INDEX_MARKER_WIDTH), Pos),
|
|
||||||
Slot = find_mark(Key, SubList),
|
|
||||||
Slot.
|
Slot.
|
||||||
|
|
||||||
%% Returns a section from the summary index and two booleans to indicate if
|
lookup_slots(StartKey, EndKey, Tree) ->
|
||||||
%% the first slot needs trimming, or the last slot
|
StartKeyFun =
|
||||||
lookup_slots(StartKey, EndKey, {_MarkerList, SlotList}) ->
|
fun(_V) ->
|
||||||
SlotsOnlyFun = fun({_K, V}) -> V end,
|
all
|
||||||
{KSL, LTrim, RTrim} = lookup_slots_int(StartKey, EndKey, SlotList),
|
end,
|
||||||
{lists:map(SlotsOnlyFun, KSL), LTrim, RTrim}.
|
MapFun =
|
||||||
|
fun({_LK, Slot}) ->
|
||||||
lookup_slots_int(all, all, SlotList) ->
|
Slot
|
||||||
{SlotList, false, false};
|
end,
|
||||||
lookup_slots_int(StartKey, all, SlotList) ->
|
SlotList = leveled_tree:search_range(StartKey, EndKey, Tree, StartKeyFun),
|
||||||
LTrimFun = fun({K, _V}) -> K < StartKey end,
|
{EK, _EndSlot} = lists:last(SlotList),
|
||||||
{_LDrop, RKeep0} = lists:splitwith(LTrimFun, SlotList),
|
case EK of
|
||||||
{RKeep0, true, false};
|
|
||||||
lookup_slots_int(StartKey, EndKey, SlotList) ->
|
|
||||||
{RKeep, true, false} = lookup_slots_int(StartKey, all, SlotList),
|
|
||||||
[LeftMost|RKeep0] = RKeep,
|
|
||||||
{LeftMostK, LeftMostV} = LeftMost,
|
|
||||||
RTrimFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndKey, K) end,
|
|
||||||
case leveled_codec:endkey_passed(EndKey, LeftMostK) of
|
|
||||||
true ->
|
|
||||||
{[{LeftMostK, LeftMostV}],
|
|
||||||
true,
|
|
||||||
true};
|
|
||||||
false ->
|
|
||||||
case LeftMostK of
|
|
||||||
EndKey ->
|
EndKey ->
|
||||||
{[{LeftMostK, LeftMostV}],
|
{lists:map(MapFun, SlotList), true, false};
|
||||||
true,
|
|
||||||
false};
|
|
||||||
_ ->
|
_ ->
|
||||||
{LKeep, RDisc} = lists:splitwith(RTrimFun, RKeep0),
|
{lists:map(MapFun, SlotList), true, true}
|
||||||
case RDisc of
|
|
||||||
[] ->
|
|
||||||
{[LeftMost|LKeep],
|
|
||||||
true,
|
|
||||||
true};
|
|
||||||
[{RDiscK1, RDiscV1}|_Rest] when RDiscK1 == EndKey ->
|
|
||||||
{[LeftMost|LKeep] ++ [{RDiscK1, RDiscV1}],
|
|
||||||
true,
|
|
||||||
false};
|
|
||||||
[{RDiscK1, RDiscV1}|_Rest] ->
|
|
||||||
{[LeftMost|LKeep] ++ [{RDiscK1, RDiscV1}],
|
|
||||||
true,
|
|
||||||
true}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,159 +0,0 @@
|
||||||
%% -------- TINY BLOOM ---------
|
|
||||||
%%
|
|
||||||
%% For sheltering relatively expensive lookups with a probabilistic check
|
|
||||||
%%
|
|
||||||
%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array.
|
|
||||||
%% Even at 1000 keys should still offer only a 20% false positive
|
|
||||||
%%
|
|
||||||
%% Restricted to no more than 256 arrays - so can't handle more than 250K keys
|
|
||||||
%% in total
|
|
||||||
%%
|
|
||||||
%% Implemented this way to make it easy to control false positive (just by
|
|
||||||
%% setting the width). Also only requires binary manipulations of a single
|
|
||||||
%% hash
|
|
||||||
|
|
||||||
-module(leveled_tinybloom).
|
|
||||||
|
|
||||||
-include("include/leveled.hrl").
|
|
||||||
|
|
||||||
-export([
|
|
||||||
enter/2,
|
|
||||||
check/2,
|
|
||||||
empty/1
|
|
||||||
]).
|
|
||||||
|
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Bloom API
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
empty(Width) when Width =< 256 ->
|
|
||||||
FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end,
|
|
||||||
lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)).
|
|
||||||
|
|
||||||
enter({hash, no_lookup}, Bloom) ->
|
|
||||||
Bloom;
|
|
||||||
enter({hash, Hash}, Bloom) ->
|
|
||||||
{Slot0, Bit1, Bit2} = split_hash(Hash),
|
|
||||||
Slot = Slot0 rem dict:size(Bloom),
|
|
||||||
BitArray0 = dict:fetch(Slot, Bloom),
|
|
||||||
FoldFun =
|
|
||||||
fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end,
|
|
||||||
BitArray1 = lists:foldl(FoldFun,
|
|
||||||
BitArray0,
|
|
||||||
lists:usort([Bit1, Bit2])),
|
|
||||||
dict:store(Slot, <<BitArray1/binary>>, Bloom);
|
|
||||||
enter(Key, Bloom) ->
|
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
|
||||||
enter({hash, Hash}, Bloom).
|
|
||||||
|
|
||||||
|
|
||||||
check({hash, Hash}, Bloom) ->
|
|
||||||
{Slot0, Bit1, Bit2} = split_hash(Hash),
|
|
||||||
Slot = Slot0 rem dict:size(Bloom),
|
|
||||||
BitArray = dict:fetch(Slot, Bloom),
|
|
||||||
|
|
||||||
case getbit(Bit1, BitArray, 4096) of
|
|
||||||
<<0:1>> ->
|
|
||||||
false;
|
|
||||||
<<1:1>> ->
|
|
||||||
case getbit(Bit2, BitArray, 4096) of
|
|
||||||
<<0:1>> ->
|
|
||||||
false;
|
|
||||||
<<1:1>> ->
|
|
||||||
true
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
check(Key, Bloom) ->
|
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
|
||||||
check({hash, Hash}, Bloom).
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Internal Functions
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
split_hash(Hash) ->
|
|
||||||
H0 = Hash band 255,
|
|
||||||
H1 = (Hash bsr 8) band 4095,
|
|
||||||
H2 = Hash bsr 20,
|
|
||||||
{H0, H1, H2}.
|
|
||||||
|
|
||||||
add_to_array(Bit, BitArray, ArrayLength) ->
|
|
||||||
RestLen = ArrayLength - Bit - 1,
|
|
||||||
<<Head:Bit/bitstring,
|
|
||||||
_B:1/integer,
|
|
||||||
Rest:RestLen/bitstring>> = BitArray,
|
|
||||||
<<Head/bitstring, 1:1, Rest/bitstring>>.
|
|
||||||
|
|
||||||
getbit(Bit, BitArray, ArrayLength) ->
|
|
||||||
RestLen = ArrayLength - Bit - 1,
|
|
||||||
<<_Head:Bit/bitstring,
|
|
||||||
B:1/bitstring,
|
|
||||||
_Rest:RestLen/bitstring>> = BitArray,
|
|
||||||
B.
|
|
||||||
|
|
||||||
|
|
||||||
%%%============================================================================
|
|
||||||
%%% Test
|
|
||||||
%%%============================================================================
|
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
|
|
||||||
simple_test() ->
|
|
||||||
N = 4000,
|
|
||||||
W = 6,
|
|
||||||
KLin = lists:map(fun(X) -> "Key_" ++
|
|
||||||
integer_to_list(X) ++
|
|
||||||
integer_to_list(random:uniform(100)) ++
|
|
||||||
binary_to_list(crypto:rand_bytes(2))
|
|
||||||
end,
|
|
||||||
lists:seq(1, N)),
|
|
||||||
KLout = lists:map(fun(X) ->
|
|
||||||
"NotKey_" ++
|
|
||||||
integer_to_list(X) ++
|
|
||||||
integer_to_list(random:uniform(100)) ++
|
|
||||||
binary_to_list(crypto:rand_bytes(2))
|
|
||||||
end,
|
|
||||||
lists:seq(1, N)),
|
|
||||||
SW0_PH = os:timestamp(),
|
|
||||||
lists:foreach(fun(X) -> erlang:phash2(X) end, KLin),
|
|
||||||
io:format(user,
|
|
||||||
"~nNative hash function hashes ~w keys in ~w microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW0_PH)]),
|
|
||||||
SW0_MH = os:timestamp(),
|
|
||||||
lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin),
|
|
||||||
io:format(user,
|
|
||||||
"~nMagic hash function hashes ~w keys in ~w microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW0_MH)]),
|
|
||||||
|
|
||||||
SW1 = os:timestamp(),
|
|
||||||
Bloom = lists:foldr(fun enter/2, empty(W), KLin),
|
|
||||||
io:format(user,
|
|
||||||
"~nAdding ~w keys to bloom took ~w microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW1)]),
|
|
||||||
|
|
||||||
SW2 = os:timestamp(),
|
|
||||||
lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin),
|
|
||||||
io:format(user,
|
|
||||||
"~nChecking ~w keys in bloom took ~w microseconds~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW2)]),
|
|
||||||
|
|
||||||
SW3 = os:timestamp(),
|
|
||||||
FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of
|
|
||||||
true -> Acc + 1;
|
|
||||||
false -> Acc
|
|
||||||
end end,
|
|
||||||
0,
|
|
||||||
KLout),
|
|
||||||
io:format(user,
|
|
||||||
"~nChecking ~w keys out of bloom took ~w microseconds " ++
|
|
||||||
"with ~w false positive rate~n",
|
|
||||||
[N, timer:now_diff(os:timestamp(), SW3), FP / N]),
|
|
||||||
?assertMatch(true, FP < (N div 4)).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-endif.
|
|
731
src/leveled_tree.erl
Normal file
731
src/leveled_tree.erl
Normal file
|
@ -0,0 +1,731 @@
|
||||||
|
%% -------- TREE ---------
|
||||||
|
%%
|
||||||
|
%% This module is intended to address two issues
|
||||||
|
%% - the lack of iterator_from support in OTP16 gb_trees
|
||||||
|
%% - the time to convert from/to list in gb_trees
|
||||||
|
%%
|
||||||
|
%% Leveled had had a skiplist implementation previously, and this is a
|
||||||
|
%% variation on that. The Treein this case is a bunch of sublists of length
|
||||||
|
%% SKIP_WIDTH with the start_keys in a gb_tree.
|
||||||
|
|
||||||
|
-module(leveled_tree).
|
||||||
|
|
||||||
|
-include("include/leveled.hrl").
|
||||||
|
|
||||||
|
-export([
|
||||||
|
from_orderedlist/2,
|
||||||
|
from_orderedset/2,
|
||||||
|
from_orderedlist/3,
|
||||||
|
from_orderedset/3,
|
||||||
|
to_list/1,
|
||||||
|
match_range/3,
|
||||||
|
search_range/4,
|
||||||
|
match/2,
|
||||||
|
search/3,
|
||||||
|
tsize/1,
|
||||||
|
empty/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
|
-define(SKIP_WIDTH, 16).
|
||||||
|
|
||||||
|
|
||||||
|
%%%============================================================================
|
||||||
|
%%% API
|
||||||
|
%%%============================================================================
|
||||||
|
|
||||||
|
from_orderedset(Table, Type) ->
|
||||||
|
from_orderedlist(ets:tab2list(Table), Type, ?SKIP_WIDTH).
|
||||||
|
|
||||||
|
from_orderedset(Table, Type, SkipWidth) ->
|
||||||
|
from_orderedlist(ets:tab2list(Table), Type, SkipWidth).
|
||||||
|
|
||||||
|
|
||||||
|
from_orderedlist(OrderedList, Type) ->
|
||||||
|
from_orderedlist(OrderedList, Type, ?SKIP_WIDTH).
|
||||||
|
|
||||||
|
from_orderedlist(OrderedList, tree, SkipWidth) ->
|
||||||
|
L = length(OrderedList),
|
||||||
|
{tree, L, tree_fromorderedlist(OrderedList, [], L, SkipWidth)};
|
||||||
|
from_orderedlist(OrderedList, idxt, SkipWidth) ->
|
||||||
|
L = length(OrderedList),
|
||||||
|
{idxt, L, idxt_fromorderedlist(OrderedList, {[], [], 1}, L, SkipWidth)};
|
||||||
|
from_orderedlist(OrderedList, skpl, _SkipWidth) ->
|
||||||
|
L = length(OrderedList),
|
||||||
|
SkipWidth =
|
||||||
|
% Autosize the skip width
|
||||||
|
case L of
|
||||||
|
L when L > 4096 -> 32;
|
||||||
|
L when L > 512 -> 16;
|
||||||
|
L when L > 64 -> 8;
|
||||||
|
_ -> 4
|
||||||
|
end,
|
||||||
|
{skpl, L, skpl_fromorderedlist(OrderedList, L, SkipWidth, 2)}.
|
||||||
|
|
||||||
|
|
||||||
|
match(Key, {tree, _L, Tree}) ->
|
||||||
|
Iter = tree_iterator_from(Key, Tree),
|
||||||
|
case tree_next(Iter) of
|
||||||
|
none ->
|
||||||
|
none;
|
||||||
|
{_NK, SL, _Iter} ->
|
||||||
|
lookup_match(Key, SL)
|
||||||
|
end;
|
||||||
|
match(Key, {idxt, _L, {TLI, IDX}}) ->
|
||||||
|
Iter = tree_iterator_from(Key, IDX),
|
||||||
|
case tree_next(Iter) of
|
||||||
|
none ->
|
||||||
|
none;
|
||||||
|
{_NK, ListID, _Iter} ->
|
||||||
|
lookup_match(Key, element(ListID, TLI))
|
||||||
|
end;
|
||||||
|
match(Key, {skpl, _L, SkipList}) ->
|
||||||
|
SL0 = skpl_getsublist(Key, SkipList),
|
||||||
|
lookup_match(Key, SL0).
|
||||||
|
|
||||||
|
search(Key, {tree, _L, Tree}, StartKeyFun) ->
|
||||||
|
Iter = tree_iterator_from(Key, Tree),
|
||||||
|
case tree_next(Iter) of
|
||||||
|
none ->
|
||||||
|
none;
|
||||||
|
{_NK, SL, _Iter} ->
|
||||||
|
{K, V} = lookup_best(Key, SL),
|
||||||
|
case Key < StartKeyFun(V) of
|
||||||
|
true ->
|
||||||
|
none;
|
||||||
|
false ->
|
||||||
|
{K, V}
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) ->
|
||||||
|
Iter = tree_iterator_from(Key, IDX),
|
||||||
|
case tree_next(Iter) of
|
||||||
|
none ->
|
||||||
|
none;
|
||||||
|
{_NK, ListID, _Iter} ->
|
||||||
|
{K, V} = lookup_best(Key, element(ListID, TLI)),
|
||||||
|
case Key < StartKeyFun(V) of
|
||||||
|
true ->
|
||||||
|
none;
|
||||||
|
false ->
|
||||||
|
{K, V}
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
search(Key, {skpl, _L, SkipList}, StartKeyFun) ->
|
||||||
|
SL0 = skpl_getsublist(Key, SkipList),
|
||||||
|
case lookup_best(Key, SL0) of
|
||||||
|
{K, V} ->
|
||||||
|
case Key < StartKeyFun(V) of
|
||||||
|
true ->
|
||||||
|
none;
|
||||||
|
false ->
|
||||||
|
{K, V}
|
||||||
|
end;
|
||||||
|
none ->
|
||||||
|
none
|
||||||
|
end.
|
||||||
|
|
||||||
|
match_range(StartRange, EndRange, Tree) ->
|
||||||
|
EndRangeFun =
|
||||||
|
fun(ER, FirstRHSKey, _FirstRHSValue) ->
|
||||||
|
ER == FirstRHSKey
|
||||||
|
end,
|
||||||
|
match_range(StartRange, EndRange, Tree, EndRangeFun).
|
||||||
|
|
||||||
|
match_range(StartRange, EndRange, {tree, _L, Tree}, EndRangeFun) ->
|
||||||
|
treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun);
|
||||||
|
match_range(StartRange, EndRange, {idxt, _L, Tree}, EndRangeFun) ->
|
||||||
|
idxtlookup_range_start(StartRange, EndRange, Tree, EndRangeFun);
|
||||||
|
match_range(StartRange, EndRange, {skpl, _L, SkipList}, EndRangeFun) ->
|
||||||
|
skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun).
|
||||||
|
|
||||||
|
|
||||||
|
search_range(StartRange, EndRange, Tree, StartKeyFun) ->
|
||||||
|
EndRangeFun =
|
||||||
|
fun(ER, _FirstRHSKey, FirstRHSValue) ->
|
||||||
|
StartRHSKey = StartKeyFun(FirstRHSValue),
|
||||||
|
ER >= StartRHSKey
|
||||||
|
end,
|
||||||
|
case Tree of
|
||||||
|
{tree, _L, T} ->
|
||||||
|
treelookup_range_start(StartRange, EndRange, T, EndRangeFun);
|
||||||
|
{idxt, _L, T} ->
|
||||||
|
idxtlookup_range_start(StartRange, EndRange, T, EndRangeFun);
|
||||||
|
{skpl, _L, SL} ->
|
||||||
|
skpllookup_to_range(StartRange, EndRange, SL, EndRangeFun)
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
to_list({tree, _L, Tree}) ->
|
||||||
|
FoldFun =
|
||||||
|
fun({_MK, SL}, Acc) ->
|
||||||
|
Acc ++ SL
|
||||||
|
end,
|
||||||
|
lists:foldl(FoldFun, [], tree_to_list(Tree));
|
||||||
|
to_list({idxt, _L, {TLI, _IDX}}) ->
|
||||||
|
lists:append(tuple_to_list(TLI));
|
||||||
|
to_list({skpl, _L, SkipList}) ->
|
||||||
|
FoldFun =
|
||||||
|
fun({_M, SL}, Acc) ->
|
||||||
|
[SL|Acc]
|
||||||
|
end,
|
||||||
|
|
||||||
|
Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)),
|
||||||
|
Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))),
|
||||||
|
lists:append(Lv0List).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
tsize({_Type, L, _Tree}) ->
|
||||||
|
L.
|
||||||
|
|
||||||
|
empty(tree) ->
|
||||||
|
{tree, 0, empty_tree()};
|
||||||
|
empty(idxt) ->
|
||||||
|
{idxt, 0, {{}, empty_tree()}};
|
||||||
|
empty(skpl) ->
|
||||||
|
{skpl, 0, []}.
|
||||||
|
|
||||||
|
%%%============================================================================
|
||||||
|
%%% Internal Functions
|
||||||
|
%%%============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
tree_fromorderedlist([], TmpList, _L, _SkipWidth) ->
|
||||||
|
gb_trees:from_orddict(lists:reverse(TmpList));
|
||||||
|
tree_fromorderedlist(OrdList, TmpList, L, SkipWidth) ->
|
||||||
|
SubLL = min(SkipWidth, L),
|
||||||
|
{Head, Tail} = lists:split(SubLL, OrdList),
|
||||||
|
{LastK, _LastV} = lists:last(Head),
|
||||||
|
tree_fromorderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL, SkipWidth).
|
||||||
|
|
||||||
|
idxt_fromorderedlist([], {TmpListElements, TmpListIdx, _C}, _L, _SkipWidth) ->
|
||||||
|
{list_to_tuple(lists:reverse(TmpListElements)),
|
||||||
|
gb_trees:from_orddict(lists:reverse(TmpListIdx))};
|
||||||
|
idxt_fromorderedlist(OrdList, {TmpListElements, TmpListIdx, C}, L, SkipWidth) ->
|
||||||
|
SubLL = min(SkipWidth, L),
|
||||||
|
{Head, Tail} = lists:split(SubLL, OrdList),
|
||||||
|
{LastK, _LastV} = lists:last(Head),
|
||||||
|
idxt_fromorderedlist(Tail,
|
||||||
|
{[Head|TmpListElements],
|
||||||
|
[{LastK, C}|TmpListIdx],
|
||||||
|
C + 1},
|
||||||
|
L - SubLL,
|
||||||
|
SkipWidth).
|
||||||
|
|
||||||
|
skpl_fromorderedlist(SkipList, _L, _SkipWidth, 0) ->
|
||||||
|
SkipList;
|
||||||
|
skpl_fromorderedlist(SkipList, L, SkipWidth, Height) ->
|
||||||
|
SkipList0 = roll_list(SkipList, L, [], SkipWidth),
|
||||||
|
skpl_fromorderedlist(SkipList0, length(SkipList0), SkipWidth, Height - 1).
|
||||||
|
|
||||||
|
roll_list([], 0, SkipList, _SkipWidth) ->
|
||||||
|
lists:reverse(SkipList);
|
||||||
|
roll_list(KVList, L, SkipList, SkipWidth) ->
|
||||||
|
SubLL = min(SkipWidth, L),
|
||||||
|
{Head, Tail} = lists:split(SubLL, KVList),
|
||||||
|
{LastK, _LastV} = lists:last(Head),
|
||||||
|
roll_list(Tail, L - SubLL, [{LastK, Head}|SkipList], SkipWidth).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% lookup_match(_Key, []) ->
|
||||||
|
% none;
|
||||||
|
% lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key ->
|
||||||
|
% none;
|
||||||
|
% lookup_match(Key, [{Key, EV}|_Tail]) ->
|
||||||
|
% {value, EV};
|
||||||
|
% lookup_match(Key, [_Top|Tail]) ->
|
||||||
|
% lookup_match(Key, Tail).
|
||||||
|
|
||||||
|
lookup_match(Key, KVList) ->
|
||||||
|
case lists:keyfind(Key, 1, KVList) of
|
||||||
|
false ->
|
||||||
|
none;
|
||||||
|
{Key, Value} ->
|
||||||
|
{value, Value}
|
||||||
|
end.
|
||||||
|
|
||||||
|
lookup_best(_Key, []) ->
|
||||||
|
none;
|
||||||
|
lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key ->
|
||||||
|
{EK, EV};
|
||||||
|
lookup_best(Key, [_Top|Tail]) ->
|
||||||
|
lookup_best(Key, Tail).
|
||||||
|
|
||||||
|
treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun) ->
|
||||||
|
Iter0 = tree_iterator_from(StartRange, Tree),
|
||||||
|
case tree_next(Iter0) of
|
||||||
|
none ->
|
||||||
|
[];
|
||||||
|
{NK, SL, Iter1} ->
|
||||||
|
PredFun =
|
||||||
|
fun({K, _V}) ->
|
||||||
|
K < StartRange
|
||||||
|
end,
|
||||||
|
{_LHS, RHS} = lists:splitwith(PredFun, SL),
|
||||||
|
treelookup_range_end(EndRange, {NK, RHS}, Iter1, [], EndRangeFun)
|
||||||
|
end.
|
||||||
|
|
||||||
|
treelookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) ->
|
||||||
|
PredFun =
|
||||||
|
fun({K, _V}) ->
|
||||||
|
not leveled_codec:endkey_passed(EndRange, K)
|
||||||
|
end,
|
||||||
|
case leveled_codec:endkey_passed(EndRange, NK0) of
|
||||||
|
true ->
|
||||||
|
{LHS, RHS} = lists:splitwith(PredFun, SL0),
|
||||||
|
case RHS of
|
||||||
|
[] ->
|
||||||
|
Output ++ LHS;
|
||||||
|
[{FirstRHSKey, FirstRHSValue}|_Rest] ->
|
||||||
|
case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of
|
||||||
|
true ->
|
||||||
|
Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}];
|
||||||
|
false ->
|
||||||
|
Output ++ LHS
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
false ->
|
||||||
|
UpdOutput = Output ++ SL0,
|
||||||
|
case tree_next(Iter0) of
|
||||||
|
none ->
|
||||||
|
UpdOutput;
|
||||||
|
{NK1, SL1, Iter1} ->
|
||||||
|
treelookup_range_end(EndRange,
|
||||||
|
{NK1, SL1},
|
||||||
|
Iter1,
|
||||||
|
UpdOutput,
|
||||||
|
EndRangeFun)
|
||||||
|
end
|
||||||
|
end.
|
||||||
|
|
||||||
|
idxtlookup_range_start(StartRange, EndRange, {TLI, IDX}, EndRangeFun) ->
|
||||||
|
Iter0 = tree_iterator_from(StartRange, IDX),
|
||||||
|
case tree_next(Iter0) of
|
||||||
|
none ->
|
||||||
|
[];
|
||||||
|
{NK, ListID, Iter1} ->
|
||||||
|
PredFun =
|
||||||
|
fun({K, _V}) ->
|
||||||
|
K < StartRange
|
||||||
|
end,
|
||||||
|
{_LHS, RHS} = lists:splitwith(PredFun, element(ListID, TLI)),
|
||||||
|
idxtlookup_range_end(EndRange, {TLI, NK, RHS}, Iter1, [], EndRangeFun)
|
||||||
|
end.
|
||||||
|
|
||||||
|
idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) ->
|
||||||
|
PredFun =
|
||||||
|
fun({K, _V}) ->
|
||||||
|
not leveled_codec:endkey_passed(EndRange, K)
|
||||||
|
end,
|
||||||
|
case leveled_codec:endkey_passed(EndRange, NK0) of
|
||||||
|
true ->
|
||||||
|
{LHS, RHS} = lists:splitwith(PredFun, SL0),
|
||||||
|
case RHS of
|
||||||
|
[] ->
|
||||||
|
Output ++ LHS;
|
||||||
|
[{FirstRHSKey, FirstRHSValue}|_Rest] ->
|
||||||
|
case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of
|
||||||
|
true ->
|
||||||
|
Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}];
|
||||||
|
false ->
|
||||||
|
Output ++ LHS
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
false ->
|
||||||
|
UpdOutput = Output ++ SL0,
|
||||||
|
case tree_next(Iter0) of
|
||||||
|
none ->
|
||||||
|
UpdOutput;
|
||||||
|
{NK1, ListID, Iter1} ->
|
||||||
|
idxtlookup_range_end(EndRange,
|
||||||
|
{TLI, NK1, element(ListID, TLI)},
|
||||||
|
Iter1,
|
||||||
|
UpdOutput,
|
||||||
|
EndRangeFun)
|
||||||
|
end
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) ->
|
||||||
|
FoldFun =
|
||||||
|
fun({K, SL}, {PassedStart, PassedEnd, Acc}) ->
|
||||||
|
case {PassedStart, PassedEnd} of
|
||||||
|
{false, false} ->
|
||||||
|
case StartRange > K of
|
||||||
|
true ->
|
||||||
|
{PassedStart, PassedEnd, Acc};
|
||||||
|
false ->
|
||||||
|
case leveled_codec:endkey_passed(EndRange, K) of
|
||||||
|
true ->
|
||||||
|
{true, true, [SL|Acc]};
|
||||||
|
false ->
|
||||||
|
{true, false, [SL|Acc]}
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
{true, false} ->
|
||||||
|
case leveled_codec:endkey_passed(EndRange, K) of
|
||||||
|
true ->
|
||||||
|
{true, true, [SL|Acc]};
|
||||||
|
false ->
|
||||||
|
{true, false, [SL|Acc]}
|
||||||
|
end;
|
||||||
|
{true, true} ->
|
||||||
|
{PassedStart, PassedEnd, Acc}
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
Lv1List = lists:reverse(element(3,
|
||||||
|
lists:foldl(FoldFun,
|
||||||
|
{false, false, []},
|
||||||
|
SkipList))),
|
||||||
|
Lv0List = lists:reverse(element(3,
|
||||||
|
lists:foldl(FoldFun,
|
||||||
|
{false, false, []},
|
||||||
|
lists:append(Lv1List)))),
|
||||||
|
BeforeFun =
|
||||||
|
fun({K, _V}) ->
|
||||||
|
K < StartRange
|
||||||
|
end,
|
||||||
|
AfterFun =
|
||||||
|
fun({K, V}) ->
|
||||||
|
case leveled_codec:endkey_passed(EndRange, K) of
|
||||||
|
false ->
|
||||||
|
true;
|
||||||
|
true ->
|
||||||
|
EndRangeFun(EndRange, K, V)
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
|
||||||
|
case length(Lv0List) of
|
||||||
|
0 ->
|
||||||
|
[];
|
||||||
|
1 ->
|
||||||
|
RHS = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)),
|
||||||
|
lists:takewhile(AfterFun, RHS);
|
||||||
|
2 ->
|
||||||
|
RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)),
|
||||||
|
LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)),
|
||||||
|
RHSofLHL ++ LHSofRHL;
|
||||||
|
L ->
|
||||||
|
RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)),
|
||||||
|
LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)),
|
||||||
|
MidLists = lists:sublist(Lv0List, 2, L - 2),
|
||||||
|
lists:append([RHSofLHL] ++ MidLists ++ [LHSofRHL])
|
||||||
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
skpl_getsublist(Key, SkipList) ->
|
||||||
|
FoldFun =
|
||||||
|
fun({Mark, SL}, Acc) ->
|
||||||
|
case {Acc, Mark} of
|
||||||
|
{[], Mark} when Mark >= Key ->
|
||||||
|
SL;
|
||||||
|
_ ->
|
||||||
|
Acc
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
SL1 = lists:foldl(FoldFun, [], SkipList),
|
||||||
|
lists:foldl(FoldFun, [], SL1).
|
||||||
|
|
||||||
|
%%%============================================================================
|
||||||
|
%%% Balance tree implementation
|
||||||
|
%%%============================================================================
|
||||||
|
|
||||||
|
empty_tree() ->
|
||||||
|
gb_trees:empty().
|
||||||
|
|
||||||
|
tree_to_list(T) ->
|
||||||
|
gb_trees:to_list(T).
|
||||||
|
|
||||||
|
tree_iterator_from(K, T) ->
|
||||||
|
% For OTP 16 compatibility with gb_trees
|
||||||
|
iterator_from(K, T).
|
||||||
|
|
||||||
|
tree_next(I) ->
|
||||||
|
% For OTP 16 compatibility with gb_trees
|
||||||
|
next(I).
|
||||||
|
|
||||||
|
|
||||||
|
iterator_from(S, {_, T}) ->
|
||||||
|
iterator_1_from(S, T).
|
||||||
|
|
||||||
|
iterator_1_from(S, T) ->
|
||||||
|
iterator_from(S, T, []).
|
||||||
|
|
||||||
|
iterator_from(S, {K, _, _, T}, As) when K < S ->
|
||||||
|
iterator_from(S, T, As);
|
||||||
|
iterator_from(_, {_, _, nil, _} = T, As) ->
|
||||||
|
[T | As];
|
||||||
|
iterator_from(S, {_, _, L, _} = T, As) ->
|
||||||
|
iterator_from(S, L, [T | As]);
|
||||||
|
iterator_from(_, nil, As) ->
|
||||||
|
As.
|
||||||
|
|
||||||
|
next([{X, V, _, T} | As]) ->
|
||||||
|
{X, V, iterator(T, As)};
|
||||||
|
next([]) ->
|
||||||
|
none.
|
||||||
|
|
||||||
|
%% The iterator structure is really just a list corresponding to
|
||||||
|
%% the call stack of an in-order traversal. This is quite fast.
|
||||||
|
|
||||||
|
iterator({_, _, nil, _} = T, As) ->
|
||||||
|
[T | As];
|
||||||
|
iterator({_, _, L, _} = T, As) ->
|
||||||
|
iterator(L, [T | As]);
|
||||||
|
iterator(nil, As) ->
|
||||||
|
As.
|
||||||
|
|
||||||
|
%%%============================================================================
|
||||||
|
%%% Test
|
||||||
|
%%%============================================================================
|
||||||
|
|
||||||
|
-ifdef(TEST).
|
||||||
|
|
||||||
|
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||||
|
generate_randomkeys(Seqn,
|
||||||
|
Count,
|
||||||
|
[],
|
||||||
|
BucketRangeLow,
|
||||||
|
BucketRangeHigh).
|
||||||
|
|
||||||
|
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||||
|
Acc;
|
||||||
|
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||||
|
BRand = random:uniform(BRange),
|
||||||
|
BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0),
|
||||||
|
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
||||||
|
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
||||||
|
{Seqn, {active, infinity}, null}},
|
||||||
|
generate_randomkeys(Seqn + 1,
|
||||||
|
Count - 1,
|
||||||
|
[{K, V}|Acc],
|
||||||
|
BucketLow,
|
||||||
|
BRange).
|
||||||
|
|
||||||
|
|
||||||
|
tree_search_test() ->
|
||||||
|
search_test_by_type(tree).
|
||||||
|
|
||||||
|
idxt_search_test() ->
|
||||||
|
search_test_by_type(idxt).
|
||||||
|
|
||||||
|
skpl_search_test() ->
|
||||||
|
search_test_by_type(skpl).
|
||||||
|
|
||||||
|
search_test_by_type(Type) ->
|
||||||
|
MapFun =
|
||||||
|
fun(N) ->
|
||||||
|
{N * 4, N * 4 - 2}
|
||||||
|
end,
|
||||||
|
KL = lists:map(MapFun, lists:seq(1, 50)),
|
||||||
|
T = from_orderedlist(KL, Type),
|
||||||
|
|
||||||
|
StartKeyFun = fun(V) -> V end,
|
||||||
|
statistics(runtime),
|
||||||
|
?assertMatch([], search_range(0, 1, T, StartKeyFun)),
|
||||||
|
?assertMatch([], search_range(201, 202, T, StartKeyFun)),
|
||||||
|
?assertMatch([{4, 2}], search_range(2, 4, T, StartKeyFun)),
|
||||||
|
?assertMatch([{4, 2}], search_range(2, 5, T, StartKeyFun)),
|
||||||
|
?assertMatch([{4, 2}, {8, 6}], search_range(2, 6, T, StartKeyFun)),
|
||||||
|
?assertMatch(50, length(search_range(2, 200, T, StartKeyFun))),
|
||||||
|
?assertMatch(50, length(search_range(2, 198, T, StartKeyFun))),
|
||||||
|
?assertMatch(49, length(search_range(2, 197, T, StartKeyFun))),
|
||||||
|
?assertMatch(49, length(search_range(4, 197, T, StartKeyFun))),
|
||||||
|
?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))),
|
||||||
|
{_, T1} = statistics(runtime),
|
||||||
|
io:format(user, "10 range tests with type ~w in ~w microseconds~n",
|
||||||
|
[Type, T1]).
|
||||||
|
|
||||||
|
|
||||||
|
tree_oor_test() ->
|
||||||
|
outofrange_test_by_type(tree).
|
||||||
|
|
||||||
|
idxt_oor_test() ->
|
||||||
|
outofrange_test_by_type(idxt).
|
||||||
|
|
||||||
|
skpl_oor_test() ->
|
||||||
|
outofrange_test_by_type(skpl).
|
||||||
|
|
||||||
|
outofrange_test_by_type(Type) ->
|
||||||
|
MapFun =
|
||||||
|
fun(N) ->
|
||||||
|
{N * 4, N * 4 - 2}
|
||||||
|
end,
|
||||||
|
KL = lists:map(MapFun, lists:seq(1, 50)),
|
||||||
|
T = from_orderedlist(KL, Type),
|
||||||
|
|
||||||
|
io:format("Out of range searches~n"),
|
||||||
|
?assertMatch(none, match(0, T)),
|
||||||
|
?assertMatch(none, match(5, T)),
|
||||||
|
?assertMatch(none, match(97, T)),
|
||||||
|
?assertMatch(none, match(197, T)),
|
||||||
|
?assertMatch(none, match(201, T)),
|
||||||
|
|
||||||
|
StartKeyFun = fun(V) -> V end,
|
||||||
|
|
||||||
|
?assertMatch(none, search(0, T, StartKeyFun)),
|
||||||
|
?assertMatch(none, search(5, T, StartKeyFun)),
|
||||||
|
?assertMatch(none, search(97, T, StartKeyFun)),
|
||||||
|
?assertMatch(none, search(197, T, StartKeyFun)),
|
||||||
|
?assertMatch(none, search(201, T, StartKeyFun)).
|
||||||
|
|
||||||
|
tree_tolist_test() ->
|
||||||
|
tolist_test_by_type(tree).
|
||||||
|
|
||||||
|
idxt_tolist_test() ->
|
||||||
|
tolist_test_by_type(idxt).
|
||||||
|
|
||||||
|
skpl_tolist_test() ->
|
||||||
|
tolist_test_by_type(skpl).
|
||||||
|
|
||||||
|
tolist_test_by_type(Type) ->
|
||||||
|
MapFun =
|
||||||
|
fun(N) ->
|
||||||
|
{N * 4, N * 4 - 2}
|
||||||
|
end,
|
||||||
|
KL = lists:map(MapFun, lists:seq(1, 50)),
|
||||||
|
T = from_orderedlist(KL, Type),
|
||||||
|
T_Reverse = to_list(T),
|
||||||
|
?assertMatch(KL, T_Reverse).
|
||||||
|
|
||||||
|
tree_timing_test() ->
|
||||||
|
log_tree_test_by_(16, tree, 4000),
|
||||||
|
tree_test_by_(8, tree, 1000),
|
||||||
|
tree_test_by_(4, tree, 256).
|
||||||
|
|
||||||
|
idxt_timing_test() ->
|
||||||
|
log_tree_test_by_(16, idxt, 4000),
|
||||||
|
tree_test_by_(8, idxt, 1000),
|
||||||
|
tree_test_by_(4, idxt, 256).
|
||||||
|
|
||||||
|
skpl_timing_test() ->
|
||||||
|
tree_test_by_(auto, skpl, 6000),
|
||||||
|
log_tree_test_by_(auto, skpl, 4000),
|
||||||
|
tree_test_by_(auto, skpl, 1000),
|
||||||
|
tree_test_by_(auto, skpl, 256).
|
||||||
|
|
||||||
|
log_tree_test_by_(Width, Type, N) ->
|
||||||
|
erlang:statistics(runtime),
|
||||||
|
G0 = erlang:statistics(garbage_collection),
|
||||||
|
tree_test_by_(Width, Type, N),
|
||||||
|
{_, T1} = erlang:statistics(runtime),
|
||||||
|
G1 = erlang:statistics(garbage_collection),
|
||||||
|
io:format(user, "Test took ~w ms and GC transitioned from ~w to ~w~n",
|
||||||
|
[T1, G0, G1]).
|
||||||
|
|
||||||
|
tree_test_by_(Width, Type, N) ->
|
||||||
|
io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]),
|
||||||
|
KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)),
|
||||||
|
|
||||||
|
OS = ets:new(test, [ordered_set, private]),
|
||||||
|
ets:insert(OS, KL),
|
||||||
|
SWaETS = os:timestamp(),
|
||||||
|
Tree0 = from_orderedset(OS, Type, Width),
|
||||||
|
io:format(user, "Generating tree from ETS in ~w microseconds" ++
|
||||||
|
" of size ~w~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWaETS),
|
||||||
|
tsize(Tree0)]),
|
||||||
|
|
||||||
|
SWaGSL = os:timestamp(),
|
||||||
|
Tree1 = from_orderedlist(KL, Type, Width),
|
||||||
|
io:format(user, "Generating tree from orddict in ~w microseconds" ++
|
||||||
|
" of size ~w~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWaGSL),
|
||||||
|
tsize(Tree1)]),
|
||||||
|
SWaLUP = os:timestamp(),
|
||||||
|
lists:foreach(match_fun(Tree0), KL),
|
||||||
|
lists:foreach(match_fun(Tree1), KL),
|
||||||
|
io:format(user, "Looked up all keys twice in ~w microseconds~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWaLUP)]),
|
||||||
|
|
||||||
|
?assertMatch(Tree0, Tree1),
|
||||||
|
|
||||||
|
SWaSRCH1 = os:timestamp(),
|
||||||
|
lists:foreach(search_exactmatch_fun(Tree0), KL),
|
||||||
|
lists:foreach(search_exactmatch_fun(Tree1), KL),
|
||||||
|
io:format(user, "Search all keys twice for exact match in ~w microseconds~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWaSRCH1)]),
|
||||||
|
|
||||||
|
BitBiggerKeyFun =
|
||||||
|
fun(Idx) ->
|
||||||
|
{K, _V} = lists:nth(Idx, KL),
|
||||||
|
{o, B, FullKey, null} = K,
|
||||||
|
{{o, B, FullKey ++ "0", null}, lists:nth(Idx + 1, KL)}
|
||||||
|
end,
|
||||||
|
SrchKL = lists:map(BitBiggerKeyFun, lists:seq(1, length(KL) - 1)),
|
||||||
|
|
||||||
|
SWaSRCH2 = os:timestamp(),
|
||||||
|
lists:foreach(search_nearmatch_fun(Tree0), SrchKL),
|
||||||
|
lists:foreach(search_nearmatch_fun(Tree1), SrchKL),
|
||||||
|
io:format(user, "Search all keys twice for near match in ~w microseconds~n",
|
||||||
|
[timer:now_diff(os:timestamp(), SWaSRCH2)]).
|
||||||
|
|
||||||
|
|
||||||
|
tree_matchrange_test() ->
|
||||||
|
matchrange_test_by_type(tree).
|
||||||
|
|
||||||
|
idxt_matchrange_test() ->
|
||||||
|
matchrange_test_by_type(idxt).
|
||||||
|
|
||||||
|
skpl_matchrange_test() ->
|
||||||
|
matchrange_test_by_type(skpl).
|
||||||
|
|
||||||
|
|
||||||
|
matchrange_test_by_type(Type) ->
|
||||||
|
N = 4000,
|
||||||
|
KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)),
|
||||||
|
Tree0 = from_orderedlist(KL, Type),
|
||||||
|
|
||||||
|
FirstKey = element(1, lists:nth(1, KL)),
|
||||||
|
FinalKey = element(1, lists:last(KL)),
|
||||||
|
PenultimateKey = element(1, lists:nth(length(KL) - 1, KL)),
|
||||||
|
AfterFirstKey = setelement(3, FirstKey, element(3, FirstKey) ++ "0"),
|
||||||
|
AfterPenultimateKey = setelement(3,
|
||||||
|
PenultimateKey,
|
||||||
|
element(3, PenultimateKey) ++ "0"),
|
||||||
|
|
||||||
|
LengthR =
|
||||||
|
fun(SK, EK, T) ->
|
||||||
|
length(match_range(SK, EK, T))
|
||||||
|
end,
|
||||||
|
|
||||||
|
KL_Length = length(KL),
|
||||||
|
io:format("KL_Length ~w~n", [KL_Length]),
|
||||||
|
?assertMatch(KL_Length, LengthR(FirstKey, FinalKey, Tree0)),
|
||||||
|
?assertMatch(KL_Length, LengthR(FirstKey, PenultimateKey, Tree0) + 1),
|
||||||
|
?assertMatch(1, LengthR(all, FirstKey, Tree0)),
|
||||||
|
?assertMatch(KL_Length, LengthR(all, PenultimateKey, Tree0) + 1),
|
||||||
|
?assertMatch(KL_Length, LengthR(all, all, Tree0)),
|
||||||
|
?assertMatch(2, LengthR(PenultimateKey, FinalKey, Tree0)),
|
||||||
|
?assertMatch(KL_Length, LengthR(AfterFirstKey, PenultimateKey, Tree0) + 2),
|
||||||
|
?assertMatch(1, LengthR(AfterPenultimateKey, FinalKey, Tree0)).
|
||||||
|
|
||||||
|
match_fun(Tree) ->
|
||||||
|
fun({K, V}) ->
|
||||||
|
?assertMatch({value, V}, match(K, Tree))
|
||||||
|
end.
|
||||||
|
|
||||||
|
search_exactmatch_fun(Tree) ->
|
||||||
|
StartKeyFun = fun(_V) -> all end,
|
||||||
|
fun({K, V}) ->
|
||||||
|
?assertMatch({K, V}, search(K, Tree, StartKeyFun))
|
||||||
|
end.
|
||||||
|
|
||||||
|
search_nearmatch_fun(Tree) ->
|
||||||
|
StartKeyFun = fun(_V) -> all end,
|
||||||
|
fun({K, {NK, NV}}) ->
|
||||||
|
?assertMatch({NK, NV}, search(K, Tree, StartKeyFun))
|
||||||
|
end.
|
||||||
|
|
||||||
|
empty_test() ->
|
||||||
|
T0 = empty(tree),
|
||||||
|
?assertMatch(0, tsize(T0)),
|
||||||
|
T1 = empty(skpl),
|
||||||
|
?assertMatch(0, tsize(T1)),
|
||||||
|
T2 = empty(idxt),
|
||||||
|
?assertMatch(0, tsize(T2)).
|
||||||
|
|
||||||
|
-endif.
|
Loading…
Add table
Add a link
Reference in a new issue