Merge pull request #18 from martinsumner/mas-leveledtree
Mas leveledtree
This commit is contained in:
commit
266e851a96
13 changed files with 1190 additions and 1081 deletions
|
@ -15,6 +15,8 @@
|
|||
%% Inker key type used for tombstones
|
||||
-define(INKT_TOMB, tomb).
|
||||
|
||||
-define(CACHE_TYPE, skpl).
|
||||
|
||||
-record(sft_options,
|
||||
{wait = true :: boolean(),
|
||||
expire_tombstones = false :: boolean(),
|
||||
|
|
|
@ -139,6 +139,7 @@
|
|||
get_opt/3,
|
||||
load_snapshot/2,
|
||||
empty_ledgercache/0,
|
||||
loadqueue_ledgercache/1,
|
||||
push_ledgercache/2]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
@ -153,7 +154,8 @@
|
|||
-define(LONG_RUNNING, 80000).
|
||||
|
||||
-record(ledger_cache, {mem :: ets:tab(),
|
||||
loader = leveled_skiplist:empty(false) :: tuple(),
|
||||
loader = leveled_tree:empty(?CACHE_TYPE) :: tuple(),
|
||||
load_queue = [] :: list(),
|
||||
index = leveled_pmem:new_index(), % array
|
||||
min_sqn = infinity :: integer()|infinity,
|
||||
max_sqn = 0 :: integer()}).
|
||||
|
@ -474,6 +476,11 @@ push_ledgercache(Penciller, Cache) ->
|
|||
Cache#ledger_cache.max_sqn},
|
||||
leveled_penciller:pcl_pushmem(Penciller, CacheToLoad).
|
||||
|
||||
loadqueue_ledgercache(Cache) ->
|
||||
SL = lists:ukeysort(1, Cache#ledger_cache.load_queue),
|
||||
T = leveled_tree:from_orderedlist(SL, ?CACHE_TYPE),
|
||||
Cache#ledger_cache{load_queue = [], loader = T}.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal functions
|
||||
%%%============================================================================
|
||||
|
@ -719,11 +726,12 @@ snapshot_store(State, SnapType) ->
|
|||
|
||||
readycache_forsnapshot(LedgerCache) ->
|
||||
% Need to convert the Ledger Cache away from using the ETS table
|
||||
SkipList = leveled_skiplist:from_orderedset(LedgerCache#ledger_cache.mem),
|
||||
Tree = leveled_tree:from_orderedset(LedgerCache#ledger_cache.mem,
|
||||
?CACHE_TYPE),
|
||||
Idx = LedgerCache#ledger_cache.index,
|
||||
MinSQN = LedgerCache#ledger_cache.min_sqn,
|
||||
MaxSQN = LedgerCache#ledger_cache.max_sqn,
|
||||
#ledger_cache{loader=SkipList, index=Idx, min_sqn=MinSQN, max_sqn=MaxSQN}.
|
||||
#ledger_cache{loader=Tree, index=Idx, min_sqn=MinSQN, max_sqn=MaxSQN}.
|
||||
|
||||
set_options(Opts) ->
|
||||
MaxJournalSize0 = get_opt(max_journalsize, Opts, 10000000000),
|
||||
|
@ -961,14 +969,10 @@ addto_ledgercache({H, SQN, KeyChanges}, Cache) ->
|
|||
max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}.
|
||||
|
||||
addto_ledgercache({H, SQN, KeyChanges}, Cache, loader) ->
|
||||
FoldChangesFun =
|
||||
fun({K, V}, SL0) ->
|
||||
leveled_skiplist:enter_nolookup(K, V, SL0)
|
||||
end,
|
||||
UpdSL = lists:foldl(FoldChangesFun, Cache#ledger_cache.loader, KeyChanges),
|
||||
UpdQ = KeyChanges ++ Cache#ledger_cache.load_queue,
|
||||
UpdIndex = leveled_pmem:prepare_for_index(Cache#ledger_cache.index, H),
|
||||
Cache#ledger_cache{index = UpdIndex,
|
||||
loader = UpdSL,
|
||||
load_queue = UpdQ,
|
||||
min_sqn=min(SQN, Cache#ledger_cache.min_sqn),
|
||||
max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}.
|
||||
|
||||
|
@ -979,7 +983,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) ->
|
|||
TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize),
|
||||
if
|
||||
TimeToPush ->
|
||||
CacheToLoad = {leveled_skiplist:from_orderedset(Tab),
|
||||
CacheToLoad = {leveled_tree:from_orderedset(Tab, ?CACHE_TYPE),
|
||||
Cache#ledger_cache.index,
|
||||
Cache#ledger_cache.min_sqn,
|
||||
Cache#ledger_cache.max_sqn},
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
|
||||
-export([
|
||||
inker_reload_strategy/1,
|
||||
strip_to_keyonly/1,
|
||||
strip_to_seqonly/1,
|
||||
strip_to_statusonly/1,
|
||||
strip_to_keyseqonly/1,
|
||||
|
@ -44,7 +43,6 @@
|
|||
endkey_passed/2,
|
||||
key_dominates/2,
|
||||
maybe_reap_expiredkey/2,
|
||||
print_key/1,
|
||||
to_ledgerkey/3,
|
||||
to_ledgerkey/5,
|
||||
from_ledgerkey/1,
|
||||
|
@ -108,8 +106,6 @@ inker_reload_strategy(AltList) ->
|
|||
ReloadStrategy0,
|
||||
AltList).
|
||||
|
||||
strip_to_keyonly({K, _V}) -> K.
|
||||
|
||||
strip_to_statusonly({_, {_, St, _, _}}) -> St.
|
||||
|
||||
strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN.
|
||||
|
@ -252,33 +248,6 @@ create_value_for_journal(Value) ->
|
|||
hash(Obj) ->
|
||||
erlang:phash2(term_to_binary(Obj)).
|
||||
|
||||
% Return a tuple of strings to ease the printing of keys to logs
|
||||
print_key(Key) ->
|
||||
{A_STR, B_TERM, C_TERM} = case Key of
|
||||
{?STD_TAG, B, K, _SK} ->
|
||||
{"Object", B, K};
|
||||
{?RIAK_TAG, B, K, _SK} ->
|
||||
{"RiakObject", B, K};
|
||||
{?IDX_TAG, B, {F, _V}, _K} ->
|
||||
{"Index", B, F}
|
||||
end,
|
||||
B_STR = turn_to_string(B_TERM),
|
||||
C_STR = turn_to_string(C_TERM),
|
||||
{A_STR, B_STR, C_STR}.
|
||||
|
||||
turn_to_string(Item) ->
|
||||
if
|
||||
is_binary(Item) == true ->
|
||||
binary_to_list(Item);
|
||||
is_integer(Item) == true ->
|
||||
integer_to_list(Item);
|
||||
is_list(Item) == true ->
|
||||
Item;
|
||||
true ->
|
||||
[Output] = io_lib:format("~w", [Item]),
|
||||
Output
|
||||
end.
|
||||
|
||||
|
||||
% Compare a key against a query key, only comparing elements that are non-null
|
||||
% in the Query key. This is used for comparing against end keys in queries.
|
||||
|
@ -461,10 +430,6 @@ endkey_passed_test() ->
|
|||
?assertMatch(false, endkey_passed(TestKey, K1)),
|
||||
?assertMatch(true, endkey_passed(TestKey, K2)).
|
||||
|
||||
stringcheck_test() ->
|
||||
?assertMatch("Bucket", turn_to_string("Bucket")),
|
||||
?assertMatch("Bucket", turn_to_string(<<"Bucket">>)),
|
||||
?assertMatch("bucket", turn_to_string(bucket)).
|
||||
|
||||
%% Test below proved that the overhead of performing hashes was trivial
|
||||
%% Maybe 5 microseconds per hash
|
||||
|
|
|
@ -673,10 +673,14 @@ load_between_sequence(MinSQN, MaxSQN, FilterFun, Penciller,
|
|||
push_to_penciller(Penciller, LedgerCache) ->
|
||||
% The push to penciller must start as a tree to correctly de-duplicate
|
||||
% the list by order before becoming a de-duplicated list for loading
|
||||
LC0 = leveled_bookie:loadqueue_ledgercache(LedgerCache),
|
||||
push_to_penciller_loop(Penciller, LC0).
|
||||
|
||||
push_to_penciller_loop(Penciller, LedgerCache) ->
|
||||
case leveled_bookie:push_ledgercache(Penciller, LedgerCache) of
|
||||
returned ->
|
||||
timer:sleep(?LOADING_PAUSE),
|
||||
push_to_penciller(Penciller, LedgerCache);
|
||||
push_to_penciller_loop(Penciller, LedgerCache);
|
||||
ok ->
|
||||
ok
|
||||
end.
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
sst_timing/3]).
|
||||
|
||||
-define(PUT_LOGPOINT, 20000).
|
||||
-define(HEAD_LOGPOINT, 160000).
|
||||
-define(GET_LOGPOINT, 160000).
|
||||
-define(HEAD_LOGPOINT, 50000).
|
||||
-define(GET_LOGPOINT, 50000).
|
||||
-define(SST_LOGPOINT, 20000).
|
||||
-define(LOG_LEVEL, [info, warn, error, critical]).
|
||||
-define(SAMPLE_RATE, 16).
|
||||
|
@ -309,10 +309,12 @@
|
|||
|
||||
|
||||
log(LogReference, Subs) ->
|
||||
{ok, {LogLevel, LogText}} = dict:find(LogReference, ?LOGBASE),
|
||||
{LogLevel, LogText} = dict:fetch(LogReference, ?LOGBASE),
|
||||
case lists:member(LogLevel, ?LOG_LEVEL) of
|
||||
true ->
|
||||
io:format(LogReference ++ " ~w " ++ LogText ++ "~n",
|
||||
io:format(format_time()
|
||||
++ " " ++ LogReference ++ " ~w "
|
||||
++ LogText ++ "~n",
|
||||
[self()|Subs]);
|
||||
false ->
|
||||
ok
|
||||
|
@ -320,7 +322,7 @@ log(LogReference, Subs) ->
|
|||
|
||||
|
||||
log_timer(LogReference, Subs, StartTime) ->
|
||||
{ok, {LogLevel, LogText}} = dict:find(LogReference, ?LOGBASE),
|
||||
{LogLevel, LogText} = dict:fetch(LogReference, ?LOGBASE),
|
||||
case lists:member(LogLevel, ?LOG_LEVEL) of
|
||||
true ->
|
||||
MicroS = timer:now_diff(os:timestamp(), StartTime),
|
||||
|
@ -330,7 +332,9 @@ log_timer(LogReference, Subs, StartTime) ->
|
|||
MicroS ->
|
||||
{"ms", MicroS div 1000}
|
||||
end,
|
||||
io:format(LogReference ++ " ~w " ++ LogText
|
||||
io:format(format_time()
|
||||
++ " " ++ LogReference ++ " ~w "
|
||||
++ LogText
|
||||
++ " with time taken ~w " ++ Unit ++ "~n",
|
||||
[self()|Subs] ++ [Time]);
|
||||
false ->
|
||||
|
@ -510,6 +514,17 @@ gen_timing_int({N, TimerD}, T0, TimerType, _KeyListFun, _LogPoint, _LogRef) ->
|
|||
TimerD)}.
|
||||
|
||||
|
||||
format_time() ->
|
||||
format_time(localtime_ms()).
|
||||
|
||||
localtime_ms() ->
|
||||
{_, _, Micro} = Now = os:timestamp(),
|
||||
{Date, {Hours, Minutes, Seconds}} = calendar:now_to_local_time(Now),
|
||||
{Date, {Hours, Minutes, Seconds, Micro div 1000 rem 1000}}.
|
||||
|
||||
format_time({{Y, M, D}, {H, Mi, S, Ms}}) ->
|
||||
io_lib:format("~b-~2..0b-~2..0b", [Y, M, D]) ++ "T" ++
|
||||
io_lib:format("~2..0b:~2..0b:~2..0b.~3..0b", [H, Mi, S, Ms]).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
|
|
|
@ -183,16 +183,12 @@ perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN) ->
|
|||
ME
|
||||
end,
|
||||
SinkManifestList = lists:map(RevertPointerFun, SinkList),
|
||||
Man0 = leveled_pmanifest:remove_manifest_entry(Manifest,
|
||||
NewSQN,
|
||||
SinkLevel,
|
||||
SinkManifestList),
|
||||
Man1 = leveled_pmanifest:insert_manifest_entry(Man0,
|
||||
Man0 = leveled_pmanifest:replace_manifest_entry(Manifest,
|
||||
NewSQN,
|
||||
SinkLevel,
|
||||
SinkManifestList,
|
||||
Additions),
|
||||
|
||||
Man2 = leveled_pmanifest:remove_manifest_entry(Man1,
|
||||
Man2 = leveled_pmanifest:remove_manifest_entry(Man0,
|
||||
NewSQN,
|
||||
SrcLevel,
|
||||
Src),
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
%% the Penciller's Clerk
|
||||
%% - The Penciller can be cloned and maintains a register of clones who have
|
||||
%% requested snapshots of the Ledger
|
||||
%% - The accepts new dumps (in the form of a leveled_skiplist accomponied by
|
||||
%% - The accepts new dumps (in the form of a leveled_tree accomponied by
|
||||
%% an array of hash-listing binaries) from the Bookie, and responds either 'ok'
|
||||
%% to the bookie if the information is accepted nad the Bookie can refresh its
|
||||
%% memory, or 'returned' if the bookie must continue without refreshing as the
|
||||
|
@ -224,7 +224,7 @@
|
|||
|
||||
levelzero_pending = false :: boolean(),
|
||||
levelzero_constructor :: pid(),
|
||||
levelzero_cache = [] :: list(), % a list of skiplists
|
||||
levelzero_cache = [] :: list(), % a list of trees
|
||||
levelzero_size = 0 :: integer(),
|
||||
levelzero_maxcachesize :: integer(),
|
||||
levelzero_cointoss = false :: boolean(),
|
||||
|
@ -345,9 +345,9 @@ handle_call({push_mem, {PushedTree, PushedIdx, MinSQN, MaxSQN}},
|
|||
State=#state{is_snapshot=Snap}) when Snap == false ->
|
||||
% The push_mem process is as follows:
|
||||
%
|
||||
% 1 - Receive a cache. The cache has four parts: a skiplist of keys and
|
||||
% 1 - Receive a cache. The cache has four parts: a tree of keys and
|
||||
% values, an array of 256 binaries listing the hashes present in the
|
||||
% skiplist, a min SQN and a max SQN
|
||||
% tree, a min SQN and a max SQN
|
||||
%
|
||||
% 2 - Check to see if there is a levelzero file pending. If so, the
|
||||
% update must be returned. If not the update can be accepted
|
||||
|
@ -404,7 +404,7 @@ handle_call({fetch_keys, StartKey, EndKey, AccFun, InitAcc, MaxKeys},
|
|||
leveled_pmem:merge_trees(StartKey,
|
||||
EndKey,
|
||||
State#state.levelzero_cache,
|
||||
leveled_skiplist:empty());
|
||||
leveled_tree:empty(?CACHE_TYPE));
|
||||
List ->
|
||||
List
|
||||
end,
|
||||
|
@ -1072,10 +1072,10 @@ clean_subdir(DirPath) ->
|
|||
|
||||
|
||||
maybe_pause_push(PCL, KL) ->
|
||||
T0 = leveled_skiplist:empty(true),
|
||||
T0 = [],
|
||||
I0 = leveled_pmem:new_index(),
|
||||
T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
|
||||
UpdSL = leveled_skiplist:enter(K, V, AccSL),
|
||||
UpdSL = [{K, V}|AccSL],
|
||||
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
||||
H = leveled_codec:magic_hash(K),
|
||||
UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
|
||||
|
@ -1083,7 +1083,10 @@ maybe_pause_push(PCL, KL) ->
|
|||
end,
|
||||
{T0, I0, infinity, 0},
|
||||
KL),
|
||||
case pcl_pushmem(PCL, T1) of
|
||||
SL = element(1, T1),
|
||||
Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, SL), ?CACHE_TYPE),
|
||||
T2 = setelement(1, T1, Tree),
|
||||
case pcl_pushmem(PCL, T2) of
|
||||
returned ->
|
||||
timer:sleep(50),
|
||||
maybe_pause_push(PCL, KL);
|
||||
|
@ -1315,63 +1318,63 @@ sqnoverlap_otherway_findnextkey_test() ->
|
|||
|
||||
foldwithimm_simple_test() ->
|
||||
QueryArray = [
|
||||
{2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}},
|
||||
{{o, "Bucket1", "Key5"}, {1, {active, infinity}, 0, null}}]},
|
||||
{3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]},
|
||||
{5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]}
|
||||
{2, [{{o, "Bucket1", "Key1", null},
|
||||
{5, {active, infinity}, 0, null}},
|
||||
{{o, "Bucket1", "Key5", null},
|
||||
{1, {active, infinity}, 0, null}}]},
|
||||
{3, [{{o, "Bucket1", "Key3", null},
|
||||
{3, {active, infinity}, 0, null}}]},
|
||||
{5, [{{o, "Bucket1", "Key5", null},
|
||||
{2, {active, infinity}, 0, null}}]}
|
||||
],
|
||||
IMM0 = leveled_skiplist:enter({o, "Bucket1", "Key6"},
|
||||
{7, {active, infinity}, 0, null},
|
||||
leveled_skiplist:empty()),
|
||||
IMM1 = leveled_skiplist:enter({o, "Bucket1", "Key1"},
|
||||
{8, {active, infinity}, 0, null},
|
||||
IMM0),
|
||||
IMM2 = leveled_skiplist:enter({o, "Bucket1", "Key8"},
|
||||
{9, {active, infinity}, 0, null},
|
||||
IMM1),
|
||||
IMMiter = leveled_skiplist:to_range(IMM2, {o, "Bucket1", "Key1"}),
|
||||
KL1A = [{{o, "Bucket1", "Key6", null}, {7, {active, infinity}, 0, null}},
|
||||
{{o, "Bucket1", "Key1", null}, {8, {active, infinity}, 0, null}},
|
||||
{{o, "Bucket1", "Key8", null}, {9, {active, infinity}, 0, null}}],
|
||||
IMM2 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1A), ?CACHE_TYPE),
|
||||
IMMiter = leveled_tree:match_range({o, "Bucket1", "Key1", null},
|
||||
{o, null, null, null},
|
||||
IMM2),
|
||||
AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}),
|
||||
Acc ++ [{K, SQN}] end,
|
||||
Acc = keyfolder(IMMiter,
|
||||
QueryArray,
|
||||
{o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"},
|
||||
{o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null},
|
||||
{AccFun, []}),
|
||||
?assertMatch([{{o, "Bucket1", "Key1"}, 8},
|
||||
{{o, "Bucket1", "Key3"}, 3},
|
||||
{{o, "Bucket1", "Key5"}, 2},
|
||||
{{o, "Bucket1", "Key6"}, 7}], Acc),
|
||||
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
||||
{{o, "Bucket1", "Key3", null}, 3},
|
||||
{{o, "Bucket1", "Key5", null}, 2},
|
||||
{{o, "Bucket1", "Key6", null}, 7}], Acc),
|
||||
|
||||
IMM1A = leveled_skiplist:enter({o, "Bucket1", "Key1"},
|
||||
{8, {active, infinity}, 0, null},
|
||||
leveled_skiplist:empty()),
|
||||
IMMiterA = leveled_skiplist:to_range(IMM1A, {o, "Bucket1", "Key1"}),
|
||||
IMMiterA = [{{o, "Bucket1", "Key1", null},
|
||||
{8, {active, infinity}, 0, null}}],
|
||||
AccA = keyfolder(IMMiterA,
|
||||
QueryArray,
|
||||
{o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"},
|
||||
{AccFun, []}),
|
||||
?assertMatch([{{o, "Bucket1", "Key1"}, 8},
|
||||
{{o, "Bucket1", "Key3"}, 3},
|
||||
{{o, "Bucket1", "Key5"}, 2}], AccA),
|
||||
QueryArray,
|
||||
{o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null},
|
||||
{AccFun, []}),
|
||||
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
||||
{{o, "Bucket1", "Key3", null}, 3},
|
||||
{{o, "Bucket1", "Key5", null}, 2}], AccA),
|
||||
|
||||
IMM3 = leveled_skiplist:enter({o, "Bucket1", "Key4"},
|
||||
{10, {active, infinity}, 0, null},
|
||||
IMM2),
|
||||
IMMiterB = leveled_skiplist:to_range(IMM3, {o, "Bucket1", "Key1"}),
|
||||
KL1B = [{{o, "Bucket1", "Key4", null}, {10, {active, infinity}, 0, null}}|KL1A],
|
||||
IMM3 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1B), ?CACHE_TYPE),
|
||||
IMMiterB = leveled_tree:match_range({o, "Bucket1", "Key1", null},
|
||||
{o, null, null, null},
|
||||
IMM3),
|
||||
AccB = keyfolder(IMMiterB,
|
||||
QueryArray,
|
||||
{o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"},
|
||||
{o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null},
|
||||
{AccFun, []}),
|
||||
?assertMatch([{{o, "Bucket1", "Key1"}, 8},
|
||||
{{o, "Bucket1", "Key3"}, 3},
|
||||
{{o, "Bucket1", "Key4"}, 10},
|
||||
{{o, "Bucket1", "Key5"}, 2},
|
||||
{{o, "Bucket1", "Key6"}, 7}], AccB).
|
||||
?assertMatch([{{o, "Bucket1", "Key1", null}, 8},
|
||||
{{o, "Bucket1", "Key3", null}, 3},
|
||||
{{o, "Bucket1", "Key4", null}, 10},
|
||||
{{o, "Bucket1", "Key5", null}, 2},
|
||||
{{o, "Bucket1", "Key6", null}, 7}], AccB).
|
||||
|
||||
create_file_test() ->
|
||||
Filename = "../test/new_file.sst",
|
||||
ok = file:write_file(Filename, term_to_binary("hello")),
|
||||
KVL = lists:usort(generate_randomkeys(10000)),
|
||||
Tree = leveled_skiplist:from_list(KVL),
|
||||
Tree = leveled_tree:from_orderedlist(KVL, ?CACHE_TYPE),
|
||||
FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end,
|
||||
{ok,
|
||||
SP,
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
merge_lookup/4,
|
||||
insert_manifest_entry/4,
|
||||
remove_manifest_entry/4,
|
||||
replace_manifest_entry/5,
|
||||
switch_manifest_entry/4,
|
||||
mergefile_selector/2,
|
||||
add_snapshot/3,
|
||||
|
@ -51,6 +52,8 @@
|
|||
-define(MANIFEST_FILEX, "man").
|
||||
-define(MANIFEST_FP, "ledger_manifest").
|
||||
-define(MAX_LEVELS, 8).
|
||||
-define(TREE_TYPE, idxt).
|
||||
-define(TREE_WIDTH, 8).
|
||||
|
||||
-record(manifest, {levels,
|
||||
% an array of lists or trees representing the manifest
|
||||
|
@ -73,8 +76,16 @@
|
|||
%%%============================================================================
|
||||
|
||||
new_manifest() ->
|
||||
LevelArray0 = array:new([{size, ?MAX_LEVELS + 1}, {default, []}]),
|
||||
SetLowerLevelFun =
|
||||
fun(IDX, Acc) ->
|
||||
array:set(IDX, leveled_tree:empty(?TREE_TYPE), Acc)
|
||||
end,
|
||||
LevelArray1 = lists:foldl(SetLowerLevelFun,
|
||||
LevelArray0,
|
||||
lists:seq(2, ?MAX_LEVELS)),
|
||||
#manifest{
|
||||
levels = array:new([{size, ?MAX_LEVELS + 1}, {default, []}]),
|
||||
levels = LevelArray1,
|
||||
manifest_sqn = 0,
|
||||
snapshots = [],
|
||||
pending_deletes = dict:new(),
|
||||
|
@ -139,6 +150,30 @@ save_manifest(Manifest, RootPath) ->
|
|||
CRC = erlang:crc32(ManBin),
|
||||
ok = file:write_file(FP, <<CRC:32/integer, ManBin/binary>>).
|
||||
|
||||
|
||||
replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) ->
|
||||
Levels = Manifest#manifest.levels,
|
||||
Level = array:get(LevelIdx, Levels),
|
||||
UpdLevel = replace_entry(LevelIdx, Level, Removals, Additions),
|
||||
leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]),
|
||||
PendingDeletes = update_pendingdeletes(ManSQN,
|
||||
Removals,
|
||||
Manifest#manifest.pending_deletes),
|
||||
UpdLevels = array:set(LevelIdx, UpdLevel, Levels),
|
||||
case is_empty(LevelIdx, UpdLevel) of
|
||||
true ->
|
||||
Manifest#manifest{levels = UpdLevels,
|
||||
basement = get_basement(UpdLevels),
|
||||
manifest_sqn = ManSQN,
|
||||
pending_deletes = PendingDeletes};
|
||||
false ->
|
||||
Basement = max(LevelIdx, Manifest#manifest.basement),
|
||||
Manifest#manifest{levels = UpdLevels,
|
||||
basement = Basement,
|
||||
manifest_sqn = ManSQN,
|
||||
pending_deletes = PendingDeletes}
|
||||
end.
|
||||
|
||||
insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
|
||||
Levels = Manifest#manifest.levels,
|
||||
Level = array:get(LevelIdx, Levels),
|
||||
|
@ -154,22 +189,9 @@ remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) ->
|
|||
Level = array:get(LevelIdx, Levels),
|
||||
UpdLevel = remove_entry(LevelIdx, Level, Entry),
|
||||
leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]),
|
||||
DelFun =
|
||||
fun(E, Acc) ->
|
||||
dict:store(E#manifest_entry.filename,
|
||||
{ManSQN, E},
|
||||
Acc)
|
||||
end,
|
||||
Entries =
|
||||
case is_list(Entry) of
|
||||
true ->
|
||||
Entry;
|
||||
false ->
|
||||
[Entry]
|
||||
end,
|
||||
PendingDeletes = lists:foldl(DelFun,
|
||||
Manifest#manifest.pending_deletes,
|
||||
Entries),
|
||||
PendingDeletes = update_pendingdeletes(ManSQN,
|
||||
Entry,
|
||||
Manifest#manifest.pending_deletes),
|
||||
UpdLevels = array:set(LevelIdx, UpdLevel, Levels),
|
||||
case is_empty(LevelIdx, UpdLevel) of
|
||||
true ->
|
||||
|
@ -322,58 +344,181 @@ levelzero_present(Manifest) ->
|
|||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
%% All these internal functions that work on a level are also passed LeveIdx
|
||||
%% even if this is not presently relevant. Currnetly levels are lists, but
|
||||
%% future branches may make lower levels trees or skiplists to improve fetch
|
||||
%% efficiency
|
||||
|
||||
load_level(_LevelIdx, Level, PidFun, SQNFun) ->
|
||||
LevelLoadFun =
|
||||
load_level(LevelIdx, Level, PidFun, SQNFun) ->
|
||||
HigherLevelLoadFun =
|
||||
fun(ME, {L_Out, L_MaxSQN}) ->
|
||||
FN = ME#manifest_entry.filename,
|
||||
P = PidFun(FN),
|
||||
SQN = SQNFun(P),
|
||||
{[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)}
|
||||
end,
|
||||
lists:foldr(LevelLoadFun, {[], 0}, Level).
|
||||
LowerLevelLoadFun =
|
||||
fun({EK, ME}, {L_Out, L_MaxSQN}) ->
|
||||
FN = ME#manifest_entry.filename,
|
||||
P = PidFun(FN),
|
||||
SQN = SQNFun(P),
|
||||
{[{EK, ME#manifest_entry{owner=P}}|L_Out], max(SQN, L_MaxSQN)}
|
||||
end,
|
||||
case LevelIdx =< 1 of
|
||||
true ->
|
||||
lists:foldr(HigherLevelLoadFun, {[], 0}, Level);
|
||||
false ->
|
||||
{L0, MaxSQN} = lists:foldr(LowerLevelLoadFun,
|
||||
{[], 0},
|
||||
leveled_tree:to_list(Level)),
|
||||
{leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), MaxSQN}
|
||||
end.
|
||||
|
||||
close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 ->
|
||||
lists:foreach(CloseEntryFun, Level);
|
||||
close_level(_LevelIdx, Level, CloseEntryFun) ->
|
||||
lists:foreach(CloseEntryFun, Level).
|
||||
lists:foreach(CloseEntryFun, leveled_tree:to_list(Level)).
|
||||
|
||||
is_empty(_LevelIdx, []) ->
|
||||
true;
|
||||
is_empty(_LevelIdx, _Level) ->
|
||||
false.
|
||||
|
||||
size(_LevelIdx, Level) ->
|
||||
length(Level).
|
||||
|
||||
add_entry(_LevelIdx, Level, Entries) when is_list(Entries) ->
|
||||
lists:sort(Level ++ Entries);
|
||||
add_entry(_LevelIdx, Level, Entry) ->
|
||||
lists:sort([Entry|Level]).
|
||||
|
||||
remove_entry(_LevelIdx, Level, Entries) when is_list(Entries) ->
|
||||
% We're assuming we're removing a sorted sublist
|
||||
RemLength = length(Entries),
|
||||
[RemStart|_Tail] = Entries,
|
||||
remove_section(Level, RemStart#manifest_entry.start_key, RemLength);
|
||||
remove_entry(_LevelIdx, Level, Entry) ->
|
||||
remove_section(Level, Entry#manifest_entry.start_key, 1).
|
||||
|
||||
remove_section(Level, SectionStartKey, SectionLength) ->
|
||||
PredFun =
|
||||
fun(E) ->
|
||||
E#manifest_entry.start_key < SectionStartKey
|
||||
end,
|
||||
{Pre, Rest} = lists:splitwith(PredFun, Level),
|
||||
Post = lists:nthtail(SectionLength, Rest),
|
||||
Pre ++ Post.
|
||||
|
||||
|
||||
key_lookup_level(_LevelIdx, [], _Key) ->
|
||||
is_empty(LevelIdx, _Level) when LevelIdx =< 1 ->
|
||||
false;
|
||||
key_lookup_level(LevelIdx, [Entry|Rest], Key) ->
|
||||
is_empty(_LevelIdx, Level) ->
|
||||
leveled_tree:tsize(Level) == 0.
|
||||
|
||||
size(LevelIdx, Level) when LevelIdx =< 1 ->
|
||||
length(Level);
|
||||
size(_LevelIdx, Level) ->
|
||||
leveled_tree:tsize(Level).
|
||||
|
||||
pred_fun(LevelIdx, StartKey, _EndKey) when LevelIdx =< 1 ->
|
||||
fun(ME) ->
|
||||
ME#manifest_entry.start_key < StartKey
|
||||
end;
|
||||
pred_fun(_LevelIdx, _StartKey, EndKey) ->
|
||||
fun({EK, _ME}) ->
|
||||
EK < EndKey
|
||||
end.
|
||||
|
||||
add_entry(_LevelIdx, Level, []) ->
|
||||
Level;
|
||||
add_entry(LevelIdx, Level, Entries) when is_list(Entries) ->
|
||||
FirstEntry = lists:nth(1, Entries),
|
||||
PredFun = pred_fun(LevelIdx,
|
||||
FirstEntry#manifest_entry.start_key,
|
||||
FirstEntry#manifest_entry.end_key),
|
||||
case LevelIdx =< 1 of
|
||||
true ->
|
||||
{LHS, RHS} = lists:splitwith(PredFun, Level),
|
||||
lists:append([LHS, Entries, RHS]);
|
||||
false ->
|
||||
{LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)),
|
||||
MapFun =
|
||||
fun(ME) ->
|
||||
{ME#manifest_entry.end_key, ME}
|
||||
end,
|
||||
Entries0 = lists:map(MapFun, Entries),
|
||||
leveled_tree:from_orderedlist(lists:append([LHS, Entries0, RHS]),
|
||||
?TREE_TYPE,
|
||||
?TREE_WIDTH)
|
||||
end;
|
||||
add_entry(LevelIdx, Level, Entry) ->
|
||||
add_entry(LevelIdx, Level, [Entry]).
|
||||
|
||||
remove_entry(LevelIdx, Level, Entries) ->
|
||||
% We're assuming we're removing a sorted sublist
|
||||
{RemLength, FirstRemoval} = measure_removals(Entries),
|
||||
remove_section(LevelIdx, Level, FirstRemoval, RemLength).
|
||||
|
||||
measure_removals(Removals) ->
|
||||
case is_list(Removals) of
|
||||
true ->
|
||||
{length(Removals), lists:nth(1, Removals)};
|
||||
false ->
|
||||
{1, Removals}
|
||||
end.
|
||||
|
||||
remove_section(LevelIdx, Level, FirstEntry, SectionLength) ->
|
||||
PredFun = pred_fun(LevelIdx,
|
||||
FirstEntry#manifest_entry.start_key,
|
||||
FirstEntry#manifest_entry.end_key),
|
||||
case LevelIdx =< 1 of
|
||||
true ->
|
||||
{LHS, RHS} = lists:splitwith(PredFun, Level),
|
||||
Post = lists:nthtail(SectionLength, RHS),
|
||||
lists:append([LHS, Post]);
|
||||
false ->
|
||||
{LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)),
|
||||
Post = lists:nthtail(SectionLength, RHS),
|
||||
leveled_tree:from_orderedlist(lists:append([LHS, Post]),
|
||||
?TREE_TYPE,
|
||||
?TREE_WIDTH)
|
||||
end.
|
||||
|
||||
replace_entry(LevelIdx, Level, Removals, Additions) when LevelIdx =< 1 ->
|
||||
{SectionLength, FirstEntry} = measure_removals(Removals),
|
||||
PredFun = pred_fun(LevelIdx,
|
||||
FirstEntry#manifest_entry.start_key,
|
||||
FirstEntry#manifest_entry.end_key),
|
||||
{LHS, RHS} = lists:splitwith(PredFun, Level),
|
||||
Post = lists:nthtail(SectionLength, RHS),
|
||||
case is_list(Additions) of
|
||||
true ->
|
||||
lists:append([LHS, Additions, Post]);
|
||||
false ->
|
||||
lists:append([LHS, [Additions], Post])
|
||||
end;
|
||||
replace_entry(LevelIdx, Level, Removals, Additions) ->
|
||||
{SectionLength, FirstEntry} = measure_removals(Removals),
|
||||
PredFun = pred_fun(LevelIdx,
|
||||
FirstEntry#manifest_entry.start_key,
|
||||
FirstEntry#manifest_entry.end_key),
|
||||
{LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)),
|
||||
Post =
|
||||
case RHS of
|
||||
[] ->
|
||||
[];
|
||||
_ ->
|
||||
lists:nthtail(SectionLength, RHS)
|
||||
end,
|
||||
UpdList =
|
||||
case is_list(Additions) of
|
||||
true ->
|
||||
MapFun =
|
||||
fun(ME) ->
|
||||
{ME#manifest_entry.end_key, ME}
|
||||
end,
|
||||
Additions0 = lists:map(MapFun, Additions),
|
||||
lists:append([LHS, Additions0, Post]);
|
||||
false ->
|
||||
lists:append([LHS,
|
||||
[{Additions#manifest_entry.end_key,
|
||||
Additions}],
|
||||
Post])
|
||||
end,
|
||||
leveled_tree:from_orderedlist(UpdList, ?TREE_TYPE, ?TREE_WIDTH).
|
||||
|
||||
|
||||
update_pendingdeletes(ManSQN, Removals, PendingDeletes) ->
|
||||
DelFun =
|
||||
fun(E, Acc) ->
|
||||
dict:store(E#manifest_entry.filename,
|
||||
{ManSQN, E},
|
||||
Acc)
|
||||
end,
|
||||
Entries =
|
||||
case is_list(Removals) of
|
||||
true ->
|
||||
Removals;
|
||||
false ->
|
||||
[Removals]
|
||||
end,
|
||||
lists:foldl(DelFun, PendingDeletes, Entries).
|
||||
|
||||
key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 ->
|
||||
false;
|
||||
key_lookup_level(LevelIdx, [Entry|Rest], Key) when LevelIdx =< 1 ->
|
||||
case Entry#manifest_entry.end_key >= Key of
|
||||
true ->
|
||||
case Key >= Entry#manifest_entry.start_key of
|
||||
|
@ -384,8 +529,20 @@ key_lookup_level(LevelIdx, [Entry|Rest], Key) ->
|
|||
end;
|
||||
false ->
|
||||
key_lookup_level(LevelIdx, Rest, Key)
|
||||
end;
|
||||
key_lookup_level(_LevelIdx, Level, Key) ->
|
||||
StartKeyFun =
|
||||
fun(ME) ->
|
||||
ME#manifest_entry.start_key
|
||||
end,
|
||||
case leveled_tree:search(Key, Level, StartKeyFun) of
|
||||
none ->
|
||||
false;
|
||||
{_EK, ME} ->
|
||||
ME#manifest_entry.owner
|
||||
end.
|
||||
|
||||
|
||||
range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) ->
|
||||
Range =
|
||||
case LevelIdx > Manifest#manifest.basement of
|
||||
|
@ -400,7 +557,7 @@ range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) ->
|
|||
end,
|
||||
lists:map(MakePointerFun, Range).
|
||||
|
||||
range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) ->
|
||||
range_lookup_level(LevelIdx, Level, QStartKey, QEndKey) when LevelIdx =< 1 ->
|
||||
BeforeFun =
|
||||
fun(M) ->
|
||||
QStartKey > M#manifest_entry.end_key
|
||||
|
@ -412,7 +569,19 @@ range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) ->
|
|||
end,
|
||||
{_Before, MaybeIn} = lists:splitwith(BeforeFun, Level),
|
||||
{In, _After} = lists:splitwith(NotAfterFun, MaybeIn),
|
||||
In.
|
||||
In;
|
||||
range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) ->
|
||||
StartKeyFun =
|
||||
fun(ME) ->
|
||||
ME#manifest_entry.start_key
|
||||
end,
|
||||
Range = leveled_tree:search_range(QStartKey, QEndKey, Level, StartKeyFun),
|
||||
MapFun =
|
||||
fun({_EK, ME}) ->
|
||||
ME
|
||||
end,
|
||||
lists:map(MapFun, Range).
|
||||
|
||||
|
||||
get_basement(Levels) ->
|
||||
GetBaseFun =
|
||||
|
@ -456,6 +625,7 @@ open_manifestfile(RootPath, [TopManSQN|Rest]) ->
|
|||
open_manifestfile(RootPath, Rest)
|
||||
end.
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
@ -587,6 +757,98 @@ keylookup_manifest_test() ->
|
|||
?assertMatch("pid_y3", key_lookup(Man13, 1, LK1_4)),
|
||||
?assertMatch("pid_z5", key_lookup(Man13, 2, LK1_4)).
|
||||
|
||||
ext_keylookup_manifest_test() ->
|
||||
RP = "../test",
|
||||
{_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(),
|
||||
save_manifest(Man6, RP),
|
||||
|
||||
E7 = #manifest_entry{start_key={o, "Bucket1", "K997", null},
|
||||
end_key={o, "Bucket1", "K999", null},
|
||||
filename="Z7",
|
||||
owner="pid_z7"},
|
||||
Man7 = insert_manifest_entry(Man6, 2, 2, E7),
|
||||
save_manifest(Man7, RP),
|
||||
ManOpen1 = open_manifest(RP),
|
||||
?assertMatch(2, get_manifest_sqn(ManOpen1)),
|
||||
|
||||
Man7FN = filepath(RP, 2, current_manifest),
|
||||
Man7FNAlt = filename:rootname(Man7FN) ++ ".pnd",
|
||||
{ok, BytesCopied} = file:copy(Man7FN, Man7FNAlt),
|
||||
{ok, Bin} = file:read_file(Man7FN),
|
||||
?assertMatch(BytesCopied, byte_size(Bin)),
|
||||
RandPos = random:uniform(bit_size(Bin) - 1),
|
||||
<<Pre:RandPos/bitstring, BitToFlip:1/integer, Rest/bitstring>> = Bin,
|
||||
Flipped = BitToFlip bxor 1,
|
||||
ok = file:write_file(Man7FN,
|
||||
<<Pre:RandPos/bitstring,
|
||||
Flipped:1/integer,
|
||||
Rest/bitstring>>),
|
||||
|
||||
?assertMatch(2, get_manifest_sqn(Man7)),
|
||||
|
||||
ManOpen2 = open_manifest(RP),
|
||||
?assertMatch(1, get_manifest_sqn(ManOpen2)),
|
||||
|
||||
E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"},
|
||||
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"},
|
||||
filename="Z1",
|
||||
owner="pid_z1"},
|
||||
E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"},
|
||||
end_key={o, "Bucket1", "K71", null},
|
||||
filename="Z2",
|
||||
owner="pid_z2"},
|
||||
E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null},
|
||||
end_key={o, "Bucket1", "K993", null},
|
||||
filename="Z3",
|
||||
owner="pid_z3"},
|
||||
|
||||
E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"},
|
||||
end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"},
|
||||
owner="pid_y1",
|
||||
filename="Y1"},
|
||||
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
|
||||
end_key={o, "Bucket1", "K45", null},
|
||||
owner="pid_y2",
|
||||
filename="Y2"},
|
||||
E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null},
|
||||
end_key={o, "Bucket1", "K812", null},
|
||||
owner="pid_y3",
|
||||
filename="Y3"},
|
||||
E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null},
|
||||
end_key={o, "Bucket1", "K998", null},
|
||||
owner="pid_y4",
|
||||
filename="Y4"},
|
||||
|
||||
Man8 = replace_manifest_entry(ManOpen2, 2, 1, E1, E1_2),
|
||||
Man9 = remove_manifest_entry(Man8, 2, 1, [E2, E3]),
|
||||
Man10 = insert_manifest_entry(Man9, 2, 1, [E2_2, E3_2, E4_2]),
|
||||
?assertMatch(2, get_manifest_sqn(Man10)),
|
||||
|
||||
LK1_4 = {o, "Bucket1", "K75", null},
|
||||
?assertMatch("pid_y3", key_lookup(Man10, 1, LK1_4)),
|
||||
?assertMatch("pid_z5", key_lookup(Man10, 2, LK1_4)),
|
||||
|
||||
E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"},
|
||||
end_key={o, "Bucket1", "K78", null},
|
||||
filename="Z5",
|
||||
owner="pid_z5"},
|
||||
E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null},
|
||||
end_key={o, "Bucket1", "K996", null},
|
||||
filename="Z6",
|
||||
owner="pid_z6"},
|
||||
|
||||
Man11 = remove_manifest_entry(Man10, 3, 2, [E5, E6]),
|
||||
?assertMatch(3, get_manifest_sqn(Man11)),
|
||||
?assertMatch(false, key_lookup(Man11, 2, LK1_4)),
|
||||
|
||||
E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"},
|
||||
end_key={o, "Bucket1", "K45", null},
|
||||
owner="pid_y2",
|
||||
filename="Y2"},
|
||||
|
||||
Man12 = replace_manifest_entry(Man11, 4, 2, E2_2, E5),
|
||||
?assertMatch(4, get_manifest_sqn(Man12)),
|
||||
?assertMatch("pid_z5", key_lookup(Man12, 2, LK1_4)).
|
||||
|
||||
rangequery_manifest_test() ->
|
||||
{_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(),
|
||||
|
|
|
@ -57,7 +57,7 @@ prepare_for_index(IndexArray, Hash) ->
|
|||
|
||||
|
||||
add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) ->
|
||||
LM1Size = leveled_skiplist:size(LevelMinus1),
|
||||
LM1Size = leveled_tree:tsize(LevelMinus1),
|
||||
case LM1Size of
|
||||
0 ->
|
||||
{LedgerSQN, L0Size, TreeList};
|
||||
|
@ -99,7 +99,7 @@ to_list(Slots, FetchFun) ->
|
|||
SlotList = lists:reverse(lists:seq(1, Slots)),
|
||||
FullList = lists:foldl(fun(Slot, Acc) ->
|
||||
Tree = FetchFun(Slot),
|
||||
L = leveled_skiplist:to_list(Tree),
|
||||
L = leveled_tree:to_list(Tree),
|
||||
lists:ukeymerge(1, Acc, L)
|
||||
end,
|
||||
[],
|
||||
|
@ -119,14 +119,14 @@ check_levelzero(Key, Hash, PosList, TreeList) ->
|
|||
check_slotlist(Key, Hash, PosList, TreeList).
|
||||
|
||||
|
||||
merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) ->
|
||||
lists:foldl(fun(SkipList, Acc) ->
|
||||
R = leveled_skiplist:to_range(SkipList,
|
||||
StartKey,
|
||||
EndKey),
|
||||
merge_trees(StartKey, EndKey, TreeList, LevelMinus1) ->
|
||||
lists:foldl(fun(Tree, Acc) ->
|
||||
R = leveled_tree:match_range(StartKey,
|
||||
EndKey,
|
||||
Tree),
|
||||
lists:ukeymerge(1, Acc, R) end,
|
||||
[],
|
||||
[LevelMinus1|lists:reverse(SkipListList)]).
|
||||
[LevelMinus1|lists:reverse(TreeList)]).
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
|
@ -148,7 +148,7 @@ split_hash(Hash) ->
|
|||
H0 = (Hash bsr 8) band 8388607,
|
||||
{Slot, H0}.
|
||||
|
||||
check_slotlist(Key, Hash, CheckList, TreeList) ->
|
||||
check_slotlist(Key, _Hash, CheckList, TreeList) ->
|
||||
SlotCheckFun =
|
||||
fun(SlotToCheck, {Found, KV}) ->
|
||||
case Found of
|
||||
|
@ -156,7 +156,7 @@ check_slotlist(Key, Hash, CheckList, TreeList) ->
|
|||
{Found, KV};
|
||||
false ->
|
||||
CheckTree = lists:nth(SlotToCheck, TreeList),
|
||||
case leveled_skiplist:lookup(Key, Hash, CheckTree) of
|
||||
case leveled_tree:match(Key, CheckTree) of
|
||||
none ->
|
||||
{Found, KV};
|
||||
{value, Value} ->
|
||||
|
@ -188,7 +188,7 @@ generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
|||
[],
|
||||
BucketRangeLow,
|
||||
BucketRangeHigh),
|
||||
leveled_skiplist:from_list(KVL).
|
||||
leveled_tree:from_orderedlist(lists:ukeysort(1, KVL), ?CACHE_TYPE).
|
||||
|
||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||
Acc;
|
||||
|
@ -223,7 +223,7 @@ compare_method_test() ->
|
|||
?assertMatch(32000, SQN),
|
||||
?assertMatch(true, Size =< 32000),
|
||||
|
||||
TestList = leveled_skiplist:to_list(generate_randomkeys(1, 2000, 1, 800)),
|
||||
TestList = leveled_tree:to_list(generate_randomkeys(1, 2000, 1, 800)),
|
||||
|
||||
FindKeyFun =
|
||||
fun(Key) ->
|
||||
|
@ -232,7 +232,7 @@ compare_method_test() ->
|
|||
true ->
|
||||
{true, KV};
|
||||
false ->
|
||||
L0 = leveled_skiplist:lookup(Key, Tree),
|
||||
L0 = leveled_tree:match(Key, Tree),
|
||||
case L0 of
|
||||
none ->
|
||||
{false, not_found};
|
||||
|
@ -270,19 +270,20 @@ compare_method_test() ->
|
|||
P = leveled_codec:endkey_passed(EndKey, K),
|
||||
case {K, P} of
|
||||
{K, false} when K >= StartKey ->
|
||||
leveled_skiplist:enter(K, V, Acc);
|
||||
[{K, V}|Acc];
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
end,
|
||||
leveled_skiplist:empty(),
|
||||
[],
|
||||
DumpList),
|
||||
Sz0 = leveled_skiplist:size(Q0),
|
||||
Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, Q0), ?CACHE_TYPE),
|
||||
Sz0 = leveled_tree:tsize(Tree),
|
||||
io:format("Crude method took ~w microseconds resulting in tree of " ++
|
||||
"size ~w~n",
|
||||
[timer:now_diff(os:timestamp(), SWa), Sz0]),
|
||||
SWb = os:timestamp(),
|
||||
Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_skiplist:empty()),
|
||||
Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty(?CACHE_TYPE)),
|
||||
Sz1 = length(Q1),
|
||||
io:format("Merge method took ~w microseconds resulting in tree of " ++
|
||||
"size ~w~n",
|
||||
|
@ -299,7 +300,7 @@ with_index_test() ->
|
|||
fun(_X, {{LedgerSQN, L0Size, L0TreeList}, L0Idx, SrcList}) ->
|
||||
LM1 = generate_randomkeys_aslist(LedgerSQN + 1, 2000, 1, 500),
|
||||
LM1Array = lists:foldl(IndexPrepareFun, new_index(), LM1),
|
||||
LM1SL = leveled_skiplist:from_list(LM1),
|
||||
LM1SL = leveled_tree:from_orderedlist(lists:ukeysort(1, LM1), ?CACHE_TYPE),
|
||||
UpdL0Index = add_to_index(LM1Array, L0Idx, length(L0TreeList) + 1),
|
||||
R = add_to_cache(L0Size,
|
||||
{LM1SL, LedgerSQN + 1, LedgerSQN + 2000},
|
||||
|
|
|
@ -1,661 +0,0 @@
|
|||
%% -------- SKIPLIST ---------
|
||||
%%
|
||||
%% For storing small numbers of {K, V} pairs where reasonable insertion and
|
||||
%% fetch times, but with fast support for flattening to a list or a sublist
|
||||
%% within a certain key range
|
||||
%%
|
||||
%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's
|
||||
%% ongoing dependency on OTP16)
|
||||
%%
|
||||
%% Not a proper skip list. Only supports a fixed depth. Good enough for the
|
||||
%% purposes of leveled. Also uses peculiar enkey_passed function within
|
||||
%% leveled. Not tested beyond a depth of 2.
|
||||
|
||||
-module(leveled_skiplist).
|
||||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-export([
|
||||
from_list/1,
|
||||
from_list/2,
|
||||
from_sortedlist/1,
|
||||
from_sortedlist/2,
|
||||
from_orderedset/1,
|
||||
from_orderedset/2,
|
||||
to_list/1,
|
||||
enter/3,
|
||||
enter/4,
|
||||
enter_nolookup/3,
|
||||
to_range/2,
|
||||
to_range/3,
|
||||
lookup/2,
|
||||
lookup/3,
|
||||
empty/0,
|
||||
empty/1,
|
||||
size/1
|
||||
]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(SKIP_WIDTH, 16).
|
||||
-define(LIST_HEIGHT, 2).
|
||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
||||
-define(BITARRAY_SIZE, 2048).
|
||||
|
||||
%%%============================================================================
|
||||
%%% SkipList API
|
||||
%%%============================================================================
|
||||
|
||||
enter(Key, Value, SkipList) ->
|
||||
Hash = leveled_codec:magic_hash(Key),
|
||||
enter(Key, Hash, Value, SkipList).
|
||||
|
||||
enter(Key, Hash, Value, SkipList) ->
|
||||
Bloom0 =
|
||||
case element(1, SkipList) of
|
||||
list_only ->
|
||||
list_only;
|
||||
Bloom ->
|
||||
leveled_tinybloom:enter({hash, Hash}, Bloom)
|
||||
end,
|
||||
{Bloom0,
|
||||
enter(Key, Value, erlang:phash2(Key),
|
||||
element(2, SkipList),
|
||||
?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
||||
|
||||
%% Can iterate over a key entered this way, but never lookup the key
|
||||
%% used for index terms
|
||||
%% The key may still be a marker key - and the much cheaper native hash
|
||||
%% is used to dtermine this, avoiding the more expensive magic hash
|
||||
enter_nolookup(Key, Value, SkipList) ->
|
||||
{element(1, SkipList),
|
||||
enter(Key, Value, erlang:phash2(Key),
|
||||
element(2, SkipList),
|
||||
?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
||||
|
||||
from_orderedset(Table) ->
|
||||
from_orderedset(Table, false).
|
||||
|
||||
from_orderedset(Table, Bloom) ->
|
||||
from_sortedlist(ets:tab2list(Table), Bloom).
|
||||
|
||||
from_list(UnsortedKVL) ->
|
||||
from_list(UnsortedKVL, false).
|
||||
|
||||
from_list(UnsortedKVL, BloomProtect) ->
|
||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
||||
from_sortedlist(KVL, BloomProtect).
|
||||
|
||||
from_sortedlist(SortedKVL) ->
|
||||
from_sortedlist(SortedKVL, false).
|
||||
|
||||
from_sortedlist([], BloomProtect) ->
|
||||
empty(BloomProtect);
|
||||
from_sortedlist(SortedKVL, BloomProtect) ->
|
||||
Bloom0 =
|
||||
case BloomProtect of
|
||||
true ->
|
||||
lists:foldr(fun({K, _V}, Bloom) ->
|
||||
leveled_tinybloom:enter(K, Bloom) end,
|
||||
leveled_tinybloom:empty(?SKIP_WIDTH),
|
||||
SortedKVL);
|
||||
false ->
|
||||
list_only
|
||||
end,
|
||||
{Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
||||
|
||||
lookup(Key, SkipList) ->
|
||||
case element(1, SkipList) of
|
||||
list_only ->
|
||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
|
||||
_ ->
|
||||
lookup(Key, leveled_codec:magic_hash(Key), SkipList)
|
||||
end.
|
||||
|
||||
lookup(Key, Hash, SkipList) ->
|
||||
case element(1, SkipList) of
|
||||
list_only ->
|
||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT);
|
||||
_ ->
|
||||
case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of
|
||||
false ->
|
||||
none;
|
||||
true ->
|
||||
list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT)
|
||||
end
|
||||
end.
|
||||
|
||||
|
||||
%% Rather than support iterator_from like gb_trees, will just an output a key
|
||||
%% sorted list for the desired range, which can the be iterated over as normal
|
||||
to_range(SkipList, Start) ->
|
||||
to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT).
|
||||
|
||||
to_range(SkipList, Start, End) ->
|
||||
to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT).
|
||||
|
||||
to_list(SkipList) ->
|
||||
to_list(element(2, SkipList), ?LIST_HEIGHT).
|
||||
|
||||
empty() ->
|
||||
empty(false).
|
||||
|
||||
empty(BloomProtect) ->
|
||||
case BloomProtect of
|
||||
true ->
|
||||
{leveled_tinybloom:empty(?SKIP_WIDTH),
|
||||
empty([], ?LIST_HEIGHT)};
|
||||
false ->
|
||||
{list_only, empty([], ?LIST_HEIGHT)}
|
||||
end.
|
||||
|
||||
size(SkipList) ->
|
||||
size(element(2, SkipList), ?LIST_HEIGHT).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% SkipList Base Functions
|
||||
%%%============================================================================
|
||||
|
||||
enter(Key, Value, Hash, SkipList, Width, 1) ->
|
||||
{MarkerKey, SubList} = find_mark(Key, SkipList),
|
||||
case Hash rem Width of
|
||||
0 ->
|
||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
|
||||
K =< Key end,
|
||||
SubList),
|
||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
||||
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
|
||||
lists:ukeysort(1, SkpL2);
|
||||
_ ->
|
||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
|
||||
UpdSubList =
|
||||
case RHS of
|
||||
[] ->
|
||||
LHS ++ [{Key, Value}];
|
||||
[{FirstKey, _V}|RHSTail] ->
|
||||
case FirstKey of
|
||||
Key ->
|
||||
LHS ++ [{Key, Value}] ++ RHSTail;
|
||||
_ ->
|
||||
LHS ++ [{Key, Value}] ++ RHS
|
||||
end
|
||||
end,
|
||||
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
||||
end;
|
||||
enter(Key, Value, Hash, SkipList, Width, Level) ->
|
||||
HashMatch = width(Level, Width),
|
||||
{MarkerKey, SubSkipList} = find_mark(Key, SkipList),
|
||||
UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1),
|
||||
case Hash rem HashMatch of
|
||||
0 ->
|
||||
%
|
||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) ->
|
||||
K =< Key end,
|
||||
UpdSubSkipList),
|
||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
||||
lists:ukeysort(1, [{Key, LHS}|SkpL1]);
|
||||
_ ->
|
||||
% Need to replace Marker Key with sublist
|
||||
lists:keyreplace(MarkerKey,
|
||||
1,
|
||||
SkipList,
|
||||
{MarkerKey, UpdSubSkipList})
|
||||
end.
|
||||
|
||||
from_list(SkipList, _SkipWidth, 0) ->
|
||||
SkipList;
|
||||
from_list(KVList, SkipWidth, ListHeight) ->
|
||||
L0 = length(KVList),
|
||||
SL0 =
|
||||
case L0 > SkipWidth of
|
||||
true ->
|
||||
from_list(KVList, L0, [], SkipWidth);
|
||||
false ->
|
||||
{LastK, _LastSL} = lists:last(KVList),
|
||||
[{LastK, KVList}]
|
||||
end,
|
||||
from_list(SL0, SkipWidth, ListHeight - 1).
|
||||
|
||||
from_list([], 0, SkipList, _SkipWidth) ->
|
||||
SkipList;
|
||||
from_list(KVList, L, SkipList, SkipWidth) ->
|
||||
SubLL = min(SkipWidth, L),
|
||||
{Head, Tail} = lists:split(SubLL, KVList),
|
||||
{LastK, _LastV} = lists:last(Head),
|
||||
from_list(Tail, L - SubLL, SkipList ++ [{LastK, Head}], SkipWidth).
|
||||
|
||||
|
||||
list_lookup(Key, SkipList, 1) ->
|
||||
SubList = get_sublist(Key, SkipList),
|
||||
case lists:keyfind(Key, 1, SubList) of
|
||||
false ->
|
||||
none;
|
||||
{Key, V} ->
|
||||
{value, V}
|
||||
end;
|
||||
list_lookup(Key, SkipList, Level) ->
|
||||
SubList = get_sublist(Key, SkipList),
|
||||
case SubList of
|
||||
null ->
|
||||
none;
|
||||
_ ->
|
||||
list_lookup(Key, SubList, Level - 1)
|
||||
end.
|
||||
|
||||
|
||||
to_list(SkipList, 1) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList);
|
||||
to_list(SkipList, Level) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end,
|
||||
[],
|
||||
SkipList).
|
||||
|
||||
|
||||
to_range(SkipList, StartKey, EndKey, ListHeight) ->
|
||||
to_range(SkipList, StartKey, EndKey, ListHeight, [], true).
|
||||
|
||||
to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) ->
|
||||
SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl),
|
||||
case SL of
|
||||
[] ->
|
||||
Acc;
|
||||
_ ->
|
||||
{LK, _LV} = lists:last(SL),
|
||||
case leveled_codec:endkey_passed(EndKey, LK) of
|
||||
false ->
|
||||
to_range(SkipList,
|
||||
LK,
|
||||
EndKey,
|
||||
ListHeight,
|
||||
Acc ++ SL,
|
||||
false);
|
||||
true ->
|
||||
SplitFun =
|
||||
fun({K, _V}) ->
|
||||
not leveled_codec:endkey_passed(EndKey, K) end,
|
||||
LHS = lists:takewhile(SplitFun, SL),
|
||||
Acc ++ LHS
|
||||
end
|
||||
end.
|
||||
|
||||
sublist_above(SkipList, StartKey, 0, StartIncl) ->
|
||||
TestFun =
|
||||
fun({K, _V}) ->
|
||||
case StartIncl of
|
||||
true ->
|
||||
K < StartKey;
|
||||
false ->
|
||||
K =< StartKey
|
||||
end end,
|
||||
lists:dropwhile(TestFun, SkipList);
|
||||
sublist_above(SkipList, StartKey, Level, StartIncl) ->
|
||||
TestFun =
|
||||
fun({K, _SL}) ->
|
||||
case StartIncl of
|
||||
true ->
|
||||
K < StartKey;
|
||||
false ->
|
||||
K =< StartKey
|
||||
end end,
|
||||
RHS = lists:dropwhile(TestFun, SkipList),
|
||||
case RHS of
|
||||
[] ->
|
||||
[];
|
||||
[{_K, SL}|_Rest] ->
|
||||
sublist_above(SL, StartKey, Level - 1, StartIncl)
|
||||
end.
|
||||
|
||||
empty(SkipList, 1) ->
|
||||
[{?INFINITY_KEY, SkipList}];
|
||||
empty(SkipList, Level) ->
|
||||
empty([{?INFINITY_KEY, SkipList}], Level - 1).
|
||||
|
||||
size(SkipList, 1) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList);
|
||||
size(SkipList, Level) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end,
|
||||
0,
|
||||
SkipList).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
width(1, Width) ->
|
||||
Width;
|
||||
width(N, Width) ->
|
||||
width(N - 1, Width * Width).
|
||||
|
||||
find_mark(Key, SkipList) ->
|
||||
lists:foldl(fun({Marker, SL}, Acc) ->
|
||||
case Acc of
|
||||
false ->
|
||||
case Marker >= Key of
|
||||
true ->
|
||||
{Marker, SL};
|
||||
false ->
|
||||
Acc
|
||||
end;
|
||||
_ ->
|
||||
Acc
|
||||
end end,
|
||||
false,
|
||||
SkipList).
|
||||
|
||||
get_sublist(Key, SkipList) ->
|
||||
lists:foldl(fun({SkipKey, SL}, Acc) ->
|
||||
case {Acc, SkipKey} of
|
||||
{null, SkipKey} when SkipKey >= Key ->
|
||||
SL;
|
||||
_ ->
|
||||
Acc
|
||||
end end,
|
||||
null,
|
||||
SkipList).
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||
generate_randomkeys(Seqn,
|
||||
Count,
|
||||
[],
|
||||
BucketRangeLow,
|
||||
BucketRangeHigh).
|
||||
|
||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||
Acc;
|
||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||
BNumber =
|
||||
case BRange of
|
||||
0 ->
|
||||
string:right(integer_to_list(BucketLow), 4, $0);
|
||||
_ ->
|
||||
BRand = random:uniform(BRange),
|
||||
string:right(integer_to_list(BucketLow + BRand), 4, $0)
|
||||
end,
|
||||
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
||||
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
||||
{Seqn, {active, infinity}, null}},
|
||||
generate_randomkeys(Seqn + 1,
|
||||
Count - 1,
|
||||
[{K, V}|Acc],
|
||||
BucketLow,
|
||||
BRange).
|
||||
|
||||
skiplist_small_test() ->
|
||||
% Check nothing bad happens with very small lists
|
||||
lists:foreach(fun(N) -> dotest_skiplist_small(N) end, lists:seq(1, 32)).
|
||||
|
||||
|
||||
dotest_skiplist_small(N) ->
|
||||
KL = generate_randomkeys(1, N, 1, 2),
|
||||
SkipList1 =
|
||||
lists:foldl(fun({K, V}, SL) ->
|
||||
enter(K, V, SL)
|
||||
end,
|
||||
empty(),
|
||||
KL),
|
||||
SkipList2 = from_list(lists:reverse(KL)),
|
||||
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList1))
|
||||
end,
|
||||
lists:ukeysort(1, lists:reverse(KL))),
|
||||
lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList2))
|
||||
end,
|
||||
lists:ukeysort(1, lists:reverse(KL))).
|
||||
|
||||
skiplist_withbloom_test() ->
|
||||
io:format(user, "~n~nBloom protected skiplist test:~n~n", []),
|
||||
skiplist_tester(true).
|
||||
|
||||
skiplist_nobloom_test() ->
|
||||
io:format(user, "~n~nBloom free skiplist test:~n~n", []),
|
||||
skiplist_tester(false).
|
||||
|
||||
skiplist_tester(Bloom) ->
|
||||
N = 4000,
|
||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||
|
||||
OS = ets:new(test, [ordered_set, private]),
|
||||
ets:insert(OS, KL),
|
||||
SWaETS = os:timestamp(),
|
||||
SkipList = from_orderedset(OS, Bloom),
|
||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds " ++
|
||||
"from ordered set~n",
|
||||
[N, timer:now_diff(os:timestamp(), SWaETS)]),
|
||||
|
||||
SWaGSL = os:timestamp(),
|
||||
SkipList = from_list(lists:reverse(KL), Bloom),
|
||||
io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++
|
||||
"Top level key count of ~w~n",
|
||||
[N,
|
||||
timer:now_diff(os:timestamp(), SWaGSL),
|
||||
length(element(2, SkipList))]),
|
||||
io:format(user, "Second tier key counts of ~w~n",
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||
element(2, SkipList))]),
|
||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
||||
|
||||
SWaGSL2 = os:timestamp(),
|
||||
SkipList = from_sortedlist(KLSorted, Bloom),
|
||||
io:format(user, "Generating skip list with ~w sorted keys in ~w " ++
|
||||
"microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SWaGSL2)]),
|
||||
|
||||
SWaDSL = os:timestamp(),
|
||||
SkipList1 =
|
||||
lists:foldl(fun({K, V}, SL) ->
|
||||
enter(K, V, SL)
|
||||
end,
|
||||
empty(Bloom),
|
||||
KL),
|
||||
io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++
|
||||
"microseconds~n" ++
|
||||
"Top level key count of ~w~n",
|
||||
[N,
|
||||
timer:now_diff(os:timestamp(), SWaDSL),
|
||||
length(element(2, SkipList1))]),
|
||||
io:format(user, "Second tier key counts of ~w~n",
|
||||
[lists:map(fun({_L, SL}) -> length(SL) end,
|
||||
element(2, SkipList1))]),
|
||||
|
||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
||||
skiplist_timingtest(KLSorted, SkipList, N, Bloom),
|
||||
|
||||
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
||||
skiplist_timingtest(KLSorted, SkipList1, N, Bloom).
|
||||
|
||||
|
||||
skiplist_timingtest(KL, SkipList, N, Bloom) ->
|
||||
io:format(user, "Timing tests on skiplist of size ~w~n",
|
||||
[leveled_skiplist:size(SkipList)]),
|
||||
CheckList1 = lists:sublist(KL, N div 4, 200),
|
||||
CheckList2 = lists:sublist(KL, N div 3, 200),
|
||||
CheckList3 = lists:sublist(KL, N div 2, 200),
|
||||
CheckList4 = lists:sublist(KL, N - 1000, 200),
|
||||
CheckList5 = lists:sublist(KL, N - 500, 200),
|
||||
CheckList6 = lists:sublist(KL, 1, 10),
|
||||
CheckList7 = lists:nthtail(N - 200, KL),
|
||||
CheckList8 = lists:sublist(KL, N div 2, 1),
|
||||
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
||||
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
||||
|
||||
SWb = os:timestamp(),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
?assertMatch({value, V}, lookup(K, SkipList))
|
||||
end,
|
||||
CheckAll),
|
||||
io:format(user, "Finding 1020 keys took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWb)]),
|
||||
|
||||
RangeFun =
|
||||
fun(SkipListToQuery, CheckListForQ, Assert) ->
|
||||
KR =
|
||||
to_range(SkipListToQuery,
|
||||
element(1, lists:nth(1, CheckListForQ)),
|
||||
element(1, lists:last(CheckListForQ))),
|
||||
case Assert of
|
||||
true ->
|
||||
CompareL = length(lists:usort(CheckListForQ)),
|
||||
?assertMatch(CompareL, length(KR));
|
||||
false ->
|
||||
KR
|
||||
end
|
||||
end,
|
||||
|
||||
SWc = os:timestamp(),
|
||||
RangeFun(SkipList, CheckList1, true),
|
||||
RangeFun(SkipList, CheckList2, true),
|
||||
RangeFun(SkipList, CheckList3, true),
|
||||
RangeFun(SkipList, CheckList4, true),
|
||||
RangeFun(SkipList, CheckList5, true),
|
||||
RangeFun(SkipList, CheckList6, true),
|
||||
RangeFun(SkipList, CheckList7, true),
|
||||
RangeFun(SkipList, CheckList8, true),
|
||||
|
||||
KL_OOR1 = generate_randomkeys(1, 4, N div 5 + 1, N div 5 + 10),
|
||||
KR9 = RangeFun(SkipList, KL_OOR1, false),
|
||||
?assertMatch([], KR9),
|
||||
|
||||
KL_OOR2 = generate_randomkeys(1, 4, 0, 0),
|
||||
KR10 = RangeFun(SkipList, KL_OOR2, false),
|
||||
?assertMatch([], KR10),
|
||||
|
||||
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWc)]),
|
||||
|
||||
AltKL1 = generate_randomkeys(1, 2000, 1, 200),
|
||||
SWd0 = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
lookup(K, SkipList)
|
||||
end,
|
||||
AltKL1),
|
||||
io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd0)]),
|
||||
SWd1 = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
leveled_codec:magic_hash(K)
|
||||
end,
|
||||
AltKL1),
|
||||
io:format(user, "Generating 2000 magic hashes took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd1)]),
|
||||
SWd2 = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
erlang:phash2(K)
|
||||
end,
|
||||
AltKL1),
|
||||
io:format(user, "Generating 2000 not so magic hashes took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd2)]),
|
||||
|
||||
AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300),
|
||||
SWe = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
none = lookup(K, SkipList)
|
||||
end,
|
||||
AltKL2),
|
||||
io:format(user, "Getting 1000 missing keys above range took ~w " ++
|
||||
"microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWe)]),
|
||||
AltKL3 = generate_randomkeys(1, 1000, 0, 0),
|
||||
SWf = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
none = lookup(K, SkipList)
|
||||
end,
|
||||
AltKL3),
|
||||
io:format(user, "Getting 1000 missing keys below range took ~w " ++
|
||||
"microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWf)]),
|
||||
|
||||
SWg = os:timestamp(),
|
||||
FlatList = to_list(SkipList),
|
||||
io:format(user, "Flattening skiplist took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWg)]),
|
||||
?assertMatch(KL, FlatList),
|
||||
|
||||
case Bloom of
|
||||
true ->
|
||||
HashList = lists:map(fun(_X) ->
|
||||
random:uniform(4294967295) end,
|
||||
lists:seq(1, 2000)),
|
||||
SWh = os:timestamp(),
|
||||
lists:foreach(fun(X) ->
|
||||
lookup(X, X, SkipList) end,
|
||||
HashList),
|
||||
io:format(user,
|
||||
"Getting 2000 missing keys when hash was known " ++
|
||||
"took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWh)]);
|
||||
false ->
|
||||
ok
|
||||
end.
|
||||
|
||||
define_kv(X) ->
|
||||
{{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null},
|
||||
{X, {active, infinity}, null}}.
|
||||
|
||||
skiplist_roundsize_test() ->
|
||||
KVL = lists:map(fun(X) -> define_kv(X) end, lists:seq(1, 4096)),
|
||||
SkipList = from_list(KVL),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
?assertMatch({value, V}, lookup(K, SkipList)) end,
|
||||
KVL),
|
||||
lists:foreach(fun(X) ->
|
||||
{KS, _VS} = define_kv(X * 32 + 1),
|
||||
{KE, _VE} = define_kv((X + 1) * 32),
|
||||
R = to_range(SkipList, KS, KE),
|
||||
L = lists:sublist(KVL,
|
||||
X * 32 + 1,
|
||||
32),
|
||||
?assertMatch(L, R) end,
|
||||
lists:seq(0, 24)).
|
||||
|
||||
skiplist_nolookup_test() ->
|
||||
N = 4000,
|
||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||
SkipList = lists:foldl(fun({K, V}, Acc) ->
|
||||
enter_nolookup(K, V, Acc) end,
|
||||
empty(true),
|
||||
KL),
|
||||
KLSorted = lists:ukeysort(1, lists:reverse(KL)),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
?assertMatch(none, lookup(K, SkipList)) end,
|
||||
KL),
|
||||
?assertMatch(KLSorted, to_list(SkipList)).
|
||||
|
||||
skiplist_range_test() ->
|
||||
N = 150,
|
||||
KL = generate_randomkeys(1, N, 1, N div 5),
|
||||
|
||||
KLSL1 = lists:sublist(lists:ukeysort(1, KL), 128),
|
||||
SkipList1 = from_list(KLSL1),
|
||||
{LastK1, V1} = lists:last(KLSL1),
|
||||
R1 = to_range(SkipList1, LastK1, LastK1),
|
||||
?assertMatch([{LastK1, V1}], R1),
|
||||
|
||||
KLSL2 = lists:sublist(lists:ukeysort(1, KL), 127),
|
||||
SkipList2 = from_list(KLSL2),
|
||||
{LastK2, V2} = lists:last(KLSL2),
|
||||
R2 = to_range(SkipList2, LastK2, LastK2),
|
||||
?assertMatch([{LastK2, V2}], R2),
|
||||
|
||||
KLSL3 = lists:sublist(lists:ukeysort(1, KL), 129),
|
||||
SkipList3 = from_list(KLSL3),
|
||||
{LastK3, V3} = lists:last(KLSL3),
|
||||
R3 = to_range(SkipList3, LastK3, LastK3),
|
||||
?assertMatch([{LastK3, V3}], R3),
|
||||
|
||||
{FirstK4, V4} = lists:nth(1, KLSL3),
|
||||
R4 = to_range(SkipList3, FirstK4, FirstK4),
|
||||
?assertMatch([{FirstK4, V4}], R4).
|
||||
|
||||
|
||||
empty_skiplist_size_test() ->
|
||||
?assertMatch(0, leveled_skiplist:size(empty(false))),
|
||||
?assertMatch(0, leveled_skiplist:size(empty(true))).
|
||||
|
||||
-endif.
|
|
@ -77,6 +77,8 @@
|
|||
-define(INDEX_MARKER_WIDTH, 16).
|
||||
-define(DISCARD_EXT, ".discarded").
|
||||
-define(DELETE_TIMEOUT, 10000).
|
||||
-define(TREE_TYPE, idxt).
|
||||
-define(TREE_SIZE, 4).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
|
@ -676,93 +678,37 @@ generate_filenames(RootFilename) ->
|
|||
%% The Slot Index is stored as a flat (sorted) list of {Key, Slot} where Key
|
||||
%% is the last key within the slot.
|
||||
%%
|
||||
%% This implementation of the SlotIndex stores it as a tuple with the original
|
||||
%% list as the second element and a list of mark points as the first element
|
||||
%% containing every 16th key. The Mark points are stored as {Mark, Index},
|
||||
%% where the Index correspnds with the nth point in the original list that the
|
||||
%% Mark occurs.
|
||||
%% This implementation of the SlotIndex uses leveled_tree
|
||||
|
||||
from_list(SlotList) ->
|
||||
L = length(SlotList),
|
||||
MarkerList = set_marks(lists:reverse(SlotList),
|
||||
{?INDEX_MARKER_WIDTH, L rem ?INDEX_MARKER_WIDTH},
|
||||
L,
|
||||
[]),
|
||||
{MarkerList, SlotList}.
|
||||
leveled_tree:from_orderedlist(SlotList, ?TREE_TYPE, ?TREE_SIZE).
|
||||
|
||||
set_marks([], _MarkInfo, 0, MarkerList) ->
|
||||
MarkerList;
|
||||
set_marks([{Key, _Slot}|Rest], {MarkerWidth, MarkPoint}, Count, MarkerList) ->
|
||||
case Count rem MarkerWidth of
|
||||
MarkPoint ->
|
||||
set_marks(Rest,
|
||||
{MarkerWidth, MarkPoint},
|
||||
Count - 1,
|
||||
[{Key, Count}|MarkerList]);
|
||||
_ ->
|
||||
set_marks(Rest,
|
||||
{MarkerWidth, MarkPoint},
|
||||
Count - 1,
|
||||
MarkerList)
|
||||
end.
|
||||
|
||||
find_mark(Key, [{Mark, Pos}|_Rest]) when Mark >= Key ->
|
||||
Pos;
|
||||
find_mark(Key, [_H|T]) ->
|
||||
find_mark(Key, T).
|
||||
|
||||
lookup_slot(Key, {MarkerList, SlotList}) ->
|
||||
Pos = find_mark(Key, MarkerList),
|
||||
SubList = lists:sublist(SlotList, max(1, Pos - ?INDEX_MARKER_WIDTH), Pos),
|
||||
Slot = find_mark(Key, SubList),
|
||||
lookup_slot(Key, Tree) ->
|
||||
StartKeyFun =
|
||||
fun(_V) ->
|
||||
all
|
||||
end,
|
||||
% The penciller should never ask for presence out of range - so will
|
||||
% always return a slot (As we don't compare to StartKey)
|
||||
{_LK, Slot} = leveled_tree:search(Key, Tree, StartKeyFun),
|
||||
Slot.
|
||||
|
||||
%% Returns a section from the summary index and two booleans to indicate if
|
||||
%% the first slot needs trimming, or the last slot
|
||||
lookup_slots(StartKey, EndKey, {_MarkerList, SlotList}) ->
|
||||
SlotsOnlyFun = fun({_K, V}) -> V end,
|
||||
{KSL, LTrim, RTrim} = lookup_slots_int(StartKey, EndKey, SlotList),
|
||||
{lists:map(SlotsOnlyFun, KSL), LTrim, RTrim}.
|
||||
|
||||
lookup_slots_int(all, all, SlotList) ->
|
||||
{SlotList, false, false};
|
||||
lookup_slots_int(StartKey, all, SlotList) ->
|
||||
LTrimFun = fun({K, _V}) -> K < StartKey end,
|
||||
{_LDrop, RKeep0} = lists:splitwith(LTrimFun, SlotList),
|
||||
{RKeep0, true, false};
|
||||
lookup_slots_int(StartKey, EndKey, SlotList) ->
|
||||
{RKeep, true, false} = lookup_slots_int(StartKey, all, SlotList),
|
||||
[LeftMost|RKeep0] = RKeep,
|
||||
{LeftMostK, LeftMostV} = LeftMost,
|
||||
RTrimFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndKey, K) end,
|
||||
case leveled_codec:endkey_passed(EndKey, LeftMostK) of
|
||||
true ->
|
||||
{[{LeftMostK, LeftMostV}],
|
||||
true,
|
||||
true};
|
||||
false ->
|
||||
case LeftMostK of
|
||||
EndKey ->
|
||||
{[{LeftMostK, LeftMostV}],
|
||||
true,
|
||||
false};
|
||||
_ ->
|
||||
{LKeep, RDisc} = lists:splitwith(RTrimFun, RKeep0),
|
||||
case RDisc of
|
||||
[] ->
|
||||
{[LeftMost|LKeep],
|
||||
true,
|
||||
true};
|
||||
[{RDiscK1, RDiscV1}|_Rest] when RDiscK1 == EndKey ->
|
||||
{[LeftMost|LKeep] ++ [{RDiscK1, RDiscV1}],
|
||||
true,
|
||||
false};
|
||||
[{RDiscK1, RDiscV1}|_Rest] ->
|
||||
{[LeftMost|LKeep] ++ [{RDiscK1, RDiscV1}],
|
||||
true,
|
||||
true}
|
||||
end
|
||||
end
|
||||
lookup_slots(StartKey, EndKey, Tree) ->
|
||||
StartKeyFun =
|
||||
fun(_V) ->
|
||||
all
|
||||
end,
|
||||
MapFun =
|
||||
fun({_LK, Slot}) ->
|
||||
Slot
|
||||
end,
|
||||
SlotList = leveled_tree:search_range(StartKey, EndKey, Tree, StartKeyFun),
|
||||
{EK, _EndSlot} = lists:last(SlotList),
|
||||
case EK of
|
||||
EndKey ->
|
||||
{lists:map(MapFun, SlotList), true, false};
|
||||
_ ->
|
||||
{lists:map(MapFun, SlotList), true, true}
|
||||
end.
|
||||
|
||||
|
||||
|
|
|
@ -1,159 +0,0 @@
|
|||
%% -------- TINY BLOOM ---------
|
||||
%%
|
||||
%% For sheltering relatively expensive lookups with a probabilistic check
|
||||
%%
|
||||
%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array.
|
||||
%% Even at 1000 keys should still offer only a 20% false positive
|
||||
%%
|
||||
%% Restricted to no more than 256 arrays - so can't handle more than 250K keys
|
||||
%% in total
|
||||
%%
|
||||
%% Implemented this way to make it easy to control false positive (just by
|
||||
%% setting the width). Also only requires binary manipulations of a single
|
||||
%% hash
|
||||
|
||||
-module(leveled_tinybloom).
|
||||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-export([
|
||||
enter/2,
|
||||
check/2,
|
||||
empty/1
|
||||
]).
|
||||
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
%%%============================================================================
|
||||
%%% Bloom API
|
||||
%%%============================================================================
|
||||
|
||||
empty(Width) when Width =< 256 ->
|
||||
FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end,
|
||||
lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)).
|
||||
|
||||
enter({hash, no_lookup}, Bloom) ->
|
||||
Bloom;
|
||||
enter({hash, Hash}, Bloom) ->
|
||||
{Slot0, Bit1, Bit2} = split_hash(Hash),
|
||||
Slot = Slot0 rem dict:size(Bloom),
|
||||
BitArray0 = dict:fetch(Slot, Bloom),
|
||||
FoldFun =
|
||||
fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end,
|
||||
BitArray1 = lists:foldl(FoldFun,
|
||||
BitArray0,
|
||||
lists:usort([Bit1, Bit2])),
|
||||
dict:store(Slot, <<BitArray1/binary>>, Bloom);
|
||||
enter(Key, Bloom) ->
|
||||
Hash = leveled_codec:magic_hash(Key),
|
||||
enter({hash, Hash}, Bloom).
|
||||
|
||||
|
||||
check({hash, Hash}, Bloom) ->
|
||||
{Slot0, Bit1, Bit2} = split_hash(Hash),
|
||||
Slot = Slot0 rem dict:size(Bloom),
|
||||
BitArray = dict:fetch(Slot, Bloom),
|
||||
|
||||
case getbit(Bit1, BitArray, 4096) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
case getbit(Bit2, BitArray, 4096) of
|
||||
<<0:1>> ->
|
||||
false;
|
||||
<<1:1>> ->
|
||||
true
|
||||
end
|
||||
end;
|
||||
check(Key, Bloom) ->
|
||||
Hash = leveled_codec:magic_hash(Key),
|
||||
check({hash, Hash}, Bloom).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
split_hash(Hash) ->
|
||||
H0 = Hash band 255,
|
||||
H1 = (Hash bsr 8) band 4095,
|
||||
H2 = Hash bsr 20,
|
||||
{H0, H1, H2}.
|
||||
|
||||
add_to_array(Bit, BitArray, ArrayLength) ->
|
||||
RestLen = ArrayLength - Bit - 1,
|
||||
<<Head:Bit/bitstring,
|
||||
_B:1/integer,
|
||||
Rest:RestLen/bitstring>> = BitArray,
|
||||
<<Head/bitstring, 1:1, Rest/bitstring>>.
|
||||
|
||||
getbit(Bit, BitArray, ArrayLength) ->
|
||||
RestLen = ArrayLength - Bit - 1,
|
||||
<<_Head:Bit/bitstring,
|
||||
B:1/bitstring,
|
||||
_Rest:RestLen/bitstring>> = BitArray,
|
||||
B.
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
simple_test() ->
|
||||
N = 4000,
|
||||
W = 6,
|
||||
KLin = lists:map(fun(X) -> "Key_" ++
|
||||
integer_to_list(X) ++
|
||||
integer_to_list(random:uniform(100)) ++
|
||||
binary_to_list(crypto:rand_bytes(2))
|
||||
end,
|
||||
lists:seq(1, N)),
|
||||
KLout = lists:map(fun(X) ->
|
||||
"NotKey_" ++
|
||||
integer_to_list(X) ++
|
||||
integer_to_list(random:uniform(100)) ++
|
||||
binary_to_list(crypto:rand_bytes(2))
|
||||
end,
|
||||
lists:seq(1, N)),
|
||||
SW0_PH = os:timestamp(),
|
||||
lists:foreach(fun(X) -> erlang:phash2(X) end, KLin),
|
||||
io:format(user,
|
||||
"~nNative hash function hashes ~w keys in ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW0_PH)]),
|
||||
SW0_MH = os:timestamp(),
|
||||
lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin),
|
||||
io:format(user,
|
||||
"~nMagic hash function hashes ~w keys in ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW0_MH)]),
|
||||
|
||||
SW1 = os:timestamp(),
|
||||
Bloom = lists:foldr(fun enter/2, empty(W), KLin),
|
||||
io:format(user,
|
||||
"~nAdding ~w keys to bloom took ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW1)]),
|
||||
|
||||
SW2 = os:timestamp(),
|
||||
lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin),
|
||||
io:format(user,
|
||||
"~nChecking ~w keys in bloom took ~w microseconds~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW2)]),
|
||||
|
||||
SW3 = os:timestamp(),
|
||||
FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of
|
||||
true -> Acc + 1;
|
||||
false -> Acc
|
||||
end end,
|
||||
0,
|
||||
KLout),
|
||||
io:format(user,
|
||||
"~nChecking ~w keys out of bloom took ~w microseconds " ++
|
||||
"with ~w false positive rate~n",
|
||||
[N, timer:now_diff(os:timestamp(), SW3), FP / N]),
|
||||
?assertMatch(true, FP < (N div 4)).
|
||||
|
||||
|
||||
|
||||
-endif.
|
731
src/leveled_tree.erl
Normal file
731
src/leveled_tree.erl
Normal file
|
@ -0,0 +1,731 @@
|
|||
%% -------- TREE ---------
|
||||
%%
|
||||
%% This module is intended to address two issues
|
||||
%% - the lack of iterator_from support in OTP16 gb_trees
|
||||
%% - the time to convert from/to list in gb_trees
|
||||
%%
|
||||
%% Leveled had had a skiplist implementation previously, and this is a
|
||||
%% variation on that. The Treein this case is a bunch of sublists of length
|
||||
%% SKIP_WIDTH with the start_keys in a gb_tree.
|
||||
|
||||
-module(leveled_tree).
|
||||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-export([
|
||||
from_orderedlist/2,
|
||||
from_orderedset/2,
|
||||
from_orderedlist/3,
|
||||
from_orderedset/3,
|
||||
to_list/1,
|
||||
match_range/3,
|
||||
search_range/4,
|
||||
match/2,
|
||||
search/3,
|
||||
tsize/1,
|
||||
empty/1
|
||||
]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(SKIP_WIDTH, 16).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% API
|
||||
%%%============================================================================
|
||||
|
||||
from_orderedset(Table, Type) ->
|
||||
from_orderedlist(ets:tab2list(Table), Type, ?SKIP_WIDTH).
|
||||
|
||||
from_orderedset(Table, Type, SkipWidth) ->
|
||||
from_orderedlist(ets:tab2list(Table), Type, SkipWidth).
|
||||
|
||||
|
||||
from_orderedlist(OrderedList, Type) ->
|
||||
from_orderedlist(OrderedList, Type, ?SKIP_WIDTH).
|
||||
|
||||
from_orderedlist(OrderedList, tree, SkipWidth) ->
|
||||
L = length(OrderedList),
|
||||
{tree, L, tree_fromorderedlist(OrderedList, [], L, SkipWidth)};
|
||||
from_orderedlist(OrderedList, idxt, SkipWidth) ->
|
||||
L = length(OrderedList),
|
||||
{idxt, L, idxt_fromorderedlist(OrderedList, {[], [], 1}, L, SkipWidth)};
|
||||
from_orderedlist(OrderedList, skpl, _SkipWidth) ->
|
||||
L = length(OrderedList),
|
||||
SkipWidth =
|
||||
% Autosize the skip width
|
||||
case L of
|
||||
L when L > 4096 -> 32;
|
||||
L when L > 512 -> 16;
|
||||
L when L > 64 -> 8;
|
||||
_ -> 4
|
||||
end,
|
||||
{skpl, L, skpl_fromorderedlist(OrderedList, L, SkipWidth, 2)}.
|
||||
|
||||
|
||||
match(Key, {tree, _L, Tree}) ->
|
||||
Iter = tree_iterator_from(Key, Tree),
|
||||
case tree_next(Iter) of
|
||||
none ->
|
||||
none;
|
||||
{_NK, SL, _Iter} ->
|
||||
lookup_match(Key, SL)
|
||||
end;
|
||||
match(Key, {idxt, _L, {TLI, IDX}}) ->
|
||||
Iter = tree_iterator_from(Key, IDX),
|
||||
case tree_next(Iter) of
|
||||
none ->
|
||||
none;
|
||||
{_NK, ListID, _Iter} ->
|
||||
lookup_match(Key, element(ListID, TLI))
|
||||
end;
|
||||
match(Key, {skpl, _L, SkipList}) ->
|
||||
SL0 = skpl_getsublist(Key, SkipList),
|
||||
lookup_match(Key, SL0).
|
||||
|
||||
search(Key, {tree, _L, Tree}, StartKeyFun) ->
|
||||
Iter = tree_iterator_from(Key, Tree),
|
||||
case tree_next(Iter) of
|
||||
none ->
|
||||
none;
|
||||
{_NK, SL, _Iter} ->
|
||||
{K, V} = lookup_best(Key, SL),
|
||||
case Key < StartKeyFun(V) of
|
||||
true ->
|
||||
none;
|
||||
false ->
|
||||
{K, V}
|
||||
end
|
||||
end;
|
||||
search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) ->
|
||||
Iter = tree_iterator_from(Key, IDX),
|
||||
case tree_next(Iter) of
|
||||
none ->
|
||||
none;
|
||||
{_NK, ListID, _Iter} ->
|
||||
{K, V} = lookup_best(Key, element(ListID, TLI)),
|
||||
case Key < StartKeyFun(V) of
|
||||
true ->
|
||||
none;
|
||||
false ->
|
||||
{K, V}
|
||||
end
|
||||
end;
|
||||
search(Key, {skpl, _L, SkipList}, StartKeyFun) ->
|
||||
SL0 = skpl_getsublist(Key, SkipList),
|
||||
case lookup_best(Key, SL0) of
|
||||
{K, V} ->
|
||||
case Key < StartKeyFun(V) of
|
||||
true ->
|
||||
none;
|
||||
false ->
|
||||
{K, V}
|
||||
end;
|
||||
none ->
|
||||
none
|
||||
end.
|
||||
|
||||
match_range(StartRange, EndRange, Tree) ->
|
||||
EndRangeFun =
|
||||
fun(ER, FirstRHSKey, _FirstRHSValue) ->
|
||||
ER == FirstRHSKey
|
||||
end,
|
||||
match_range(StartRange, EndRange, Tree, EndRangeFun).
|
||||
|
||||
match_range(StartRange, EndRange, {tree, _L, Tree}, EndRangeFun) ->
|
||||
treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun);
|
||||
match_range(StartRange, EndRange, {idxt, _L, Tree}, EndRangeFun) ->
|
||||
idxtlookup_range_start(StartRange, EndRange, Tree, EndRangeFun);
|
||||
match_range(StartRange, EndRange, {skpl, _L, SkipList}, EndRangeFun) ->
|
||||
skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun).
|
||||
|
||||
|
||||
search_range(StartRange, EndRange, Tree, StartKeyFun) ->
|
||||
EndRangeFun =
|
||||
fun(ER, _FirstRHSKey, FirstRHSValue) ->
|
||||
StartRHSKey = StartKeyFun(FirstRHSValue),
|
||||
ER >= StartRHSKey
|
||||
end,
|
||||
case Tree of
|
||||
{tree, _L, T} ->
|
||||
treelookup_range_start(StartRange, EndRange, T, EndRangeFun);
|
||||
{idxt, _L, T} ->
|
||||
idxtlookup_range_start(StartRange, EndRange, T, EndRangeFun);
|
||||
{skpl, _L, SL} ->
|
||||
skpllookup_to_range(StartRange, EndRange, SL, EndRangeFun)
|
||||
end.
|
||||
|
||||
|
||||
to_list({tree, _L, Tree}) ->
|
||||
FoldFun =
|
||||
fun({_MK, SL}, Acc) ->
|
||||
Acc ++ SL
|
||||
end,
|
||||
lists:foldl(FoldFun, [], tree_to_list(Tree));
|
||||
to_list({idxt, _L, {TLI, _IDX}}) ->
|
||||
lists:append(tuple_to_list(TLI));
|
||||
to_list({skpl, _L, SkipList}) ->
|
||||
FoldFun =
|
||||
fun({_M, SL}, Acc) ->
|
||||
[SL|Acc]
|
||||
end,
|
||||
|
||||
Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)),
|
||||
Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))),
|
||||
lists:append(Lv0List).
|
||||
|
||||
|
||||
|
||||
tsize({_Type, L, _Tree}) ->
|
||||
L.
|
||||
|
||||
empty(tree) ->
|
||||
{tree, 0, empty_tree()};
|
||||
empty(idxt) ->
|
||||
{idxt, 0, {{}, empty_tree()}};
|
||||
empty(skpl) ->
|
||||
{skpl, 0, []}.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
tree_fromorderedlist([], TmpList, _L, _SkipWidth) ->
|
||||
gb_trees:from_orddict(lists:reverse(TmpList));
|
||||
tree_fromorderedlist(OrdList, TmpList, L, SkipWidth) ->
|
||||
SubLL = min(SkipWidth, L),
|
||||
{Head, Tail} = lists:split(SubLL, OrdList),
|
||||
{LastK, _LastV} = lists:last(Head),
|
||||
tree_fromorderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL, SkipWidth).
|
||||
|
||||
idxt_fromorderedlist([], {TmpListElements, TmpListIdx, _C}, _L, _SkipWidth) ->
|
||||
{list_to_tuple(lists:reverse(TmpListElements)),
|
||||
gb_trees:from_orddict(lists:reverse(TmpListIdx))};
|
||||
idxt_fromorderedlist(OrdList, {TmpListElements, TmpListIdx, C}, L, SkipWidth) ->
|
||||
SubLL = min(SkipWidth, L),
|
||||
{Head, Tail} = lists:split(SubLL, OrdList),
|
||||
{LastK, _LastV} = lists:last(Head),
|
||||
idxt_fromorderedlist(Tail,
|
||||
{[Head|TmpListElements],
|
||||
[{LastK, C}|TmpListIdx],
|
||||
C + 1},
|
||||
L - SubLL,
|
||||
SkipWidth).
|
||||
|
||||
skpl_fromorderedlist(SkipList, _L, _SkipWidth, 0) ->
|
||||
SkipList;
|
||||
skpl_fromorderedlist(SkipList, L, SkipWidth, Height) ->
|
||||
SkipList0 = roll_list(SkipList, L, [], SkipWidth),
|
||||
skpl_fromorderedlist(SkipList0, length(SkipList0), SkipWidth, Height - 1).
|
||||
|
||||
roll_list([], 0, SkipList, _SkipWidth) ->
|
||||
lists:reverse(SkipList);
|
||||
roll_list(KVList, L, SkipList, SkipWidth) ->
|
||||
SubLL = min(SkipWidth, L),
|
||||
{Head, Tail} = lists:split(SubLL, KVList),
|
||||
{LastK, _LastV} = lists:last(Head),
|
||||
roll_list(Tail, L - SubLL, [{LastK, Head}|SkipList], SkipWidth).
|
||||
|
||||
|
||||
|
||||
% lookup_match(_Key, []) ->
|
||||
% none;
|
||||
% lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key ->
|
||||
% none;
|
||||
% lookup_match(Key, [{Key, EV}|_Tail]) ->
|
||||
% {value, EV};
|
||||
% lookup_match(Key, [_Top|Tail]) ->
|
||||
% lookup_match(Key, Tail).
|
||||
|
||||
lookup_match(Key, KVList) ->
|
||||
case lists:keyfind(Key, 1, KVList) of
|
||||
false ->
|
||||
none;
|
||||
{Key, Value} ->
|
||||
{value, Value}
|
||||
end.
|
||||
|
||||
lookup_best(_Key, []) ->
|
||||
none;
|
||||
lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key ->
|
||||
{EK, EV};
|
||||
lookup_best(Key, [_Top|Tail]) ->
|
||||
lookup_best(Key, Tail).
|
||||
|
||||
treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun) ->
|
||||
Iter0 = tree_iterator_from(StartRange, Tree),
|
||||
case tree_next(Iter0) of
|
||||
none ->
|
||||
[];
|
||||
{NK, SL, Iter1} ->
|
||||
PredFun =
|
||||
fun({K, _V}) ->
|
||||
K < StartRange
|
||||
end,
|
||||
{_LHS, RHS} = lists:splitwith(PredFun, SL),
|
||||
treelookup_range_end(EndRange, {NK, RHS}, Iter1, [], EndRangeFun)
|
||||
end.
|
||||
|
||||
treelookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) ->
|
||||
PredFun =
|
||||
fun({K, _V}) ->
|
||||
not leveled_codec:endkey_passed(EndRange, K)
|
||||
end,
|
||||
case leveled_codec:endkey_passed(EndRange, NK0) of
|
||||
true ->
|
||||
{LHS, RHS} = lists:splitwith(PredFun, SL0),
|
||||
case RHS of
|
||||
[] ->
|
||||
Output ++ LHS;
|
||||
[{FirstRHSKey, FirstRHSValue}|_Rest] ->
|
||||
case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of
|
||||
true ->
|
||||
Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}];
|
||||
false ->
|
||||
Output ++ LHS
|
||||
end
|
||||
end;
|
||||
false ->
|
||||
UpdOutput = Output ++ SL0,
|
||||
case tree_next(Iter0) of
|
||||
none ->
|
||||
UpdOutput;
|
||||
{NK1, SL1, Iter1} ->
|
||||
treelookup_range_end(EndRange,
|
||||
{NK1, SL1},
|
||||
Iter1,
|
||||
UpdOutput,
|
||||
EndRangeFun)
|
||||
end
|
||||
end.
|
||||
|
||||
idxtlookup_range_start(StartRange, EndRange, {TLI, IDX}, EndRangeFun) ->
|
||||
Iter0 = tree_iterator_from(StartRange, IDX),
|
||||
case tree_next(Iter0) of
|
||||
none ->
|
||||
[];
|
||||
{NK, ListID, Iter1} ->
|
||||
PredFun =
|
||||
fun({K, _V}) ->
|
||||
K < StartRange
|
||||
end,
|
||||
{_LHS, RHS} = lists:splitwith(PredFun, element(ListID, TLI)),
|
||||
idxtlookup_range_end(EndRange, {TLI, NK, RHS}, Iter1, [], EndRangeFun)
|
||||
end.
|
||||
|
||||
idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) ->
|
||||
PredFun =
|
||||
fun({K, _V}) ->
|
||||
not leveled_codec:endkey_passed(EndRange, K)
|
||||
end,
|
||||
case leveled_codec:endkey_passed(EndRange, NK0) of
|
||||
true ->
|
||||
{LHS, RHS} = lists:splitwith(PredFun, SL0),
|
||||
case RHS of
|
||||
[] ->
|
||||
Output ++ LHS;
|
||||
[{FirstRHSKey, FirstRHSValue}|_Rest] ->
|
||||
case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of
|
||||
true ->
|
||||
Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}];
|
||||
false ->
|
||||
Output ++ LHS
|
||||
end
|
||||
end;
|
||||
false ->
|
||||
UpdOutput = Output ++ SL0,
|
||||
case tree_next(Iter0) of
|
||||
none ->
|
||||
UpdOutput;
|
||||
{NK1, ListID, Iter1} ->
|
||||
idxtlookup_range_end(EndRange,
|
||||
{TLI, NK1, element(ListID, TLI)},
|
||||
Iter1,
|
||||
UpdOutput,
|
||||
EndRangeFun)
|
||||
end
|
||||
end.
|
||||
|
||||
|
||||
skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) ->
|
||||
FoldFun =
|
||||
fun({K, SL}, {PassedStart, PassedEnd, Acc}) ->
|
||||
case {PassedStart, PassedEnd} of
|
||||
{false, false} ->
|
||||
case StartRange > K of
|
||||
true ->
|
||||
{PassedStart, PassedEnd, Acc};
|
||||
false ->
|
||||
case leveled_codec:endkey_passed(EndRange, K) of
|
||||
true ->
|
||||
{true, true, [SL|Acc]};
|
||||
false ->
|
||||
{true, false, [SL|Acc]}
|
||||
end
|
||||
end;
|
||||
{true, false} ->
|
||||
case leveled_codec:endkey_passed(EndRange, K) of
|
||||
true ->
|
||||
{true, true, [SL|Acc]};
|
||||
false ->
|
||||
{true, false, [SL|Acc]}
|
||||
end;
|
||||
{true, true} ->
|
||||
{PassedStart, PassedEnd, Acc}
|
||||
end
|
||||
end,
|
||||
Lv1List = lists:reverse(element(3,
|
||||
lists:foldl(FoldFun,
|
||||
{false, false, []},
|
||||
SkipList))),
|
||||
Lv0List = lists:reverse(element(3,
|
||||
lists:foldl(FoldFun,
|
||||
{false, false, []},
|
||||
lists:append(Lv1List)))),
|
||||
BeforeFun =
|
||||
fun({K, _V}) ->
|
||||
K < StartRange
|
||||
end,
|
||||
AfterFun =
|
||||
fun({K, V}) ->
|
||||
case leveled_codec:endkey_passed(EndRange, K) of
|
||||
false ->
|
||||
true;
|
||||
true ->
|
||||
EndRangeFun(EndRange, K, V)
|
||||
end
|
||||
end,
|
||||
|
||||
case length(Lv0List) of
|
||||
0 ->
|
||||
[];
|
||||
1 ->
|
||||
RHS = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)),
|
||||
lists:takewhile(AfterFun, RHS);
|
||||
2 ->
|
||||
RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)),
|
||||
LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)),
|
||||
RHSofLHL ++ LHSofRHL;
|
||||
L ->
|
||||
RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)),
|
||||
LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)),
|
||||
MidLists = lists:sublist(Lv0List, 2, L - 2),
|
||||
lists:append([RHSofLHL] ++ MidLists ++ [LHSofRHL])
|
||||
end.
|
||||
|
||||
|
||||
skpl_getsublist(Key, SkipList) ->
|
||||
FoldFun =
|
||||
fun({Mark, SL}, Acc) ->
|
||||
case {Acc, Mark} of
|
||||
{[], Mark} when Mark >= Key ->
|
||||
SL;
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
end,
|
||||
SL1 = lists:foldl(FoldFun, [], SkipList),
|
||||
lists:foldl(FoldFun, [], SL1).
|
||||
|
||||
%%%============================================================================
|
||||
%%% Balance tree implementation
|
||||
%%%============================================================================
|
||||
|
||||
empty_tree() ->
|
||||
gb_trees:empty().
|
||||
|
||||
tree_to_list(T) ->
|
||||
gb_trees:to_list(T).
|
||||
|
||||
tree_iterator_from(K, T) ->
|
||||
% For OTP 16 compatibility with gb_trees
|
||||
iterator_from(K, T).
|
||||
|
||||
tree_next(I) ->
|
||||
% For OTP 16 compatibility with gb_trees
|
||||
next(I).
|
||||
|
||||
|
||||
iterator_from(S, {_, T}) ->
|
||||
iterator_1_from(S, T).
|
||||
|
||||
iterator_1_from(S, T) ->
|
||||
iterator_from(S, T, []).
|
||||
|
||||
iterator_from(S, {K, _, _, T}, As) when K < S ->
|
||||
iterator_from(S, T, As);
|
||||
iterator_from(_, {_, _, nil, _} = T, As) ->
|
||||
[T | As];
|
||||
iterator_from(S, {_, _, L, _} = T, As) ->
|
||||
iterator_from(S, L, [T | As]);
|
||||
iterator_from(_, nil, As) ->
|
||||
As.
|
||||
|
||||
next([{X, V, _, T} | As]) ->
|
||||
{X, V, iterator(T, As)};
|
||||
next([]) ->
|
||||
none.
|
||||
|
||||
%% The iterator structure is really just a list corresponding to
|
||||
%% the call stack of an in-order traversal. This is quite fast.
|
||||
|
||||
iterator({_, _, nil, _} = T, As) ->
|
||||
[T | As];
|
||||
iterator({_, _, L, _} = T, As) ->
|
||||
iterator(L, [T | As]);
|
||||
iterator(nil, As) ->
|
||||
As.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||
generate_randomkeys(Seqn,
|
||||
Count,
|
||||
[],
|
||||
BucketRangeLow,
|
||||
BucketRangeHigh).
|
||||
|
||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||
Acc;
|
||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||
BRand = random:uniform(BRange),
|
||||
BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0),
|
||||
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
||||
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
||||
{Seqn, {active, infinity}, null}},
|
||||
generate_randomkeys(Seqn + 1,
|
||||
Count - 1,
|
||||
[{K, V}|Acc],
|
||||
BucketLow,
|
||||
BRange).
|
||||
|
||||
|
||||
tree_search_test() ->
|
||||
search_test_by_type(tree).
|
||||
|
||||
idxt_search_test() ->
|
||||
search_test_by_type(idxt).
|
||||
|
||||
skpl_search_test() ->
|
||||
search_test_by_type(skpl).
|
||||
|
||||
search_test_by_type(Type) ->
|
||||
MapFun =
|
||||
fun(N) ->
|
||||
{N * 4, N * 4 - 2}
|
||||
end,
|
||||
KL = lists:map(MapFun, lists:seq(1, 50)),
|
||||
T = from_orderedlist(KL, Type),
|
||||
|
||||
StartKeyFun = fun(V) -> V end,
|
||||
statistics(runtime),
|
||||
?assertMatch([], search_range(0, 1, T, StartKeyFun)),
|
||||
?assertMatch([], search_range(201, 202, T, StartKeyFun)),
|
||||
?assertMatch([{4, 2}], search_range(2, 4, T, StartKeyFun)),
|
||||
?assertMatch([{4, 2}], search_range(2, 5, T, StartKeyFun)),
|
||||
?assertMatch([{4, 2}, {8, 6}], search_range(2, 6, T, StartKeyFun)),
|
||||
?assertMatch(50, length(search_range(2, 200, T, StartKeyFun))),
|
||||
?assertMatch(50, length(search_range(2, 198, T, StartKeyFun))),
|
||||
?assertMatch(49, length(search_range(2, 197, T, StartKeyFun))),
|
||||
?assertMatch(49, length(search_range(4, 197, T, StartKeyFun))),
|
||||
?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))),
|
||||
{_, T1} = statistics(runtime),
|
||||
io:format(user, "10 range tests with type ~w in ~w microseconds~n",
|
||||
[Type, T1]).
|
||||
|
||||
|
||||
tree_oor_test() ->
|
||||
outofrange_test_by_type(tree).
|
||||
|
||||
idxt_oor_test() ->
|
||||
outofrange_test_by_type(idxt).
|
||||
|
||||
skpl_oor_test() ->
|
||||
outofrange_test_by_type(skpl).
|
||||
|
||||
outofrange_test_by_type(Type) ->
|
||||
MapFun =
|
||||
fun(N) ->
|
||||
{N * 4, N * 4 - 2}
|
||||
end,
|
||||
KL = lists:map(MapFun, lists:seq(1, 50)),
|
||||
T = from_orderedlist(KL, Type),
|
||||
|
||||
io:format("Out of range searches~n"),
|
||||
?assertMatch(none, match(0, T)),
|
||||
?assertMatch(none, match(5, T)),
|
||||
?assertMatch(none, match(97, T)),
|
||||
?assertMatch(none, match(197, T)),
|
||||
?assertMatch(none, match(201, T)),
|
||||
|
||||
StartKeyFun = fun(V) -> V end,
|
||||
|
||||
?assertMatch(none, search(0, T, StartKeyFun)),
|
||||
?assertMatch(none, search(5, T, StartKeyFun)),
|
||||
?assertMatch(none, search(97, T, StartKeyFun)),
|
||||
?assertMatch(none, search(197, T, StartKeyFun)),
|
||||
?assertMatch(none, search(201, T, StartKeyFun)).
|
||||
|
||||
tree_tolist_test() ->
|
||||
tolist_test_by_type(tree).
|
||||
|
||||
idxt_tolist_test() ->
|
||||
tolist_test_by_type(idxt).
|
||||
|
||||
skpl_tolist_test() ->
|
||||
tolist_test_by_type(skpl).
|
||||
|
||||
tolist_test_by_type(Type) ->
|
||||
MapFun =
|
||||
fun(N) ->
|
||||
{N * 4, N * 4 - 2}
|
||||
end,
|
||||
KL = lists:map(MapFun, lists:seq(1, 50)),
|
||||
T = from_orderedlist(KL, Type),
|
||||
T_Reverse = to_list(T),
|
||||
?assertMatch(KL, T_Reverse).
|
||||
|
||||
tree_timing_test() ->
|
||||
log_tree_test_by_(16, tree, 4000),
|
||||
tree_test_by_(8, tree, 1000),
|
||||
tree_test_by_(4, tree, 256).
|
||||
|
||||
idxt_timing_test() ->
|
||||
log_tree_test_by_(16, idxt, 4000),
|
||||
tree_test_by_(8, idxt, 1000),
|
||||
tree_test_by_(4, idxt, 256).
|
||||
|
||||
skpl_timing_test() ->
|
||||
tree_test_by_(auto, skpl, 6000),
|
||||
log_tree_test_by_(auto, skpl, 4000),
|
||||
tree_test_by_(auto, skpl, 1000),
|
||||
tree_test_by_(auto, skpl, 256).
|
||||
|
||||
log_tree_test_by_(Width, Type, N) ->
|
||||
erlang:statistics(runtime),
|
||||
G0 = erlang:statistics(garbage_collection),
|
||||
tree_test_by_(Width, Type, N),
|
||||
{_, T1} = erlang:statistics(runtime),
|
||||
G1 = erlang:statistics(garbage_collection),
|
||||
io:format(user, "Test took ~w ms and GC transitioned from ~w to ~w~n",
|
||||
[T1, G0, G1]).
|
||||
|
||||
tree_test_by_(Width, Type, N) ->
|
||||
io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]),
|
||||
KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)),
|
||||
|
||||
OS = ets:new(test, [ordered_set, private]),
|
||||
ets:insert(OS, KL),
|
||||
SWaETS = os:timestamp(),
|
||||
Tree0 = from_orderedset(OS, Type, Width),
|
||||
io:format(user, "Generating tree from ETS in ~w microseconds" ++
|
||||
" of size ~w~n",
|
||||
[timer:now_diff(os:timestamp(), SWaETS),
|
||||
tsize(Tree0)]),
|
||||
|
||||
SWaGSL = os:timestamp(),
|
||||
Tree1 = from_orderedlist(KL, Type, Width),
|
||||
io:format(user, "Generating tree from orddict in ~w microseconds" ++
|
||||
" of size ~w~n",
|
||||
[timer:now_diff(os:timestamp(), SWaGSL),
|
||||
tsize(Tree1)]),
|
||||
SWaLUP = os:timestamp(),
|
||||
lists:foreach(match_fun(Tree0), KL),
|
||||
lists:foreach(match_fun(Tree1), KL),
|
||||
io:format(user, "Looked up all keys twice in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWaLUP)]),
|
||||
|
||||
?assertMatch(Tree0, Tree1),
|
||||
|
||||
SWaSRCH1 = os:timestamp(),
|
||||
lists:foreach(search_exactmatch_fun(Tree0), KL),
|
||||
lists:foreach(search_exactmatch_fun(Tree1), KL),
|
||||
io:format(user, "Search all keys twice for exact match in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWaSRCH1)]),
|
||||
|
||||
BitBiggerKeyFun =
|
||||
fun(Idx) ->
|
||||
{K, _V} = lists:nth(Idx, KL),
|
||||
{o, B, FullKey, null} = K,
|
||||
{{o, B, FullKey ++ "0", null}, lists:nth(Idx + 1, KL)}
|
||||
end,
|
||||
SrchKL = lists:map(BitBiggerKeyFun, lists:seq(1, length(KL) - 1)),
|
||||
|
||||
SWaSRCH2 = os:timestamp(),
|
||||
lists:foreach(search_nearmatch_fun(Tree0), SrchKL),
|
||||
lists:foreach(search_nearmatch_fun(Tree1), SrchKL),
|
||||
io:format(user, "Search all keys twice for near match in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWaSRCH2)]).
|
||||
|
||||
|
||||
tree_matchrange_test() ->
|
||||
matchrange_test_by_type(tree).
|
||||
|
||||
idxt_matchrange_test() ->
|
||||
matchrange_test_by_type(idxt).
|
||||
|
||||
skpl_matchrange_test() ->
|
||||
matchrange_test_by_type(skpl).
|
||||
|
||||
|
||||
matchrange_test_by_type(Type) ->
|
||||
N = 4000,
|
||||
KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)),
|
||||
Tree0 = from_orderedlist(KL, Type),
|
||||
|
||||
FirstKey = element(1, lists:nth(1, KL)),
|
||||
FinalKey = element(1, lists:last(KL)),
|
||||
PenultimateKey = element(1, lists:nth(length(KL) - 1, KL)),
|
||||
AfterFirstKey = setelement(3, FirstKey, element(3, FirstKey) ++ "0"),
|
||||
AfterPenultimateKey = setelement(3,
|
||||
PenultimateKey,
|
||||
element(3, PenultimateKey) ++ "0"),
|
||||
|
||||
LengthR =
|
||||
fun(SK, EK, T) ->
|
||||
length(match_range(SK, EK, T))
|
||||
end,
|
||||
|
||||
KL_Length = length(KL),
|
||||
io:format("KL_Length ~w~n", [KL_Length]),
|
||||
?assertMatch(KL_Length, LengthR(FirstKey, FinalKey, Tree0)),
|
||||
?assertMatch(KL_Length, LengthR(FirstKey, PenultimateKey, Tree0) + 1),
|
||||
?assertMatch(1, LengthR(all, FirstKey, Tree0)),
|
||||
?assertMatch(KL_Length, LengthR(all, PenultimateKey, Tree0) + 1),
|
||||
?assertMatch(KL_Length, LengthR(all, all, Tree0)),
|
||||
?assertMatch(2, LengthR(PenultimateKey, FinalKey, Tree0)),
|
||||
?assertMatch(KL_Length, LengthR(AfterFirstKey, PenultimateKey, Tree0) + 2),
|
||||
?assertMatch(1, LengthR(AfterPenultimateKey, FinalKey, Tree0)).
|
||||
|
||||
match_fun(Tree) ->
|
||||
fun({K, V}) ->
|
||||
?assertMatch({value, V}, match(K, Tree))
|
||||
end.
|
||||
|
||||
search_exactmatch_fun(Tree) ->
|
||||
StartKeyFun = fun(_V) -> all end,
|
||||
fun({K, V}) ->
|
||||
?assertMatch({K, V}, search(K, Tree, StartKeyFun))
|
||||
end.
|
||||
|
||||
search_nearmatch_fun(Tree) ->
|
||||
StartKeyFun = fun(_V) -> all end,
|
||||
fun({K, {NK, NV}}) ->
|
||||
?assertMatch({NK, NV}, search(K, Tree, StartKeyFun))
|
||||
end.
|
||||
|
||||
empty_test() ->
|
||||
T0 = empty(tree),
|
||||
?assertMatch(0, tsize(T0)),
|
||||
T1 = empty(skpl),
|
||||
?assertMatch(0, tsize(T1)),
|
||||
T2 = empty(idxt),
|
||||
?assertMatch(0, tsize(T2)).
|
||||
|
||||
-endif.
|
Loading…
Add table
Add a link
Reference in a new issue