diff --git a/include/leveled.hrl b/include/leveled.hrl index 13d862e..fa4dd11 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -15,6 +15,8 @@ %% Inker key type used for tombstones -define(INKT_TOMB, tomb). +-define(CACHE_TYPE, skpl). + -record(sft_options, {wait = true :: boolean(), expire_tombstones = false :: boolean(), diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 0960c68..245aa1c 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -139,6 +139,7 @@ get_opt/3, load_snapshot/2, empty_ledgercache/0, + loadqueue_ledgercache/1, push_ledgercache/2]). -include_lib("eunit/include/eunit.hrl"). @@ -153,7 +154,8 @@ -define(LONG_RUNNING, 80000). -record(ledger_cache, {mem :: ets:tab(), - loader = leveled_skiplist:empty(false) :: tuple(), + loader = leveled_tree:empty(?CACHE_TYPE) :: tuple(), + load_queue = [] :: list(), index = leveled_pmem:new_index(), % array min_sqn = infinity :: integer()|infinity, max_sqn = 0 :: integer()}). @@ -474,6 +476,11 @@ push_ledgercache(Penciller, Cache) -> Cache#ledger_cache.max_sqn}, leveled_penciller:pcl_pushmem(Penciller, CacheToLoad). +loadqueue_ledgercache(Cache) -> + SL = lists:ukeysort(1, Cache#ledger_cache.load_queue), + T = leveled_tree:from_orderedlist(SL, ?CACHE_TYPE), + Cache#ledger_cache{load_queue = [], loader = T}. + %%%============================================================================ %%% Internal functions %%%============================================================================ @@ -719,11 +726,12 @@ snapshot_store(State, SnapType) -> readycache_forsnapshot(LedgerCache) -> % Need to convert the Ledger Cache away from using the ETS table - SkipList = leveled_skiplist:from_orderedset(LedgerCache#ledger_cache.mem), + Tree = leveled_tree:from_orderedset(LedgerCache#ledger_cache.mem, + ?CACHE_TYPE), Idx = LedgerCache#ledger_cache.index, MinSQN = LedgerCache#ledger_cache.min_sqn, MaxSQN = LedgerCache#ledger_cache.max_sqn, - #ledger_cache{loader=SkipList, index=Idx, min_sqn=MinSQN, max_sqn=MaxSQN}. + #ledger_cache{loader=Tree, index=Idx, min_sqn=MinSQN, max_sqn=MaxSQN}. set_options(Opts) -> MaxJournalSize0 = get_opt(max_journalsize, Opts, 10000000000), @@ -961,14 +969,10 @@ addto_ledgercache({H, SQN, KeyChanges}, Cache) -> max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}. addto_ledgercache({H, SQN, KeyChanges}, Cache, loader) -> - FoldChangesFun = - fun({K, V}, SL0) -> - leveled_skiplist:enter_nolookup(K, V, SL0) - end, - UpdSL = lists:foldl(FoldChangesFun, Cache#ledger_cache.loader, KeyChanges), + UpdQ = KeyChanges ++ Cache#ledger_cache.load_queue, UpdIndex = leveled_pmem:prepare_for_index(Cache#ledger_cache.index, H), Cache#ledger_cache{index = UpdIndex, - loader = UpdSL, + load_queue = UpdQ, min_sqn=min(SQN, Cache#ledger_cache.min_sqn), max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}. @@ -979,7 +983,7 @@ maybepush_ledgercache(MaxCacheSize, Cache, Penciller) -> TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize), if TimeToPush -> - CacheToLoad = {leveled_skiplist:from_orderedset(Tab), + CacheToLoad = {leveled_tree:from_orderedset(Tab, ?CACHE_TYPE), Cache#ledger_cache.index, Cache#ledger_cache.min_sqn, Cache#ledger_cache.max_sqn}, diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 6dbbff4..6360e2b 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -34,7 +34,6 @@ -export([ inker_reload_strategy/1, - strip_to_keyonly/1, strip_to_seqonly/1, strip_to_statusonly/1, strip_to_keyseqonly/1, @@ -44,7 +43,6 @@ endkey_passed/2, key_dominates/2, maybe_reap_expiredkey/2, - print_key/1, to_ledgerkey/3, to_ledgerkey/5, from_ledgerkey/1, @@ -108,8 +106,6 @@ inker_reload_strategy(AltList) -> ReloadStrategy0, AltList). -strip_to_keyonly({K, _V}) -> K. - strip_to_statusonly({_, {_, St, _, _}}) -> St. strip_to_seqonly({_, {SeqN, _, _, _}}) -> SeqN. @@ -252,33 +248,6 @@ create_value_for_journal(Value) -> hash(Obj) -> erlang:phash2(term_to_binary(Obj)). -% Return a tuple of strings to ease the printing of keys to logs -print_key(Key) -> - {A_STR, B_TERM, C_TERM} = case Key of - {?STD_TAG, B, K, _SK} -> - {"Object", B, K}; - {?RIAK_TAG, B, K, _SK} -> - {"RiakObject", B, K}; - {?IDX_TAG, B, {F, _V}, _K} -> - {"Index", B, F} - end, - B_STR = turn_to_string(B_TERM), - C_STR = turn_to_string(C_TERM), - {A_STR, B_STR, C_STR}. - -turn_to_string(Item) -> - if - is_binary(Item) == true -> - binary_to_list(Item); - is_integer(Item) == true -> - integer_to_list(Item); - is_list(Item) == true -> - Item; - true -> - [Output] = io_lib:format("~w", [Item]), - Output - end. - % Compare a key against a query key, only comparing elements that are non-null % in the Query key. This is used for comparing against end keys in queries. @@ -461,10 +430,6 @@ endkey_passed_test() -> ?assertMatch(false, endkey_passed(TestKey, K1)), ?assertMatch(true, endkey_passed(TestKey, K2)). -stringcheck_test() -> - ?assertMatch("Bucket", turn_to_string("Bucket")), - ?assertMatch("Bucket", turn_to_string(<<"Bucket">>)), - ?assertMatch("bucket", turn_to_string(bucket)). %% Test below proved that the overhead of performing hashes was trivial %% Maybe 5 microseconds per hash diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index abb7851..cb915dd 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -673,10 +673,14 @@ load_between_sequence(MinSQN, MaxSQN, FilterFun, Penciller, push_to_penciller(Penciller, LedgerCache) -> % The push to penciller must start as a tree to correctly de-duplicate % the list by order before becoming a de-duplicated list for loading + LC0 = leveled_bookie:loadqueue_ledgercache(LedgerCache), + push_to_penciller_loop(Penciller, LC0). + +push_to_penciller_loop(Penciller, LedgerCache) -> case leveled_bookie:push_ledgercache(Penciller, LedgerCache) of returned -> timer:sleep(?LOADING_PAUSE), - push_to_penciller(Penciller, LedgerCache); + push_to_penciller_loop(Penciller, LedgerCache); ok -> ok end. diff --git a/src/leveled_log.erl b/src/leveled_log.erl index f3dbb25..c736fe9 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -15,8 +15,8 @@ sst_timing/3]). -define(PUT_LOGPOINT, 20000). --define(HEAD_LOGPOINT, 160000). --define(GET_LOGPOINT, 160000). +-define(HEAD_LOGPOINT, 50000). +-define(GET_LOGPOINT, 50000). -define(SST_LOGPOINT, 20000). -define(LOG_LEVEL, [info, warn, error, critical]). -define(SAMPLE_RATE, 16). @@ -309,10 +309,12 @@ log(LogReference, Subs) -> - {ok, {LogLevel, LogText}} = dict:find(LogReference, ?LOGBASE), + {LogLevel, LogText} = dict:fetch(LogReference, ?LOGBASE), case lists:member(LogLevel, ?LOG_LEVEL) of true -> - io:format(LogReference ++ " ~w " ++ LogText ++ "~n", + io:format(format_time() + ++ " " ++ LogReference ++ " ~w " + ++ LogText ++ "~n", [self()|Subs]); false -> ok @@ -320,7 +322,7 @@ log(LogReference, Subs) -> log_timer(LogReference, Subs, StartTime) -> - {ok, {LogLevel, LogText}} = dict:find(LogReference, ?LOGBASE), + {LogLevel, LogText} = dict:fetch(LogReference, ?LOGBASE), case lists:member(LogLevel, ?LOG_LEVEL) of true -> MicroS = timer:now_diff(os:timestamp(), StartTime), @@ -330,7 +332,9 @@ log_timer(LogReference, Subs, StartTime) -> MicroS -> {"ms", MicroS div 1000} end, - io:format(LogReference ++ " ~w " ++ LogText + io:format(format_time() + ++ " " ++ LogReference ++ " ~w " + ++ LogText ++ " with time taken ~w " ++ Unit ++ "~n", [self()|Subs] ++ [Time]); false -> @@ -510,6 +514,17 @@ gen_timing_int({N, TimerD}, T0, TimerType, _KeyListFun, _LogPoint, _LogRef) -> TimerD)}. +format_time() -> + format_time(localtime_ms()). + +localtime_ms() -> + {_, _, Micro} = Now = os:timestamp(), + {Date, {Hours, Minutes, Seconds}} = calendar:now_to_local_time(Now), + {Date, {Hours, Minutes, Seconds, Micro div 1000 rem 1000}}. + +format_time({{Y, M, D}, {H, Mi, S, Ms}}) -> + io_lib:format("~b-~2..0b-~2..0b", [Y, M, D]) ++ "T" ++ + io_lib:format("~2..0b:~2..0b:~2..0b.~3..0b", [H, Mi, S, Ms]). %%%============================================================================ diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 66b7c74..5055a01 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -183,16 +183,12 @@ perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN) -> ME end, SinkManifestList = lists:map(RevertPointerFun, SinkList), - Man0 = leveled_pmanifest:remove_manifest_entry(Manifest, - NewSQN, - SinkLevel, - SinkManifestList), - Man1 = leveled_pmanifest:insert_manifest_entry(Man0, + Man0 = leveled_pmanifest:replace_manifest_entry(Manifest, NewSQN, SinkLevel, + SinkManifestList, Additions), - - Man2 = leveled_pmanifest:remove_manifest_entry(Man1, + Man2 = leveled_pmanifest:remove_manifest_entry(Man0, NewSQN, SrcLevel, Src), diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 9a6daf3..57b522c 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -9,7 +9,7 @@ %% the Penciller's Clerk %% - The Penciller can be cloned and maintains a register of clones who have %% requested snapshots of the Ledger -%% - The accepts new dumps (in the form of a leveled_skiplist accomponied by +%% - The accepts new dumps (in the form of a leveled_tree accomponied by %% an array of hash-listing binaries) from the Bookie, and responds either 'ok' %% to the bookie if the information is accepted nad the Bookie can refresh its %% memory, or 'returned' if the bookie must continue without refreshing as the @@ -224,7 +224,7 @@ levelzero_pending = false :: boolean(), levelzero_constructor :: pid(), - levelzero_cache = [] :: list(), % a list of skiplists + levelzero_cache = [] :: list(), % a list of trees levelzero_size = 0 :: integer(), levelzero_maxcachesize :: integer(), levelzero_cointoss = false :: boolean(), @@ -345,9 +345,9 @@ handle_call({push_mem, {PushedTree, PushedIdx, MinSQN, MaxSQN}}, State=#state{is_snapshot=Snap}) when Snap == false -> % The push_mem process is as follows: % - % 1 - Receive a cache. The cache has four parts: a skiplist of keys and + % 1 - Receive a cache. The cache has four parts: a tree of keys and % values, an array of 256 binaries listing the hashes present in the - % skiplist, a min SQN and a max SQN + % tree, a min SQN and a max SQN % % 2 - Check to see if there is a levelzero file pending. If so, the % update must be returned. If not the update can be accepted @@ -404,7 +404,7 @@ handle_call({fetch_keys, StartKey, EndKey, AccFun, InitAcc, MaxKeys}, leveled_pmem:merge_trees(StartKey, EndKey, State#state.levelzero_cache, - leveled_skiplist:empty()); + leveled_tree:empty(?CACHE_TYPE)); List -> List end, @@ -1072,10 +1072,10 @@ clean_subdir(DirPath) -> maybe_pause_push(PCL, KL) -> - T0 = leveled_skiplist:empty(true), + T0 = [], I0 = leveled_pmem:new_index(), T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) -> - UpdSL = leveled_skiplist:enter(K, V, AccSL), + UpdSL = [{K, V}|AccSL], SQN = leveled_codec:strip_to_seqonly({K, V}), H = leveled_codec:magic_hash(K), UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H), @@ -1083,7 +1083,10 @@ maybe_pause_push(PCL, KL) -> end, {T0, I0, infinity, 0}, KL), - case pcl_pushmem(PCL, T1) of + SL = element(1, T1), + Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, SL), ?CACHE_TYPE), + T2 = setelement(1, T1, Tree), + case pcl_pushmem(PCL, T2) of returned -> timer:sleep(50), maybe_pause_push(PCL, KL); @@ -1315,63 +1318,63 @@ sqnoverlap_otherway_findnextkey_test() -> foldwithimm_simple_test() -> QueryArray = [ - {2, [{{o, "Bucket1", "Key1"}, {5, {active, infinity}, 0, null}}, - {{o, "Bucket1", "Key5"}, {1, {active, infinity}, 0, null}}]}, - {3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, 0, null}}]}, - {5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, 0, null}}]} + {2, [{{o, "Bucket1", "Key1", null}, + {5, {active, infinity}, 0, null}}, + {{o, "Bucket1", "Key5", null}, + {1, {active, infinity}, 0, null}}]}, + {3, [{{o, "Bucket1", "Key3", null}, + {3, {active, infinity}, 0, null}}]}, + {5, [{{o, "Bucket1", "Key5", null}, + {2, {active, infinity}, 0, null}}]} ], - IMM0 = leveled_skiplist:enter({o, "Bucket1", "Key6"}, - {7, {active, infinity}, 0, null}, - leveled_skiplist:empty()), - IMM1 = leveled_skiplist:enter({o, "Bucket1", "Key1"}, - {8, {active, infinity}, 0, null}, - IMM0), - IMM2 = leveled_skiplist:enter({o, "Bucket1", "Key8"}, - {9, {active, infinity}, 0, null}, - IMM1), - IMMiter = leveled_skiplist:to_range(IMM2, {o, "Bucket1", "Key1"}), + KL1A = [{{o, "Bucket1", "Key6", null}, {7, {active, infinity}, 0, null}}, + {{o, "Bucket1", "Key1", null}, {8, {active, infinity}, 0, null}}, + {{o, "Bucket1", "Key8", null}, {9, {active, infinity}, 0, null}}], + IMM2 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1A), ?CACHE_TYPE), + IMMiter = leveled_tree:match_range({o, "Bucket1", "Key1", null}, + {o, null, null, null}, + IMM2), AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}), Acc ++ [{K, SQN}] end, Acc = keyfolder(IMMiter, QueryArray, - {o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"}, + {o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null}, {AccFun, []}), - ?assertMatch([{{o, "Bucket1", "Key1"}, 8}, - {{o, "Bucket1", "Key3"}, 3}, - {{o, "Bucket1", "Key5"}, 2}, - {{o, "Bucket1", "Key6"}, 7}], Acc), + ?assertMatch([{{o, "Bucket1", "Key1", null}, 8}, + {{o, "Bucket1", "Key3", null}, 3}, + {{o, "Bucket1", "Key5", null}, 2}, + {{o, "Bucket1", "Key6", null}, 7}], Acc), - IMM1A = leveled_skiplist:enter({o, "Bucket1", "Key1"}, - {8, {active, infinity}, 0, null}, - leveled_skiplist:empty()), - IMMiterA = leveled_skiplist:to_range(IMM1A, {o, "Bucket1", "Key1"}), + IMMiterA = [{{o, "Bucket1", "Key1", null}, + {8, {active, infinity}, 0, null}}], AccA = keyfolder(IMMiterA, - QueryArray, - {o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"}, - {AccFun, []}), - ?assertMatch([{{o, "Bucket1", "Key1"}, 8}, - {{o, "Bucket1", "Key3"}, 3}, - {{o, "Bucket1", "Key5"}, 2}], AccA), + QueryArray, + {o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null}, + {AccFun, []}), + ?assertMatch([{{o, "Bucket1", "Key1", null}, 8}, + {{o, "Bucket1", "Key3", null}, 3}, + {{o, "Bucket1", "Key5", null}, 2}], AccA), - IMM3 = leveled_skiplist:enter({o, "Bucket1", "Key4"}, - {10, {active, infinity}, 0, null}, - IMM2), - IMMiterB = leveled_skiplist:to_range(IMM3, {o, "Bucket1", "Key1"}), + KL1B = [{{o, "Bucket1", "Key4", null}, {10, {active, infinity}, 0, null}}|KL1A], + IMM3 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1B), ?CACHE_TYPE), + IMMiterB = leveled_tree:match_range({o, "Bucket1", "Key1", null}, + {o, null, null, null}, + IMM3), AccB = keyfolder(IMMiterB, QueryArray, - {o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"}, + {o, "Bucket1", "Key1", null}, {o, "Bucket1", "Key6", null}, {AccFun, []}), - ?assertMatch([{{o, "Bucket1", "Key1"}, 8}, - {{o, "Bucket1", "Key3"}, 3}, - {{o, "Bucket1", "Key4"}, 10}, - {{o, "Bucket1", "Key5"}, 2}, - {{o, "Bucket1", "Key6"}, 7}], AccB). + ?assertMatch([{{o, "Bucket1", "Key1", null}, 8}, + {{o, "Bucket1", "Key3", null}, 3}, + {{o, "Bucket1", "Key4", null}, 10}, + {{o, "Bucket1", "Key5", null}, 2}, + {{o, "Bucket1", "Key6", null}, 7}], AccB). create_file_test() -> Filename = "../test/new_file.sst", ok = file:write_file(Filename, term_to_binary("hello")), KVL = lists:usort(generate_randomkeys(10000)), - Tree = leveled_skiplist:from_list(KVL), + Tree = leveled_tree:from_orderedlist(KVL, ?CACHE_TYPE), FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end, {ok, SP, diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 9fa50ef..47e7120 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -32,6 +32,7 @@ merge_lookup/4, insert_manifest_entry/4, remove_manifest_entry/4, + replace_manifest_entry/5, switch_manifest_entry/4, mergefile_selector/2, add_snapshot/3, @@ -51,6 +52,8 @@ -define(MANIFEST_FILEX, "man"). -define(MANIFEST_FP, "ledger_manifest"). -define(MAX_LEVELS, 8). +-define(TREE_TYPE, idxt). +-define(TREE_WIDTH, 8). -record(manifest, {levels, % an array of lists or trees representing the manifest @@ -73,8 +76,16 @@ %%%============================================================================ new_manifest() -> + LevelArray0 = array:new([{size, ?MAX_LEVELS + 1}, {default, []}]), + SetLowerLevelFun = + fun(IDX, Acc) -> + array:set(IDX, leveled_tree:empty(?TREE_TYPE), Acc) + end, + LevelArray1 = lists:foldl(SetLowerLevelFun, + LevelArray0, + lists:seq(2, ?MAX_LEVELS)), #manifest{ - levels = array:new([{size, ?MAX_LEVELS + 1}, {default, []}]), + levels = LevelArray1, manifest_sqn = 0, snapshots = [], pending_deletes = dict:new(), @@ -139,6 +150,30 @@ save_manifest(Manifest, RootPath) -> CRC = erlang:crc32(ManBin), ok = file:write_file(FP, <>). + +replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) -> + Levels = Manifest#manifest.levels, + Level = array:get(LevelIdx, Levels), + UpdLevel = replace_entry(LevelIdx, Level, Removals, Additions), + leveled_log:log("PC019", ["insert", LevelIdx, UpdLevel]), + PendingDeletes = update_pendingdeletes(ManSQN, + Removals, + Manifest#manifest.pending_deletes), + UpdLevels = array:set(LevelIdx, UpdLevel, Levels), + case is_empty(LevelIdx, UpdLevel) of + true -> + Manifest#manifest{levels = UpdLevels, + basement = get_basement(UpdLevels), + manifest_sqn = ManSQN, + pending_deletes = PendingDeletes}; + false -> + Basement = max(LevelIdx, Manifest#manifest.basement), + Manifest#manifest{levels = UpdLevels, + basement = Basement, + manifest_sqn = ManSQN, + pending_deletes = PendingDeletes} + end. + insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> Levels = Manifest#manifest.levels, Level = array:get(LevelIdx, Levels), @@ -154,22 +189,9 @@ remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> Level = array:get(LevelIdx, Levels), UpdLevel = remove_entry(LevelIdx, Level, Entry), leveled_log:log("PC019", ["remove", LevelIdx, UpdLevel]), - DelFun = - fun(E, Acc) -> - dict:store(E#manifest_entry.filename, - {ManSQN, E}, - Acc) - end, - Entries = - case is_list(Entry) of - true -> - Entry; - false -> - [Entry] - end, - PendingDeletes = lists:foldl(DelFun, - Manifest#manifest.pending_deletes, - Entries), + PendingDeletes = update_pendingdeletes(ManSQN, + Entry, + Manifest#manifest.pending_deletes), UpdLevels = array:set(LevelIdx, UpdLevel, Levels), case is_empty(LevelIdx, UpdLevel) of true -> @@ -322,58 +344,181 @@ levelzero_present(Manifest) -> %%% Internal Functions %%%============================================================================ + %% All these internal functions that work on a level are also passed LeveIdx %% even if this is not presently relevant. Currnetly levels are lists, but %% future branches may make lower levels trees or skiplists to improve fetch %% efficiency -load_level(_LevelIdx, Level, PidFun, SQNFun) -> - LevelLoadFun = +load_level(LevelIdx, Level, PidFun, SQNFun) -> + HigherLevelLoadFun = fun(ME, {L_Out, L_MaxSQN}) -> FN = ME#manifest_entry.filename, P = PidFun(FN), SQN = SQNFun(P), {[ME#manifest_entry{owner=P}|L_Out], max(SQN, L_MaxSQN)} end, - lists:foldr(LevelLoadFun, {[], 0}, Level). + LowerLevelLoadFun = + fun({EK, ME}, {L_Out, L_MaxSQN}) -> + FN = ME#manifest_entry.filename, + P = PidFun(FN), + SQN = SQNFun(P), + {[{EK, ME#manifest_entry{owner=P}}|L_Out], max(SQN, L_MaxSQN)} + end, + case LevelIdx =< 1 of + true -> + lists:foldr(HigherLevelLoadFun, {[], 0}, Level); + false -> + {L0, MaxSQN} = lists:foldr(LowerLevelLoadFun, + {[], 0}, + leveled_tree:to_list(Level)), + {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), MaxSQN} + end. +close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 -> + lists:foreach(CloseEntryFun, Level); close_level(_LevelIdx, Level, CloseEntryFun) -> - lists:foreach(CloseEntryFun, Level). + lists:foreach(CloseEntryFun, leveled_tree:to_list(Level)). is_empty(_LevelIdx, []) -> true; -is_empty(_LevelIdx, _Level) -> - false. - -size(_LevelIdx, Level) -> - length(Level). - -add_entry(_LevelIdx, Level, Entries) when is_list(Entries) -> - lists:sort(Level ++ Entries); -add_entry(_LevelIdx, Level, Entry) -> - lists:sort([Entry|Level]). - -remove_entry(_LevelIdx, Level, Entries) when is_list(Entries) -> - % We're assuming we're removing a sorted sublist - RemLength = length(Entries), - [RemStart|_Tail] = Entries, - remove_section(Level, RemStart#manifest_entry.start_key, RemLength); -remove_entry(_LevelIdx, Level, Entry) -> - remove_section(Level, Entry#manifest_entry.start_key, 1). - -remove_section(Level, SectionStartKey, SectionLength) -> - PredFun = - fun(E) -> - E#manifest_entry.start_key < SectionStartKey - end, - {Pre, Rest} = lists:splitwith(PredFun, Level), - Post = lists:nthtail(SectionLength, Rest), - Pre ++ Post. - - -key_lookup_level(_LevelIdx, [], _Key) -> +is_empty(LevelIdx, _Level) when LevelIdx =< 1 -> false; -key_lookup_level(LevelIdx, [Entry|Rest], Key) -> +is_empty(_LevelIdx, Level) -> + leveled_tree:tsize(Level) == 0. + +size(LevelIdx, Level) when LevelIdx =< 1 -> + length(Level); +size(_LevelIdx, Level) -> + leveled_tree:tsize(Level). + +pred_fun(LevelIdx, StartKey, _EndKey) when LevelIdx =< 1 -> + fun(ME) -> + ME#manifest_entry.start_key < StartKey + end; +pred_fun(_LevelIdx, _StartKey, EndKey) -> + fun({EK, _ME}) -> + EK < EndKey + end. + +add_entry(_LevelIdx, Level, []) -> + Level; +add_entry(LevelIdx, Level, Entries) when is_list(Entries) -> + FirstEntry = lists:nth(1, Entries), + PredFun = pred_fun(LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key), + case LevelIdx =< 1 of + true -> + {LHS, RHS} = lists:splitwith(PredFun, Level), + lists:append([LHS, Entries, RHS]); + false -> + {LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)), + MapFun = + fun(ME) -> + {ME#manifest_entry.end_key, ME} + end, + Entries0 = lists:map(MapFun, Entries), + leveled_tree:from_orderedlist(lists:append([LHS, Entries0, RHS]), + ?TREE_TYPE, + ?TREE_WIDTH) + end; +add_entry(LevelIdx, Level, Entry) -> + add_entry(LevelIdx, Level, [Entry]). + +remove_entry(LevelIdx, Level, Entries) -> + % We're assuming we're removing a sorted sublist + {RemLength, FirstRemoval} = measure_removals(Entries), + remove_section(LevelIdx, Level, FirstRemoval, RemLength). + +measure_removals(Removals) -> + case is_list(Removals) of + true -> + {length(Removals), lists:nth(1, Removals)}; + false -> + {1, Removals} + end. + +remove_section(LevelIdx, Level, FirstEntry, SectionLength) -> + PredFun = pred_fun(LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key), + case LevelIdx =< 1 of + true -> + {LHS, RHS} = lists:splitwith(PredFun, Level), + Post = lists:nthtail(SectionLength, RHS), + lists:append([LHS, Post]); + false -> + {LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)), + Post = lists:nthtail(SectionLength, RHS), + leveled_tree:from_orderedlist(lists:append([LHS, Post]), + ?TREE_TYPE, + ?TREE_WIDTH) + end. + +replace_entry(LevelIdx, Level, Removals, Additions) when LevelIdx =< 1 -> + {SectionLength, FirstEntry} = measure_removals(Removals), + PredFun = pred_fun(LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key), + {LHS, RHS} = lists:splitwith(PredFun, Level), + Post = lists:nthtail(SectionLength, RHS), + case is_list(Additions) of + true -> + lists:append([LHS, Additions, Post]); + false -> + lists:append([LHS, [Additions], Post]) + end; +replace_entry(LevelIdx, Level, Removals, Additions) -> + {SectionLength, FirstEntry} = measure_removals(Removals), + PredFun = pred_fun(LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key), + {LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)), + Post = + case RHS of + [] -> + []; + _ -> + lists:nthtail(SectionLength, RHS) + end, + UpdList = + case is_list(Additions) of + true -> + MapFun = + fun(ME) -> + {ME#manifest_entry.end_key, ME} + end, + Additions0 = lists:map(MapFun, Additions), + lists:append([LHS, Additions0, Post]); + false -> + lists:append([LHS, + [{Additions#manifest_entry.end_key, + Additions}], + Post]) + end, + leveled_tree:from_orderedlist(UpdList, ?TREE_TYPE, ?TREE_WIDTH). + + +update_pendingdeletes(ManSQN, Removals, PendingDeletes) -> + DelFun = + fun(E, Acc) -> + dict:store(E#manifest_entry.filename, + {ManSQN, E}, + Acc) + end, + Entries = + case is_list(Removals) of + true -> + Removals; + false -> + [Removals] + end, + lists:foldl(DelFun, PendingDeletes, Entries). + +key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 -> + false; +key_lookup_level(LevelIdx, [Entry|Rest], Key) when LevelIdx =< 1 -> case Entry#manifest_entry.end_key >= Key of true -> case Key >= Entry#manifest_entry.start_key of @@ -384,8 +529,20 @@ key_lookup_level(LevelIdx, [Entry|Rest], Key) -> end; false -> key_lookup_level(LevelIdx, Rest, Key) + end; +key_lookup_level(_LevelIdx, Level, Key) -> + StartKeyFun = + fun(ME) -> + ME#manifest_entry.start_key + end, + case leveled_tree:search(Key, Level, StartKeyFun) of + none -> + false; + {_EK, ME} -> + ME#manifest_entry.owner end. + range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) -> Range = case LevelIdx > Manifest#manifest.basement of @@ -400,7 +557,7 @@ range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) -> end, lists:map(MakePointerFun, Range). -range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) -> +range_lookup_level(LevelIdx, Level, QStartKey, QEndKey) when LevelIdx =< 1 -> BeforeFun = fun(M) -> QStartKey > M#manifest_entry.end_key @@ -412,7 +569,19 @@ range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) -> end, {_Before, MaybeIn} = lists:splitwith(BeforeFun, Level), {In, _After} = lists:splitwith(NotAfterFun, MaybeIn), - In. + In; +range_lookup_level(_LevelIdx, Level, QStartKey, QEndKey) -> + StartKeyFun = + fun(ME) -> + ME#manifest_entry.start_key + end, + Range = leveled_tree:search_range(QStartKey, QEndKey, Level, StartKeyFun), + MapFun = + fun({_EK, ME}) -> + ME + end, + lists:map(MapFun, Range). + get_basement(Levels) -> GetBaseFun = @@ -456,6 +625,7 @@ open_manifestfile(RootPath, [TopManSQN|Rest]) -> open_manifestfile(RootPath, Rest) end. + %%%============================================================================ %%% Test %%%============================================================================ @@ -587,6 +757,98 @@ keylookup_manifest_test() -> ?assertMatch("pid_y3", key_lookup(Man13, 1, LK1_4)), ?assertMatch("pid_z5", key_lookup(Man13, 2, LK1_4)). +ext_keylookup_manifest_test() -> + RP = "../test", + {_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(), + save_manifest(Man6, RP), + + E7 = #manifest_entry{start_key={o, "Bucket1", "K997", null}, + end_key={o, "Bucket1", "K999", null}, + filename="Z7", + owner="pid_z7"}, + Man7 = insert_manifest_entry(Man6, 2, 2, E7), + save_manifest(Man7, RP), + ManOpen1 = open_manifest(RP), + ?assertMatch(2, get_manifest_sqn(ManOpen1)), + + Man7FN = filepath(RP, 2, current_manifest), + Man7FNAlt = filename:rootname(Man7FN) ++ ".pnd", + {ok, BytesCopied} = file:copy(Man7FN, Man7FNAlt), + {ok, Bin} = file:read_file(Man7FN), + ?assertMatch(BytesCopied, byte_size(Bin)), + RandPos = random:uniform(bit_size(Bin) - 1), + <> = Bin, + Flipped = BitToFlip bxor 1, + ok = file:write_file(Man7FN, + <>), + + ?assertMatch(2, get_manifest_sqn(Man7)), + + ManOpen2 = open_manifest(RP), + ?assertMatch(1, get_manifest_sqn(ManOpen2)), + + E1 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld1"}, "K8"}, + end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K93"}, + filename="Z1", + owner="pid_z1"}, + E2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K97"}, + end_key={o, "Bucket1", "K71", null}, + filename="Z2", + owner="pid_z2"}, + E3 = #manifest_entry{start_key={o, "Bucket1", "K75", null}, + end_key={o, "Bucket1", "K993", null}, + filename="Z3", + owner="pid_z3"}, + + E1_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld4"}, "K8"}, + end_key={i, "Bucket1", {"Idx1", "Fld9"}, "K62"}, + owner="pid_y1", + filename="Y1"}, + E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"}, + end_key={o, "Bucket1", "K45", null}, + owner="pid_y2", + filename="Y2"}, + E3_2 = #manifest_entry{start_key={o, "Bucket1", "K47", null}, + end_key={o, "Bucket1", "K812", null}, + owner="pid_y3", + filename="Y3"}, + E4_2 = #manifest_entry{start_key={o, "Bucket1", "K815", null}, + end_key={o, "Bucket1", "K998", null}, + owner="pid_y4", + filename="Y4"}, + + Man8 = replace_manifest_entry(ManOpen2, 2, 1, E1, E1_2), + Man9 = remove_manifest_entry(Man8, 2, 1, [E2, E3]), + Man10 = insert_manifest_entry(Man9, 2, 1, [E2_2, E3_2, E4_2]), + ?assertMatch(2, get_manifest_sqn(Man10)), + + LK1_4 = {o, "Bucket1", "K75", null}, + ?assertMatch("pid_y3", key_lookup(Man10, 1, LK1_4)), + ?assertMatch("pid_z5", key_lookup(Man10, 2, LK1_4)), + + E5 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld7"}, "K97"}, + end_key={o, "Bucket1", "K78", null}, + filename="Z5", + owner="pid_z5"}, + E6 = #manifest_entry{start_key={o, "Bucket1", "K81", null}, + end_key={o, "Bucket1", "K996", null}, + filename="Z6", + owner="pid_z6"}, + + Man11 = remove_manifest_entry(Man10, 3, 2, [E5, E6]), + ?assertMatch(3, get_manifest_sqn(Man11)), + ?assertMatch(false, key_lookup(Man11, 2, LK1_4)), + + E2_2 = #manifest_entry{start_key={i, "Bucket1", {"Idx1", "Fld9"}, "K67"}, + end_key={o, "Bucket1", "K45", null}, + owner="pid_y2", + filename="Y2"}, + + Man12 = replace_manifest_entry(Man11, 4, 2, E2_2, E5), + ?assertMatch(4, get_manifest_sqn(Man12)), + ?assertMatch("pid_z5", key_lookup(Man12, 2, LK1_4)). rangequery_manifest_test() -> {_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(), diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl index 9480abe..97e4d5c 100644 --- a/src/leveled_pmem.erl +++ b/src/leveled_pmem.erl @@ -57,7 +57,7 @@ prepare_for_index(IndexArray, Hash) -> add_to_cache(L0Size, {LevelMinus1, MinSQN, MaxSQN}, LedgerSQN, TreeList) -> - LM1Size = leveled_skiplist:size(LevelMinus1), + LM1Size = leveled_tree:tsize(LevelMinus1), case LM1Size of 0 -> {LedgerSQN, L0Size, TreeList}; @@ -99,7 +99,7 @@ to_list(Slots, FetchFun) -> SlotList = lists:reverse(lists:seq(1, Slots)), FullList = lists:foldl(fun(Slot, Acc) -> Tree = FetchFun(Slot), - L = leveled_skiplist:to_list(Tree), + L = leveled_tree:to_list(Tree), lists:ukeymerge(1, Acc, L) end, [], @@ -119,14 +119,14 @@ check_levelzero(Key, Hash, PosList, TreeList) -> check_slotlist(Key, Hash, PosList, TreeList). -merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) -> - lists:foldl(fun(SkipList, Acc) -> - R = leveled_skiplist:to_range(SkipList, - StartKey, - EndKey), +merge_trees(StartKey, EndKey, TreeList, LevelMinus1) -> + lists:foldl(fun(Tree, Acc) -> + R = leveled_tree:match_range(StartKey, + EndKey, + Tree), lists:ukeymerge(1, Acc, R) end, [], - [LevelMinus1|lists:reverse(SkipListList)]). + [LevelMinus1|lists:reverse(TreeList)]). %%%============================================================================ %%% Internal Functions @@ -148,7 +148,7 @@ split_hash(Hash) -> H0 = (Hash bsr 8) band 8388607, {Slot, H0}. -check_slotlist(Key, Hash, CheckList, TreeList) -> +check_slotlist(Key, _Hash, CheckList, TreeList) -> SlotCheckFun = fun(SlotToCheck, {Found, KV}) -> case Found of @@ -156,7 +156,7 @@ check_slotlist(Key, Hash, CheckList, TreeList) -> {Found, KV}; false -> CheckTree = lists:nth(SlotToCheck, TreeList), - case leveled_skiplist:lookup(Key, Hash, CheckTree) of + case leveled_tree:match(Key, CheckTree) of none -> {Found, KV}; {value, Value} -> @@ -188,7 +188,7 @@ generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> [], BucketRangeLow, BucketRangeHigh), - leveled_skiplist:from_list(KVL). + leveled_tree:from_orderedlist(lists:ukeysort(1, KVL), ?CACHE_TYPE). generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> Acc; @@ -223,7 +223,7 @@ compare_method_test() -> ?assertMatch(32000, SQN), ?assertMatch(true, Size =< 32000), - TestList = leveled_skiplist:to_list(generate_randomkeys(1, 2000, 1, 800)), + TestList = leveled_tree:to_list(generate_randomkeys(1, 2000, 1, 800)), FindKeyFun = fun(Key) -> @@ -232,7 +232,7 @@ compare_method_test() -> true -> {true, KV}; false -> - L0 = leveled_skiplist:lookup(Key, Tree), + L0 = leveled_tree:match(Key, Tree), case L0 of none -> {false, not_found}; @@ -270,19 +270,20 @@ compare_method_test() -> P = leveled_codec:endkey_passed(EndKey, K), case {K, P} of {K, false} when K >= StartKey -> - leveled_skiplist:enter(K, V, Acc); + [{K, V}|Acc]; _ -> Acc end end, - leveled_skiplist:empty(), + [], DumpList), - Sz0 = leveled_skiplist:size(Q0), + Tree = leveled_tree:from_orderedlist(lists:ukeysort(1, Q0), ?CACHE_TYPE), + Sz0 = leveled_tree:tsize(Tree), io:format("Crude method took ~w microseconds resulting in tree of " ++ "size ~w~n", [timer:now_diff(os:timestamp(), SWa), Sz0]), SWb = os:timestamp(), - Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_skiplist:empty()), + Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty(?CACHE_TYPE)), Sz1 = length(Q1), io:format("Merge method took ~w microseconds resulting in tree of " ++ "size ~w~n", @@ -299,7 +300,7 @@ with_index_test() -> fun(_X, {{LedgerSQN, L0Size, L0TreeList}, L0Idx, SrcList}) -> LM1 = generate_randomkeys_aslist(LedgerSQN + 1, 2000, 1, 500), LM1Array = lists:foldl(IndexPrepareFun, new_index(), LM1), - LM1SL = leveled_skiplist:from_list(LM1), + LM1SL = leveled_tree:from_orderedlist(lists:ukeysort(1, LM1), ?CACHE_TYPE), UpdL0Index = add_to_index(LM1Array, L0Idx, length(L0TreeList) + 1), R = add_to_cache(L0Size, {LM1SL, LedgerSQN + 1, LedgerSQN + 2000}, diff --git a/src/leveled_skiplist.erl b/src/leveled_skiplist.erl deleted file mode 100644 index b79d050..0000000 --- a/src/leveled_skiplist.erl +++ /dev/null @@ -1,661 +0,0 @@ -%% -------- SKIPLIST --------- -%% -%% For storing small numbers of {K, V} pairs where reasonable insertion and -%% fetch times, but with fast support for flattening to a list or a sublist -%% within a certain key range -%% -%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's -%% ongoing dependency on OTP16) -%% -%% Not a proper skip list. Only supports a fixed depth. Good enough for the -%% purposes of leveled. Also uses peculiar enkey_passed function within -%% leveled. Not tested beyond a depth of 2. - --module(leveled_skiplist). - --include("include/leveled.hrl"). - --export([ - from_list/1, - from_list/2, - from_sortedlist/1, - from_sortedlist/2, - from_orderedset/1, - from_orderedset/2, - to_list/1, - enter/3, - enter/4, - enter_nolookup/3, - to_range/2, - to_range/3, - lookup/2, - lookup/3, - empty/0, - empty/1, - size/1 - ]). - --include_lib("eunit/include/eunit.hrl"). - --define(SKIP_WIDTH, 16). --define(LIST_HEIGHT, 2). --define(INFINITY_KEY, {null, null, null, null, null}). --define(BITARRAY_SIZE, 2048). - -%%%============================================================================ -%%% SkipList API -%%%============================================================================ - -enter(Key, Value, SkipList) -> - Hash = leveled_codec:magic_hash(Key), - enter(Key, Hash, Value, SkipList). - -enter(Key, Hash, Value, SkipList) -> - Bloom0 = - case element(1, SkipList) of - list_only -> - list_only; - Bloom -> - leveled_tinybloom:enter({hash, Hash}, Bloom) - end, - {Bloom0, - enter(Key, Value, erlang:phash2(Key), - element(2, SkipList), - ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -%% Can iterate over a key entered this way, but never lookup the key -%% used for index terms -%% The key may still be a marker key - and the much cheaper native hash -%% is used to dtermine this, avoiding the more expensive magic hash -enter_nolookup(Key, Value, SkipList) -> - {element(1, SkipList), - enter(Key, Value, erlang:phash2(Key), - element(2, SkipList), - ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -from_orderedset(Table) -> - from_orderedset(Table, false). - -from_orderedset(Table, Bloom) -> - from_sortedlist(ets:tab2list(Table), Bloom). - -from_list(UnsortedKVL) -> - from_list(UnsortedKVL, false). - -from_list(UnsortedKVL, BloomProtect) -> - KVL = lists:ukeysort(1, UnsortedKVL), - from_sortedlist(KVL, BloomProtect). - -from_sortedlist(SortedKVL) -> - from_sortedlist(SortedKVL, false). - -from_sortedlist([], BloomProtect) -> - empty(BloomProtect); -from_sortedlist(SortedKVL, BloomProtect) -> - Bloom0 = - case BloomProtect of - true -> - lists:foldr(fun({K, _V}, Bloom) -> - leveled_tinybloom:enter(K, Bloom) end, - leveled_tinybloom:empty(?SKIP_WIDTH), - SortedKVL); - false -> - list_only - end, - {Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}. - -lookup(Key, SkipList) -> - case element(1, SkipList) of - list_only -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT); - _ -> - lookup(Key, leveled_codec:magic_hash(Key), SkipList) - end. - -lookup(Key, Hash, SkipList) -> - case element(1, SkipList) of - list_only -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT); - _ -> - case leveled_tinybloom:check({hash, Hash}, element(1, SkipList)) of - false -> - none; - true -> - list_lookup(Key, element(2, SkipList), ?LIST_HEIGHT) - end - end. - - -%% Rather than support iterator_from like gb_trees, will just an output a key -%% sorted list for the desired range, which can the be iterated over as normal -to_range(SkipList, Start) -> - to_range(element(2, SkipList), Start, ?INFINITY_KEY, ?LIST_HEIGHT). - -to_range(SkipList, Start, End) -> - to_range(element(2, SkipList), Start, End, ?LIST_HEIGHT). - -to_list(SkipList) -> - to_list(element(2, SkipList), ?LIST_HEIGHT). - -empty() -> - empty(false). - -empty(BloomProtect) -> - case BloomProtect of - true -> - {leveled_tinybloom:empty(?SKIP_WIDTH), - empty([], ?LIST_HEIGHT)}; - false -> - {list_only, empty([], ?LIST_HEIGHT)} - end. - -size(SkipList) -> - size(element(2, SkipList), ?LIST_HEIGHT). - - -%%%============================================================================ -%%% SkipList Base Functions -%%%============================================================================ - -enter(Key, Value, Hash, SkipList, Width, 1) -> - {MarkerKey, SubList} = find_mark(Key, SkipList), - case Hash rem Width of - 0 -> - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> - K =< Key end, - SubList), - SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), - SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1], - lists:ukeysort(1, SkpL2); - _ -> - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList), - UpdSubList = - case RHS of - [] -> - LHS ++ [{Key, Value}]; - [{FirstKey, _V}|RHSTail] -> - case FirstKey of - Key -> - LHS ++ [{Key, Value}] ++ RHSTail; - _ -> - LHS ++ [{Key, Value}] ++ RHS - end - end, - lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList}) - end; -enter(Key, Value, Hash, SkipList, Width, Level) -> - HashMatch = width(Level, Width), - {MarkerKey, SubSkipList} = find_mark(Key, SkipList), - UpdSubSkipList = enter(Key, Value, Hash, SubSkipList, Width, Level - 1), - case Hash rem HashMatch of - 0 -> - % - {LHS, RHS} = lists:splitwith(fun({K, _V}) -> - K =< Key end, - UpdSubSkipList), - SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}), - lists:ukeysort(1, [{Key, LHS}|SkpL1]); - _ -> - % Need to replace Marker Key with sublist - lists:keyreplace(MarkerKey, - 1, - SkipList, - {MarkerKey, UpdSubSkipList}) - end. - -from_list(SkipList, _SkipWidth, 0) -> - SkipList; -from_list(KVList, SkipWidth, ListHeight) -> - L0 = length(KVList), - SL0 = - case L0 > SkipWidth of - true -> - from_list(KVList, L0, [], SkipWidth); - false -> - {LastK, _LastSL} = lists:last(KVList), - [{LastK, KVList}] - end, - from_list(SL0, SkipWidth, ListHeight - 1). - -from_list([], 0, SkipList, _SkipWidth) -> - SkipList; -from_list(KVList, L, SkipList, SkipWidth) -> - SubLL = min(SkipWidth, L), - {Head, Tail} = lists:split(SubLL, KVList), - {LastK, _LastV} = lists:last(Head), - from_list(Tail, L - SubLL, SkipList ++ [{LastK, Head}], SkipWidth). - - -list_lookup(Key, SkipList, 1) -> - SubList = get_sublist(Key, SkipList), - case lists:keyfind(Key, 1, SubList) of - false -> - none; - {Key, V} -> - {value, V} - end; -list_lookup(Key, SkipList, Level) -> - SubList = get_sublist(Key, SkipList), - case SubList of - null -> - none; - _ -> - list_lookup(Key, SubList, Level - 1) - end. - - -to_list(SkipList, 1) -> - lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList); -to_list(SkipList, Level) -> - lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ to_list(SL, Level - 1) end, - [], - SkipList). - - -to_range(SkipList, StartKey, EndKey, ListHeight) -> - to_range(SkipList, StartKey, EndKey, ListHeight, [], true). - -to_range(SkipList, StartKey, EndKey, ListHeight, Acc, StartIncl) -> - SL = sublist_above(SkipList, StartKey, ListHeight, StartIncl), - case SL of - [] -> - Acc; - _ -> - {LK, _LV} = lists:last(SL), - case leveled_codec:endkey_passed(EndKey, LK) of - false -> - to_range(SkipList, - LK, - EndKey, - ListHeight, - Acc ++ SL, - false); - true -> - SplitFun = - fun({K, _V}) -> - not leveled_codec:endkey_passed(EndKey, K) end, - LHS = lists:takewhile(SplitFun, SL), - Acc ++ LHS - end - end. - -sublist_above(SkipList, StartKey, 0, StartIncl) -> - TestFun = - fun({K, _V}) -> - case StartIncl of - true -> - K < StartKey; - false -> - K =< StartKey - end end, - lists:dropwhile(TestFun, SkipList); -sublist_above(SkipList, StartKey, Level, StartIncl) -> - TestFun = - fun({K, _SL}) -> - case StartIncl of - true -> - K < StartKey; - false -> - K =< StartKey - end end, - RHS = lists:dropwhile(TestFun, SkipList), - case RHS of - [] -> - []; - [{_K, SL}|_Rest] -> - sublist_above(SL, StartKey, Level - 1, StartIncl) - end. - -empty(SkipList, 1) -> - [{?INFINITY_KEY, SkipList}]; -empty(SkipList, Level) -> - empty([{?INFINITY_KEY, SkipList}], Level - 1). - -size(SkipList, 1) -> - lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList); -size(SkipList, Level) -> - lists:foldl(fun({_Mark, SL}, Acc) -> size(SL, Level - 1) + Acc end, - 0, - SkipList). - - -%%%============================================================================ -%%% Internal Functions -%%%============================================================================ - -width(1, Width) -> - Width; -width(N, Width) -> - width(N - 1, Width * Width). - -find_mark(Key, SkipList) -> - lists:foldl(fun({Marker, SL}, Acc) -> - case Acc of - false -> - case Marker >= Key of - true -> - {Marker, SL}; - false -> - Acc - end; - _ -> - Acc - end end, - false, - SkipList). - -get_sublist(Key, SkipList) -> - lists:foldl(fun({SkipKey, SL}, Acc) -> - case {Acc, SkipKey} of - {null, SkipKey} when SkipKey >= Key -> - SL; - _ -> - Acc - end end, - null, - SkipList). - -%%%============================================================================ -%%% Test -%%%============================================================================ - --ifdef(TEST). - -generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> - generate_randomkeys(Seqn, - Count, - [], - BucketRangeLow, - BucketRangeHigh). - -generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> - Acc; -generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> - BNumber = - case BRange of - 0 -> - string:right(integer_to_list(BucketLow), 4, $0); - _ -> - BRand = random:uniform(BRange), - string:right(integer_to_list(BucketLow + BRand), 4, $0) - end, - KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0), - {K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, - {Seqn, {active, infinity}, null}}, - generate_randomkeys(Seqn + 1, - Count - 1, - [{K, V}|Acc], - BucketLow, - BRange). - -skiplist_small_test() -> - % Check nothing bad happens with very small lists - lists:foreach(fun(N) -> dotest_skiplist_small(N) end, lists:seq(1, 32)). - - -dotest_skiplist_small(N) -> - KL = generate_randomkeys(1, N, 1, 2), - SkipList1 = - lists:foldl(fun({K, V}, SL) -> - enter(K, V, SL) - end, - empty(), - KL), - SkipList2 = from_list(lists:reverse(KL)), - lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList1)) - end, - lists:ukeysort(1, lists:reverse(KL))), - lists:foreach(fun({K, V}) -> ?assertMatch({value, V}, lookup(K, SkipList2)) - end, - lists:ukeysort(1, lists:reverse(KL))). - -skiplist_withbloom_test() -> - io:format(user, "~n~nBloom protected skiplist test:~n~n", []), - skiplist_tester(true). - -skiplist_nobloom_test() -> - io:format(user, "~n~nBloom free skiplist test:~n~n", []), - skiplist_tester(false). - -skiplist_tester(Bloom) -> - N = 4000, - KL = generate_randomkeys(1, N, 1, N div 5), - - OS = ets:new(test, [ordered_set, private]), - ets:insert(OS, KL), - SWaETS = os:timestamp(), - SkipList = from_orderedset(OS, Bloom), - io:format(user, "Generating skip list with ~w keys in ~w microseconds " ++ - "from ordered set~n", - [N, timer:now_diff(os:timestamp(), SWaETS)]), - - SWaGSL = os:timestamp(), - SkipList = from_list(lists:reverse(KL), Bloom), - io:format(user, "Generating skip list with ~w keys in ~w microseconds~n" ++ - "Top level key count of ~w~n", - [N, - timer:now_diff(os:timestamp(), SWaGSL), - length(element(2, SkipList))]), - io:format(user, "Second tier key counts of ~w~n", - [lists:map(fun({_L, SL}) -> length(SL) end, - element(2, SkipList))]), - KLSorted = lists:ukeysort(1, lists:reverse(KL)), - - SWaGSL2 = os:timestamp(), - SkipList = from_sortedlist(KLSorted, Bloom), - io:format(user, "Generating skip list with ~w sorted keys in ~w " ++ - "microseconds~n", - [N, timer:now_diff(os:timestamp(), SWaGSL2)]), - - SWaDSL = os:timestamp(), - SkipList1 = - lists:foldl(fun({K, V}, SL) -> - enter(K, V, SL) - end, - empty(Bloom), - KL), - io:format(user, "Dynamic load of skiplist with ~w keys took ~w " ++ - "microseconds~n" ++ - "Top level key count of ~w~n", - [N, - timer:now_diff(os:timestamp(), SWaDSL), - length(element(2, SkipList1))]), - io:format(user, "Second tier key counts of ~w~n", - [lists:map(fun({_L, SL}) -> length(SL) end, - element(2, SkipList1))]), - - io:format(user, "~nRunning timing tests for generated skiplist:~n", []), - skiplist_timingtest(KLSorted, SkipList, N, Bloom), - - io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []), - skiplist_timingtest(KLSorted, SkipList1, N, Bloom). - - -skiplist_timingtest(KL, SkipList, N, Bloom) -> - io:format(user, "Timing tests on skiplist of size ~w~n", - [leveled_skiplist:size(SkipList)]), - CheckList1 = lists:sublist(KL, N div 4, 200), - CheckList2 = lists:sublist(KL, N div 3, 200), - CheckList3 = lists:sublist(KL, N div 2, 200), - CheckList4 = lists:sublist(KL, N - 1000, 200), - CheckList5 = lists:sublist(KL, N - 500, 200), - CheckList6 = lists:sublist(KL, 1, 10), - CheckList7 = lists:nthtail(N - 200, KL), - CheckList8 = lists:sublist(KL, N div 2, 1), - CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++ - CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7, - - SWb = os:timestamp(), - lists:foreach(fun({K, V}) -> - ?assertMatch({value, V}, lookup(K, SkipList)) - end, - CheckAll), - io:format(user, "Finding 1020 keys took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWb)]), - - RangeFun = - fun(SkipListToQuery, CheckListForQ, Assert) -> - KR = - to_range(SkipListToQuery, - element(1, lists:nth(1, CheckListForQ)), - element(1, lists:last(CheckListForQ))), - case Assert of - true -> - CompareL = length(lists:usort(CheckListForQ)), - ?assertMatch(CompareL, length(KR)); - false -> - KR - end - end, - - SWc = os:timestamp(), - RangeFun(SkipList, CheckList1, true), - RangeFun(SkipList, CheckList2, true), - RangeFun(SkipList, CheckList3, true), - RangeFun(SkipList, CheckList4, true), - RangeFun(SkipList, CheckList5, true), - RangeFun(SkipList, CheckList6, true), - RangeFun(SkipList, CheckList7, true), - RangeFun(SkipList, CheckList8, true), - - KL_OOR1 = generate_randomkeys(1, 4, N div 5 + 1, N div 5 + 10), - KR9 = RangeFun(SkipList, KL_OOR1, false), - ?assertMatch([], KR9), - - KL_OOR2 = generate_randomkeys(1, 4, 0, 0), - KR10 = RangeFun(SkipList, KL_OOR2, false), - ?assertMatch([], KR10), - - io:format(user, "Finding 10 ranges took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWc)]), - - AltKL1 = generate_randomkeys(1, 2000, 1, 200), - SWd0 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - lookup(K, SkipList) - end, - AltKL1), - io:format(user, "Getting 2000 mainly missing keys took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd0)]), - SWd1 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - leveled_codec:magic_hash(K) - end, - AltKL1), - io:format(user, "Generating 2000 magic hashes took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd1)]), - SWd2 = os:timestamp(), - lists:foreach(fun({K, _V}) -> - erlang:phash2(K) - end, - AltKL1), - io:format(user, "Generating 2000 not so magic hashes took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWd2)]), - - AltKL2 = generate_randomkeys(1, 1000, N div 5 + 1, N div 5 + 300), - SWe = os:timestamp(), - lists:foreach(fun({K, _V}) -> - none = lookup(K, SkipList) - end, - AltKL2), - io:format(user, "Getting 1000 missing keys above range took ~w " ++ - "microseconds~n", - [timer:now_diff(os:timestamp(), SWe)]), - AltKL3 = generate_randomkeys(1, 1000, 0, 0), - SWf = os:timestamp(), - lists:foreach(fun({K, _V}) -> - none = lookup(K, SkipList) - end, - AltKL3), - io:format(user, "Getting 1000 missing keys below range took ~w " ++ - "microseconds~n", - [timer:now_diff(os:timestamp(), SWf)]), - - SWg = os:timestamp(), - FlatList = to_list(SkipList), - io:format(user, "Flattening skiplist took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWg)]), - ?assertMatch(KL, FlatList), - - case Bloom of - true -> - HashList = lists:map(fun(_X) -> - random:uniform(4294967295) end, - lists:seq(1, 2000)), - SWh = os:timestamp(), - lists:foreach(fun(X) -> - lookup(X, X, SkipList) end, - HashList), - io:format(user, - "Getting 2000 missing keys when hash was known " ++ - "took ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWh)]); - false -> - ok - end. - -define_kv(X) -> - {{o, "Bucket", "Key" ++ string:right(integer_to_list(X), 6), null}, - {X, {active, infinity}, null}}. - -skiplist_roundsize_test() -> - KVL = lists:map(fun(X) -> define_kv(X) end, lists:seq(1, 4096)), - SkipList = from_list(KVL), - lists:foreach(fun({K, V}) -> - ?assertMatch({value, V}, lookup(K, SkipList)) end, - KVL), - lists:foreach(fun(X) -> - {KS, _VS} = define_kv(X * 32 + 1), - {KE, _VE} = define_kv((X + 1) * 32), - R = to_range(SkipList, KS, KE), - L = lists:sublist(KVL, - X * 32 + 1, - 32), - ?assertMatch(L, R) end, - lists:seq(0, 24)). - -skiplist_nolookup_test() -> - N = 4000, - KL = generate_randomkeys(1, N, 1, N div 5), - SkipList = lists:foldl(fun({K, V}, Acc) -> - enter_nolookup(K, V, Acc) end, - empty(true), - KL), - KLSorted = lists:ukeysort(1, lists:reverse(KL)), - lists:foreach(fun({K, _V}) -> - ?assertMatch(none, lookup(K, SkipList)) end, - KL), - ?assertMatch(KLSorted, to_list(SkipList)). - -skiplist_range_test() -> - N = 150, - KL = generate_randomkeys(1, N, 1, N div 5), - - KLSL1 = lists:sublist(lists:ukeysort(1, KL), 128), - SkipList1 = from_list(KLSL1), - {LastK1, V1} = lists:last(KLSL1), - R1 = to_range(SkipList1, LastK1, LastK1), - ?assertMatch([{LastK1, V1}], R1), - - KLSL2 = lists:sublist(lists:ukeysort(1, KL), 127), - SkipList2 = from_list(KLSL2), - {LastK2, V2} = lists:last(KLSL2), - R2 = to_range(SkipList2, LastK2, LastK2), - ?assertMatch([{LastK2, V2}], R2), - - KLSL3 = lists:sublist(lists:ukeysort(1, KL), 129), - SkipList3 = from_list(KLSL3), - {LastK3, V3} = lists:last(KLSL3), - R3 = to_range(SkipList3, LastK3, LastK3), - ?assertMatch([{LastK3, V3}], R3), - - {FirstK4, V4} = lists:nth(1, KLSL3), - R4 = to_range(SkipList3, FirstK4, FirstK4), - ?assertMatch([{FirstK4, V4}], R4). - - -empty_skiplist_size_test() -> - ?assertMatch(0, leveled_skiplist:size(empty(false))), - ?assertMatch(0, leveled_skiplist:size(empty(true))). - --endif. \ No newline at end of file diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index c9102d1..58573a8 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -77,6 +77,8 @@ -define(INDEX_MARKER_WIDTH, 16). -define(DISCARD_EXT, ".discarded"). -define(DELETE_TIMEOUT, 10000). +-define(TREE_TYPE, idxt). +-define(TREE_SIZE, 4). -include_lib("eunit/include/eunit.hrl"). @@ -676,93 +678,37 @@ generate_filenames(RootFilename) -> %% The Slot Index is stored as a flat (sorted) list of {Key, Slot} where Key %% is the last key within the slot. %% -%% This implementation of the SlotIndex stores it as a tuple with the original -%% list as the second element and a list of mark points as the first element -%% containing every 16th key. The Mark points are stored as {Mark, Index}, -%% where the Index correspnds with the nth point in the original list that the -%% Mark occurs. +%% This implementation of the SlotIndex uses leveled_tree from_list(SlotList) -> - L = length(SlotList), - MarkerList = set_marks(lists:reverse(SlotList), - {?INDEX_MARKER_WIDTH, L rem ?INDEX_MARKER_WIDTH}, - L, - []), - {MarkerList, SlotList}. + leveled_tree:from_orderedlist(SlotList, ?TREE_TYPE, ?TREE_SIZE). -set_marks([], _MarkInfo, 0, MarkerList) -> - MarkerList; -set_marks([{Key, _Slot}|Rest], {MarkerWidth, MarkPoint}, Count, MarkerList) -> - case Count rem MarkerWidth of - MarkPoint -> - set_marks(Rest, - {MarkerWidth, MarkPoint}, - Count - 1, - [{Key, Count}|MarkerList]); - _ -> - set_marks(Rest, - {MarkerWidth, MarkPoint}, - Count - 1, - MarkerList) - end. - -find_mark(Key, [{Mark, Pos}|_Rest]) when Mark >= Key -> - Pos; -find_mark(Key, [_H|T]) -> - find_mark(Key, T). - -lookup_slot(Key, {MarkerList, SlotList}) -> - Pos = find_mark(Key, MarkerList), - SubList = lists:sublist(SlotList, max(1, Pos - ?INDEX_MARKER_WIDTH), Pos), - Slot = find_mark(Key, SubList), +lookup_slot(Key, Tree) -> + StartKeyFun = + fun(_V) -> + all + end, + % The penciller should never ask for presence out of range - so will + % always return a slot (As we don't compare to StartKey) + {_LK, Slot} = leveled_tree:search(Key, Tree, StartKeyFun), Slot. -%% Returns a section from the summary index and two booleans to indicate if -%% the first slot needs trimming, or the last slot -lookup_slots(StartKey, EndKey, {_MarkerList, SlotList}) -> - SlotsOnlyFun = fun({_K, V}) -> V end, - {KSL, LTrim, RTrim} = lookup_slots_int(StartKey, EndKey, SlotList), - {lists:map(SlotsOnlyFun, KSL), LTrim, RTrim}. - -lookup_slots_int(all, all, SlotList) -> - {SlotList, false, false}; -lookup_slots_int(StartKey, all, SlotList) -> - LTrimFun = fun({K, _V}) -> K < StartKey end, - {_LDrop, RKeep0} = lists:splitwith(LTrimFun, SlotList), - {RKeep0, true, false}; -lookup_slots_int(StartKey, EndKey, SlotList) -> - {RKeep, true, false} = lookup_slots_int(StartKey, all, SlotList), - [LeftMost|RKeep0] = RKeep, - {LeftMostK, LeftMostV} = LeftMost, - RTrimFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndKey, K) end, - case leveled_codec:endkey_passed(EndKey, LeftMostK) of - true -> - {[{LeftMostK, LeftMostV}], - true, - true}; - false -> - case LeftMostK of - EndKey -> - {[{LeftMostK, LeftMostV}], - true, - false}; - _ -> - {LKeep, RDisc} = lists:splitwith(RTrimFun, RKeep0), - case RDisc of - [] -> - {[LeftMost|LKeep], - true, - true}; - [{RDiscK1, RDiscV1}|_Rest] when RDiscK1 == EndKey -> - {[LeftMost|LKeep] ++ [{RDiscK1, RDiscV1}], - true, - false}; - [{RDiscK1, RDiscV1}|_Rest] -> - {[LeftMost|LKeep] ++ [{RDiscK1, RDiscV1}], - true, - true} - end - end +lookup_slots(StartKey, EndKey, Tree) -> + StartKeyFun = + fun(_V) -> + all + end, + MapFun = + fun({_LK, Slot}) -> + Slot + end, + SlotList = leveled_tree:search_range(StartKey, EndKey, Tree, StartKeyFun), + {EK, _EndSlot} = lists:last(SlotList), + case EK of + EndKey -> + {lists:map(MapFun, SlotList), true, false}; + _ -> + {lists:map(MapFun, SlotList), true, true} end. diff --git a/src/leveled_tinybloom.erl b/src/leveled_tinybloom.erl deleted file mode 100644 index 2278c2a..0000000 --- a/src/leveled_tinybloom.erl +++ /dev/null @@ -1,159 +0,0 @@ -%% -------- TINY BLOOM --------- -%% -%% For sheltering relatively expensive lookups with a probabilistic check -%% -%% Uses multiple 512 byte blooms. Can sensibly hold up to 1000 keys per array. -%% Even at 1000 keys should still offer only a 20% false positive -%% -%% Restricted to no more than 256 arrays - so can't handle more than 250K keys -%% in total -%% -%% Implemented this way to make it easy to control false positive (just by -%% setting the width). Also only requires binary manipulations of a single -%% hash - --module(leveled_tinybloom). - --include("include/leveled.hrl"). - --export([ - enter/2, - check/2, - empty/1 - ]). - - --include_lib("eunit/include/eunit.hrl"). - -%%%============================================================================ -%%% Bloom API -%%%============================================================================ - -empty(Width) when Width =< 256 -> - FoldFun = fun(X, Acc) -> dict:store(X, <<0:4096>>, Acc) end, - lists:foldl(FoldFun, dict:new(), lists:seq(0, Width - 1)). - -enter({hash, no_lookup}, Bloom) -> - Bloom; -enter({hash, Hash}, Bloom) -> - {Slot0, Bit1, Bit2} = split_hash(Hash), - Slot = Slot0 rem dict:size(Bloom), - BitArray0 = dict:fetch(Slot, Bloom), - FoldFun = - fun(Bit, Arr) -> add_to_array(Bit, Arr, 4096) end, - BitArray1 = lists:foldl(FoldFun, - BitArray0, - lists:usort([Bit1, Bit2])), - dict:store(Slot, <>, Bloom); -enter(Key, Bloom) -> - Hash = leveled_codec:magic_hash(Key), - enter({hash, Hash}, Bloom). - - -check({hash, Hash}, Bloom) -> - {Slot0, Bit1, Bit2} = split_hash(Hash), - Slot = Slot0 rem dict:size(Bloom), - BitArray = dict:fetch(Slot, Bloom), - - case getbit(Bit1, BitArray, 4096) of - <<0:1>> -> - false; - <<1:1>> -> - case getbit(Bit2, BitArray, 4096) of - <<0:1>> -> - false; - <<1:1>> -> - true - end - end; -check(Key, Bloom) -> - Hash = leveled_codec:magic_hash(Key), - check({hash, Hash}, Bloom). - - -%%%============================================================================ -%%% Internal Functions -%%%============================================================================ - -split_hash(Hash) -> - H0 = Hash band 255, - H1 = (Hash bsr 8) band 4095, - H2 = Hash bsr 20, - {H0, H1, H2}. - -add_to_array(Bit, BitArray, ArrayLength) -> - RestLen = ArrayLength - Bit - 1, - <> = BitArray, - <>. - -getbit(Bit, BitArray, ArrayLength) -> - RestLen = ArrayLength - Bit - 1, - <<_Head:Bit/bitstring, - B:1/bitstring, - _Rest:RestLen/bitstring>> = BitArray, - B. - - -%%%============================================================================ -%%% Test -%%%============================================================================ - --ifdef(TEST). - -simple_test() -> - N = 4000, - W = 6, - KLin = lists:map(fun(X) -> "Key_" ++ - integer_to_list(X) ++ - integer_to_list(random:uniform(100)) ++ - binary_to_list(crypto:rand_bytes(2)) - end, - lists:seq(1, N)), - KLout = lists:map(fun(X) -> - "NotKey_" ++ - integer_to_list(X) ++ - integer_to_list(random:uniform(100)) ++ - binary_to_list(crypto:rand_bytes(2)) - end, - lists:seq(1, N)), - SW0_PH = os:timestamp(), - lists:foreach(fun(X) -> erlang:phash2(X) end, KLin), - io:format(user, - "~nNative hash function hashes ~w keys in ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW0_PH)]), - SW0_MH = os:timestamp(), - lists:foreach(fun(X) -> leveled_codec:magic_hash(X) end, KLin), - io:format(user, - "~nMagic hash function hashes ~w keys in ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW0_MH)]), - - SW1 = os:timestamp(), - Bloom = lists:foldr(fun enter/2, empty(W), KLin), - io:format(user, - "~nAdding ~w keys to bloom took ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW1)]), - - SW2 = os:timestamp(), - lists:foreach(fun(X) -> ?assertMatch(true, check(X, Bloom)) end, KLin), - io:format(user, - "~nChecking ~w keys in bloom took ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW2)]), - - SW3 = os:timestamp(), - FP = lists:foldr(fun(X, Acc) -> case check(X, Bloom) of - true -> Acc + 1; - false -> Acc - end end, - 0, - KLout), - io:format(user, - "~nChecking ~w keys out of bloom took ~w microseconds " ++ - "with ~w false positive rate~n", - [N, timer:now_diff(os:timestamp(), SW3), FP / N]), - ?assertMatch(true, FP < (N div 4)). - - - --endif. \ No newline at end of file diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl new file mode 100644 index 0000000..ba07a3f --- /dev/null +++ b/src/leveled_tree.erl @@ -0,0 +1,731 @@ +%% -------- TREE --------- +%% +%% This module is intended to address two issues +%% - the lack of iterator_from support in OTP16 gb_trees +%% - the time to convert from/to list in gb_trees +%% +%% Leveled had had a skiplist implementation previously, and this is a +%% variation on that. The Treein this case is a bunch of sublists of length +%% SKIP_WIDTH with the start_keys in a gb_tree. + +-module(leveled_tree). + +-include("include/leveled.hrl"). + +-export([ + from_orderedlist/2, + from_orderedset/2, + from_orderedlist/3, + from_orderedset/3, + to_list/1, + match_range/3, + search_range/4, + match/2, + search/3, + tsize/1, + empty/1 + ]). + +-include_lib("eunit/include/eunit.hrl"). + +-define(SKIP_WIDTH, 16). + + +%%%============================================================================ +%%% API +%%%============================================================================ + +from_orderedset(Table, Type) -> + from_orderedlist(ets:tab2list(Table), Type, ?SKIP_WIDTH). + +from_orderedset(Table, Type, SkipWidth) -> + from_orderedlist(ets:tab2list(Table), Type, SkipWidth). + + +from_orderedlist(OrderedList, Type) -> + from_orderedlist(OrderedList, Type, ?SKIP_WIDTH). + +from_orderedlist(OrderedList, tree, SkipWidth) -> + L = length(OrderedList), + {tree, L, tree_fromorderedlist(OrderedList, [], L, SkipWidth)}; +from_orderedlist(OrderedList, idxt, SkipWidth) -> + L = length(OrderedList), + {idxt, L, idxt_fromorderedlist(OrderedList, {[], [], 1}, L, SkipWidth)}; +from_orderedlist(OrderedList, skpl, _SkipWidth) -> + L = length(OrderedList), + SkipWidth = + % Autosize the skip width + case L of + L when L > 4096 -> 32; + L when L > 512 -> 16; + L when L > 64 -> 8; + _ -> 4 + end, + {skpl, L, skpl_fromorderedlist(OrderedList, L, SkipWidth, 2)}. + + +match(Key, {tree, _L, Tree}) -> + Iter = tree_iterator_from(Key, Tree), + case tree_next(Iter) of + none -> + none; + {_NK, SL, _Iter} -> + lookup_match(Key, SL) + end; +match(Key, {idxt, _L, {TLI, IDX}}) -> + Iter = tree_iterator_from(Key, IDX), + case tree_next(Iter) of + none -> + none; + {_NK, ListID, _Iter} -> + lookup_match(Key, element(ListID, TLI)) + end; +match(Key, {skpl, _L, SkipList}) -> + SL0 = skpl_getsublist(Key, SkipList), + lookup_match(Key, SL0). + +search(Key, {tree, _L, Tree}, StartKeyFun) -> + Iter = tree_iterator_from(Key, Tree), + case tree_next(Iter) of + none -> + none; + {_NK, SL, _Iter} -> + {K, V} = lookup_best(Key, SL), + case Key < StartKeyFun(V) of + true -> + none; + false -> + {K, V} + end + end; +search(Key, {idxt, _L, {TLI, IDX}}, StartKeyFun) -> + Iter = tree_iterator_from(Key, IDX), + case tree_next(Iter) of + none -> + none; + {_NK, ListID, _Iter} -> + {K, V} = lookup_best(Key, element(ListID, TLI)), + case Key < StartKeyFun(V) of + true -> + none; + false -> + {K, V} + end + end; +search(Key, {skpl, _L, SkipList}, StartKeyFun) -> + SL0 = skpl_getsublist(Key, SkipList), + case lookup_best(Key, SL0) of + {K, V} -> + case Key < StartKeyFun(V) of + true -> + none; + false -> + {K, V} + end; + none -> + none + end. + +match_range(StartRange, EndRange, Tree) -> + EndRangeFun = + fun(ER, FirstRHSKey, _FirstRHSValue) -> + ER == FirstRHSKey + end, + match_range(StartRange, EndRange, Tree, EndRangeFun). + +match_range(StartRange, EndRange, {tree, _L, Tree}, EndRangeFun) -> + treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun); +match_range(StartRange, EndRange, {idxt, _L, Tree}, EndRangeFun) -> + idxtlookup_range_start(StartRange, EndRange, Tree, EndRangeFun); +match_range(StartRange, EndRange, {skpl, _L, SkipList}, EndRangeFun) -> + skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun). + + +search_range(StartRange, EndRange, Tree, StartKeyFun) -> + EndRangeFun = + fun(ER, _FirstRHSKey, FirstRHSValue) -> + StartRHSKey = StartKeyFun(FirstRHSValue), + ER >= StartRHSKey + end, + case Tree of + {tree, _L, T} -> + treelookup_range_start(StartRange, EndRange, T, EndRangeFun); + {idxt, _L, T} -> + idxtlookup_range_start(StartRange, EndRange, T, EndRangeFun); + {skpl, _L, SL} -> + skpllookup_to_range(StartRange, EndRange, SL, EndRangeFun) + end. + + +to_list({tree, _L, Tree}) -> + FoldFun = + fun({_MK, SL}, Acc) -> + Acc ++ SL + end, + lists:foldl(FoldFun, [], tree_to_list(Tree)); +to_list({idxt, _L, {TLI, _IDX}}) -> + lists:append(tuple_to_list(TLI)); +to_list({skpl, _L, SkipList}) -> + FoldFun = + fun({_M, SL}, Acc) -> + [SL|Acc] + end, + + Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)), + Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))), + lists:append(Lv0List). + + + +tsize({_Type, L, _Tree}) -> + L. + +empty(tree) -> + {tree, 0, empty_tree()}; +empty(idxt) -> + {idxt, 0, {{}, empty_tree()}}; +empty(skpl) -> + {skpl, 0, []}. + +%%%============================================================================ +%%% Internal Functions +%%%============================================================================ + + +tree_fromorderedlist([], TmpList, _L, _SkipWidth) -> + gb_trees:from_orddict(lists:reverse(TmpList)); +tree_fromorderedlist(OrdList, TmpList, L, SkipWidth) -> + SubLL = min(SkipWidth, L), + {Head, Tail} = lists:split(SubLL, OrdList), + {LastK, _LastV} = lists:last(Head), + tree_fromorderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL, SkipWidth). + +idxt_fromorderedlist([], {TmpListElements, TmpListIdx, _C}, _L, _SkipWidth) -> + {list_to_tuple(lists:reverse(TmpListElements)), + gb_trees:from_orddict(lists:reverse(TmpListIdx))}; +idxt_fromorderedlist(OrdList, {TmpListElements, TmpListIdx, C}, L, SkipWidth) -> + SubLL = min(SkipWidth, L), + {Head, Tail} = lists:split(SubLL, OrdList), + {LastK, _LastV} = lists:last(Head), + idxt_fromorderedlist(Tail, + {[Head|TmpListElements], + [{LastK, C}|TmpListIdx], + C + 1}, + L - SubLL, + SkipWidth). + +skpl_fromorderedlist(SkipList, _L, _SkipWidth, 0) -> + SkipList; +skpl_fromorderedlist(SkipList, L, SkipWidth, Height) -> + SkipList0 = roll_list(SkipList, L, [], SkipWidth), + skpl_fromorderedlist(SkipList0, length(SkipList0), SkipWidth, Height - 1). + +roll_list([], 0, SkipList, _SkipWidth) -> + lists:reverse(SkipList); +roll_list(KVList, L, SkipList, SkipWidth) -> + SubLL = min(SkipWidth, L), + {Head, Tail} = lists:split(SubLL, KVList), + {LastK, _LastV} = lists:last(Head), + roll_list(Tail, L - SubLL, [{LastK, Head}|SkipList], SkipWidth). + + + +% lookup_match(_Key, []) -> +% none; +% lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key -> +% none; +% lookup_match(Key, [{Key, EV}|_Tail]) -> +% {value, EV}; +% lookup_match(Key, [_Top|Tail]) -> +% lookup_match(Key, Tail). + +lookup_match(Key, KVList) -> + case lists:keyfind(Key, 1, KVList) of + false -> + none; + {Key, Value} -> + {value, Value} + end. + +lookup_best(_Key, []) -> + none; +lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> + {EK, EV}; +lookup_best(Key, [_Top|Tail]) -> + lookup_best(Key, Tail). + +treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun) -> + Iter0 = tree_iterator_from(StartRange, Tree), + case tree_next(Iter0) of + none -> + []; + {NK, SL, Iter1} -> + PredFun = + fun({K, _V}) -> + K < StartRange + end, + {_LHS, RHS} = lists:splitwith(PredFun, SL), + treelookup_range_end(EndRange, {NK, RHS}, Iter1, [], EndRangeFun) + end. + +treelookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) -> + PredFun = + fun({K, _V}) -> + not leveled_codec:endkey_passed(EndRange, K) + end, + case leveled_codec:endkey_passed(EndRange, NK0) of + true -> + {LHS, RHS} = lists:splitwith(PredFun, SL0), + case RHS of + [] -> + Output ++ LHS; + [{FirstRHSKey, FirstRHSValue}|_Rest] -> + case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of + true -> + Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}]; + false -> + Output ++ LHS + end + end; + false -> + UpdOutput = Output ++ SL0, + case tree_next(Iter0) of + none -> + UpdOutput; + {NK1, SL1, Iter1} -> + treelookup_range_end(EndRange, + {NK1, SL1}, + Iter1, + UpdOutput, + EndRangeFun) + end + end. + +idxtlookup_range_start(StartRange, EndRange, {TLI, IDX}, EndRangeFun) -> + Iter0 = tree_iterator_from(StartRange, IDX), + case tree_next(Iter0) of + none -> + []; + {NK, ListID, Iter1} -> + PredFun = + fun({K, _V}) -> + K < StartRange + end, + {_LHS, RHS} = lists:splitwith(PredFun, element(ListID, TLI)), + idxtlookup_range_end(EndRange, {TLI, NK, RHS}, Iter1, [], EndRangeFun) + end. + +idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) -> + PredFun = + fun({K, _V}) -> + not leveled_codec:endkey_passed(EndRange, K) + end, + case leveled_codec:endkey_passed(EndRange, NK0) of + true -> + {LHS, RHS} = lists:splitwith(PredFun, SL0), + case RHS of + [] -> + Output ++ LHS; + [{FirstRHSKey, FirstRHSValue}|_Rest] -> + case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of + true -> + Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}]; + false -> + Output ++ LHS + end + end; + false -> + UpdOutput = Output ++ SL0, + case tree_next(Iter0) of + none -> + UpdOutput; + {NK1, ListID, Iter1} -> + idxtlookup_range_end(EndRange, + {TLI, NK1, element(ListID, TLI)}, + Iter1, + UpdOutput, + EndRangeFun) + end + end. + + +skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) -> + FoldFun = + fun({K, SL}, {PassedStart, PassedEnd, Acc}) -> + case {PassedStart, PassedEnd} of + {false, false} -> + case StartRange > K of + true -> + {PassedStart, PassedEnd, Acc}; + false -> + case leveled_codec:endkey_passed(EndRange, K) of + true -> + {true, true, [SL|Acc]}; + false -> + {true, false, [SL|Acc]} + end + end; + {true, false} -> + case leveled_codec:endkey_passed(EndRange, K) of + true -> + {true, true, [SL|Acc]}; + false -> + {true, false, [SL|Acc]} + end; + {true, true} -> + {PassedStart, PassedEnd, Acc} + end + end, + Lv1List = lists:reverse(element(3, + lists:foldl(FoldFun, + {false, false, []}, + SkipList))), + Lv0List = lists:reverse(element(3, + lists:foldl(FoldFun, + {false, false, []}, + lists:append(Lv1List)))), + BeforeFun = + fun({K, _V}) -> + K < StartRange + end, + AfterFun = + fun({K, V}) -> + case leveled_codec:endkey_passed(EndRange, K) of + false -> + true; + true -> + EndRangeFun(EndRange, K, V) + end + end, + + case length(Lv0List) of + 0 -> + []; + 1 -> + RHS = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), + lists:takewhile(AfterFun, RHS); + 2 -> + RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), + LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)), + RHSofLHL ++ LHSofRHL; + L -> + RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), + LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)), + MidLists = lists:sublist(Lv0List, 2, L - 2), + lists:append([RHSofLHL] ++ MidLists ++ [LHSofRHL]) + end. + + +skpl_getsublist(Key, SkipList) -> + FoldFun = + fun({Mark, SL}, Acc) -> + case {Acc, Mark} of + {[], Mark} when Mark >= Key -> + SL; + _ -> + Acc + end + end, + SL1 = lists:foldl(FoldFun, [], SkipList), + lists:foldl(FoldFun, [], SL1). + +%%%============================================================================ +%%% Balance tree implementation +%%%============================================================================ + +empty_tree() -> + gb_trees:empty(). + +tree_to_list(T) -> + gb_trees:to_list(T). + +tree_iterator_from(K, T) -> + % For OTP 16 compatibility with gb_trees + iterator_from(K, T). + +tree_next(I) -> + % For OTP 16 compatibility with gb_trees + next(I). + + +iterator_from(S, {_, T}) -> + iterator_1_from(S, T). + +iterator_1_from(S, T) -> + iterator_from(S, T, []). + +iterator_from(S, {K, _, _, T}, As) when K < S -> + iterator_from(S, T, As); +iterator_from(_, {_, _, nil, _} = T, As) -> + [T | As]; +iterator_from(S, {_, _, L, _} = T, As) -> + iterator_from(S, L, [T | As]); +iterator_from(_, nil, As) -> + As. + +next([{X, V, _, T} | As]) -> + {X, V, iterator(T, As)}; +next([]) -> + none. + +%% The iterator structure is really just a list corresponding to +%% the call stack of an in-order traversal. This is quite fast. + +iterator({_, _, nil, _} = T, As) -> + [T | As]; +iterator({_, _, L, _} = T, As) -> + iterator(L, [T | As]); +iterator(nil, As) -> + As. + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> + generate_randomkeys(Seqn, + Count, + [], + BucketRangeLow, + BucketRangeHigh). + +generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> + Acc; +generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> + BRand = random:uniform(BRange), + BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0), + KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0), + {K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null}, + {Seqn, {active, infinity}, null}}, + generate_randomkeys(Seqn + 1, + Count - 1, + [{K, V}|Acc], + BucketLow, + BRange). + + +tree_search_test() -> + search_test_by_type(tree). + +idxt_search_test() -> + search_test_by_type(idxt). + +skpl_search_test() -> + search_test_by_type(skpl). + +search_test_by_type(Type) -> + MapFun = + fun(N) -> + {N * 4, N * 4 - 2} + end, + KL = lists:map(MapFun, lists:seq(1, 50)), + T = from_orderedlist(KL, Type), + + StartKeyFun = fun(V) -> V end, + statistics(runtime), + ?assertMatch([], search_range(0, 1, T, StartKeyFun)), + ?assertMatch([], search_range(201, 202, T, StartKeyFun)), + ?assertMatch([{4, 2}], search_range(2, 4, T, StartKeyFun)), + ?assertMatch([{4, 2}], search_range(2, 5, T, StartKeyFun)), + ?assertMatch([{4, 2}, {8, 6}], search_range(2, 6, T, StartKeyFun)), + ?assertMatch(50, length(search_range(2, 200, T, StartKeyFun))), + ?assertMatch(50, length(search_range(2, 198, T, StartKeyFun))), + ?assertMatch(49, length(search_range(2, 197, T, StartKeyFun))), + ?assertMatch(49, length(search_range(4, 197, T, StartKeyFun))), + ?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))), + {_, T1} = statistics(runtime), + io:format(user, "10 range tests with type ~w in ~w microseconds~n", + [Type, T1]). + + +tree_oor_test() -> + outofrange_test_by_type(tree). + +idxt_oor_test() -> + outofrange_test_by_type(idxt). + +skpl_oor_test() -> + outofrange_test_by_type(skpl). + +outofrange_test_by_type(Type) -> + MapFun = + fun(N) -> + {N * 4, N * 4 - 2} + end, + KL = lists:map(MapFun, lists:seq(1, 50)), + T = from_orderedlist(KL, Type), + + io:format("Out of range searches~n"), + ?assertMatch(none, match(0, T)), + ?assertMatch(none, match(5, T)), + ?assertMatch(none, match(97, T)), + ?assertMatch(none, match(197, T)), + ?assertMatch(none, match(201, T)), + + StartKeyFun = fun(V) -> V end, + + ?assertMatch(none, search(0, T, StartKeyFun)), + ?assertMatch(none, search(5, T, StartKeyFun)), + ?assertMatch(none, search(97, T, StartKeyFun)), + ?assertMatch(none, search(197, T, StartKeyFun)), + ?assertMatch(none, search(201, T, StartKeyFun)). + +tree_tolist_test() -> + tolist_test_by_type(tree). + +idxt_tolist_test() -> + tolist_test_by_type(idxt). + +skpl_tolist_test() -> + tolist_test_by_type(skpl). + +tolist_test_by_type(Type) -> + MapFun = + fun(N) -> + {N * 4, N * 4 - 2} + end, + KL = lists:map(MapFun, lists:seq(1, 50)), + T = from_orderedlist(KL, Type), + T_Reverse = to_list(T), + ?assertMatch(KL, T_Reverse). + +tree_timing_test() -> + log_tree_test_by_(16, tree, 4000), + tree_test_by_(8, tree, 1000), + tree_test_by_(4, tree, 256). + +idxt_timing_test() -> + log_tree_test_by_(16, idxt, 4000), + tree_test_by_(8, idxt, 1000), + tree_test_by_(4, idxt, 256). + +skpl_timing_test() -> + tree_test_by_(auto, skpl, 6000), + log_tree_test_by_(auto, skpl, 4000), + tree_test_by_(auto, skpl, 1000), + tree_test_by_(auto, skpl, 256). + +log_tree_test_by_(Width, Type, N) -> + erlang:statistics(runtime), + G0 = erlang:statistics(garbage_collection), + tree_test_by_(Width, Type, N), + {_, T1} = erlang:statistics(runtime), + G1 = erlang:statistics(garbage_collection), + io:format(user, "Test took ~w ms and GC transitioned from ~w to ~w~n", + [T1, G0, G1]). + +tree_test_by_(Width, Type, N) -> + io:format(user, "~nTree test for type and width: ~w ~w~n", [Type, Width]), + KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), + + OS = ets:new(test, [ordered_set, private]), + ets:insert(OS, KL), + SWaETS = os:timestamp(), + Tree0 = from_orderedset(OS, Type, Width), + io:format(user, "Generating tree from ETS in ~w microseconds" ++ + " of size ~w~n", + [timer:now_diff(os:timestamp(), SWaETS), + tsize(Tree0)]), + + SWaGSL = os:timestamp(), + Tree1 = from_orderedlist(KL, Type, Width), + io:format(user, "Generating tree from orddict in ~w microseconds" ++ + " of size ~w~n", + [timer:now_diff(os:timestamp(), SWaGSL), + tsize(Tree1)]), + SWaLUP = os:timestamp(), + lists:foreach(match_fun(Tree0), KL), + lists:foreach(match_fun(Tree1), KL), + io:format(user, "Looked up all keys twice in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaLUP)]), + + ?assertMatch(Tree0, Tree1), + + SWaSRCH1 = os:timestamp(), + lists:foreach(search_exactmatch_fun(Tree0), KL), + lists:foreach(search_exactmatch_fun(Tree1), KL), + io:format(user, "Search all keys twice for exact match in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaSRCH1)]), + + BitBiggerKeyFun = + fun(Idx) -> + {K, _V} = lists:nth(Idx, KL), + {o, B, FullKey, null} = K, + {{o, B, FullKey ++ "0", null}, lists:nth(Idx + 1, KL)} + end, + SrchKL = lists:map(BitBiggerKeyFun, lists:seq(1, length(KL) - 1)), + + SWaSRCH2 = os:timestamp(), + lists:foreach(search_nearmatch_fun(Tree0), SrchKL), + lists:foreach(search_nearmatch_fun(Tree1), SrchKL), + io:format(user, "Search all keys twice for near match in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaSRCH2)]). + + +tree_matchrange_test() -> + matchrange_test_by_type(tree). + +idxt_matchrange_test() -> + matchrange_test_by_type(idxt). + +skpl_matchrange_test() -> + matchrange_test_by_type(skpl). + + +matchrange_test_by_type(Type) -> + N = 4000, + KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), + Tree0 = from_orderedlist(KL, Type), + + FirstKey = element(1, lists:nth(1, KL)), + FinalKey = element(1, lists:last(KL)), + PenultimateKey = element(1, lists:nth(length(KL) - 1, KL)), + AfterFirstKey = setelement(3, FirstKey, element(3, FirstKey) ++ "0"), + AfterPenultimateKey = setelement(3, + PenultimateKey, + element(3, PenultimateKey) ++ "0"), + + LengthR = + fun(SK, EK, T) -> + length(match_range(SK, EK, T)) + end, + + KL_Length = length(KL), + io:format("KL_Length ~w~n", [KL_Length]), + ?assertMatch(KL_Length, LengthR(FirstKey, FinalKey, Tree0)), + ?assertMatch(KL_Length, LengthR(FirstKey, PenultimateKey, Tree0) + 1), + ?assertMatch(1, LengthR(all, FirstKey, Tree0)), + ?assertMatch(KL_Length, LengthR(all, PenultimateKey, Tree0) + 1), + ?assertMatch(KL_Length, LengthR(all, all, Tree0)), + ?assertMatch(2, LengthR(PenultimateKey, FinalKey, Tree0)), + ?assertMatch(KL_Length, LengthR(AfterFirstKey, PenultimateKey, Tree0) + 2), + ?assertMatch(1, LengthR(AfterPenultimateKey, FinalKey, Tree0)). + +match_fun(Tree) -> + fun({K, V}) -> + ?assertMatch({value, V}, match(K, Tree)) + end. + +search_exactmatch_fun(Tree) -> + StartKeyFun = fun(_V) -> all end, + fun({K, V}) -> + ?assertMatch({K, V}, search(K, Tree, StartKeyFun)) + end. + +search_nearmatch_fun(Tree) -> + StartKeyFun = fun(_V) -> all end, + fun({K, {NK, NV}}) -> + ?assertMatch({NK, NV}, search(K, Tree, StartKeyFun)) + end. + +empty_test() -> + T0 = empty(tree), + ?assertMatch(0, tsize(T0)), + T1 = empty(skpl), + ?assertMatch(0, tsize(T1)), + T2 = empty(idxt), + ?assertMatch(0, tsize(T2)). + +-endif. \ No newline at end of file