Replace ledger-side gb_trees
Try to make minimal change to replace gb_trees with gb_tree API-like skiplists
This commit is contained in:
parent
c9afe34fea
commit
03d025d581
5 changed files with 490 additions and 525 deletions
|
@ -149,7 +149,7 @@
|
|||
-record(state, {inker :: pid(),
|
||||
penciller :: pid(),
|
||||
cache_size :: integer(),
|
||||
ledger_cache :: gb_trees:tree(),
|
||||
ledger_cache :: list(), % a skiplist
|
||||
is_snapshot :: boolean(),
|
||||
slow_offer = false :: boolean()}).
|
||||
|
||||
|
@ -233,14 +233,14 @@ init([Opts]) ->
|
|||
{ok, #state{inker=Inker,
|
||||
penciller=Penciller,
|
||||
cache_size=CacheSize,
|
||||
ledger_cache=gb_trees:empty(),
|
||||
ledger_cache=leveled_skiplist:empty(),
|
||||
is_snapshot=false}};
|
||||
Bookie ->
|
||||
{ok,
|
||||
{Penciller, LedgerCache},
|
||||
Inker} = book_snapshotstore(Bookie, self(), ?SNAPSHOT_TIMEOUT),
|
||||
ok = leveled_penciller:pcl_loadsnapshot(Penciller,
|
||||
gb_trees:empty()),
|
||||
leveled_skiplist:empty()),
|
||||
leveled_log:log("B0002", [Inker, Penciller]),
|
||||
{ok, #state{penciller=Penciller,
|
||||
inker=Inker,
|
||||
|
@ -431,7 +431,7 @@ bucket_stats(State, Bucket, Tag) ->
|
|||
{LedgerSnapshot, LedgerCache},
|
||||
_JournalSnapshot} = snapshot_store(State, ledger),
|
||||
Folder = fun() ->
|
||||
leveled_log:log("B0004", [gb_trees:size(LedgerCache)]),
|
||||
leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
|
||||
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
|
||||
LedgerCache),
|
||||
StartKey = leveled_codec:to_ledgerkey(Bucket, null, Tag),
|
||||
|
@ -454,7 +454,7 @@ binary_bucketlist(State, Tag, {FoldBucketsFun, InitAcc}) ->
|
|||
{LedgerSnapshot, LedgerCache},
|
||||
_JournalSnapshot} = snapshot_store(State, ledger),
|
||||
Folder = fun() ->
|
||||
leveled_log:log("B0004", [gb_trees:size(LedgerCache)]),
|
||||
leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
|
||||
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
|
||||
LedgerCache),
|
||||
BucketAcc = get_nextbucket(null,
|
||||
|
@ -509,7 +509,7 @@ index_query(State,
|
|||
{B, null}
|
||||
end,
|
||||
Folder = fun() ->
|
||||
leveled_log:log("B0004", [gb_trees:size(LedgerCache)]),
|
||||
leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
|
||||
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
|
||||
LedgerCache),
|
||||
StartKey = leveled_codec:to_ledgerkey(Bucket,
|
||||
|
@ -551,7 +551,7 @@ hashtree_query(State, Tag, JournalCheck) ->
|
|||
{LedgerSnapshot, LedgerCache},
|
||||
JournalSnapshot} = snapshot_store(State, SnapType),
|
||||
Folder = fun() ->
|
||||
leveled_log:log("B0004", [gb_trees:size(LedgerCache)]),
|
||||
leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
|
||||
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
|
||||
LedgerCache),
|
||||
StartKey = leveled_codec:to_ledgerkey(null, null, Tag),
|
||||
|
@ -602,7 +602,7 @@ foldobjects(State, Tag, StartKey, EndKey, FoldObjectsFun) ->
|
|||
{FoldObjectsFun, []}
|
||||
end,
|
||||
Folder = fun() ->
|
||||
leveled_log:log("B0004", [gb_trees:size(LedgerCache)]),
|
||||
leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
|
||||
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
|
||||
LedgerCache),
|
||||
AccFun = accumulate_objects(FoldFun, JournalSnapshot, Tag),
|
||||
|
@ -623,7 +623,7 @@ bucketkey_query(State, Tag, Bucket, {FoldKeysFun, InitAcc}) ->
|
|||
{LedgerSnapshot, LedgerCache},
|
||||
_JournalSnapshot} = snapshot_store(State, ledger),
|
||||
Folder = fun() ->
|
||||
leveled_log:log("B0004", [gb_trees:size(LedgerCache)]),
|
||||
leveled_log:log("B0004", [leveled_skiplist:size(LedgerCache)]),
|
||||
ok = leveled_penciller:pcl_loadsnapshot(LedgerSnapshot,
|
||||
LedgerCache),
|
||||
SK = leveled_codec:to_ledgerkey(Bucket, null, Tag),
|
||||
|
@ -697,7 +697,7 @@ startup(InkerOpts, PencillerOpts) ->
|
|||
|
||||
|
||||
fetch_head(Key, Penciller, LedgerCache) ->
|
||||
case gb_trees:lookup(Key, LedgerCache) of
|
||||
case leveled_skiplist:lookup(Key, LedgerCache) of
|
||||
{value, Head} ->
|
||||
Head;
|
||||
none ->
|
||||
|
@ -863,18 +863,18 @@ preparefor_ledgercache(_Type, LedgerKey, SQN, Obj, Size, {IndexSpecs, TTL}) ->
|
|||
|
||||
|
||||
addto_ledgercache(Changes, Cache) ->
|
||||
lists:foldl(fun({K, V}, Acc) -> gb_trees:enter(K, V, Acc) end,
|
||||
lists:foldl(fun({K, V}, Acc) -> leveled_skiplist:enter(K, V, Acc) end,
|
||||
Cache,
|
||||
Changes).
|
||||
|
||||
maybepush_ledgercache(MaxCacheSize, Cache, Penciller) ->
|
||||
CacheSize = gb_trees:size(Cache),
|
||||
CacheSize = leveled_skiplist:size(Cache),
|
||||
TimeToPush = maybe_withjitter(CacheSize, MaxCacheSize),
|
||||
if
|
||||
TimeToPush ->
|
||||
case leveled_penciller:pcl_pushmem(Penciller, Cache) of
|
||||
ok ->
|
||||
{ok, gb_trees:empty()};
|
||||
{ok, leveled_skiplist:empty()};
|
||||
returned ->
|
||||
{returned, Cache}
|
||||
end;
|
||||
|
|
|
@ -633,7 +633,7 @@ load_from_sequence(MinSQN, FilterFun, Penciller, [{_LowSQN, FN, Pid}|Rest]) ->
|
|||
load_between_sequence(MinSQN, MaxSQN, FilterFun, Penciller,
|
||||
CDBpid, StartPos, FN, Rest) ->
|
||||
leveled_log:log("I0014", [FN, MinSQN]),
|
||||
InitAcc = {MinSQN, MaxSQN, gb_trees:empty()},
|
||||
InitAcc = {MinSQN, MaxSQN, leveled_skiplist:empty()},
|
||||
Res = case leveled_cdb:cdb_scan(CDBpid, FilterFun, InitAcc, StartPos) of
|
||||
{eof, {AccMinSQN, _AccMaxSQN, AccKL}} ->
|
||||
ok = push_to_penciller(Penciller, AccKL),
|
||||
|
|
|
@ -212,7 +212,7 @@
|
|||
|
||||
levelzero_pending = false :: boolean(),
|
||||
levelzero_constructor :: pid(),
|
||||
levelzero_cache = [] :: list(), % a list of gb_trees
|
||||
levelzero_cache = [] :: list(), % a list of skiplists
|
||||
levelzero_index :: array:array(),
|
||||
levelzero_size = 0 :: integer(),
|
||||
levelzero_maxcachesize :: integer(),
|
||||
|
@ -220,7 +220,7 @@
|
|||
is_snapshot = false :: boolean(),
|
||||
snapshot_fully_loaded = false :: boolean(),
|
||||
source_penciller :: pid(),
|
||||
levelzero_astree :: gb_trees:tree(),
|
||||
levelzero_astree :: list(), % skiplist
|
||||
|
||||
ongoing_work = [] :: list(),
|
||||
work_backlog = false :: boolean()}).
|
||||
|
@ -366,25 +366,24 @@ handle_call({fetch_keys, StartKey, EndKey, AccFun, InitAcc, MaxKeys},
|
|||
_From,
|
||||
State=#state{snapshot_fully_loaded=Ready})
|
||||
when Ready == true ->
|
||||
L0AsTree =
|
||||
L0AsList =
|
||||
case State#state.levelzero_astree of
|
||||
undefined ->
|
||||
leveled_pmem:merge_trees(StartKey,
|
||||
EndKey,
|
||||
State#state.levelzero_cache,
|
||||
gb_trees:empty());
|
||||
Tree ->
|
||||
Tree
|
||||
leveled_skiplist:empty());
|
||||
List ->
|
||||
List
|
||||
end,
|
||||
L0iter = gb_trees:iterator(L0AsTree),
|
||||
SFTiter = initiate_rangequery_frommanifest(StartKey,
|
||||
EndKey,
|
||||
State#state.manifest),
|
||||
Acc = keyfolder({L0iter, SFTiter},
|
||||
Acc = keyfolder({L0AsList, SFTiter},
|
||||
{StartKey, EndKey},
|
||||
{AccFun, InitAcc},
|
||||
MaxKeys),
|
||||
{reply, Acc, State#state{levelzero_astree = L0AsTree}};
|
||||
{reply, Acc, State#state{levelzero_astree = L0AsList}};
|
||||
handle_call(work_for_clerk, From, State) ->
|
||||
{UpdState, Work} = return_work(State, From),
|
||||
{reply, Work, UpdState};
|
||||
|
@ -985,41 +984,37 @@ keyfolder(IMMiter, SFTiter, StartKey, EndKey, {AccFun, Acc}) ->
|
|||
|
||||
keyfolder(_Iterators, _KeyRange, {_AccFun, Acc}, MaxKeys) when MaxKeys == 0 ->
|
||||
Acc;
|
||||
keyfolder({null, SFTiter}, KeyRange, {AccFun, Acc}, MaxKeys) ->
|
||||
keyfolder({[], SFTiter}, KeyRange, {AccFun, Acc}, MaxKeys) ->
|
||||
{StartKey, EndKey} = KeyRange,
|
||||
case find_nextkey(SFTiter, StartKey, EndKey) of
|
||||
no_more_keys ->
|
||||
Acc;
|
||||
{NxSFTiter, {SFTKey, SFTVal}} ->
|
||||
Acc1 = AccFun(SFTKey, SFTVal, Acc),
|
||||
keyfolder({null, NxSFTiter}, KeyRange, {AccFun, Acc1}, MaxKeys - 1)
|
||||
keyfolder({[], NxSFTiter}, KeyRange, {AccFun, Acc1}, MaxKeys - 1)
|
||||
end;
|
||||
keyfolder({IMMiterator, SFTiterator}, KeyRange, {AccFun, Acc}, MaxKeys) ->
|
||||
keyfolder({[{IMMKey, IMMVal}|NxIMMiterator], SFTiterator}, KeyRange,
|
||||
{AccFun, Acc}, MaxKeys) ->
|
||||
{StartKey, EndKey} = KeyRange,
|
||||
case gb_trees:next(IMMiterator) of
|
||||
none ->
|
||||
% There are no more keys in the in-memory iterator, so now
|
||||
% iterate only over the remaining keys in the SFT iterator
|
||||
keyfolder({null, SFTiterator}, KeyRange, {AccFun, Acc}, MaxKeys);
|
||||
{IMMKey, _IMMVal, NxIMMiterator} when IMMKey < StartKey ->
|
||||
case {IMMKey < StartKey, leveled_codec:endkey_passed(EndKey, IMMKey)} of
|
||||
{true, _} ->
|
||||
|
||||
% Normally everything is pre-filterd, but the IMM iterator can
|
||||
% be re-used and do may be behind the StartKey if the StartKey has
|
||||
% be re-used and so may be behind the StartKey if the StartKey has
|
||||
% advanced from the previous use
|
||||
keyfolder({NxIMMiterator, SFTiterator},
|
||||
KeyRange,
|
||||
{AccFun, Acc},
|
||||
MaxKeys);
|
||||
{IMMKey, IMMVal, NxIMMiterator} ->
|
||||
case leveled_codec:endkey_passed(EndKey, IMMKey) of
|
||||
true ->
|
||||
{false, true} ->
|
||||
% There are no more keys in-range in the in-memory
|
||||
% iterator, so take action as if this iterator is empty
|
||||
% (see above)
|
||||
keyfolder({null, SFTiterator},
|
||||
keyfolder({[], SFTiterator},
|
||||
KeyRange,
|
||||
{AccFun, Acc},
|
||||
MaxKeys);
|
||||
false ->
|
||||
{false, false} ->
|
||||
case find_nextkey(SFTiterator, StartKey, EndKey) of
|
||||
no_more_keys ->
|
||||
% No more keys in range in the persisted store, so use the
|
||||
|
@ -1045,7 +1040,8 @@ keyfolder({IMMiterator, SFTiterator}, KeyRange, {AccFun, Acc}, MaxKeys) ->
|
|||
MaxKeys - 1);
|
||||
right_hand_first ->
|
||||
Acc1 = AccFun(SFTKey, SFTVal, Acc),
|
||||
keyfolder({IMMiterator, NxSFTiterator},
|
||||
keyfolder({[{IMMKey, IMMVal}|NxIMMiterator],
|
||||
NxSFTiterator},
|
||||
KeyRange,
|
||||
{AccFun, Acc1},
|
||||
MaxKeys - 1);
|
||||
|
@ -1057,7 +1053,6 @@ keyfolder({IMMiterator, SFTiterator}, KeyRange, {AccFun, Acc}, MaxKeys) ->
|
|||
MaxKeys - 1)
|
||||
end
|
||||
end
|
||||
end
|
||||
end.
|
||||
|
||||
|
||||
|
@ -1267,8 +1262,8 @@ confirm_delete_test() ->
|
|||
|
||||
|
||||
maybe_pause_push(PCL, KL) ->
|
||||
T0 = gb_trees:empty(),
|
||||
T1 = lists:foldl(fun({K, V}, Acc) -> gb_trees:enter(K, V, Acc) end,
|
||||
T0 = leveled_skiplist:empty(),
|
||||
T1 = lists:foldl(fun({K, V}, Acc) -> leveled_skiplist:enter(K, V, Acc) end,
|
||||
T0,
|
||||
KL),
|
||||
case pcl_pushmem(PCL, T1) of
|
||||
|
@ -1335,7 +1330,7 @@ simple_server_test() ->
|
|||
SnapOpts = #penciller_options{start_snapshot = true,
|
||||
source_penciller = PCLr},
|
||||
{ok, PclSnap} = pcl_start(SnapOpts),
|
||||
ok = pcl_loadsnapshot(PclSnap, gb_trees:empty()),
|
||||
ok = pcl_loadsnapshot(PclSnap, leveled_skiplist:empty()),
|
||||
?assertMatch(Key1, pcl_fetch(PclSnap, {o,"Bucket0001", "Key0001", null})),
|
||||
?assertMatch(Key2, pcl_fetch(PclSnap, {o,"Bucket0002", "Key0002", null})),
|
||||
?assertMatch(Key3, pcl_fetch(PclSnap, {o,"Bucket0003", "Key0003", null})),
|
||||
|
@ -1384,7 +1379,7 @@ simple_server_test() ->
|
|||
term_to_binary("Hello")),
|
||||
|
||||
{ok, PclSnap2} = pcl_start(SnapOpts),
|
||||
ok = pcl_loadsnapshot(PclSnap2, gb_trees:empty()),
|
||||
ok = pcl_loadsnapshot(PclSnap2, leveled_skiplist:empty()),
|
||||
?assertMatch(false, pcl_checksequencenumber(PclSnap2,
|
||||
{o,
|
||||
"Bucket0001",
|
||||
|
@ -1543,16 +1538,16 @@ foldwithimm_simple_test() ->
|
|||
{3, [{{o, "Bucket1", "Key3"}, {3, {active, infinity}, null}}]},
|
||||
{5, [{{o, "Bucket1", "Key5"}, {2, {active, infinity}, null}}]}
|
||||
],
|
||||
IMM0 = gb_trees:enter({o, "Bucket1", "Key6"},
|
||||
IMM0 = leveled_skiplist:enter({o, "Bucket1", "Key6"},
|
||||
{7, {active, infinity}, null},
|
||||
gb_trees:empty()),
|
||||
IMM1 = gb_trees:enter({o, "Bucket1", "Key1"},
|
||||
leveled_skiplist:empty()),
|
||||
IMM1 = leveled_skiplist:enter({o, "Bucket1", "Key1"},
|
||||
{8, {active, infinity}, null},
|
||||
IMM0),
|
||||
IMM2 = gb_trees:enter({o, "Bucket1", "Key8"},
|
||||
IMM2 = leveled_skiplist:enter({o, "Bucket1", "Key8"},
|
||||
{9, {active, infinity}, null},
|
||||
IMM1),
|
||||
IMMiter = gb_trees:iterator_from({o, "Bucket1", "Key1"}, IMM2),
|
||||
IMMiter = leveled_skiplist:to_range(IMM2, {o, "Bucket1", "Key1"}),
|
||||
AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}),
|
||||
Acc ++ [{K, SQN}] end,
|
||||
Acc = keyfolder(IMMiter,
|
||||
|
@ -1564,10 +1559,10 @@ foldwithimm_simple_test() ->
|
|||
{{o, "Bucket1", "Key5"}, 2},
|
||||
{{o, "Bucket1", "Key6"}, 7}], Acc),
|
||||
|
||||
IMM1A = gb_trees:enter({o, "Bucket1", "Key1"},
|
||||
IMM1A = leveled_skiplist:enter({o, "Bucket1", "Key1"},
|
||||
{8, {active, infinity}, null},
|
||||
gb_trees:empty()),
|
||||
IMMiterA = gb_trees:iterator_from({o, "Bucket1", "Key1"}, IMM1A),
|
||||
leveled_skiplist:empty()),
|
||||
IMMiterA = leveled_skiplist:to_range(IMM1A, {o, "Bucket1", "Key1"}),
|
||||
AccA = keyfolder(IMMiterA,
|
||||
QueryArray,
|
||||
{o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"},
|
||||
|
@ -1576,10 +1571,10 @@ foldwithimm_simple_test() ->
|
|||
{{o, "Bucket1", "Key3"}, 3},
|
||||
{{o, "Bucket1", "Key5"}, 2}], AccA),
|
||||
|
||||
IMM3 = gb_trees:enter({o, "Bucket1", "Key4"},
|
||||
IMM3 = leveled_skiplist:enter({o, "Bucket1", "Key4"},
|
||||
{10, {active, infinity}, null},
|
||||
IMM2),
|
||||
IMMiterB = gb_trees:iterator_from({o, "Bucket1", "Key1"}, IMM3),
|
||||
IMMiterB = leveled_skiplist:to_range(IMM3, {o, "Bucket1", "Key1"}),
|
||||
AccB = keyfolder(IMMiterB,
|
||||
QueryArray,
|
||||
{o, "Bucket1", "Key1"}, {o, "Bucket1", "Key6"},
|
||||
|
@ -1594,7 +1589,7 @@ create_file_test() ->
|
|||
Filename = "../test/new_file.sft",
|
||||
ok = file:write_file(Filename, term_to_binary("hello")),
|
||||
KVL = lists:usort(leveled_sft:generate_randomkeys(10000)),
|
||||
Tree = gb_trees:from_orddict(KVL),
|
||||
Tree = leveled_skiplist:from_list(KVL),
|
||||
FetchFun = fun(Slot) -> lists:nth(Slot, [Tree]) end,
|
||||
{ok,
|
||||
SP,
|
||||
|
|
|
@ -51,20 +51,13 @@
|
|||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(SLOT_WIDTH, {2048, 11}).
|
||||
-define(SKIP_WIDTH, 32).
|
||||
-define(INFINITE_KEY, {null, null, null, null, null}).
|
||||
-define(EMPTY_SKIPLIST, [{?INFINITE_KEY, []}]).
|
||||
-define(SLOT_WIDTH, {4096, 12}).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% API
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
|
||||
SW = os:timestamp(),
|
||||
SlotInTreeList = length(TreeList) + 1,
|
||||
|
@ -83,7 +76,7 @@ add_to_index(L0Index, L0Size, LevelMinus1, LedgerSQN, TreeList) ->
|
|||
Count0,
|
||||
array:set(Slot, [{Hash, SlotInTreeList}|L], HashIndex)}
|
||||
end,
|
||||
LM1List = gb_trees:to_list(LevelMinus1),
|
||||
LM1List = leveled_skiplist:to_list(LevelMinus1),
|
||||
StartingT = {infinity, 0, L0Size, L0Index},
|
||||
{MinSQN, MaxSQN, NewL0Size, UpdL0Index} = lists:foldl(FoldFun,
|
||||
StartingT,
|
||||
|
@ -103,7 +96,7 @@ to_list(Slots, FetchFun) ->
|
|||
SlotList = lists:reverse(lists:seq(1, Slots)),
|
||||
FullList = lists:foldl(fun(Slot, Acc) ->
|
||||
Tree = FetchFun(Slot),
|
||||
L = gb_trees:to_list(Tree),
|
||||
L = leveled_skiplist:to_list(Tree),
|
||||
lists:ukeymerge(1, Acc, L)
|
||||
end,
|
||||
[],
|
||||
|
@ -135,7 +128,7 @@ check_levelzero(Key, L0Index, TreeList) ->
|
|||
{Found, KV};
|
||||
false ->
|
||||
CheckTree = lists:nth(SlotToCheck, TreeList),
|
||||
case gb_trees:lookup(Key, CheckTree) of
|
||||
case leveled_skiplist:lookup(Key, CheckTree) of
|
||||
none ->
|
||||
{Found, KV};
|
||||
{value, Value} ->
|
||||
|
@ -147,169 +140,14 @@ check_levelzero(Key, L0Index, TreeList) ->
|
|||
lists:reverse(lists:usort(SlotList))).
|
||||
|
||||
|
||||
merge_trees(StartKey, EndKey, TreeList, LevelMinus1) ->
|
||||
lists:foldl(fun(Tree, TreeAcc) ->
|
||||
merge_nexttree(Tree, TreeAcc, StartKey, EndKey) end,
|
||||
gb_trees:empty(),
|
||||
lists:append(TreeList, [LevelMinus1])).
|
||||
|
||||
%%%============================================================================
|
||||
%%% SkipList
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
addhash_to_index(HashIndex, Hash, Slot, Count) ->
|
||||
L = array:get(Slot, HashIndex),
|
||||
case lists:member(Hash, L) of
|
||||
true ->
|
||||
{HashIndex, Count};
|
||||
false ->
|
||||
{array:set(Slot, [Hash|L], HashIndex), Count + 1}
|
||||
end.
|
||||
|
||||
merge_indexes(HashIndex, MergedIndex, Count, L0Slot) ->
|
||||
lists:foldl(fun(Slot, {MHI, AccCount}) ->
|
||||
HashList = array:get(Slot, HashIndex),
|
||||
case length(HashList) > 0 of
|
||||
true ->
|
||||
merge_indexes_singleslot(HashList,
|
||||
Slot,
|
||||
MHI,
|
||||
L0Slot,
|
||||
AccCount);
|
||||
false ->
|
||||
{MHI, AccCount}
|
||||
end end,
|
||||
{MergedIndex, Count},
|
||||
lists:seq(0, element(1, ?SLOT_WIDTH) - 1)).
|
||||
|
||||
merge_indexes_singleslot(HashList, IndexSlot, MergedIndex, L0Slot, Count) ->
|
||||
L = array:get(IndexSlot, MergedIndex),
|
||||
{UpdHL, UpdCount} = lists:foldl(fun(H, {HL, C}) ->
|
||||
case lists:keymember(H, 1, L) of
|
||||
true ->
|
||||
{[{H, L0Slot}|HL], C + 1};
|
||||
false ->
|
||||
{[{H, L0Slot}|HL], C}
|
||||
end end,
|
||||
{L, Count},
|
||||
HashList),
|
||||
{array:set(IndexSlot, UpdHL, MergedIndex), UpdCount}.
|
||||
|
||||
skiplist_put(SkipList, Key, Value, Hash) ->
|
||||
{MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) ->
|
||||
case Acc of
|
||||
false ->
|
||||
case Marker >= Key of
|
||||
true ->
|
||||
{Marker, SL};
|
||||
false ->
|
||||
Acc
|
||||
end;
|
||||
_ ->
|
||||
Acc
|
||||
end end,
|
||||
false,
|
||||
SkipList),
|
||||
case Hash rem ?SKIP_WIDTH of
|
||||
0 ->
|
||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
|
||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
||||
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
|
||||
lists:ukeysort(1, SkpL2);
|
||||
_ ->
|
||||
UpdSubList = lists:ukeysort(1, [{Key, Value}|SubList]),
|
||||
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
||||
end.
|
||||
|
||||
skiplist_generate(UnsortedKVL) ->
|
||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
||||
Slots = length(KVL) div ?SKIP_WIDTH,
|
||||
SkipList0 = lists:map(fun(X) ->
|
||||
N = X * ?SKIP_WIDTH,
|
||||
{K, _V} = lists:nth(N, KVL),
|
||||
{K, lists:sublist(KVL,
|
||||
N - ?SKIP_WIDTH + 1,
|
||||
?SKIP_WIDTH)}
|
||||
end,
|
||||
lists:seq(1, length(KVL) div ?SKIP_WIDTH)),
|
||||
case Slots * ?SKIP_WIDTH < length(KVL) of
|
||||
true ->
|
||||
{LastK, _V} = lists:last(KVL),
|
||||
SkipList0 ++ [{LastK, lists:nthtail(Slots * ?SKIP_WIDTH, KVL)}];
|
||||
false ->
|
||||
SkipList0
|
||||
end.
|
||||
|
||||
skiplist_get(SkipList, Key) ->
|
||||
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
|
||||
case {Acc, SkipKey} of
|
||||
{null, SkipKey} when SkipKey >= Key ->
|
||||
SL;
|
||||
_ ->
|
||||
Acc
|
||||
end end,
|
||||
null,
|
||||
SkipList),
|
||||
case SubList of
|
||||
null ->
|
||||
not_found;
|
||||
SubList ->
|
||||
case lists:keyfind(Key, 1, SubList) of
|
||||
false ->
|
||||
not_found;
|
||||
{Key, V} ->
|
||||
{Key, V}
|
||||
end
|
||||
end.
|
||||
|
||||
skiplist_range(SkipList, Start, End) ->
|
||||
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
|
||||
|
||||
case {PassedStart, PassedEnd} of
|
||||
{true, true} ->
|
||||
{true, true, Acc, null};
|
||||
{false, false} ->
|
||||
case Start > Mark of
|
||||
true ->
|
||||
{false, false, Acc, SL};
|
||||
false ->
|
||||
RHS = splitlist_start(Start, PrevList ++ SL),
|
||||
case leveled_codec:endkey_passed(End, Mark) of
|
||||
true ->
|
||||
EL = splitlist_end(End, RHS),
|
||||
{true, true, EL, null};
|
||||
false ->
|
||||
{true, false, RHS, null}
|
||||
end
|
||||
end;
|
||||
{true, false} ->
|
||||
case leveled_codec:endkey_passed(End, Mark) of
|
||||
true ->
|
||||
EL = splitlist_end(End, SL),
|
||||
{true, true, Acc ++ EL, null};
|
||||
false ->
|
||||
{true, false, Acc ++ SL, null}
|
||||
end
|
||||
end end,
|
||||
|
||||
{false, false, [], []},
|
||||
SkipList),
|
||||
{_Bool1, _Bool2, SubList, _PrevList} = R,
|
||||
SubList.
|
||||
|
||||
splitlist_start(StartKey, SL) ->
|
||||
{_LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < StartKey end, SL),
|
||||
RHS.
|
||||
|
||||
splitlist_end(EndKey, SL) ->
|
||||
{LHS, _RHS} = lists:splitwith(fun({K, _V}) ->
|
||||
not leveled_codec:endkey_passed(EndKey, K)
|
||||
end,
|
||||
SL),
|
||||
LHS.
|
||||
|
||||
|
||||
merge_trees(StartKey, EndKey, SkipListList, LevelMinus1) ->
|
||||
lists:foldl(fun(SkipList, Acc) ->
|
||||
R = leveled_skiplist:to_range(SkipList,
|
||||
StartKey,
|
||||
EndKey),
|
||||
lists:ukeymerge(1, Acc, R) end,
|
||||
[],
|
||||
[LevelMinus1|lists:reverse(SkipListList)]).
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
|
@ -320,24 +158,6 @@ hash_to_slot(Key) ->
|
|||
H = erlang:phash2(Key),
|
||||
{H bsr element(2, ?SLOT_WIDTH), H band (element(1, ?SLOT_WIDTH) - 1)}.
|
||||
|
||||
merge_nexttree(Tree, TreeAcc, StartKey, EndKey) ->
|
||||
Iter = gb_trees:iterator_from(StartKey, Tree),
|
||||
merge_nexttree(Iter, TreeAcc, EndKey).
|
||||
|
||||
merge_nexttree(Iter, TreeAcc, EndKey) ->
|
||||
case gb_trees:next(Iter) of
|
||||
none ->
|
||||
TreeAcc;
|
||||
{Key, Value, NewIter} ->
|
||||
case leveled_codec:endkey_passed(EndKey, Key) of
|
||||
true ->
|
||||
TreeAcc;
|
||||
false ->
|
||||
merge_nexttree(NewIter,
|
||||
gb_trees:enter(Key, Value, TreeAcc),
|
||||
EndKey)
|
||||
end
|
||||
end.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
|
@ -348,27 +168,21 @@ merge_nexttree(Iter, TreeAcc, EndKey) ->
|
|||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||
generate_randomkeys(Seqn,
|
||||
Count,
|
||||
gb_trees:empty(),
|
||||
leveled_skiplist:empty(),
|
||||
BucketRangeLow,
|
||||
BucketRangeHigh).
|
||||
|
||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||
Acc;
|
||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||
BNumber =
|
||||
case BRange of
|
||||
0 ->
|
||||
string:right(integer_to_list(BucketLow), 4, $0);
|
||||
_ ->
|
||||
BRand = random:uniform(BRange),
|
||||
string:right(integer_to_list(BucketLow + BRand), 4, $0)
|
||||
end,
|
||||
BNumber = string:right(integer_to_list(BucketLow + random:uniform(BRange)),
|
||||
4, $0),
|
||||
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
||||
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
||||
{Seqn, {active, infinity}, null}},
|
||||
generate_randomkeys(Seqn + 1,
|
||||
Count - 1,
|
||||
gb_trees:enter(K, V, Acc),
|
||||
leveled_skiplist:enter(K, V, Acc),
|
||||
BucketLow,
|
||||
BRange).
|
||||
|
||||
|
@ -387,7 +201,7 @@ compare_method_test() ->
|
|||
?assertMatch(32000, SQN),
|
||||
?assertMatch(true, Size =< 32000),
|
||||
|
||||
TestList = gb_trees:to_list(generate_randomkeys(1, 2000, 1, 800)),
|
||||
TestList = leveled_skiplist:to_list(generate_randomkeys(1, 2000, 1, 800)),
|
||||
|
||||
S0 = lists:foldl(fun({Key, _V}, Acc) ->
|
||||
R0 = lists:foldr(fun(Tree, {Found, KV}) ->
|
||||
|
@ -395,7 +209,7 @@ compare_method_test() ->
|
|||
true ->
|
||||
{true, KV};
|
||||
false ->
|
||||
L0 = gb_trees:lookup(Key, Tree),
|
||||
L0 = leveled_skiplist:lookup(Key, Tree),
|
||||
case L0 of
|
||||
none ->
|
||||
{false, not_found};
|
||||
|
@ -429,221 +243,25 @@ compare_method_test() ->
|
|||
P = leveled_codec:endkey_passed(EndKey, K),
|
||||
case {K, P} of
|
||||
{K, false} when K >= StartKey ->
|
||||
gb_trees:enter(K, V, Acc);
|
||||
leveled_skiplist:enter(K, V, Acc);
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
end,
|
||||
gb_trees:empty(),
|
||||
leveled_skiplist:empty(),
|
||||
DumpList),
|
||||
Sz0 = gb_trees:size(Q0),
|
||||
io:format(user, "Crude method took ~w microseconds resulting in tree of "
|
||||
++ "size ~w~n",
|
||||
Sz0 = leveled_skiplist:size(Q0),
|
||||
io:format("Crude method took ~w microseconds resulting in tree of " ++
|
||||
"size ~w~n",
|
||||
[timer:now_diff(os:timestamp(), SWa), Sz0]),
|
||||
SWb = os:timestamp(),
|
||||
Q1 = merge_trees(StartKey, EndKey, TreeList, gb_trees:empty()),
|
||||
Sz1 = gb_trees:size(Q1),
|
||||
io:format(user, "Merge method took ~w microseconds resulting in tree of "
|
||||
++ "size ~w~n",
|
||||
Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_skiplist:empty()),
|
||||
Sz1 = length(Q1),
|
||||
io:format("Merge method took ~w microseconds resulting in tree of " ++
|
||||
"size ~w~n",
|
||||
[timer:now_diff(os:timestamp(), SWb), Sz1]),
|
||||
?assertMatch(Sz0, Sz1).
|
||||
|
||||
skiplist_test() ->
|
||||
KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)),
|
||||
SWaD = os:timestamp(),
|
||||
_D = lists:foldl(fun({K, V}, AccD) -> dict:store(K, V, AccD) end,
|
||||
dict:new(),
|
||||
KL),
|
||||
io:format(user, "Loading dict with 4000 keys in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWaD)]),
|
||||
|
||||
SWa = os:timestamp(),
|
||||
SkipList = skiplist_generate(KL),
|
||||
io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWa)]),
|
||||
|
||||
CheckList1 = lists:sublist(KL, 1200, 100),
|
||||
CheckList2 = lists:sublist(KL, 1600, 100),
|
||||
CheckList3 = lists:sublist(KL, 2000, 100),
|
||||
CheckList4 = lists:sublist(KL, 2400, 100),
|
||||
CheckList5 = lists:sublist(KL, 2800, 100),
|
||||
CheckList6 = lists:sublist(KL, 1, 10),
|
||||
CheckList7 = lists:nthtail(3800, KL),
|
||||
CheckList8 = lists:sublist(KL, 3000, 1),
|
||||
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
||||
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
||||
|
||||
SWb = os:timestamp(),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
?assertMatch({K, V}, skiplist_get(SkipList, K))
|
||||
end,
|
||||
CheckAll),
|
||||
io:format(user, "Finding 520 keys took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWb)]),
|
||||
|
||||
SWc = os:timestamp(),
|
||||
KR1 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList1)),
|
||||
element(1, lists:last(CheckList1))),
|
||||
io:format("Result length ~w ~n", [length(KR1)]),
|
||||
CompareL1 = length(lists:usort(CheckList1)),
|
||||
?assertMatch(CompareL1, length(KR1)),
|
||||
KR2 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList2)),
|
||||
element(1, lists:last(CheckList2))),
|
||||
CompareL2 = length(lists:usort(CheckList2)),
|
||||
?assertMatch(CompareL2, length(KR2)),
|
||||
KR3 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList3)),
|
||||
element(1, lists:last(CheckList3))),
|
||||
CompareL3 = length(lists:usort(CheckList3)),
|
||||
?assertMatch(CompareL3, length(KR3)),
|
||||
KR4 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList4)),
|
||||
element(1, lists:last(CheckList4))),
|
||||
CompareL4 = length(lists:usort(CheckList4)),
|
||||
?assertMatch(CompareL4, length(KR4)),
|
||||
KR5 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList5)),
|
||||
element(1, lists:last(CheckList5))),
|
||||
CompareL5 = length(lists:usort(CheckList5)),
|
||||
?assertMatch(CompareL5, length(KR5)),
|
||||
KR6 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList6)),
|
||||
element(1, lists:last(CheckList6))),
|
||||
CompareL6 = length(lists:usort(CheckList6)),
|
||||
?assertMatch(CompareL6, length(KR6)),
|
||||
KR7 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList7)),
|
||||
element(1, lists:last(CheckList7))),
|
||||
CompareL7 = length(lists:usort(CheckList7)),
|
||||
?assertMatch(CompareL7, length(KR7)),
|
||||
KR8 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList8)),
|
||||
element(1, lists:last(CheckList8))),
|
||||
CompareL8 = length(lists:usort(CheckList8)),
|
||||
?assertMatch(CompareL8, length(KR8)),
|
||||
|
||||
KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, 4, 201, 202)),
|
||||
KR9 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, KL_OOR1)),
|
||||
element(1, lists:last(KL_OOR1))),
|
||||
?assertMatch([], KR9),
|
||||
KL_OOR2 = gb_trees:to_list(generate_randomkeys(1, 4, 0, 0)),
|
||||
KR10 = skiplist_range(SkipList,
|
||||
element(1, lists:nth(1, KL_OOR2)),
|
||||
element(1, lists:last(KL_OOR2))),
|
||||
?assertMatch([], KR10),
|
||||
|
||||
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWc)]),
|
||||
|
||||
AltKL = gb_trees:to_list(generate_randomkeys(1, 1000, 1, 200)),
|
||||
SWd = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
skiplist_get(SkipList, K)
|
||||
end,
|
||||
AltKL),
|
||||
io:format(user, "Finding 1000 mainly missing keys took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd)]).
|
||||
|
||||
hash_index_test() ->
|
||||
KeyCount = 4000,
|
||||
SlotWidth = element(1, ?SLOT_WIDTH),
|
||||
HI0 = new_index(),
|
||||
MHI0 = new_index(),
|
||||
KL0 = gb_trees:to_list(generate_randomkeys(1, KeyCount, 1, 200)),
|
||||
CheckList1 = lists:sublist(KL0, 1200, 100),
|
||||
CheckList2 = lists:sublist(KL0, 1600, 100),
|
||||
CheckList3 = lists:sublist(KL0, 2000, 100),
|
||||
CheckList4 = lists:sublist(KL0, 2400, 100),
|
||||
CheckList5 = lists:sublist(KL0, 2800, 100),
|
||||
CheckList6 = lists:sublist(KL0, 1, 10),
|
||||
CheckList7 = lists:nthtail(3800, KL0),
|
||||
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
||||
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
||||
|
||||
SWa = os:timestamp(),
|
||||
{HashIndex1, SkipList1, _TC} =
|
||||
lists:foldl(fun({K, V}, {HI, SL, C}) ->
|
||||
{H, S} = hash_to_slot(K),
|
||||
{UpdHI, UpdC} = addhash_to_index(HI, H, S, C),
|
||||
UpdSL = skiplist_put(SL, K, V, H),
|
||||
{UpdHI, UpdSL, UpdC} end,
|
||||
{HI0, ?EMPTY_SKIPLIST, 0},
|
||||
KL0),
|
||||
io:format(user, "Dynamic load of skiplist took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWa)]),
|
||||
|
||||
{LL, LN} = lists:foldl(fun({K, SL}, {Count, Number}) ->
|
||||
{Count + length(SL), Number + 1} end,
|
||||
{0, 0},
|
||||
SkipList1),
|
||||
io:format(user,
|
||||
"Skip list has ~w markers with total members of ~w~n",
|
||||
[LN, LL]),
|
||||
?assertMatch(true, LL / LN > ?SKIP_WIDTH / 2 ),
|
||||
?assertMatch(true, LL / LN < ?SKIP_WIDTH * 2 ),
|
||||
|
||||
SWb = os:timestamp(),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
?assertMatch({K, V},
|
||||
skiplist_get(SkipList1, K))
|
||||
end,
|
||||
CheckAll),
|
||||
io:format(user, "Fetching ~w keys from skiplist took ~w microseconds~n",
|
||||
[KeyCount, timer:now_diff(os:timestamp(), SWb)]),
|
||||
|
||||
SWc = os:timestamp(),
|
||||
{HI1, _C1} = lists:foldl(fun({K, _V}, {HI, C}) ->
|
||||
{H, S} = hash_to_slot(K),
|
||||
addhash_to_index(HI, H, S, C) end,
|
||||
{HI0, 0},
|
||||
KL0),
|
||||
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
|
||||
[KeyCount, timer:now_diff(os:timestamp(), SWc)]),
|
||||
?assertMatch(SlotWidth, array:size(HI1)),
|
||||
|
||||
SWd = os:timestamp(),
|
||||
{MHI1, TC1} = merge_indexes(HI1, MHI0, 0, 0),
|
||||
io:format(user, "First merge to hashindex took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd)]),
|
||||
?assertMatch(SlotWidth, array:size(MHI1)),
|
||||
|
||||
KL1 = gb_trees:to_list(generate_randomkeys(1, KeyCount, 1, 200)),
|
||||
|
||||
SWe = os:timestamp(),
|
||||
HI2 = new_index(),
|
||||
{HI3, _C2} = lists:foldl(fun({K, _V}, {HI, C}) ->
|
||||
{H, S} = hash_to_slot(K),
|
||||
addhash_to_index(HI, H, S, C) end,
|
||||
{HI2, 0},
|
||||
KL1),
|
||||
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
|
||||
[KeyCount, timer:now_diff(os:timestamp(), SWe)]),
|
||||
|
||||
SWf = os:timestamp(),
|
||||
{MHI2, TC2} = merge_indexes(HI3, MHI1, TC1, 1),
|
||||
io:format(user, "Second merge to hashindex took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWf)]),
|
||||
?assertMatch(SlotWidth, array:size(MHI2)),
|
||||
|
||||
SWg = os:timestamp(),
|
||||
HI4 = new_index(),
|
||||
{HI5, _C3} = lists:foldl(fun({K, _V}, {HI, C}) ->
|
||||
{H, S} = hash_to_slot(K),
|
||||
addhash_to_index(HI, H, S, C) end,
|
||||
{HI4, 0},
|
||||
KL1),
|
||||
io:format(user, "Adding ~w keys to hashindex took ~w microseconds~n",
|
||||
[KeyCount, timer:now_diff(os:timestamp(), SWg)]),
|
||||
|
||||
SWh = os:timestamp(),
|
||||
{MHI3, _TC3} = merge_indexes(HI5, MHI2, TC2, 2),
|
||||
io:format(user, "Third merge to hashindex took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWh)]),
|
||||
?assertMatch(SlotWidth, array:size(MHI2)).
|
||||
|
||||
|
||||
|
||||
-endif.
|
352
src/leveled_skiplist.erl
Normal file
352
src/leveled_skiplist.erl
Normal file
|
@ -0,0 +1,352 @@
|
|||
%% -------- SKIPLIST ---------
|
||||
%%
|
||||
%% For storing small numbers of {K, V} pairs where reasonable insertion and
|
||||
%% fetch times, but with fast support for flattening to a list or a sublist
|
||||
%% within a certain key range
|
||||
%%
|
||||
%% Used instead of gb_trees to retain compatability of OTP16 (and Riak's
|
||||
%% ongoing dependency on OTP16)
|
||||
%%
|
||||
%% Not a proper skip list. Only supports a single depth. Good enough for the
|
||||
%% purposes of leveled. Also uses peculiar enkey_passed function within
|
||||
%% leveled
|
||||
|
||||
-module(leveled_skiplist).
|
||||
|
||||
-include("include/leveled.hrl").
|
||||
|
||||
-export([
|
||||
from_list/1,
|
||||
to_list/1,
|
||||
enter/3,
|
||||
to_range/2,
|
||||
to_range/3,
|
||||
lookup/2,
|
||||
empty/0,
|
||||
size/1
|
||||
]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(SKIP_WIDTH, 32).
|
||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
||||
-define(EMPTY_SKIPLIST, [{?INFINITY_KEY, []}]).
|
||||
|
||||
|
||||
%%%============================================================================
|
||||
%%% SkipList API
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
enter(Key, Value, SkipList) ->
|
||||
Hash = erlang:phash2(Key),
|
||||
{MarkerKey, SubList} = lists:foldl(fun({Marker, SL}, Acc) ->
|
||||
case Acc of
|
||||
false ->
|
||||
case Marker >= Key of
|
||||
true ->
|
||||
{Marker, SL};
|
||||
false ->
|
||||
Acc
|
||||
end;
|
||||
_ ->
|
||||
Acc
|
||||
end end,
|
||||
false,
|
||||
SkipList),
|
||||
case Hash rem ?SKIP_WIDTH of
|
||||
0 ->
|
||||
{LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < Key end, SubList),
|
||||
SkpL1 = lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, RHS}),
|
||||
SkpL2 = [{Key, lists:ukeysort(1, [{Key, Value}|LHS])}|SkpL1],
|
||||
lists:ukeysort(1, SkpL2);
|
||||
_ ->
|
||||
UpdSubList = lists:ukeysort(1, [{Key, Value}|SubList]),
|
||||
lists:keyreplace(MarkerKey, 1, SkipList, {MarkerKey, UpdSubList})
|
||||
end.
|
||||
|
||||
from_list(UnsortedKVL) ->
|
||||
KVL = lists:ukeysort(1, UnsortedKVL),
|
||||
Slots = length(KVL) div ?SKIP_WIDTH,
|
||||
SkipList0 = lists:map(fun(X) ->
|
||||
N = X * ?SKIP_WIDTH,
|
||||
{K, _V} = lists:nth(N, KVL),
|
||||
{K, lists:sublist(KVL,
|
||||
N - ?SKIP_WIDTH + 1,
|
||||
?SKIP_WIDTH)}
|
||||
end,
|
||||
lists:seq(1, length(KVL) div ?SKIP_WIDTH)),
|
||||
case Slots * ?SKIP_WIDTH < length(KVL) of
|
||||
true ->
|
||||
{LastK, _V} = lists:last(KVL),
|
||||
SkipList0 ++ [{LastK, lists:nthtail(Slots * ?SKIP_WIDTH, KVL)}];
|
||||
false ->
|
||||
SkipList0
|
||||
end.
|
||||
|
||||
lookup(Key, SkipList) ->
|
||||
SubList = lists:foldl(fun({SkipKey, SL}, Acc) ->
|
||||
case {Acc, SkipKey} of
|
||||
{null, SkipKey} when SkipKey >= Key ->
|
||||
SL;
|
||||
_ ->
|
||||
Acc
|
||||
end end,
|
||||
null,
|
||||
SkipList),
|
||||
case SubList of
|
||||
null ->
|
||||
none;
|
||||
SubList ->
|
||||
case lists:keyfind(Key, 1, SubList) of
|
||||
false ->
|
||||
none;
|
||||
{Key, V} ->
|
||||
{value, V}
|
||||
end
|
||||
end.
|
||||
|
||||
to_list(SkipList) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> Acc ++ SL end, [], SkipList).
|
||||
|
||||
%% Rather than support iterator_from like gb_trees, will just an output a key
|
||||
%% sorted list for the desired range, which can the be iterated over as normal
|
||||
to_range(SkipList, Start) ->
|
||||
to_range(SkipList, Start, ?INFINITY_KEY).
|
||||
|
||||
to_range(SkipList, Start, End) ->
|
||||
R = lists:foldl(fun({Mark, SL}, {PassedStart, PassedEnd, Acc, PrevList}) ->
|
||||
|
||||
case {PassedStart, PassedEnd} of
|
||||
{true, true} ->
|
||||
{true, true, Acc, null};
|
||||
{false, false} ->
|
||||
case Start > Mark of
|
||||
true ->
|
||||
{false, false, Acc, SL};
|
||||
false ->
|
||||
RHS = splitlist_start(Start, PrevList ++ SL),
|
||||
case leveled_codec:endkey_passed(End, Mark) of
|
||||
true ->
|
||||
EL = splitlist_end(End, RHS),
|
||||
{true, true, EL, null};
|
||||
false ->
|
||||
{true, false, RHS, null}
|
||||
end
|
||||
end;
|
||||
{true, false} ->
|
||||
case leveled_codec:endkey_passed(End, Mark) of
|
||||
true ->
|
||||
EL = splitlist_end(End, SL),
|
||||
{true, true, Acc ++ EL, null};
|
||||
false ->
|
||||
{true, false, Acc ++ SL, null}
|
||||
end
|
||||
end end,
|
||||
|
||||
{false, false, [], []},
|
||||
SkipList),
|
||||
{_Bool1, _Bool2, SubList, _PrevList} = R,
|
||||
SubList.
|
||||
|
||||
empty() ->
|
||||
?EMPTY_SKIPLIST.
|
||||
|
||||
size(SkipList) ->
|
||||
lists:foldl(fun({_Mark, SL}, Acc) -> length(SL) + Acc end, 0, SkipList).
|
||||
|
||||
%%%============================================================================
|
||||
%%% Internal Functions
|
||||
%%%============================================================================
|
||||
|
||||
|
||||
splitlist_start(StartKey, SL) ->
|
||||
{_LHS, RHS} = lists:splitwith(fun({K, _V}) -> K < StartKey end, SL),
|
||||
RHS.
|
||||
|
||||
splitlist_end(EndKey, SL) ->
|
||||
{LHS, _RHS} = lists:splitwith(fun({K, _V}) ->
|
||||
not leveled_codec:endkey_passed(EndKey, K)
|
||||
end,
|
||||
SL),
|
||||
LHS.
|
||||
|
||||
%%%============================================================================
|
||||
%%% Test
|
||||
%%%============================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
|
||||
generate_randomkeys(Seqn,
|
||||
Count,
|
||||
gb_trees:empty(),
|
||||
BucketRangeLow,
|
||||
BucketRangeHigh).
|
||||
|
||||
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
|
||||
Acc;
|
||||
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||
BNumber =
|
||||
case BRange of
|
||||
0 ->
|
||||
string:right(integer_to_list(BucketLow), 4, $0);
|
||||
_ ->
|
||||
BRand = random:uniform(BRange),
|
||||
string:right(integer_to_list(BucketLow + BRand), 4, $0)
|
||||
end,
|
||||
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
|
||||
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
|
||||
{Seqn, {active, infinity}, null}},
|
||||
generate_randomkeys(Seqn + 1,
|
||||
Count - 1,
|
||||
gb_trees:enter(K, V, Acc),
|
||||
BucketLow,
|
||||
BRange).
|
||||
|
||||
skiplist_test() ->
|
||||
KL = gb_trees:to_list(generate_randomkeys(1, 4000, 1, 200)),
|
||||
SWaD = os:timestamp(),
|
||||
_D = lists:foldl(fun({K, V}, AccD) -> dict:store(K, V, AccD) end,
|
||||
dict:new(),
|
||||
KL),
|
||||
io:format(user, "Loading dict with 4000 keys in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWaD)]),
|
||||
|
||||
SWaGSL = os:timestamp(),
|
||||
SkipList = from_list(KL),
|
||||
io:format(user, "Generating skip list with 4000 keys in ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWaGSL)]),
|
||||
SWaDSL = os:timestamp(),
|
||||
SkipList1 =
|
||||
lists:foldl(fun({K, V}, SL) ->
|
||||
enter(K, V, SL)
|
||||
end,
|
||||
?EMPTY_SKIPLIST,
|
||||
KL),
|
||||
io:format(user, "Dynamic load of skiplist took ~w microseconds~n~n",
|
||||
[timer:now_diff(os:timestamp(), SWaDSL)]),
|
||||
|
||||
io:format(user, "~nRunning timing tests for generated skiplist:~n", []),
|
||||
skiplist_timingtest(KL, SkipList),
|
||||
|
||||
io:format(user, "~nRunning timing tests for dynamic skiplist:~n", []),
|
||||
skiplist_timingtest(KL, SkipList1),
|
||||
io:format(user, "~n", []).
|
||||
|
||||
|
||||
skiplist_timingtest(KL, SkipList) ->
|
||||
io:format(user, "Timing tests on skiplist of size ~w~n",
|
||||
[leveled_skiplist:size(SkipList)]),
|
||||
CheckList1 = lists:sublist(KL, 1200, 100),
|
||||
CheckList2 = lists:sublist(KL, 1600, 100),
|
||||
CheckList3 = lists:sublist(KL, 2000, 100),
|
||||
CheckList4 = lists:sublist(KL, 2400, 100),
|
||||
CheckList5 = lists:sublist(KL, 2800, 100),
|
||||
CheckList6 = lists:sublist(KL, 1, 10),
|
||||
CheckList7 = lists:nthtail(3800, KL),
|
||||
CheckList8 = lists:sublist(KL, 3000, 1),
|
||||
CheckAll = CheckList1 ++ CheckList2 ++ CheckList3 ++
|
||||
CheckList4 ++ CheckList5 ++ CheckList6 ++ CheckList7,
|
||||
|
||||
SWb = os:timestamp(),
|
||||
lists:foreach(fun({K, V}) ->
|
||||
?assertMatch({value, V}, lookup(K, SkipList))
|
||||
end,
|
||||
CheckAll),
|
||||
io:format(user, "Finding 520 keys took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWb)]),
|
||||
|
||||
SWc = os:timestamp(),
|
||||
KR1 = to_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList1)),
|
||||
element(1, lists:last(CheckList1))),
|
||||
io:format("Result length ~w ~n", [length(KR1)]),
|
||||
CompareL1 = length(lists:usort(CheckList1)),
|
||||
?assertMatch(CompareL1, length(KR1)),
|
||||
KR2 = to_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList2)),
|
||||
element(1, lists:last(CheckList2))),
|
||||
CompareL2 = length(lists:usort(CheckList2)),
|
||||
?assertMatch(CompareL2, length(KR2)),
|
||||
KR3 = to_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList3)),
|
||||
element(1, lists:last(CheckList3))),
|
||||
CompareL3 = length(lists:usort(CheckList3)),
|
||||
?assertMatch(CompareL3, length(KR3)),
|
||||
KR4 = to_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList4)),
|
||||
element(1, lists:last(CheckList4))),
|
||||
CompareL4 = length(lists:usort(CheckList4)),
|
||||
?assertMatch(CompareL4, length(KR4)),
|
||||
KR5 = to_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList5)),
|
||||
element(1, lists:last(CheckList5))),
|
||||
CompareL5 = length(lists:usort(CheckList5)),
|
||||
?assertMatch(CompareL5, length(KR5)),
|
||||
KR6 = to_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList6)),
|
||||
element(1, lists:last(CheckList6))),
|
||||
CompareL6 = length(lists:usort(CheckList6)),
|
||||
?assertMatch(CompareL6, length(KR6)),
|
||||
KR7 = to_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList7)),
|
||||
element(1, lists:last(CheckList7))),
|
||||
CompareL7 = length(lists:usort(CheckList7)),
|
||||
?assertMatch(CompareL7, length(KR7)),
|
||||
KR8 = to_range(SkipList,
|
||||
element(1, lists:nth(1, CheckList8)),
|
||||
element(1, lists:last(CheckList8))),
|
||||
CompareL8 = length(lists:usort(CheckList8)),
|
||||
?assertMatch(CompareL8, length(KR8)),
|
||||
|
||||
KL_OOR1 = gb_trees:to_list(generate_randomkeys(1, 4, 201, 202)),
|
||||
KR9 = to_range(SkipList,
|
||||
element(1, lists:nth(1, KL_OOR1)),
|
||||
element(1, lists:last(KL_OOR1))),
|
||||
?assertMatch([], KR9),
|
||||
KL_OOR2 = gb_trees:to_list(generate_randomkeys(1, 4, 0, 0)),
|
||||
KR10 = to_range(SkipList,
|
||||
element(1, lists:nth(1, KL_OOR2)),
|
||||
element(1, lists:last(KL_OOR2))),
|
||||
?assertMatch([], KR10),
|
||||
|
||||
io:format(user, "Finding 10 ranges took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWc)]),
|
||||
|
||||
AltKL1 = gb_trees:to_list(generate_randomkeys(1, 1000, 1, 200)),
|
||||
SWd = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
lookup(K, SkipList)
|
||||
end,
|
||||
AltKL1),
|
||||
io:format(user, "Getting 1000 mainly missing keys took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWd)]),
|
||||
AltKL2 = gb_trees:to_list(generate_randomkeys(1, 1000, 201, 300)),
|
||||
SWe = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
none = lookup(K, SkipList)
|
||||
end,
|
||||
AltKL2),
|
||||
io:format(user, "Getting 1000 missing keys above range took ~w " ++
|
||||
"microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWe)]),
|
||||
AltKL3 = gb_trees:to_list(generate_randomkeys(1, 1000, 0, 0)),
|
||||
SWf = os:timestamp(),
|
||||
lists:foreach(fun({K, _V}) ->
|
||||
none = lookup(K, SkipList)
|
||||
end,
|
||||
AltKL3),
|
||||
io:format(user, "Getting 1000 missing keys below range took ~w " ++
|
||||
"microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWf)]),
|
||||
|
||||
SWg = os:timestamp(),
|
||||
FlatList = to_list(SkipList),
|
||||
io:format(user, "Flattening skiplist took ~w microseconds~n",
|
||||
[timer:now_diff(os:timestamp(), SWg)]),
|
||||
?assertMatch(KL, FlatList).
|
||||
|
||||
|
||||
|
||||
-endif.
|
Loading…
Add table
Add a link
Reference in a new issue