Initial version of potential new tree module

with basic Unit Tests.  Intended to replace skiplist
This commit is contained in:
martinsumner 2017-01-19 22:49:32 +00:00
parent 213a2e93fb
commit c722f3132d

255
src/leveled_tree.erl Normal file
View file

@ -0,0 +1,255 @@
%% -------- TREE ---------
%%
%% This module is intended to address two issues
%% - the lack of iterator_from support in OTP16 gb_trees
%% - the time to convert from/to list in gb_trees
%%
%% Leveled had had a skiplist implementation previously, and this is a
%% variation on that. The Treein this case is a bunch of sublists of length
%% SKIP_WIDTH with the start_keys in a gb_tree.
-module(leveled_tree).
-include("include/leveled.hrl").
-export([
from_orderedlist/1,
from_orderedset/1,
to_list/1,
match_range/3,
% search_range/3,
match/2,
search/2,
tsize/1
]).
-include_lib("eunit/include/eunit.hrl").
-define(SKIP_WIDTH, 16).
%%%============================================================================
%%% API
%%%============================================================================
from_orderedlist(OrderedList) ->
L = length(OrderedList),
{tree, L, from_orderedlist(OrderedList, gb_trees:empty(), L)}.
from_orderedset(Table) ->
from_orderedlist(ets:tab2list(Table)).
match(Key, {tree, _L, Tree}) ->
Iter = gb_trees:iterator_from(Key, Tree),
case gb_trees:next(Iter) of
none ->
none;
{_NK, SL, _Iter} ->
lookup_match(Key, SL)
end.
match_range(StartKey, EndKey, {tree, _L, Tree}) ->
Iter0 = gb_trees:iterator_from(StartKey, Tree),
case gb_trees:next(Iter0) of
none ->
[];
{NK, SL, Iter1} ->
PredFun =
fun({K, _V}) ->
K < StartKey
end,
{_LHS, RHS} = lists:splitwith(PredFun, SL),
lookup_match_range(EndKey, {NK, RHS}, Iter1, [])
end.
search(Key, {tree, _L, Tree}) ->
Iter = gb_trees:iterator_from(Key, Tree),
case gb_trees:next(Iter) of
none ->
none;
{_NK, SL, _Iter} ->
lookup_best(Key, SL)
end.
to_list({tree, _L, Tree}) ->
FoldFun =
fun({_MK, SL}, Acc) ->
Acc ++ SL
end,
lists:foldl(FoldFun, [], gb_trees:to_list(Tree)).
tsize({tree, L, _Tree}) ->
L.
%%%============================================================================
%%% Internal Functions
%%%============================================================================
from_orderedlist([], Tree, _L) ->
Tree;
from_orderedlist(OrdList, Tree, L) ->
SubLL = min(?SKIP_WIDTH, L),
{Head, Tail} = lists:split(SubLL, OrdList),
{LastK, _LastV} = lists:last(Head),
from_orderedlist(Tail, gb_trees:insert(LastK, Head, Tree), L - SubLL).
lookup_match(_Key, []) ->
none;
lookup_match(Key, [{EK, _EV}|_Tail]) when EK > Key ->
none;
lookup_match(Key, [{Key, EV}|_Tail]) ->
{value, EV};
lookup_match(Key, [_Top|Tail]) ->
lookup_match(Key, Tail).
lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key ->
{EK, EV};
lookup_best(Key, [_Top|Tail]) ->
lookup_best(Key, Tail).
lookup_match_range(EndKey, {NK0, SL0}, Iter0, Output) ->
PredFun =
fun({K, _V}) ->
not leveled_codec:endkey_passed(EndKey, K)
end,
case leveled_codec:endkey_passed(EndKey, NK0) of
true ->
{LHS, RHS} = lists:splitwith(PredFun, SL0),
case RHS of
[{EndKey, FirstValue}|_Tail] ->
Output ++ LHS ++ [{EndKey, FirstValue}];
_ ->
Output ++ LHS
end;
false ->
UpdOutput = Output ++ SL0,
case gb_trees:next(Iter0) of
none ->
UpdOutput;
{NK1, SL1, Iter1} ->
lookup_match_range(EndKey, {NK1, SL1}, Iter1, UpdOutput)
end
end.
%%%============================================================================
%%% Test
%%%============================================================================
-ifdef(TEST).
generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) ->
generate_randomkeys(Seqn,
Count,
[],
BucketRangeLow,
BucketRangeHigh).
generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) ->
Acc;
generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
BNumber =
case BRange of
0 ->
string:right(integer_to_list(BucketLow), 4, $0);
_ ->
BRand = random:uniform(BRange),
string:right(integer_to_list(BucketLow + BRand), 4, $0)
end,
KNumber = string:right(integer_to_list(random:uniform(1000)), 4, $0),
{K, V} = {{o, "Bucket" ++ BNumber, "Key" ++ KNumber, null},
{Seqn, {active, infinity}, null}},
generate_randomkeys(Seqn + 1,
Count - 1,
[{K, V}|Acc],
BucketLow,
BRange).
tree_test() ->
N = 4000,
KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)),
OS = ets:new(test, [ordered_set, private]),
ets:insert(OS, KL),
SWaETS = os:timestamp(),
Tree0 = from_orderedset(OS),
io:format(user, "Generating tree from ETS in ~w microseconds" ++
" of size ~w~n",
[timer:now_diff(os:timestamp(), SWaETS),
tsize(Tree0)]),
SWaGSL = os:timestamp(),
Tree1 = from_orderedlist(KL),
io:format(user, "Generating tree from orddict in ~w microseconds" ++
" of size ~w~n",
[timer:now_diff(os:timestamp(), SWaGSL),
tsize(Tree1)]),
SWaLUP = os:timestamp(),
lists:foreach(match_fun(Tree0), KL),
lists:foreach(match_fun(Tree1), KL),
io:format(user, "Looked up all keys twice in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWaLUP)]),
?assertMatch(Tree0, Tree1),
SWaSRCH1 = os:timestamp(),
lists:foreach(search_exactmatch_fun(Tree0), KL),
lists:foreach(search_exactmatch_fun(Tree1), KL),
io:format(user, "Search all keys twice for exact match in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWaSRCH1)]),
BitBiggerKeyFun =
fun(Idx) ->
{K, _V} = lists:nth(Idx, KL),
{o, B, FullKey, null} = K,
{{o, B, FullKey ++ "0", null}, lists:nth(Idx + 1, KL)}
end,
SrchKL = lists:map(BitBiggerKeyFun, lists:seq(1, length(KL) - 1)),
SWaSRCH2 = os:timestamp(),
lists:foreach(search_nearmatch_fun(Tree0), SrchKL),
lists:foreach(search_nearmatch_fun(Tree1), SrchKL),
io:format(user, "Search all keys twice for near match in ~w microseconds~n",
[timer:now_diff(os:timestamp(), SWaSRCH2)]),
FirstKey = element(1, lists:nth(1, KL)),
FinalKey = element(1, lists:last(KL)),
PenultimateKey = element(1, lists:nth(length(KL) - 1, KL)),
AfterFirstKey = setelement(3, FirstKey, element(3, FirstKey) ++ "0"),
AfterPenultimateKey = setelement(3,
PenultimateKey,
element(3, PenultimateKey) ++ "0"),
LengthR =
fun(SK, EK, T) ->
length(match_range(SK, EK, T))
end,
KL_Length = length(KL),
?assertMatch(KL_Length, LengthR(FirstKey, FinalKey, Tree0)),
?assertMatch(KL_Length, LengthR(FirstKey, PenultimateKey, Tree0) + 1),
?assertMatch(1, LengthR(all, FirstKey, Tree0)),
?assertMatch(KL_Length, LengthR(all, PenultimateKey, Tree0) + 1),
?assertMatch(KL_Length, LengthR(all, all, Tree0)),
?assertMatch(2, LengthR(PenultimateKey, FinalKey, Tree0)),
?assertMatch(KL_Length, LengthR(AfterFirstKey, PenultimateKey, Tree0) + 2),
?assertMatch(1, LengthR(AfterPenultimateKey, FinalKey, Tree0)).
match_fun(Tree) ->
fun({K, V}) ->
?assertMatch({value, V}, match(K, Tree))
end.
search_exactmatch_fun(Tree) ->
fun({K, V}) ->
?assertMatch({K, V}, search(K, Tree))
end.
search_nearmatch_fun(Tree) ->
fun({K, {NK, NV}}) ->
?assertMatch({NK, NV}, search(K, Tree))
end.
-endif.