WIP - Recent Modifications
Just some initial WIP code for this. Will revisit this again after exploring some ideas as to how to reduce the cost of the get_keys_by_segment. The overlal idea is that there are trees of recent modifications, with recent being some rolling time window made up of hourly blocks, and recency being dtermined by the last-modified date on the object metadata - which should be conistent across a cluster. So if we were at 15:30 we would get the tree for 14:00 - 15:00 and the tree for 15:00-16:00 from two different queries which cover the same partitions and then compare. Comparison may find differences, and we know what segment the difference is in - but how to then find all keys in that segment which have been modified in the period? Three ways: Do it inefficeintly and infrequently using a fold_keys and a filter (perhaps with SST files having a highest LMD in the metadata so that they can be skipped). Add a special index, where verye entry has a TTL, and the Key is {$segment, Segment, Bucket, Key} so that a normal 2i query cna be used. Align hashing for segments with hashing for penciller lookup so that a query over the actual keys cna be optimised skipping chunks of the in-memory part, and chunks of the SST file
This commit is contained in:
parent
fde9af28dd
commit
9fca17d56a
2 changed files with 77 additions and 4 deletions
|
@ -84,7 +84,12 @@
|
||||||
level2 :: any() % an array - but OTP compatibility
|
level2 :: any() % an array - but OTP compatibility
|
||||||
}).
|
}).
|
||||||
|
|
||||||
|
-record(recenttrees, {trees :: list(),
|
||||||
|
size:: small|medium|large|xlarge
|
||||||
|
}).
|
||||||
|
|
||||||
-type tictactree() :: #tictactree{}.
|
-type tictactree() :: #tictactree{}.
|
||||||
|
-type recenttrees() :: #recenttrees{}.
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% External functions
|
%%% External functions
|
||||||
|
@ -95,7 +100,10 @@
|
||||||
%% Create a new tree, zeroed out.
|
%% Create a new tree, zeroed out.
|
||||||
new_tree(TreeID) ->
|
new_tree(TreeID) ->
|
||||||
new_tree(TreeID, small).
|
new_tree(TreeID, small).
|
||||||
|
|
||||||
|
-spec new_tree(any(), small|medium|large|xlarge) -> tictactree().
|
||||||
|
%% @doc
|
||||||
|
%% Create a new tree, zeroed out. Specify the t-shirt siz eof the tree
|
||||||
new_tree(TreeID, Size) ->
|
new_tree(TreeID, Size) ->
|
||||||
{BitWidth, Width, SegmentCount} =
|
{BitWidth, Width, SegmentCount} =
|
||||||
case Size of
|
case Size of
|
||||||
|
@ -121,10 +129,12 @@ new_tree(TreeID, Size) ->
|
||||||
level1 = Lv1Init,
|
level1 = Lv1Init,
|
||||||
level2 = Lv2Init}.
|
level2 = Lv2Init}.
|
||||||
|
|
||||||
-spec add_kv(tictactree(), tuple(), tuple(), fun()) -> tictactree().
|
-spec add_kv(tictactree(), tuple(), tuple(), fun((_,_) -> integer())) ->
|
||||||
|
tictactree().
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Add a Key and value to a tictactree using the HashFun to calculate the Hash
|
%% Add a Key and value to a tictactree using the HashFun to calculate the Hash
|
||||||
%% based on that key and value
|
%% based on that key and value (or extract the Hash if it is present within
|
||||||
|
%% the value).
|
||||||
add_kv(TicTacTree, Key, Value, HashFun) ->
|
add_kv(TicTacTree, Key, Value, HashFun) ->
|
||||||
HashV = HashFun(Key, Value),
|
HashV = HashFun(Key, Value),
|
||||||
SegChangeHash = erlang:phash2(Key, HashV),
|
SegChangeHash = erlang:phash2(Key, HashV),
|
||||||
|
@ -236,10 +246,73 @@ merge_trees(TreeA, TreeB) ->
|
||||||
|
|
||||||
MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}.
|
MergedTree#tictactree{level1 = NewLevel1, level2 = NewLevel2}.
|
||||||
|
|
||||||
|
-spec get_segment(tuple(), integer()) -> integer().
|
||||||
|
%% @doc
|
||||||
|
%% Map the key to a segmen.
|
||||||
get_segment(Key, SegmentCount) ->
|
get_segment(Key, SegmentCount) ->
|
||||||
erlang:phash2(Key) band (SegmentCount - 1).
|
erlang:phash2(Key) band (SegmentCount - 1).
|
||||||
|
|
||||||
|
|
||||||
|
-spec match_hour(tuple(), tuple(), fun((_,_) -> tuple()), tuple(), integer())
|
||||||
|
-> {integer(), integer()}|no_match.
|
||||||
|
%% @doc
|
||||||
|
%% Match the modified date of the object to an hour of day, where the hour of
|
||||||
|
%% the day is within a threshold. Used for identifying recently added keys and
|
||||||
|
%% mapping those keys to the right tictac tree of recent additions
|
||||||
|
%%
|
||||||
|
%% The ModDateFun must return a datetime tuple e.g. {{Y, M, D}, {H, M, S}}
|
||||||
|
match_hour(Key, Value, ModDateFun, Now, HoursToKeep) ->
|
||||||
|
{ModDate, {ModHr, _ModMin, _ModSec}} = ModDateFun(Key, Value),
|
||||||
|
{NowDate, {NowHr, _NowMin, _NowSec}} = calendar:now_to_datetime(Now),
|
||||||
|
{DayDiff, {HourDiff, _MinDiff, _SecDiff}}
|
||||||
|
= calendar:time_difference({ModDate, {ModHr, 0, 0}},
|
||||||
|
{NowDate, {NowHr, 0, 0}}),
|
||||||
|
case HoursToKeep >= DayDiff * 24 + HourDiff of
|
||||||
|
true ->
|
||||||
|
{ModDate, ModHr};
|
||||||
|
false ->
|
||||||
|
no_match
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec add_recent_kv(tuple(), tuple(),
|
||||||
|
fun((_,_) -> integer()),
|
||||||
|
integer(), recenttrees()) -> recenttrees().
|
||||||
|
%% @doc
|
||||||
|
%% Add a recently modified key and value to the appropriate tree of recent
|
||||||
|
%% keys and values.
|
||||||
|
add_recent_kv({ModDate, ModHour}, {Key, Value},
|
||||||
|
HashFun, HoursToKeep, RecentTrees) ->
|
||||||
|
case lists:keyfind({ModDate, ModHour}, 1, RecentTrees#recenttrees.trees) of
|
||||||
|
{{ModDate, ModHour}, Tree0} ->
|
||||||
|
Tree1 = add_kv(Tree0, Key, Value, HashFun),
|
||||||
|
RT1 = lists:keyreplace({ModDate, ModHour},
|
||||||
|
1,
|
||||||
|
RecentTrees#recenttrees.trees,
|
||||||
|
{{ModDate, ModHour}, Tree1}),
|
||||||
|
RecentTrees#recenttrees{trees = RT1};
|
||||||
|
not_found ->
|
||||||
|
NT0 = new_tree(recent, RecentTrees#recenttrees.size),
|
||||||
|
NT1 = add_kv(NT0, Key, Value, HashFun),
|
||||||
|
RT0 = [{{ModDate, ModHour}, NT1}|RecentTrees#recenttrees.trees],
|
||||||
|
case length(RT0) > HoursToKeep of
|
||||||
|
true ->
|
||||||
|
FoldFun =
|
||||||
|
fun({K, _V} , Acc) ->
|
||||||
|
case K < Acc of
|
||||||
|
true ->
|
||||||
|
K;
|
||||||
|
false ->
|
||||||
|
Acc
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
OldestK = lists:foldl(FoldFun, NT1, {ModDate, ModHour}),
|
||||||
|
RT1 = lists:keydelete(OldestK, 1, RT0),
|
||||||
|
RecentTrees#recenttrees{trees = RT1};
|
||||||
|
false ->
|
||||||
|
RecentTrees#recenttrees{trees = RT0}
|
||||||
|
end
|
||||||
|
end.
|
||||||
|
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
%%% Internal functions
|
%%% Internal functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
|
@ -67,7 +67,7 @@ many_put_compare(_Config) ->
|
||||||
{ok, Bookie3} = leveled_bookie:book_start(StartOpts3),
|
{ok, Bookie3} = leveled_bookie:book_start(StartOpts3),
|
||||||
lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs),
|
lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs),
|
||||||
|
|
||||||
% Now run a tictac query against both stores to see th extent to which
|
% Now run a tictac query against both stores to see the extent to which
|
||||||
% state between stores is consistent
|
% state between stores is consistent
|
||||||
|
|
||||||
TicTacQ = {tictactree_obj,
|
TicTacQ = {tictactree_obj,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue