Merge pull request #98 from martinsumner/mas-segid-cryptohash
Mas segid cryptohash
This commit is contained in:
commit
7763df3cef
12 changed files with 231 additions and 201 deletions
|
@ -947,7 +947,7 @@ fetch_head(Key, Penciller, LedgerCache) ->
|
||||||
[{Key, Head}] ->
|
[{Key, Head}] ->
|
||||||
Head;
|
Head;
|
||||||
[] ->
|
[] ->
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
Hash = leveled_codec:segment_hash(Key),
|
||||||
case leveled_penciller:pcl_fetch(Penciller, Key, Hash) of
|
case leveled_penciller:pcl_fetch(Penciller, Key, Hash) of
|
||||||
{Key, Head} ->
|
{Key, Head} ->
|
||||||
maybe_longrunning(SW, pcl_head),
|
maybe_longrunning(SW, pcl_head),
|
||||||
|
|
|
@ -65,6 +65,7 @@
|
||||||
integer_now/0,
|
integer_now/0,
|
||||||
riak_extract_metadata/2,
|
riak_extract_metadata/2,
|
||||||
magic_hash/1,
|
magic_hash/1,
|
||||||
|
segment_hash/1,
|
||||||
to_lookup/1]).
|
to_lookup/1]).
|
||||||
|
|
||||||
-define(V1_VERS, 1).
|
-define(V1_VERS, 1).
|
||||||
|
@ -79,6 +80,20 @@
|
||||||
integer()|null, % Hash of vclock - non-exportable
|
integer()|null, % Hash of vclock - non-exportable
|
||||||
integer()}. % Size in bytes of real object
|
integer()}. % Size in bytes of real object
|
||||||
|
|
||||||
|
|
||||||
|
-spec segment_hash(any()) -> {integer(), integer()}.
|
||||||
|
%% @doc
|
||||||
|
%% Return two 16 bit integers - the segment ID and a second integer for spare
|
||||||
|
%% entropy. The hashed should be used in blooms or indexes such that some
|
||||||
|
%% speed can be gained if just the segment ID is known - but more can be
|
||||||
|
%% gained should the extended hash (with the second element) is known
|
||||||
|
segment_hash(Key) when is_binary(Key) ->
|
||||||
|
<<SegmentID:16/integer, ExtraHash:32/integer, _Rest/binary>> =
|
||||||
|
crypto:hash(md5, Key),
|
||||||
|
{SegmentID, ExtraHash};
|
||||||
|
segment_hash(Key) ->
|
||||||
|
segment_hash(term_to_binary(Key)).
|
||||||
|
|
||||||
-spec magic_hash(any()) -> integer().
|
-spec magic_hash(any()) -> integer().
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Use DJ Bernstein magic hash function. Note, this is more expensive than
|
%% Use DJ Bernstein magic hash function. Note, this is more expensive than
|
||||||
|
@ -87,10 +102,6 @@
|
||||||
%% Hash function contains mysterious constants, some explanation here as to
|
%% Hash function contains mysterious constants, some explanation here as to
|
||||||
%% what they are -
|
%% what they are -
|
||||||
%% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function
|
%% http://stackoverflow.com/questions/10696223/reason-for-5381-number-in-djb-hash-function
|
||||||
magic_hash({?RIAK_TAG, Bucket, Key, _SubKey}) ->
|
|
||||||
magic_hash({Bucket, Key});
|
|
||||||
magic_hash({?STD_TAG, Bucket, Key, _SubKey}) ->
|
|
||||||
magic_hash({Bucket, Key});
|
|
||||||
magic_hash({binary, BinaryKey}) ->
|
magic_hash({binary, BinaryKey}) ->
|
||||||
H = 5381,
|
H = 5381,
|
||||||
hash1(H, BinaryKey) band 16#FFFFFFFF;
|
hash1(H, BinaryKey) band 16#FFFFFFFF;
|
||||||
|
@ -516,7 +527,9 @@ parse_date(LMD, UnitMins, LimitMins, Now) ->
|
||||||
|
|
||||||
-spec generate_ledgerkv(
|
-spec generate_ledgerkv(
|
||||||
tuple(), integer(), any(), integer(), tuple()|infinity) ->
|
tuple(), integer(), any(), integer(), tuple()|infinity) ->
|
||||||
{any(), any(), any(), {integer()|no_lookup, integer()}, list()}.
|
{any(), any(), any(),
|
||||||
|
{{integer(), integer()}|no_lookup, integer()},
|
||||||
|
list()}.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Function to extract from an object the information necessary to populate
|
%% Function to extract from an object the information necessary to populate
|
||||||
%% the Penciller's ledger.
|
%% the Penciller's ledger.
|
||||||
|
@ -537,7 +550,7 @@ generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) ->
|
||||||
_ ->
|
_ ->
|
||||||
{active, TS}
|
{active, TS}
|
||||||
end,
|
end,
|
||||||
Hash = magic_hash(PrimaryKey),
|
Hash = segment_hash(PrimaryKey),
|
||||||
{MD, LastMods} = extract_metadata(Obj, Size, Tag),
|
{MD, LastMods} = extract_metadata(Obj, Size, Tag),
|
||||||
ObjHash = get_objhash(Tag, MD),
|
ObjHash = get_objhash(Tag, MD),
|
||||||
Value = {SQN,
|
Value = {SQN,
|
||||||
|
|
|
@ -648,8 +648,8 @@ schedule_test_bycount(N) ->
|
||||||
?assertMatch(true, SecondsToCompaction0 < 5700),
|
?assertMatch(true, SecondsToCompaction0 < 5700),
|
||||||
SecondsToCompaction1 = schedule_compaction([14], N, CurrentTS), % tomorrow!
|
SecondsToCompaction1 = schedule_compaction([14], N, CurrentTS), % tomorrow!
|
||||||
io:format("Seconds to compaction ~w~n", [SecondsToCompaction1]),
|
io:format("Seconds to compaction ~w~n", [SecondsToCompaction1]),
|
||||||
?assertMatch(true, SecondsToCompaction1 > 81000),
|
?assertMatch(true, SecondsToCompaction1 >= 81180),
|
||||||
?assertMatch(true, SecondsToCompaction1 < 84300).
|
?assertMatch(true, SecondsToCompaction1 =< 84780).
|
||||||
|
|
||||||
|
|
||||||
simple_score_test() ->
|
simple_score_test() ->
|
||||||
|
|
|
@ -144,6 +144,8 @@
|
||||||
++ "leaving SnapshotCount=~w and MinSQN=~w"}},
|
++ "leaving SnapshotCount=~w and MinSQN=~w"}},
|
||||||
{"P0040",
|
{"P0040",
|
||||||
{info, "Archiving filename ~s as unused at startup"}},
|
{info, "Archiving filename ~s as unused at startup"}},
|
||||||
|
{"P0041",
|
||||||
|
{info, "Penciller manifest switched from SQN ~w to ~w"}},
|
||||||
|
|
||||||
{"PC001",
|
{"PC001",
|
||||||
{info, "Penciller's clerk ~w started with owner ~w"}},
|
{info, "Penciller's clerk ~w started with owner ~w"}},
|
||||||
|
|
|
@ -254,7 +254,7 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) ->
|
||||||
K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber},
|
K = {o, "Bucket" ++ BNumber, "Key" ++ KNumber},
|
||||||
RandKey = {K, {Count + 1,
|
RandKey = {K, {Count + 1,
|
||||||
{active, infinity},
|
{active, infinity},
|
||||||
leveled_codec:magic_hash(K),
|
leveled_codec:segment_hash(K),
|
||||||
null}},
|
null}},
|
||||||
generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
|
generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange).
|
||||||
|
|
||||||
|
|
|
@ -315,21 +315,22 @@ pcl_fetchlevelzero(Pid, Slot) ->
|
||||||
%% The Key needs to be hashable (i.e. have a tag which indicates that the key
|
%% The Key needs to be hashable (i.e. have a tag which indicates that the key
|
||||||
%% can be looked up) - index entries are not hashable for example.
|
%% can be looked up) - index entries are not hashable for example.
|
||||||
%%
|
%%
|
||||||
%% If the hash is already knonw, call pcl_fetch/3 as magic_hash is a
|
%% If the hash is already knonw, call pcl_fetch/3 as segment_hash is a
|
||||||
%% relatively expensive hash function
|
%% relatively expensive hash function
|
||||||
pcl_fetch(Pid, Key) ->
|
pcl_fetch(Pid, Key) ->
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
Hash = leveled_codec:segment_hash(Key),
|
||||||
if
|
if
|
||||||
Hash /= no_lookup ->
|
Hash /= no_lookup ->
|
||||||
gen_server:call(Pid, {fetch, Key, Hash}, infinity)
|
gen_server:call(Pid, {fetch, Key, Hash}, infinity)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec pcl_fetch(pid(), tuple(), integer()) -> {tuple(), tuple()}|not_present.
|
-spec pcl_fetch(pid(), tuple(), {integer(), integer()}) ->
|
||||||
|
{tuple(), tuple()}|not_present.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Fetch a key, return the first (highest SQN) occurrence of that Key along
|
%% Fetch a key, return the first (highest SQN) occurrence of that Key along
|
||||||
%% with the value.
|
%% with the value.
|
||||||
%%
|
%%
|
||||||
%% Hash should be result of leveled_codec:magic_hash(Key)
|
%% Hash should be result of leveled_codec:segment_hash(Key)
|
||||||
pcl_fetch(Pid, Key, Hash) ->
|
pcl_fetch(Pid, Key, Hash) ->
|
||||||
gen_server:call(Pid, {fetch, Key, Hash}, infinity).
|
gen_server:call(Pid, {fetch, Key, Hash}, infinity).
|
||||||
|
|
||||||
|
@ -367,7 +368,7 @@ pcl_fetchnextkey(Pid, StartKey, EndKey, AccFun, InitAcc) ->
|
||||||
%% If the key is not present, it will be assumed that a higher sequence number
|
%% If the key is not present, it will be assumed that a higher sequence number
|
||||||
%% tombstone once existed, and false will be returned.
|
%% tombstone once existed, and false will be returned.
|
||||||
pcl_checksequencenumber(Pid, Key, SQN) ->
|
pcl_checksequencenumber(Pid, Key, SQN) ->
|
||||||
Hash = leveled_codec:magic_hash(Key),
|
Hash = leveled_codec:segment_hash(Key),
|
||||||
if
|
if
|
||||||
Hash /= no_lookup ->
|
Hash /= no_lookup ->
|
||||||
gen_server:call(Pid, {check_sqn, Key, Hash, SQN}, infinity)
|
gen_server:call(Pid, {check_sqn, Key, Hash, SQN}, infinity)
|
||||||
|
@ -672,6 +673,8 @@ handle_call(doom, _From, State) ->
|
||||||
|
|
||||||
handle_cast({manifest_change, NewManifest}, State) ->
|
handle_cast({manifest_change, NewManifest}, State) ->
|
||||||
NewManSQN = leveled_pmanifest:get_manifest_sqn(NewManifest),
|
NewManSQN = leveled_pmanifest:get_manifest_sqn(NewManifest),
|
||||||
|
OldManSQN = leveled_pmanifest:get_manifest_sqn(State#state.manifest),
|
||||||
|
leveled_log:log("P0041", [OldManSQN, NewManSQN]),
|
||||||
ok = leveled_pclerk:clerk_promptdeletions(State#state.clerk, NewManSQN),
|
ok = leveled_pclerk:clerk_promptdeletions(State#state.clerk, NewManSQN),
|
||||||
UpdManifest = leveled_pmanifest:merge_snapshot(State#state.manifest,
|
UpdManifest = leveled_pmanifest:merge_snapshot(State#state.manifest,
|
||||||
NewManifest),
|
NewManifest),
|
||||||
|
@ -1317,7 +1320,7 @@ generate_randomkeys(Count, SQN, Acc) ->
|
||||||
RandKey = {K,
|
RandKey = {K,
|
||||||
{SQN,
|
{SQN,
|
||||||
{active, infinity},
|
{active, infinity},
|
||||||
leveled_codec:magic_hash(K),
|
leveled_codec:segment_hash(K),
|
||||||
null}},
|
null}},
|
||||||
generate_randomkeys(Count - 1, SQN + 1, [RandKey|Acc]).
|
generate_randomkeys(Count - 1, SQN + 1, [RandKey|Acc]).
|
||||||
|
|
||||||
|
@ -1347,7 +1350,7 @@ maybe_pause_push(PCL, KL) ->
|
||||||
T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
|
T1 = lists:foldl(fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) ->
|
||||||
UpdSL = [{K, V}|AccSL],
|
UpdSL = [{K, V}|AccSL],
|
||||||
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
SQN = leveled_codec:strip_to_seqonly({K, V}),
|
||||||
H = leveled_codec:magic_hash(K),
|
H = leveled_codec:segment_hash(K),
|
||||||
UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
|
UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H),
|
||||||
{UpdSL, UpdIdx, min(SQN, MinSQN), max(SQN, MaxSQN)}
|
{UpdSL, UpdIdx, min(SQN, MinSQN), max(SQN, MaxSQN)}
|
||||||
end,
|
end,
|
||||||
|
@ -1366,7 +1369,7 @@ maybe_pause_push(PCL, KL) ->
|
||||||
|
|
||||||
%% old test data doesn't have the magic hash
|
%% old test data doesn't have the magic hash
|
||||||
add_missing_hash({K, {SQN, ST, MD}}) ->
|
add_missing_hash({K, {SQN, ST, MD}}) ->
|
||||||
{K, {SQN, ST, leveled_codec:magic_hash(K), MD}}.
|
{K, {SQN, ST, leveled_codec:segment_hash(K), MD}}.
|
||||||
|
|
||||||
|
|
||||||
clean_dir_test() ->
|
clean_dir_test() ->
|
||||||
|
|
|
@ -1128,6 +1128,49 @@ snapshot_timeout_test() ->
|
||||||
Man10 = release_snapshot(Man9, ?PHANTOM_PID),
|
Man10 = release_snapshot(Man9, ?PHANTOM_PID),
|
||||||
?assertMatch(0, length(Man10#manifest.snapshots)).
|
?assertMatch(0, length(Man10#manifest.snapshots)).
|
||||||
|
|
||||||
|
potential_issue_test() ->
|
||||||
|
Manifest =
|
||||||
|
{manifest,{array,9,0,[],
|
||||||
|
{[],
|
||||||
|
[{manifest_entry,{o_rkv,"Bucket","Key10",null},
|
||||||
|
{o_rkv,"Bucket","Key12949",null},
|
||||||
|
"<0.313.0>","./16_1_0.sst"},
|
||||||
|
{manifest_entry,{o_rkv,"Bucket","Key129490",null},
|
||||||
|
{o_rkv,"Bucket","Key158981",null},
|
||||||
|
"<0.315.0>","./16_1_1.sst"},
|
||||||
|
{manifest_entry,{o_rkv,"Bucket","Key158982",null},
|
||||||
|
{o_rkv,"Bucket","Key188472",null},
|
||||||
|
"<0.316.0>","./16_1_2.sst"}],
|
||||||
|
{idxt,1,
|
||||||
|
{{[{{o_rkv,"Bucket1","Key1",null},
|
||||||
|
{manifest_entry,{o_rkv,"Bucket","Key9083",null},
|
||||||
|
{o_rkv,"Bucket1","Key1",null},
|
||||||
|
"<0.320.0>","./16_1_6.sst"}}]},
|
||||||
|
{1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
|
||||||
|
{idxt,0,{{},{0,nil}}},
|
||||||
|
{idxt,0,{{},{0,nil}}},
|
||||||
|
{idxt,0,{{},{0,nil}}},
|
||||||
|
{idxt,0,{{},{0,nil}}},
|
||||||
|
{idxt,0,{{},{0,nil}}},
|
||||||
|
{idxt,0,{{},{0,nil}}},
|
||||||
|
[]}},
|
||||||
|
19,[],0,
|
||||||
|
{dict,0,16,16,8,80,48,
|
||||||
|
{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
|
||||||
|
{{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}},
|
||||||
|
2},
|
||||||
|
Range1 = range_lookup(Manifest,
|
||||||
|
1,
|
||||||
|
{o_rkv, "Bucket", null, null},
|
||||||
|
{o_rkv, "Bucket", null, null}),
|
||||||
|
Range2 = range_lookup(Manifest,
|
||||||
|
2,
|
||||||
|
{o_rkv, "Bucket", null, null},
|
||||||
|
{o_rkv, "Bucket", null, null}),
|
||||||
|
io:format("Range in Level 1 ~w~n", [Range1]),
|
||||||
|
io:format("Range in Level 2 ~w~n", [Range2]),
|
||||||
|
?assertMatch(3, length(Range1)),
|
||||||
|
?assertMatch(1, length(Range2)).
|
||||||
|
|
||||||
|
|
||||||
-endif.
|
-endif.
|
||||||
|
|
|
@ -50,7 +50,8 @@
|
||||||
%%% API
|
%%% API
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
-spec prepare_for_index(index_array(), integer()|no_lookup) -> index_array().
|
-spec prepare_for_index(index_array(), {integer(), integer()}|no_lookup)
|
||||||
|
-> index_array().
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Add the hash of a key to the index. This is 'prepared' in the sense that
|
%% Add the hash of a key to the index. This is 'prepared' in the sense that
|
||||||
%% this index is not use until it is loaded into the main index.
|
%% this index is not use until it is loaded into the main index.
|
||||||
|
@ -95,7 +96,7 @@ new_index() ->
|
||||||
clear_index(_L0Index) ->
|
clear_index(_L0Index) ->
|
||||||
new_index().
|
new_index().
|
||||||
|
|
||||||
-spec check_index(integer(), index_array()) -> list(integer()).
|
-spec check_index({integer(), integer()}, index_array()) -> list(integer()).
|
||||||
%% @doc
|
%% @doc
|
||||||
%% return a list of positions in the list of cache arrays that may contain the
|
%% return a list of positions in the list of cache arrays that may contain the
|
||||||
%% key associated with the hash being checked
|
%% key associated with the hash being checked
|
||||||
|
@ -158,9 +159,9 @@ to_list(Slots, FetchFun) ->
|
||||||
%% checked (with the most recently received cache being checked first) until a
|
%% checked (with the most recently received cache being checked first) until a
|
||||||
%% match is found.
|
%% match is found.
|
||||||
check_levelzero(Key, PosList, TreeList) ->
|
check_levelzero(Key, PosList, TreeList) ->
|
||||||
check_levelzero(Key, leveled_codec:magic_hash(Key), PosList, TreeList).
|
check_levelzero(Key, leveled_codec:segment_hash(Key), PosList, TreeList).
|
||||||
|
|
||||||
-spec check_levelzero(tuple(), integer(), list(integer()), list())
|
-spec check_levelzero(tuple(), {integer(), integer()}, list(integer()), list())
|
||||||
-> {boolean(), tuple|not_found}.
|
-> {boolean(), tuple|not_found}.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Check for the presence of a given Key in the Level Zero cache, with the
|
%% Check for the presence of a given Key in the Level Zero cache, with the
|
||||||
|
@ -204,10 +205,10 @@ find_pos(<<0:1/integer, NxtSlot:7/integer, T/binary>>, Hash, PosList, _SlotID) -
|
||||||
find_pos(T, Hash, PosList, NxtSlot).
|
find_pos(T, Hash, PosList, NxtSlot).
|
||||||
|
|
||||||
|
|
||||||
split_hash(Hash) ->
|
split_hash({SegmentID, ExtraHash}) ->
|
||||||
Slot = Hash band 255,
|
Slot = SegmentID band 255,
|
||||||
H0 = (Hash bsr 8) band 8388607,
|
H0 = (SegmentID bsr 8) bor (ExtraHash bsl 8),
|
||||||
{Slot, H0}.
|
{Slot, H0 band 8388607}.
|
||||||
|
|
||||||
check_slotlist(Key, _Hash, CheckList, TreeList) ->
|
check_slotlist(Key, _Hash, CheckList, TreeList) ->
|
||||||
SlotCheckFun =
|
SlotCheckFun =
|
||||||
|
@ -358,7 +359,7 @@ with_index_test_() ->
|
||||||
with_index_test2() ->
|
with_index_test2() ->
|
||||||
IndexPrepareFun =
|
IndexPrepareFun =
|
||||||
fun({K, _V}, Acc) ->
|
fun({K, _V}, Acc) ->
|
||||||
H = leveled_codec:magic_hash(K),
|
H = leveled_codec:segment_hash(K),
|
||||||
prepare_for_index(Acc, H)
|
prepare_for_index(Acc, H)
|
||||||
end,
|
end,
|
||||||
LoadFun =
|
LoadFun =
|
||||||
|
@ -382,7 +383,7 @@ with_index_test2() ->
|
||||||
|
|
||||||
CheckFun =
|
CheckFun =
|
||||||
fun({K, V}, {L0Idx, L0Cache}) ->
|
fun({K, V}, {L0Idx, L0Cache}) ->
|
||||||
H = leveled_codec:magic_hash(K),
|
H = leveled_codec:segment_hash(K),
|
||||||
PosList = check_index(H, L0Idx),
|
PosList = check_index(H, L0Idx),
|
||||||
?assertMatch({true, {K, V}},
|
?assertMatch({true, {K, V}},
|
||||||
check_slotlist(K, H, PosList, L0Cache)),
|
check_slotlist(K, H, PosList, L0Cache)),
|
||||||
|
|
|
@ -65,13 +65,12 @@
|
||||||
-include("include/leveled.hrl").
|
-include("include/leveled.hrl").
|
||||||
|
|
||||||
-define(MAX_SLOTS, 256).
|
-define(MAX_SLOTS, 256).
|
||||||
-define(LOOK_SLOTSIZE, 128). % This is not configurable
|
-define(LOOK_SLOTSIZE, 128). % Maximum of 128
|
||||||
-define(LOOK_BLOCKSIZE, {24, 32}).
|
-define(LOOK_BLOCKSIZE, {24, 32}). % 4x + y = ?LOOK_SLOTSIZE
|
||||||
-define(NOLOOK_SLOTSIZE, 256).
|
-define(NOLOOK_SLOTSIZE, 256).
|
||||||
-define(NOLOOK_BLOCKSIZE, {56, 32}).
|
-define(NOLOOK_BLOCKSIZE, {56, 32}). % 4x + y = ?NOLOOK_SLOTSIZE
|
||||||
-define(COMPRESSION_LEVEL, 1).
|
-define(COMPRESSION_LEVEL, 1).
|
||||||
-define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_LEVEL}]).
|
-define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_LEVEL}]).
|
||||||
% -define(LEVEL_BLOOM_BITS, [{0, 8}, {1, 10}, {2, 8}, {default, 6}]).
|
|
||||||
-define(MERGE_SCANWIDTH, 16).
|
-define(MERGE_SCANWIDTH, 16).
|
||||||
-define(DISCARD_EXT, ".discarded").
|
-define(DISCARD_EXT, ".discarded").
|
||||||
-define(DELETE_TIMEOUT, 10000).
|
-define(DELETE_TIMEOUT, 10000).
|
||||||
|
@ -237,12 +236,12 @@ sst_newlevelzero(RootPath, Filename, Slots, FetchFun, Penciller, MaxSQN) ->
|
||||||
-spec sst_get(pid(), tuple()) -> tuple()|not_present.
|
-spec sst_get(pid(), tuple()) -> tuple()|not_present.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Return a Key, Value pair matching a Key or not_present if the Key is not in
|
%% Return a Key, Value pair matching a Key or not_present if the Key is not in
|
||||||
%% the store. The magic_hash function is used to accelerate the seeking of
|
%% the store. The segment_hash function is used to accelerate the seeking of
|
||||||
%% keys, sst_get/3 should be used directly if this has already been calculated
|
%% keys, sst_get/3 should be used directly if this has already been calculated
|
||||||
sst_get(Pid, LedgerKey) ->
|
sst_get(Pid, LedgerKey) ->
|
||||||
sst_get(Pid, LedgerKey, leveled_codec:magic_hash(LedgerKey)).
|
sst_get(Pid, LedgerKey, leveled_codec:segment_hash(LedgerKey)).
|
||||||
|
|
||||||
-spec sst_get(pid(), tuple(), integer()) -> tuple()|not_present.
|
-spec sst_get(pid(), tuple(), {integer(), integer()}) -> tuple()|not_present.
|
||||||
%% @doc
|
%% @doc
|
||||||
%% Return a Key, Value pair matching a Key or not_present if the Key is not in
|
%% Return a Key, Value pair matching a Key or not_present if the Key is not in
|
||||||
%% the store (with the magic hash precalculated).
|
%% the store (with the magic hash precalculated).
|
||||||
|
@ -554,7 +553,7 @@ fetch(LedgerKey, Hash, State) ->
|
||||||
State#state{blockindex_cache = BlockIndexCache}};
|
State#state{blockindex_cache = BlockIndexCache}};
|
||||||
<<BlockLengths:24/binary, BlockIdx/binary>> ->
|
<<BlockLengths:24/binary, BlockIdx/binary>> ->
|
||||||
PosList = find_pos(BlockIdx,
|
PosList = find_pos(BlockIdx,
|
||||||
double_hash(Hash, LedgerKey),
|
extra_hash(Hash),
|
||||||
[],
|
[],
|
||||||
0),
|
0),
|
||||||
case PosList of
|
case PosList of
|
||||||
|
@ -808,9 +807,9 @@ generate_binary_slot(Lookup, KVL) ->
|
||||||
fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) ->
|
fun({K, V}, {PosBinAcc, NoHashCount, HashAcc}) ->
|
||||||
|
|
||||||
{_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
{_SQN, H1} = leveled_codec:strip_to_seqnhashonly({K, V}),
|
||||||
case is_integer(H1) of
|
PosH1 = extra_hash(H1),
|
||||||
|
case is_integer(PosH1) of
|
||||||
true ->
|
true ->
|
||||||
PosH1 = double_hash(H1, K),
|
|
||||||
case NoHashCount of
|
case NoHashCount of
|
||||||
0 ->
|
0 ->
|
||||||
{<<1:1/integer,
|
{<<1:1/integer,
|
||||||
|
@ -1003,7 +1002,7 @@ binaryslot_get(FullBin, Key, Hash) ->
|
||||||
<<B1P:32/integer, _R/binary>> = BlockLengths,
|
<<B1P:32/integer, _R/binary>> = BlockLengths,
|
||||||
<<PosBinIndex:B1P/binary, Blocks/binary>> = Rest,
|
<<PosBinIndex:B1P/binary, Blocks/binary>> = Rest,
|
||||||
PosList = find_pos(PosBinIndex,
|
PosList = find_pos(PosBinIndex,
|
||||||
double_hash(Hash, Key),
|
extra_hash(Hash),
|
||||||
[],
|
[],
|
||||||
0),
|
0),
|
||||||
{fetch_value(PosList, BlockLengths, Blocks, Key),
|
{fetch_value(PosList, BlockLengths, Blocks, Key),
|
||||||
|
@ -1186,9 +1185,10 @@ block_offsetandlength(BlockLengths, BlockID) ->
|
||||||
{BlocksPos, B1L + B2L + B3L + B4L, B5L}
|
{BlocksPos, B1L + B2L + B3L + B4L, B5L}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
double_hash(Hash, Key) ->
|
extra_hash({SegHash, _ExtraHash}) when is_integer(SegHash) ->
|
||||||
H2 = erlang:phash2(Key),
|
SegHash band 32767;
|
||||||
(Hash bxor H2) band 32767.
|
extra_hash(NotHash) ->
|
||||||
|
NotHash.
|
||||||
|
|
||||||
fetch_value([], _BlockLengths, _Blocks, _Key) ->
|
fetch_value([], _BlockLengths, _Blocks, _Key) ->
|
||||||
not_present;
|
not_present;
|
||||||
|
@ -1538,7 +1538,7 @@ indexed_list_test() ->
|
||||||
io:format(user, "~nIndexed list timing test:~n", []),
|
io:format(user, "~nIndexed list timing test:~n", []),
|
||||||
N = 150,
|
N = 150,
|
||||||
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 4)),
|
KVL0 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 4)),
|
||||||
KVL1 = lists:sublist(KVL0, 128),
|
KVL1 = lists:sublist(KVL0, ?LOOK_SLOTSIZE),
|
||||||
|
|
||||||
SW0 = os:timestamp(),
|
SW0 = os:timestamp(),
|
||||||
|
|
||||||
|
@ -1548,15 +1548,15 @@ indexed_list_test() ->
|
||||||
[timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),
|
[timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]),
|
||||||
|
|
||||||
{TestK1, TestV1} = lists:nth(20, KVL1),
|
{TestK1, TestV1} = lists:nth(20, KVL1),
|
||||||
MH1 = leveled_codec:magic_hash(TestK1),
|
MH1 = leveled_codec:segment_hash(TestK1),
|
||||||
{TestK2, TestV2} = lists:nth(40, KVL1),
|
{TestK2, TestV2} = lists:nth(40, KVL1),
|
||||||
MH2 = leveled_codec:magic_hash(TestK2),
|
MH2 = leveled_codec:segment_hash(TestK2),
|
||||||
{TestK3, TestV3} = lists:nth(60, KVL1),
|
{TestK3, TestV3} = lists:nth(60, KVL1),
|
||||||
MH3 = leveled_codec:magic_hash(TestK3),
|
MH3 = leveled_codec:segment_hash(TestK3),
|
||||||
{TestK4, TestV4} = lists:nth(80, KVL1),
|
{TestK4, TestV4} = lists:nth(80, KVL1),
|
||||||
MH4 = leveled_codec:magic_hash(TestK4),
|
MH4 = leveled_codec:segment_hash(TestK4),
|
||||||
{TestK5, TestV5} = lists:nth(100, KVL1),
|
{TestK5, TestV5} = lists:nth(100, KVL1),
|
||||||
MH5 = leveled_codec:magic_hash(TestK5),
|
MH5 = leveled_codec:segment_hash(TestK5),
|
||||||
|
|
||||||
test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
|
test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
|
||||||
test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
|
test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
|
||||||
|
@ -1573,15 +1573,15 @@ indexed_list_mixedkeys_test() ->
|
||||||
{_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
{_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
||||||
|
|
||||||
{TestK1, TestV1} = lists:nth(4, KVL1),
|
{TestK1, TestV1} = lists:nth(4, KVL1),
|
||||||
MH1 = leveled_codec:magic_hash(TestK1),
|
MH1 = leveled_codec:segment_hash(TestK1),
|
||||||
{TestK2, TestV2} = lists:nth(8, KVL1),
|
{TestK2, TestV2} = lists:nth(8, KVL1),
|
||||||
MH2 = leveled_codec:magic_hash(TestK2),
|
MH2 = leveled_codec:segment_hash(TestK2),
|
||||||
{TestK3, TestV3} = lists:nth(12, KVL1),
|
{TestK3, TestV3} = lists:nth(12, KVL1),
|
||||||
MH3 = leveled_codec:magic_hash(TestK3),
|
MH3 = leveled_codec:segment_hash(TestK3),
|
||||||
{TestK4, TestV4} = lists:nth(16, KVL1),
|
{TestK4, TestV4} = lists:nth(16, KVL1),
|
||||||
MH4 = leveled_codec:magic_hash(TestK4),
|
MH4 = leveled_codec:segment_hash(TestK4),
|
||||||
{TestK5, TestV5} = lists:nth(20, KVL1),
|
{TestK5, TestV5} = lists:nth(20, KVL1),
|
||||||
MH5 = leveled_codec:magic_hash(TestK5),
|
MH5 = leveled_codec:segment_hash(TestK5),
|
||||||
|
|
||||||
test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
|
test_binary_slot(FullBin, TestK1, MH1, {TestK1, TestV1}),
|
||||||
test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
|
test_binary_slot(FullBin, TestK2, MH2, {TestK2, TestV2}),
|
||||||
|
@ -1598,15 +1598,17 @@ indexed_list_mixedkeys2_test() ->
|
||||||
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2,
|
||||||
{_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
{_PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
||||||
lists:foreach(fun({K, V}) ->
|
lists:foreach(fun({K, V}) ->
|
||||||
MH = leveled_codec:magic_hash(K),
|
MH = leveled_codec:segment_hash(K),
|
||||||
test_binary_slot(FullBin, K, MH, {K, V})
|
test_binary_slot(FullBin, K, MH, {K, V})
|
||||||
end,
|
end,
|
||||||
KVL1).
|
KVL1).
|
||||||
|
|
||||||
indexed_list_allindexkeys_test() ->
|
indexed_list_allindexkeys_test() ->
|
||||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
|
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
|
||||||
|
?LOOK_SLOTSIZE),
|
||||||
{PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
{PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
||||||
?assertMatch(<<_BL:24/binary, 127:8/integer>>, PosBinIndex1),
|
EmptySlotSize = ?LOOK_SLOTSIZE - 1,
|
||||||
|
?assertMatch(<<_BL:24/binary, EmptySlotSize:8/integer>>, PosBinIndex1),
|
||||||
% SW = os:timestamp(),
|
% SW = os:timestamp(),
|
||||||
BinToList = binaryslot_tolist(FullBin),
|
BinToList = binaryslot_tolist(FullBin),
|
||||||
% io:format(user,
|
% io:format(user,
|
||||||
|
@ -1629,9 +1631,11 @@ indexed_list_allindexkeys_nolookup_test() ->
|
||||||
?assertMatch(Keys, binaryslot_trimmedlist(FullBin, all, all)).
|
?assertMatch(Keys, binaryslot_trimmedlist(FullBin, all, all)).
|
||||||
|
|
||||||
indexed_list_allindexkeys_trimmed_test() ->
|
indexed_list_allindexkeys_trimmed_test() ->
|
||||||
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), 128),
|
Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)),
|
||||||
|
?LOOK_SLOTSIZE),
|
||||||
{PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
{PosBinIndex1, FullBin, _HL, _LK} = generate_binary_slot(lookup, Keys),
|
||||||
?assertMatch(<<_BL:24/binary, 127:8/integer>>, PosBinIndex1),
|
EmptySlotSize = ?LOOK_SLOTSIZE - 1,
|
||||||
|
?assertMatch(<<_BL:24/binary, EmptySlotSize:8/integer>>, PosBinIndex1),
|
||||||
?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
|
?assertMatch(Keys, binaryslot_trimmedlist(FullBin,
|
||||||
{i,
|
{i,
|
||||||
"Bucket",
|
"Bucket",
|
||||||
|
@ -1656,9 +1660,9 @@ indexed_list_allindexkeys_trimmed_test() ->
|
||||||
?assertMatch(11, length(O2)),
|
?assertMatch(11, length(O2)),
|
||||||
?assertMatch(R2, O2),
|
?assertMatch(R2, O2),
|
||||||
|
|
||||||
{SK3, _} = lists:nth(127, Keys),
|
{SK3, _} = lists:nth(?LOOK_SLOTSIZE - 1, Keys),
|
||||||
{EK3, _} = lists:nth(128, Keys),
|
{EK3, _} = lists:nth(?LOOK_SLOTSIZE, Keys),
|
||||||
R3 = lists:sublist(Keys, 127, 2),
|
R3 = lists:sublist(Keys, ?LOOK_SLOTSIZE - 1, 2),
|
||||||
O3 = binaryslot_trimmedlist(FullBin, SK3, EK3),
|
O3 = binaryslot_trimmedlist(FullBin, SK3, EK3),
|
||||||
?assertMatch(2, length(O3)),
|
?assertMatch(2, length(O3)),
|
||||||
?assertMatch(R3, O3).
|
?assertMatch(R3, O3).
|
||||||
|
@ -1682,7 +1686,7 @@ indexed_list_mixedkeys_bitflip_test() ->
|
||||||
end,
|
end,
|
||||||
|
|
||||||
{TestK1, _TestV1} = lists:nth(20, KVL1),
|
{TestK1, _TestV1} = lists:nth(20, KVL1),
|
||||||
MH1 = leveled_codec:magic_hash(TestK1),
|
MH1 = leveled_codec:segment_hash(TestK1),
|
||||||
|
|
||||||
test_binary_slot(FullBin0, TestK1, MH1, not_present),
|
test_binary_slot(FullBin0, TestK1, MH1, not_present),
|
||||||
ToList = binaryslot_tolist(FullBin0),
|
ToList = binaryslot_tolist(FullBin0),
|
||||||
|
@ -1920,7 +1924,7 @@ simple_persisted_test() ->
|
||||||
In = lists:keymember(K, 1, KVList1),
|
In = lists:keymember(K, 1, KVList1),
|
||||||
case {K > FirstKey, LastKey > K, In} of
|
case {K > FirstKey, LastKey > K, In} of
|
||||||
{true, true, false} ->
|
{true, true, false} ->
|
||||||
[{K, leveled_codec:magic_hash(K), V}|Acc];
|
[{K, leveled_codec:segment_hash(K), V}|Acc];
|
||||||
_ ->
|
_ ->
|
||||||
Acc
|
Acc
|
||||||
end
|
end
|
||||||
|
|
|
@ -16,8 +16,8 @@
|
||||||
check_hash/2
|
check_hash/2
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-define(BITS_PER_KEY, 8). % Must be 8 or 4
|
-define(BLOOM_SIZE_BYTES, 16).
|
||||||
-define(INTEGER_SIZE, ?BITS_PER_KEY * 8).
|
-define(INTEGER_SIZE, 128).
|
||||||
-define(BAND_MASK, ?INTEGER_SIZE - 1).
|
-define(BAND_MASK, ?INTEGER_SIZE - 1).
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,9 +34,8 @@ create_bloom(HashList) ->
|
||||||
<<>>;
|
<<>>;
|
||||||
L when L > 32 ->
|
L when L > 32 ->
|
||||||
add_hashlist(HashList,
|
add_hashlist(HashList,
|
||||||
15,
|
7,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
0, 0, 0, 0, 0, 0);
|
|
||||||
L when L > 16 ->
|
L when L > 16 ->
|
||||||
add_hashlist(HashList, 3, 0, 0, 0, 0);
|
add_hashlist(HashList, 3, 0, 0, 0, 0);
|
||||||
_ ->
|
_ ->
|
||||||
|
@ -48,11 +47,11 @@ create_bloom(HashList) ->
|
||||||
%% Check for the presence of a given hash within a bloom
|
%% Check for the presence of a given hash within a bloom
|
||||||
check_hash(_Hash, <<>>) ->
|
check_hash(_Hash, <<>>) ->
|
||||||
false;
|
false;
|
||||||
check_hash(Hash, BloomBin) ->
|
check_hash({_SegHash, Hash}, BloomBin) ->
|
||||||
SlotSplit = (byte_size(BloomBin) div ?BITS_PER_KEY) - 1,
|
SlotSplit = (byte_size(BloomBin) div ?BLOOM_SIZE_BYTES) - 1,
|
||||||
{Slot, H0, H1} = split_hash(Hash, SlotSplit),
|
{Slot, Hashes} = split_hash(Hash, SlotSplit),
|
||||||
Mask = get_mask(H0, H1),
|
Mask = get_mask(Hashes),
|
||||||
Pos = Slot * ?BITS_PER_KEY,
|
Pos = Slot * ?BLOOM_SIZE_BYTES,
|
||||||
IntSize = ?INTEGER_SIZE,
|
IntSize = ?INTEGER_SIZE,
|
||||||
<<_H:Pos/binary, CheckInt:IntSize/integer, _T/binary>> = BloomBin,
|
<<_H:Pos/binary, CheckInt:IntSize/integer, _T/binary>> = BloomBin,
|
||||||
case CheckInt band Mask of
|
case CheckInt band Mask of
|
||||||
|
@ -69,19 +68,13 @@ check_hash(Hash, BloomBin) ->
|
||||||
split_hash(Hash, SlotSplit) ->
|
split_hash(Hash, SlotSplit) ->
|
||||||
Slot = Hash band SlotSplit,
|
Slot = Hash band SlotSplit,
|
||||||
H0 = (Hash bsr 4) band (?BAND_MASK),
|
H0 = (Hash bsr 4) band (?BAND_MASK),
|
||||||
H1 = (Hash bsr 10) band (?BAND_MASK),
|
H1 = (Hash bsr 11) band (?BAND_MASK),
|
||||||
H3 = (Hash bsr 16) band (?BAND_MASK),
|
H2 = (Hash bsr 18) band (?BAND_MASK),
|
||||||
H4 = (Hash bsr 22) band (?BAND_MASK),
|
H3 = (Hash bsr 25) band (?BAND_MASK),
|
||||||
Slot0 = (Hash bsr 28) band SlotSplit,
|
{Slot, [H0, H1, H2, H3]}.
|
||||||
{Slot bxor Slot0, H0 bxor H3, H1 bxor H4}.
|
|
||||||
|
|
||||||
get_mask(H0, H1) ->
|
get_mask([H0, H1, H2, H3]) ->
|
||||||
case H0 == H1 of
|
(1 bsl H0) bor (1 bsl H1) bor (1 bsl H2) bor (1 bsl H3).
|
||||||
true ->
|
|
||||||
1 bsl H0;
|
|
||||||
false ->
|
|
||||||
(1 bsl H0) + (1 bsl H1)
|
|
||||||
end.
|
|
||||||
|
|
||||||
|
|
||||||
%% This looks ugly and clunky, but in tests it was quicker than modifying an
|
%% This looks ugly and clunky, but in tests it was quicker than modifying an
|
||||||
|
@ -90,9 +83,9 @@ get_mask(H0, H1) ->
|
||||||
add_hashlist([], _S, S0, S1) ->
|
add_hashlist([], _S, S0, S1) ->
|
||||||
IntSize = ?INTEGER_SIZE,
|
IntSize = ?INTEGER_SIZE,
|
||||||
<<S0:IntSize/integer, S1:IntSize/integer>>;
|
<<S0:IntSize/integer, S1:IntSize/integer>>;
|
||||||
add_hashlist([TopHash|T], SlotSplit, S0, S1) ->
|
add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1) ->
|
||||||
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
|
||||||
Mask = get_mask(H0, H1),
|
Mask = get_mask(Hashes),
|
||||||
case Slot of
|
case Slot of
|
||||||
0 ->
|
0 ->
|
||||||
add_hashlist(T, SlotSplit, S0 bor Mask, S1);
|
add_hashlist(T, SlotSplit, S0 bor Mask, S1);
|
||||||
|
@ -104,9 +97,9 @@ add_hashlist([], _S, S0, S1, S2, S3) ->
|
||||||
IntSize = ?INTEGER_SIZE,
|
IntSize = ?INTEGER_SIZE,
|
||||||
<<S0:IntSize/integer, S1:IntSize/integer,
|
<<S0:IntSize/integer, S1:IntSize/integer,
|
||||||
S2:IntSize/integer, S3:IntSize/integer>>;
|
S2:IntSize/integer, S3:IntSize/integer>>;
|
||||||
add_hashlist([TopHash|T], SlotSplit, S0, S1, S2, S3) ->
|
add_hashlist([{_SegHash, TopHash}|T], SlotSplit, S0, S1, S2, S3) ->
|
||||||
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
|
||||||
Mask = get_mask(H0, H1),
|
Mask = get_mask(Hashes),
|
||||||
case Slot of
|
case Slot of
|
||||||
0 ->
|
0 ->
|
||||||
add_hashlist(T, SlotSplit, S0 bor Mask, S1, S2, S3);
|
add_hashlist(T, SlotSplit, S0 bor Mask, S1, S2, S3);
|
||||||
|
@ -118,104 +111,50 @@ add_hashlist([TopHash|T], SlotSplit, S0, S1, S2, S3) ->
|
||||||
add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask)
|
add_hashlist(T, SlotSplit, S0, S1, S2, S3 bor Mask)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
add_hashlist([], _S, S0, S1, S2, S3, S4, S5, S6, S7) ->
|
||||||
SA, SB, SC, SD, SE, SF) ->
|
|
||||||
IntSize = ?INTEGER_SIZE,
|
IntSize = ?INTEGER_SIZE,
|
||||||
<<S0:IntSize/integer, S1:IntSize/integer,
|
<<S0:IntSize/integer, S1:IntSize/integer,
|
||||||
S2:IntSize/integer, S3:IntSize/integer,
|
S2:IntSize/integer, S3:IntSize/integer,
|
||||||
S4:IntSize/integer, S5:IntSize/integer,
|
S4:IntSize/integer, S5:IntSize/integer,
|
||||||
S6:IntSize/integer, S7:IntSize/integer,
|
S6:IntSize/integer, S7:IntSize/integer>>;
|
||||||
S8:IntSize/integer, S9:IntSize/integer,
|
add_hashlist([{_SegHash, TopHash}|T],
|
||||||
SA:IntSize/integer, SB:IntSize/integer,
|
|
||||||
SC:IntSize/integer, SD:IntSize/integer,
|
|
||||||
SE:IntSize/integer, SF:IntSize/integer>>;
|
|
||||||
add_hashlist([TopHash|T],
|
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
S0, S1, S2, S3, S4, S5, S6, S7) ->
|
||||||
SA, SB, SC, SD, SE, SF) ->
|
{Slot, Hashes} = split_hash(TopHash, SlotSplit),
|
||||||
{Slot, H0, H1} = split_hash(TopHash, SlotSplit),
|
Mask = get_mask(Hashes),
|
||||||
Mask = get_mask(H0, H1),
|
|
||||||
case Slot of
|
case Slot of
|
||||||
0 ->
|
0 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0 bor Mask, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
S0 bor Mask, S1, S2, S3, S4, S5, S6, S7);
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
1 ->
|
1 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1 bor Mask, S2, S3, S4, S5, S6, S7, S8, S9,
|
S0, S1 bor Mask, S2, S3, S4, S5, S6, S7);
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
2 ->
|
2 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2 bor Mask, S3, S4, S5, S6, S7, S8, S9,
|
S0, S1, S2 bor Mask, S3, S4, S5, S6, S7);
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
3 ->
|
3 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3 bor Mask, S4, S5, S6, S7, S8, S9,
|
S0, S1, S2, S3 bor Mask, S4, S5, S6, S7);
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
4 ->
|
4 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4 bor Mask, S5, S6, S7, S8, S9,
|
S0, S1, S2, S3, S4 bor Mask, S5, S6, S7);
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
5 ->
|
5 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5 bor Mask, S6, S7, S8, S9,
|
S0, S1, S2, S3, S4, S5 bor Mask, S6, S7);
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
6 ->
|
6 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5, S6 bor Mask, S7, S8, S9,
|
S0, S1, S2, S3, S4, S5, S6 bor Mask, S7);
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
7 ->
|
7 ->
|
||||||
add_hashlist(T,
|
add_hashlist(T,
|
||||||
SlotSplit,
|
SlotSplit,
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7 bor Mask, S8, S9,
|
S0, S1, S2, S3, S4, S5, S6, S7 bor Mask)
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
8 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8 bor Mask, S9,
|
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
9 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9 bor Mask,
|
|
||||||
SA, SB, SC, SD, SE, SF);
|
|
||||||
10 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
|
||||||
SA bor Mask, SB, SC, SD, SE, SF);
|
|
||||||
11 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
|
||||||
SA, SB bor Mask, SC, SD, SE, SF);
|
|
||||||
12 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
|
||||||
SA, SB, SC bor Mask, SD, SE, SF);
|
|
||||||
13 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
|
||||||
SA, SB, SC, SD bor Mask, SE, SF);
|
|
||||||
14 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
|
||||||
SA, SB, SC, SD, SE bor Mask, SF);
|
|
||||||
15 ->
|
|
||||||
add_hashlist(T,
|
|
||||||
SlotSplit,
|
|
||||||
S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
|
|
||||||
SA, SB, SC, SD, SE, SF bor Mask)
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
||||||
|
@ -239,7 +178,7 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) ->
|
||||||
BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0),
|
BNumber = string:right(integer_to_list(BucketLow + BRand), 4, $0),
|
||||||
KNumber = string:right(integer_to_list(leveled_rand:uniform(10000)), 6, $0),
|
KNumber = string:right(integer_to_list(leveled_rand:uniform(10000)), 6, $0),
|
||||||
LK = leveled_codec:to_ledgerkey("Bucket" ++ BNumber, "Key" ++ KNumber, o),
|
LK = leveled_codec:to_ledgerkey("Bucket" ++ BNumber, "Key" ++ KNumber, o),
|
||||||
Chunk = leveled_rand:rand_bytes(64),
|
Chunk = leveled_rand:rand_bytes(16),
|
||||||
{_B, _K, MV, _H, _LMs} =
|
{_B, _K, MV, _H, _LMs} =
|
||||||
leveled_codec:generate_ledgerkv(LK, Seqn, Chunk, 64, infinity),
|
leveled_codec:generate_ledgerkv(LK, Seqn, Chunk, 64, infinity),
|
||||||
generate_randomkeys(Seqn + 1,
|
generate_randomkeys(Seqn + 1,
|
||||||
|
@ -254,7 +193,7 @@ get_hashlist(N) ->
|
||||||
KVL = lists:sublist(KVL0, N),
|
KVL = lists:sublist(KVL0, N),
|
||||||
HashFun =
|
HashFun =
|
||||||
fun({K, _V}) ->
|
fun({K, _V}) ->
|
||||||
leveled_codec:magic_hash(K)
|
leveled_codec:segment_hash(K)
|
||||||
end,
|
end,
|
||||||
lists:map(HashFun, KVL).
|
lists:map(HashFun, KVL).
|
||||||
|
|
||||||
|
@ -283,46 +222,50 @@ empty_bloom_test() ->
|
||||||
?assertMatch({0, 4},
|
?assertMatch({0, 4},
|
||||||
check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})).
|
check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})).
|
||||||
|
|
||||||
bloom_test() ->
|
bloom_test_() ->
|
||||||
test_bloom(128),
|
{timeout, 20, fun bloom_test_ranges/0}.
|
||||||
test_bloom(64),
|
|
||||||
test_bloom(32),
|
|
||||||
test_bloom(16),
|
|
||||||
test_bloom(8).
|
|
||||||
|
|
||||||
test_bloom(N) ->
|
bloom_test_ranges() ->
|
||||||
HashList1 = get_hashlist(N),
|
test_bloom(128, 2000),
|
||||||
HashList2 = get_hashlist(N),
|
test_bloom(64, 100),
|
||||||
HashList3 = get_hashlist(N),
|
test_bloom(32, 100),
|
||||||
HashList4 = get_hashlist(N),
|
test_bloom(16, 100),
|
||||||
|
test_bloom(8, 100).
|
||||||
|
|
||||||
|
test_bloom(N, Runs) ->
|
||||||
|
ListOfHashLists =
|
||||||
|
lists:map(fun(_X) -> get_hashlist(N) end, lists:seq(1, Runs)),
|
||||||
|
|
||||||
SWa = os:timestamp(),
|
SWa = os:timestamp(),
|
||||||
BloomBin1 = create_bloom(HashList1),
|
ListOfBlooms =
|
||||||
BloomBin2 = create_bloom(HashList2),
|
lists:map(fun(HL) -> create_bloom(HL) end, ListOfHashLists),
|
||||||
BloomBin3 = create_bloom(HashList3),
|
|
||||||
BloomBin4 = create_bloom(HashList4),
|
|
||||||
TSa = timer:now_diff(os:timestamp(), SWa),
|
TSa = timer:now_diff(os:timestamp(), SWa),
|
||||||
|
|
||||||
SWb = os:timestamp(),
|
SWb = os:timestamp(),
|
||||||
check_all_hashes(BloomBin1, HashList1),
|
lists:foreach(fun(Nth) ->
|
||||||
check_all_hashes(BloomBin2, HashList2),
|
HL = lists:nth(Nth, ListOfHashLists),
|
||||||
check_all_hashes(BloomBin3, HashList3),
|
BB = lists:nth(Nth, ListOfBlooms),
|
||||||
check_all_hashes(BloomBin4, HashList4),
|
check_all_hashes(BB, HL)
|
||||||
|
end,
|
||||||
|
lists:seq(1, Runs)),
|
||||||
TSb = timer:now_diff(os:timestamp(), SWb),
|
TSb = timer:now_diff(os:timestamp(), SWb),
|
||||||
|
|
||||||
HashPool = get_hashlist(N * 2),
|
HashPool = get_hashlist(N * 2),
|
||||||
HashListOut1 = lists:sublist(lists:subtract(HashPool, HashList1), N),
|
ListOfMisses =
|
||||||
HashListOut2 = lists:sublist(lists:subtract(HashPool, HashList2), N),
|
lists:map(fun(HL) ->
|
||||||
HashListOut3 = lists:sublist(lists:subtract(HashPool, HashList3), N),
|
lists:sublist(lists:subtract(HashPool, HL), N)
|
||||||
HashListOut4 = lists:sublist(lists:subtract(HashPool, HashList4), N),
|
end,
|
||||||
|
ListOfHashLists),
|
||||||
|
|
||||||
SWc = os:timestamp(),
|
SWc = os:timestamp(),
|
||||||
C0 = {0, 0},
|
{Pos, Neg} =
|
||||||
C1 = check_neg_hashes(BloomBin1, HashListOut1, C0),
|
lists:foldl(fun(Nth, Acc) ->
|
||||||
C2 = check_neg_hashes(BloomBin2, HashListOut2, C1),
|
HL = lists:nth(Nth, ListOfMisses),
|
||||||
C3 = check_neg_hashes(BloomBin3, HashListOut3, C2),
|
BB = lists:nth(Nth, ListOfBlooms),
|
||||||
C4 = check_neg_hashes(BloomBin4, HashListOut4, C3),
|
check_neg_hashes(BB, HL, Acc)
|
||||||
{Pos, Neg} = C4,
|
end,
|
||||||
|
{0, 0},
|
||||||
|
lists:seq(1, Runs)),
|
||||||
FPR = Pos / (Pos + Neg),
|
FPR = Pos / (Pos + Neg),
|
||||||
TSc = timer:now_diff(os:timestamp(), SWc),
|
TSc = timer:now_diff(os:timestamp(), SWc),
|
||||||
|
|
||||||
|
@ -332,5 +275,4 @@ test_bloom(N) ->
|
||||||
[N, TSa, TSb, TSc, FPR]).
|
[N, TSa, TSb, TSc, FPR]).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-endif.
|
-endif.
|
||||||
|
|
|
@ -214,7 +214,7 @@ search_range(StartRange, EndRange, Tree, StartKeyFun) ->
|
||||||
EndRangeFun =
|
EndRangeFun =
|
||||||
fun(ER, _FirstRHSKey, FirstRHSValue) ->
|
fun(ER, _FirstRHSKey, FirstRHSValue) ->
|
||||||
StartRHSKey = StartKeyFun(FirstRHSValue),
|
StartRHSKey = StartKeyFun(FirstRHSValue),
|
||||||
ER >= StartRHSKey
|
not leveled_codec:endkey_passed(ER, StartRHSKey)
|
||||||
end,
|
end,
|
||||||
case Tree of
|
case Tree of
|
||||||
{tree, _L, T} ->
|
{tree, _L, T} ->
|
||||||
|
@ -405,8 +405,12 @@ idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) ->
|
||||||
[{FirstRHSKey, FirstRHSValue}|_Rest] ->
|
[{FirstRHSKey, FirstRHSValue}|_Rest] ->
|
||||||
case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of
|
case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of
|
||||||
true ->
|
true ->
|
||||||
|
% The start key is not after the end of the range
|
||||||
|
% and so this should be included in the range
|
||||||
Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}];
|
Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}];
|
||||||
false ->
|
false ->
|
||||||
|
% the start key of the next key is after the end
|
||||||
|
% of the range and so should not be included
|
||||||
Output ++ LHS
|
Output ++ LHS
|
||||||
end
|
end
|
||||||
end;
|
end;
|
||||||
|
@ -804,4 +808,22 @@ empty_test() ->
|
||||||
T2 = empty(idxt),
|
T2 = empty(idxt),
|
||||||
?assertMatch(0, tsize(T2)).
|
?assertMatch(0, tsize(T2)).
|
||||||
|
|
||||||
|
search_range_idx_test() ->
|
||||||
|
Tree =
|
||||||
|
{idxt,1,
|
||||||
|
{{[{{o_rkv,"Bucket1","Key1",null},
|
||||||
|
{manifest_entry,{o_rkv,"Bucket","Key9083",null},
|
||||||
|
{o_rkv,"Bucket1","Key1",null},
|
||||||
|
"<0.320.0>","./16_1_6.sst"}}]},
|
||||||
|
{1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}},
|
||||||
|
StartKeyFun =
|
||||||
|
fun(ME) ->
|
||||||
|
ME#manifest_entry.start_key
|
||||||
|
end,
|
||||||
|
R = search_range({o_rkv, "Bucket", null, null},
|
||||||
|
{o_rkv, "Bucket", null, null},
|
||||||
|
Tree,
|
||||||
|
StartKeyFun),
|
||||||
|
?assertMatch(1, length(R)).
|
||||||
|
|
||||||
-endif.
|
-endif.
|
||||||
|
|
|
@ -333,8 +333,8 @@ load_and_count(_Config) ->
|
||||||
Bookie1,
|
Bookie1,
|
||||||
TestObject,
|
TestObject,
|
||||||
G1),
|
G1),
|
||||||
{_S, Count} = testutil:check_bucket_stats(Bookie1,
|
{_S, Count} =
|
||||||
"Bucket"),
|
testutil:check_bucket_stats(Bookie1, "Bucket"),
|
||||||
if
|
if
|
||||||
Acc + 5000 == Count ->
|
Acc + 5000 == Count ->
|
||||||
ok
|
ok
|
||||||
|
@ -351,8 +351,8 @@ load_and_count(_Config) ->
|
||||||
Bookie1,
|
Bookie1,
|
||||||
TestObject,
|
TestObject,
|
||||||
G2),
|
G2),
|
||||||
{_S, Count} = testutil:check_bucket_stats(Bookie1,
|
{_S, Count} =
|
||||||
"Bucket"),
|
testutil:check_bucket_stats(Bookie1, "Bucket"),
|
||||||
if
|
if
|
||||||
Acc + 5000 == Count ->
|
Acc + 5000 == Count ->
|
||||||
ok
|
ok
|
||||||
|
@ -368,8 +368,8 @@ load_and_count(_Config) ->
|
||||||
Bookie1,
|
Bookie1,
|
||||||
TestObject,
|
TestObject,
|
||||||
G1),
|
G1),
|
||||||
{_S, Count} = testutil:check_bucket_stats(Bookie1,
|
{_S, Count} =
|
||||||
"Bucket"),
|
testutil:check_bucket_stats(Bookie1, "Bucket"),
|
||||||
if
|
if
|
||||||
Count == 200000 ->
|
Count == 200000 ->
|
||||||
ok
|
ok
|
||||||
|
@ -385,8 +385,8 @@ load_and_count(_Config) ->
|
||||||
Bookie1,
|
Bookie1,
|
||||||
TestObject,
|
TestObject,
|
||||||
G2),
|
G2),
|
||||||
{_S, Count} = testutil:check_bucket_stats(Bookie1,
|
{_S, Count} =
|
||||||
"Bucket"),
|
testutil:check_bucket_stats(Bookie1, "Bucket"),
|
||||||
if
|
if
|
||||||
Acc + 5000 == Count ->
|
Acc + 5000 == Count ->
|
||||||
ok
|
ok
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue