Settled on sizes
Also removed length check due to warning in Erlang guidance about non-constant time nature of this command. Intend to remove lengths from elsewhere (especially when used simply for logging).
This commit is contained in:
parent
b1a3b4ad13
commit
4f838f6f88
2 changed files with 42 additions and 11 deletions
|
@ -18,7 +18,6 @@
|
||||||
-export([
|
-export([
|
||||||
from_list/1,
|
from_list/1,
|
||||||
from_list/2,
|
from_list/2,
|
||||||
to_sstlist/1,
|
|
||||||
from_sortedlist/1,
|
from_sortedlist/1,
|
||||||
from_sortedlist/2,
|
from_sortedlist/2,
|
||||||
to_list/1,
|
to_list/1,
|
||||||
|
@ -38,7 +37,6 @@
|
||||||
|
|
||||||
-define(SKIP_WIDTH, 16).
|
-define(SKIP_WIDTH, 16).
|
||||||
-define(LIST_HEIGHT, 2).
|
-define(LIST_HEIGHT, 2).
|
||||||
-define(SST_WIDTH, 16).
|
|
||||||
-define(INFINITY_KEY, {null, null, null, null, null}).
|
-define(INFINITY_KEY, {null, null, null, null, null}).
|
||||||
-define(BITARRAY_SIZE, 2048).
|
-define(BITARRAY_SIZE, 2048).
|
||||||
|
|
||||||
|
@ -96,9 +94,6 @@ from_sortedlist(SortedKVL, BloomProtect) ->
|
||||||
end,
|
end,
|
||||||
{Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
{Bloom0, from_list(SortedKVL, ?SKIP_WIDTH, ?LIST_HEIGHT)}.
|
||||||
|
|
||||||
to_sstlist(SortedKVL) ->
|
|
||||||
{list_only, from_list(SortedKVL, ?SST_WIDTH, ?LIST_HEIGHT)}.
|
|
||||||
|
|
||||||
lookup(Key, SkipList) ->
|
lookup(Key, SkipList) ->
|
||||||
case element(1, SkipList) of
|
case element(1, SkipList) of
|
||||||
list_only ->
|
list_only ->
|
||||||
|
|
|
@ -3,8 +3,7 @@
|
||||||
%% A FSM module intended to wrap a persisted, ordered view of Keys and Values
|
%% A FSM module intended to wrap a persisted, ordered view of Keys and Values
|
||||||
%%
|
%%
|
||||||
%% The persisted view is built from a list (which may be created by merging
|
%% The persisted view is built from a list (which may be created by merging
|
||||||
%% multiple lists).
|
%% multiple lists). The list is built first, then the view is created in bulk.
|
||||||
%%
|
|
||||||
%%
|
%%
|
||||||
%% -------- Slots ---------
|
%% -------- Slots ---------
|
||||||
%%
|
%%
|
||||||
|
@ -21,7 +20,7 @@
|
||||||
%% flatten back to list - 164 microseconds
|
%% flatten back to list - 164 microseconds
|
||||||
%%
|
%%
|
||||||
%% GBTree:
|
%% GBTree:
|
||||||
%% build and serialise tree 402 microseconds
|
%% build and serialise tree 1433 microseconds
|
||||||
%% de-serialise and check * 128 - 15263 microseconds
|
%% de-serialise and check * 128 - 15263 microseconds
|
||||||
%% flatten back to list - 175 microseconds
|
%% flatten back to list - 175 microseconds
|
||||||
%%
|
%%
|
||||||
|
@ -30,6 +29,28 @@
|
||||||
%% the size of the slot, wherease the serialisation time is relatively constant
|
%% the size of the slot, wherease the serialisation time is relatively constant
|
||||||
%% with growth. So bigger slots would be quicker to build, but the penalty for
|
%% with growth. So bigger slots would be quicker to build, but the penalty for
|
||||||
%% that speed is too high at lookup time.
|
%% that speed is too high at lookup time.
|
||||||
|
%%
|
||||||
|
%% -------- Blooms ---------
|
||||||
|
%%
|
||||||
|
%% There are two different tiny blooms for each table. One is split by the
|
||||||
|
%% first byte of the hash, and consists of two hashes (derived from the
|
||||||
|
%% remainder of the hash). This is the top bloom, and the size vaires by
|
||||||
|
%% level.
|
||||||
|
%% Level 0 has 8 bits per key - 0.05 fpr
|
||||||
|
%% Level 1 has 6 bits per key - 0.08 fpr
|
||||||
|
%% Other Levels have 4 bits per key - 0.15 fpr
|
||||||
|
%%
|
||||||
|
%% If this level is passed, then each slot has its own bloom based on the
|
||||||
|
%% same hash, but now split into three hashes and having a fixed 8 bit per
|
||||||
|
%% key size at all levels.
|
||||||
|
%% Slot Bloom has 8 bits per key - 0.03 fpr
|
||||||
|
%%
|
||||||
|
%% All blooms are base don the DJ Bernstein magic hash which proved to give
|
||||||
|
%% the predicted fpr in tests (unlike phash2 which has significantly higher
|
||||||
|
%% fpr). Due to the cost of producing the magic hash, it is read from the
|
||||||
|
%% value not reproduced each time. If the value is set to no_lookup no bloom
|
||||||
|
%% entry is added, and if all hashes are no_lookup in the slot then no bloom
|
||||||
|
%% is produced.
|
||||||
|
|
||||||
|
|
||||||
-module(leveled_sst).
|
-module(leveled_sst).
|
||||||
|
@ -38,6 +59,7 @@
|
||||||
|
|
||||||
-define(SLOT_SIZE, 128).
|
-define(SLOT_SIZE, 128).
|
||||||
-define(COMPRESSION_LEVEL, 1).
|
-define(COMPRESSION_LEVEL, 1).
|
||||||
|
-define(LEVEL_BLOOM_SLOTS, [{0, 64}, {1, 48}, {default, 32}]).
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
|
@ -57,6 +79,20 @@
|
||||||
%%% Internal Functions
|
%%% Internal Functions
|
||||||
%%%============================================================================
|
%%%============================================================================
|
||||||
|
|
||||||
|
build_table_summary(SlotIndex, AllHashes, Level) ->
|
||||||
|
BloomSlots =
|
||||||
|
case lists:keyfind(Level, ?LEVEL_BLOOM_SLOTS) of
|
||||||
|
{Level, N} ->
|
||||||
|
N;
|
||||||
|
false ->
|
||||||
|
element(2, lists:keyfind(default, ?LEVEL_BLOOM_SLOTS))
|
||||||
|
end,
|
||||||
|
Bloom = lists:foldr(fun leveled_tinybloom:enter/2,
|
||||||
|
leveled_bloom:empty(BloomSlots),
|
||||||
|
AllHashes),
|
||||||
|
SkipSlot = leveled_skiplist:from_sortedlist(lists:reverse(SlotIndex)),
|
||||||
|
term_to_binary({SkipSlot, Bloom}, [{comprressed, ?COMPRESSION_LEVEL}]).
|
||||||
|
|
||||||
build_all_slots(KVList, BasePosition) ->
|
build_all_slots(KVList, BasePosition) ->
|
||||||
build_all_slots(KVList, BasePosition, [], 1, []).
|
build_all_slots(KVList, BasePosition, [], 1, []).
|
||||||
|
|
||||||
|
@ -75,8 +111,8 @@ build_all_slots(KVList, StartPosition, AllHashes, SlotID, SlotIndex) ->
|
||||||
[{hash, H}|Acc]
|
[{hash, H}|Acc]
|
||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
HashList = lists:foldr(ExtractHashFun, [], KVList),
|
HashList = lists:foldr(ExtractHashFun, [], SlotList),
|
||||||
{SlotBin, Bloom} = build_slot(KVList, HashList),
|
{SlotBin, Bloom} = build_slot(SlotList, HashList),
|
||||||
Length = byte_size(SlotBin),
|
Length = byte_size(SlotBin),
|
||||||
SlotIndexV = #slot_index_value{slot_id = SlotID,
|
SlotIndexV = #slot_index_value{slot_id = SlotID,
|
||||||
bloom = Bloom,
|
bloom = Bloom,
|
||||||
|
@ -89,7 +125,7 @@ build_all_slots(KVList, StartPosition, AllHashes, SlotID, SlotIndex) ->
|
||||||
[{LastKey, SlotIndexV}|SlotIndex]).
|
[{LastKey, SlotIndexV}|SlotIndex]).
|
||||||
|
|
||||||
|
|
||||||
build_slot(KVList, HashList) when length(KVList) =< ?SLOT_SIZE ->
|
build_slot(KVList, HashList) ->
|
||||||
Tree = gb_trees:from_orddict(KVList),
|
Tree = gb_trees:from_orddict(KVList),
|
||||||
Bloom = lists:foldr(fun leveled_tinybloom:tiny_enter/2,
|
Bloom = lists:foldr(fun leveled_tinybloom:tiny_enter/2,
|
||||||
leveled_tinybloom:tiny_empty(),
|
leveled_tinybloom:tiny_empty(),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue