Switch to ordered_set

Improved performance by a combination of switching to an ordered_set
(so a list can be extracted in a sane way), and building the binary
from an ordered list.
This commit is contained in:
Martin Sumner 2016-12-13 12:35:30 +00:00
parent aa2d19df1d
commit cfc6a67638

View file

@ -1236,36 +1236,45 @@ multi_key_value_to_record(KVList, BinaryMode, LastPosition) ->
%%%============================================================================ %%%============================================================================
lookup_positions(HashTree, Index, Hash) -> lookup_positions(HashTree, Index, Hash) ->
ConvertObjFun = fun({{_Idx, _H}, P}) -> P end, lookup_positions(HashTree, Index, Hash, -1, []).
lists:map(ConvertObjFun, ets:lookup(HashTree, {Index, Hash})).
lookup_positions(HashTree, Index, Hash, Pos, PosList) ->
case ets:next(HashTree, {Index, Hash, Pos}) of
{Index, Hash, NewPos} ->
lookup_positions(HashTree, Index, Hash, NewPos, [NewPos|PosList]);
_ ->
PosList
end.
add_position_tohashtree(HashTree, Index, Hash, Position) -> add_position_tohashtree(HashTree, Index, Hash, Position) ->
ets:insert(HashTree, {{Index, Hash}, Position}), ets:insert(HashTree, {{Index, Hash, Position}}),
HashTree. HashTree.
new_hashtree() -> new_hashtree() ->
ets:new(hashtree, [bag]). ets:new(hashtree, [ordered_set]).
is_empty(HashTree, Index) -> to_list(HashTree, Index) ->
case ets:match(HashTree, {{Index, '_'}, '_'}) of to_list(HashTree, Index, {0, -1}, []).
'$end_of_table' ->
true; to_list(HashTree, Index, {LastHash, LastPos}, Acc) ->
case ets:next(HashTree, {Index, LastHash, LastPos}) of
{Index, Hash, Pos} ->
to_list(HashTree, Index, {Hash, Pos}, [{Hash, Pos}|Acc]);
_ -> _ ->
false Acc
end. end.
to_slotmap(HashTree, Index) -> to_slotmap(HashTree, Index) ->
ObjList = ets:match_object(HashTree, {{Index, '_'}, '_'}), HPList = to_list(HashTree, Index),
IndexLength = length(ObjList) * 2, IndexLength = length(HPList),
ConvertObjFun = ConvertObjFun =
fun({{_Idx, Hash}, Position}) -> fun({Hash, Position}) ->
HashLE = endian_flip(Hash), HashLE = endian_flip(Hash),
PosLE = endian_flip(Position), PosLE = endian_flip(Position),
NewBin = <<HashLE:32, PosLE:32>>, NewBin = <<HashLE:32, PosLE:32>>,
{hash_to_slot(Hash, IndexLength), NewBin} {hash_to_slot(Hash, IndexLength), NewBin}
end, end,
lists:keysort(1, lists:map(ConvertObjFun, ObjList)). lists:map(ConvertObjFun, HPList).
build_hashtree_binary(SlotMap, IndexLength) -> build_hashtree_binary(SlotMap, IndexLength) ->
build_hashtree_binary(SlotMap, IndexLength, 0, []). build_hashtree_binary(SlotMap, IndexLength, 0, []).
@ -1318,31 +1327,42 @@ find_firstzero(Bin, Pos) ->
write_hash_tables(Indexes, HashTree, CurrPos) -> write_hash_tables(Indexes, HashTree, CurrPos) ->
write_hash_tables(Indexes, HashTree, CurrPos, CurrPos, [], []). write_hash_tables(Indexes, HashTree, CurrPos, CurrPos, [], [], {0, 0, 0}).
write_hash_tables([], _HashTree, _CurrPos, _BasePos, IndexList, HT_BinList) -> write_hash_tables([], _HashTree, _CurrPos, _BasePos,
IndexList, HT_BinList, {T1, T2, T3}) ->
io:format("CDB99 ~w T1 ~w T2 ~w T3 ~w~n", [self(), T1, T2, T3]),
IL = lists:reverse(IndexList), IL = lists:reverse(IndexList),
{IL, list_to_binary(HT_BinList)}; {IL, list_to_binary(HT_BinList)};
write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos, write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos,
IndexList, HT_BinList) -> IndexList, HT_BinList, Timers) ->
case is_empty(HashTree, Index) of SW1 = os:timestamp(),
true -> SlotMap = to_slotmap(HashTree, Index),
T1 = timer:now_diff(os:timestamp(), SW1) + element(1, Timers),
case SlotMap of
[] ->
write_hash_tables(Rest, write_hash_tables(Rest,
HashTree, HashTree,
CurrPos, CurrPos,
BasePos, BasePos,
[{Index, BasePos, 0}|IndexList], [{Index, BasePos, 0}|IndexList],
HT_BinList); HT_BinList,
false -> Timers);
SlotMap = to_slotmap(HashTree, Index), _ ->
SW2 = os:timestamp(),
IndexLength = length(SlotMap) * 2, IndexLength = length(SlotMap) * 2,
NewSlotBin = build_hashtree_binary(SlotMap, IndexLength), SortedMap = lists:keysort(1, SlotMap),
T2 = timer:now_diff(os:timestamp(), SW2) + element(2, Timers),
SW3 = os:timestamp(),
NewSlotBin = build_hashtree_binary(SortedMap, IndexLength),
T3 = timer:now_diff(os:timestamp(), SW3) + element(3, Timers),
write_hash_tables(Rest, write_hash_tables(Rest,
HashTree, HashTree,
CurrPos + IndexLength * ?DWORD_SIZE, CurrPos + IndexLength * ?DWORD_SIZE,
BasePos, BasePos,
[{Index, CurrPos, IndexLength}|IndexList], [{Index, CurrPos, IndexLength}|IndexList],
HT_BinList ++ NewSlotBin) HT_BinList ++ NewSlotBin,
{T1, T2, T3})
end. end.