Ongoing improvements - in particular CDB now supports general erlang terms not just lists

2015-06-04 21:15:31 +01:00 · 2015-06-04 21:15:31 +01:00 · c5f50c613d
commit c5f50c613d
parent 647a7f44dc
4 changed files with 412 additions and 159 deletions
--- a/src/leveled_bst.erl
+++ b/src/leveled_bst.erl
@ -25,14 +25,14 @@
 %% The slots is a series of references
 %% - 4 byte bloom-filter length 
 %% - 4 byte key-helper length 
-%% - a variable-length compressed bloom filter for all keys in slot (approx 1KB)
+%% - a variable-length compressed bloom filter for all keys in slot (approx 3KB)
-%% - 32 ordered variable-length key helpers pointing to first key in each 
+%% - 64 ordered variable-length key helpers pointing to first key in each 
 %% block (in slot) of the form Key Length, Key, Block Position
 %% - 4 byte CRC for the slot
 %%
 %% The slot index in the footer is made up of 128 keys and pointers at the 
 %% the start of each slot
-%% - 128 Key Length (4 byte), Key, Position (4 byte) indexes
+%% - 64 x Key Length (4 byte), Key, Position (4 byte) indexes
 %% - 4 bytes CRC for the index
 %%
 %% The format of the file is intended to support quick lookups, whilst 
@ -54,8 +54,9 @@
 -record(metadata, {version = ?CURRENT_VERSION :: tuple(),
 					   mutable = false :: true | false,
-					   compressed = true :: tre | false,
+					   compressed = true :: true | false,
-					   slot_list :: list(),
+					   slot_array,
 					   open_slot :: integer(),
 					   cache :: tuple(),
 					   smallest_key :: tuple(),
 					   largest_key :: tuple(),
@ -73,9 +74,9 @@ start_file(Handle) ->
 	{ok, _} = file:position(Handle, bof),
 	file:write(Handle, Header),
 	{Version, {M, C}, _, _} = convert_header(Header),
-	FileMD = #metadata{version=Version, mutable=M, compressed=C},
+	FileMD = #metadata{version = Version, mutable = M, compressed = C, 
-	SlotArray = array:new(?SLOT_COUNT),
+	slot_array = array:new(?SLOT_COUNT), open_slot = 0},
-	{Handle, FileMD, SlotArray}.
+	{Handle, FileMD}.
 create_header(initial) ->
@ -119,6 +120,8 @@ convert_header_v01(Header) ->
 	{{0, 1}, {M, C}, {FooterP, SlotLng, HlpLng}, none}.
 % add_slot(Handle, FileMD, SlotArray)
 %%%%%%%%%%%%%%%%
@ -148,7 +151,7 @@ bad_header_test() ->
 	?assertMatch(unknown_version, convert_header(NewHdr2)).
 record_onstartfile_test() ->
-	{_, FileMD, _} = start_file("onstartfile.bst"),
+	{_, FileMD} = start_file("onstartfile.bst"),
 	?assertMatch({0, 1}, FileMD#metadata.version).
--- a/src/leveled_cdb.erl
+++ b/src/leveled_cdb.erl
@ -54,24 +54,22 @@
  put/4,
  open_active_file/1,
  get_nextkey/1,
-  get_nextkey/2]).
+  get_nextkey/2,
  fold/3,
  fold_keys/3]).
 -include_lib("eunit/include/eunit.hrl").
 -define(DWORD_SIZE, 8).
 -define(WORD_SIZE, 4).
 -define(CRC_CHECK, true).
 -define(MAX_FILE_SIZE, 3221225472).
 -define(BASE_POSITION, 2048).
 %%
 %% from_dict(FileName,ListOfKeyValueTuples)
 %% Given a filename and a dictionary, create a cdb
 %% using the key value pairs from the dict.
 %%
 %% @spec from_dict(filename(),dictionary()) -> ok
 %% where
 %%	filename() = string(),
 %%	dictionary() = dict()
 %%
 from_dict(FileName,Dict) ->
  KeyValueList = dict:to_list(Dict),
  create(FileName, KeyValueList).
@ -82,30 +80,21 @@ from_dict(FileName,Dict) ->
 %% this function creates a CDB
 %%
 create(FileName,KeyValueList) ->
-  {ok, Handle} = file:open(FileName, [write]),
+  {ok, Handle} = file:open(FileName, [binary, raw, read, write]),
-  {ok, _} = file:position(Handle, {bof, 2048}),
+  {ok, _} = file:position(Handle, {bof, ?BASE_POSITION}),
  {BasePos, HashTree} = write_key_value_pairs(Handle, KeyValueList),
-  io:format("KVs has been written to base position ~w~n", [BasePos]),
+  close_file(Handle, HashTree, BasePos).
  L2 = write_hash_tables(Handle, HashTree),
  io:format("Index list output of ~w~n", [L2]),
  write_top_index_table(Handle, BasePos, L2),
  file:close(Handle).
 %%
 %% dump(FileName) -> List
 %% Given a file name, this function returns a list
 %% of {key,value} tuples from the CDB.
 %%
 %%
 %% @spec dump(filename()) -> key_value_list()
 %% where
 %%  filename() = string(),
 %%  key_value_list() = [{key,value}]
 dump(FileName) ->
  dump(FileName, ?CRC_CHECK).
 dump(FileName, CRCCheck) ->
-  {ok, Handle} = file:open(FileName, [binary,raw]),
+  {ok, Handle} = file:open(FileName, [binary, raw, read]),
  Fn = fun(Index, Acc) ->
    {ok, _} = file:position(Handle, ?DWORD_SIZE * Index),
    {_, Count} = read_next_2_integers(Handle),
@ -117,8 +106,9 @@ dump(FileName, CRCCheck) ->
  {ok, _} = file:position(Handle, {bof, 2048}),
  Fn1 = fun(_I,Acc) ->
    {KL,VL} = read_next_2_integers(Handle),
-    Key = read_next_string(Handle, KL),
+    Key = read_next_term(Handle, KL),
-    case read_next_string(Handle, VL, crc, CRCCheck) of
+    io:format("Key read of ~w~n", [Key]),
    case read_next_term(Handle, VL, crc, CRCCheck) of
      {false, _} ->
        {ok, CurrLoc} = file:position(Handle, cur),
        Return = {crc_wonky, get(Handle, Key)};
@ -169,9 +159,15 @@ put(FileName, Key, Value, {LastPosition, HashTree}) when is_list(FileName) ->
    [binary, raw, read, write, delayed_write]),
  put(Handle, Key, Value, {LastPosition, HashTree});
 put(Handle, Key, Value, {LastPosition, HashTree}) ->
-  Bin = key_value_to_record({Key, Value}), % create binary for Key and Value
+  Bin = key_value_to_record({Key, Value}), 
  PotentialNewSize = LastPosition + byte_size(Bin),
  if PotentialNewSize > ?MAX_FILE_SIZE ->
    close_file(Handle, HashTree, LastPosition),
    roll;
  true ->
    ok = file:pwrite(Handle, LastPosition, Bin),
-  {LastPosition + byte_size(Bin), put_hashtree(Key, LastPosition, HashTree)}.
+    {Handle, PotentialNewSize, put_hashtree(Key, LastPosition, HashTree)}
  end.
 %%
@ -182,7 +178,7 @@ get(FileNameOrHandle, Key) ->
  get(FileNameOrHandle, Key, ?CRC_CHECK).
 get(FileName, Key, CRCCheck) when is_list(FileName), is_list(Key) ->
-  {ok,Handle} = file:open(FileName,[binary,raw]),
+  {ok,Handle} = file:open(FileName,[binary, raw, read]),
  get(Handle,Key, CRCCheck);
 get(Handle, Key, CRCCheck) when is_tuple(Handle), is_list(Key) ->
@ -230,7 +226,7 @@ get_nextkey(Handle, {Position, FirstHashPosition}) ->
  {ok, Position} = file:position(Handle, Position),
  case read_next_2_integers(Handle) of 
    {KeyLength, ValueLength} ->
-      NextKey = read_next_string(Handle, KeyLength),
+      NextKey = read_next_term(Handle, KeyLength),
      NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE,
      case NextPosition of 
        FirstHashPosition ->
@ -243,10 +239,76 @@ get_nextkey(Handle, {Position, FirstHashPosition}) ->
  end.
 %% Fold over all of the objects in the file, applying FoldFun to each object
 %% where FoldFun(K, V, Acc0) -> Acc , or FoldFun(K, Acc0) -> Acc if KeyOnly is
 %% set to true
 fold(FileName, FoldFun, Acc0) when is_list(FileName) ->
  {ok, Handle} = file:open(FileName, [binary, raw, read]),
  fold(Handle, FoldFun, Acc0);
 fold(Handle, FoldFun, Acc0) ->
  {ok, _} = file:position(Handle, bof),
  {FirstHashPosition, _} = read_next_2_integers(Handle),
  fold(Handle, FoldFun, Acc0, {256 * ?DWORD_SIZE, FirstHashPosition}, false).
 fold(Handle, FoldFun, Acc0, {Position, FirstHashPosition}, KeyOnly) ->
  {ok, Position} = file:position(Handle, Position),
  case Position of 
    FirstHashPosition ->
      Acc0;
    _ ->
      case read_next_2_integers(Handle) of 
        {KeyLength, ValueLength} ->
          NextKey = read_next_term(Handle, KeyLength),
          NextPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE,
          case KeyOnly of 
            true ->
              fold(Handle, FoldFun, FoldFun(NextKey, Acc0), 
                {NextPosition, FirstHashPosition}, KeyOnly);
            false ->
              case read_next_term(Handle, ValueLength, crc, ?CRC_CHECK) of
                {false, _} -> 
                  io:format("Skipping value for Key ~w as CRC check failed~n", 
                    [NextKey]),
                  fold(Handle, FoldFun, Acc0, 
                    {NextPosition, FirstHashPosition}, KeyOnly);
                {_, Value} ->
                  fold(Handle, FoldFun, FoldFun(NextKey, Value, Acc0), 
                    {NextPosition, FirstHashPosition}, KeyOnly)
              end
          end;
      eof ->
        Acc0
      end
  end.
 fold_keys(FileName, FoldFun, Acc0) when is_list(FileName) ->
  {ok, Handle} = file:open(FileName, [binary, raw, read]),
  fold_keys(Handle, FoldFun, Acc0);
 fold_keys(Handle, FoldFun, Acc0) ->
  {ok, _} = file:position(Handle, bof),
  {FirstHashPosition, _} = read_next_2_integers(Handle),
  fold(Handle, FoldFun, Acc0, {256 * ?DWORD_SIZE, FirstHashPosition}, true).
 %%%%%%%%%%%%%%%%%%%%
 %% Internal functions
 %%%%%%%%%%%%%%%%%%%%
 %% Take an active file and write the hash details necessary to close that
 %% file and roll a new active file if requested.  
 %%
 %% Base Pos should be at the end of the KV pairs written (the position for)
 %% the hash tables
 close_file(Handle, HashTree, BasePos) ->
  {ok, BasePos} = file:position(Handle, BasePos),
  L2 = write_hash_tables(Handle, HashTree),
  write_top_index_table(Handle, BasePos, L2),
  file:close(Handle).
 %% Fetch a list of positions by passing a key to the HashTree
 get_hashtree(Key, HashTree) ->
  Hash = hash(Key),
@ -282,9 +344,9 @@ extract_kvpair(_, [], _, _) ->
 extract_kvpair(Handle, [Position|Rest], Key, Check) ->
  {ok, _} = file:position(Handle, Position),
  {KeyLength, ValueLength} = read_next_2_integers(Handle),
-  case read_next_string(Handle, KeyLength) of
+  case read_next_term(Handle, KeyLength) of
    Key ->  % If same key as passed in, then found!
-      case read_next_string(Handle, ValueLength, crc, Check) of
+      case read_next_term(Handle, ValueLength, crc, Check) of
        {false, _} -> 
          crc_wonky;
        {_, Value} ->
@ -301,10 +363,10 @@ scan_over_file(Handle, Position) ->
  scan_over_file(Handle, Position, HashTree).
 scan_over_file(Handle, Position, HashTree) ->
-  case read_next_2_integers(Handle) of 
+  case saferead_keyvalue(Handle) of
-    {KeyLength, ValueLength} -> 
+    false ->
-      Key = read_next_string(Handle, KeyLength),
+      {Position, HashTree};
-      {ok, ValueAsBin} = file:read(Handle, ValueLength),
+    {Key, ValueAsBin, KeyLength, ValueLength} ->
      case crccheck_value(ValueAsBin) of
        true ->
          NewPosition = Position + KeyLength + ValueLength + ?DWORD_SIZE,
@ -318,6 +380,34 @@ scan_over_file(Handle, Position, HashTree) ->
      {Position, HashTree}
  end.
 %% Read the Key/Value at this point, returning {ok, Key, Value}
 %% catch expected exceptiosn associated with file corruption (or end) and 
 %% return eof
 saferead_keyvalue(Handle) ->
  case read_next_2_integers(Handle) of 
    {error, einval} ->
      false;
    eof ->
      false;
    {KeyL, ValueL} ->
      case read_next_term(Handle, KeyL) of 
        {error, einval} ->
          false;
        eof ->
          false;
        Key ->
          case file:read(Handle, ValueL) of 
            {error, einval} ->
              false;
            eof ->
              false;
            {ok, Value} ->
              {Key, Value, KeyL, ValueL}
          end 
      end
  end.
 %% The first four bytes of the value are the crc check
 crccheck_value(Value) when byte_size(Value) >4 ->
  << Hash:32/integer, Tail/bitstring>> = Value,
@ -356,26 +446,30 @@ to_dict(FileName) ->
  KeyValueList = dump(FileName),
  dict:from_list(KeyValueList).
-read_next_string(Handle, Length) ->
+read_next_term(Handle, Length) ->
-  {ok, Bin} = file:read(Handle, Length),
+  case file:read(Handle, Length) of  
-  binary_to_list(Bin).
+    {ok, Bin} ->
      binary_to_term(Bin);
    ReadError ->
      ReadError
  end.
 %% Read next string where the string has a CRC prepended - stripping the crc 
 %% and checking if requested
-read_next_string(Handle, Length, crc, Check) ->
+read_next_term(Handle, Length, crc, Check) ->
  case Check of 
    true ->
      {ok, <<CRC:32/integer, Bin/binary>>} = file:read(Handle, Length),
      case calc_crc(Bin) of 
        CRC ->
-          {true, binary_to_list(Bin)};
+          {true, binary_to_term(Bin)};
        _ ->
-          {false, binary_to_list(Bin)}
+          {false, binary_to_term(Bin)}
      end;
    _ ->
      {ok, _} = file:position(Handle, {cur, 4}),
      {ok, Bin} = file:read(Handle, Length - 4),
-      {unchecked, binary_to_list(Bin)}
+      {unchecked, binary_to_term(Bin)}
  end.
@ -386,9 +480,9 @@ read_next_2_integers(Handle) ->
  case file:read(Handle,?DWORD_SIZE) of 
    {ok, <<Int1:32,Int2:32>>} -> 
      {endian_flip(Int1), endian_flip(Int2)};
-    MatchError
+    ReadError
      ->
-        MatchError
+        ReadError
  end.
 %% Seach the hash table for the matching hash and key.  Be prepared for 
@ -398,7 +492,6 @@ search_hash_table(_Handle, [], _Hash, _Key, _CRCCHeck) ->
 search_hash_table(Handle, [Entry|RestOfEntries], Hash, Key, CRCCheck) ->
  {ok, _} = file:position(Handle, Entry),
  {StoredHash, DataLoc} = read_next_2_integers(Handle),
  io:format("looking in data location ~w~n", [DataLoc]),
  case StoredHash of
    Hash ->
      KV = extract_kvpair(Handle, [DataLoc], Key, CRCCheck),
@ -432,7 +525,7 @@ write_key_value_pairs(_, [], Acc) ->
  Acc;
 write_key_value_pairs(Handle, [HeadPair|TailList], Acc) -> 
  {Key, Value} = HeadPair,
-  {NewPosition, HashTree} = put(Handle, Key, Value, Acc),
+  {Handle, NewPosition, HashTree} = put(Handle, Key, Value, Acc),
  write_key_value_pairs(Handle, TailList, {NewPosition, HashTree}).
 %% Write the actual hashtables at the bottom of the file.  Each hash table
@ -549,14 +642,16 @@ endian_flip(Int) ->
  X.
 hash(Key) ->
  BK = term_to_binary(Key),
  H = 5381,
-  hash1(H,Key) band 16#FFFFFFFF.
+  hash1(H, BK) band 16#FFFFFFFF.
-hash1(H,[]) ->H;
+hash1(H, <<>>) -> 
-hash1(H,[B|Rest]) ->
+  H;
 hash1(H, <<B:8/integer, Rest/bytes>>) ->
  H1 = H * 33,
  H2 = H1 bxor B,
-  hash1(H2,Rest).
+  hash1(H2, Rest).
 % Get the least significant 8 bits from the hash.
 hash_to_index(Hash) ->
@ -567,41 +662,21 @@ hash_to_slot(Hash,L) ->
 %% Create a binary of the LengthKeyLengthValue, adding a CRC check
 %% at the front of the value
-key_value_to_record({Key,Value}) ->
+key_value_to_record({Key, Value}) ->
-  L1 = endian_flip(length(Key)),
+  BK = term_to_binary(Key), 
-  L2 = endian_flip(length(Value) + 4),
+  BV = term_to_binary(Value), 
-  LB1 = list_to_binary(Key), 
+  LK = byte_size(BK),
-  LB2 = list_to_binary(Value), 
+  LV = byte_size(BV),
-  CRC = calc_crc(LB2),
+  LK_FL = endian_flip(LK),
-  <<L1:32,L2:32,LB1/binary,CRC:32/integer,LB2/binary>>.
+  LV_FL = endian_flip(LV + 4),
  CRC = calc_crc(BV),
  <<LK_FL:32, LV_FL:32, BK:LK/binary, CRC:32/integer, BV:LV/binary>>.
 %%%%%%%%%%%%%%%%
 % T E S T 
 %%%%%%%%%%%%%%%  
-
+-ifdef(TEST).
 hash_1_test() ->
  Hash = hash("key1"),
  ?assertMatch(Hash,2088047427).
 hash_to_index_1_test() ->
  Hash = hash("key1"),
  Index = hash_to_index(Hash),
  ?assertMatch(Index,67).
 hash_to_index_2_test() ->
  Hash = 256,
  I = hash_to_index(Hash),
  ?assertMatch(I,0).
 hash_to_index_3_test() ->
  Hash = 268,
  I = hash_to_index(Hash),
  ?assertMatch(I,12).
 hash_to_index_4_test() ->
  Hash = hash("key2"),
  Index = hash_to_index(Hash),
  ?assertMatch(Index,64).
 write_key_value_pairs_1_test() ->
  {ok,Handle} = file:open("test.cdb",write),
@ -612,8 +687,11 @@ write_key_value_pairs_1_test() ->
  Index2 = hash_to_index(Hash2),
  R0 = array:new(256, {default, gb_trees:empty()}),
  R1 = array:set(Index1, gb_trees:insert(Hash1, [0], array:get(Index1, R0)), R0),
-  R2 = array:set(Index2, gb_trees:insert(Hash2, [22], array:get(Index2, R1)), R1),
+  R2 = array:set(Index2, gb_trees:insert(Hash2, [30], array:get(Index2, R1)), R1),
-  ?assertMatch(R2, HashTree).
+  io:format("HashTree is ~w~n", [HashTree]),
  io:format("Expected HashTree is ~w~n", [R2]),
  ?assertMatch(R2, HashTree),
  ok = file:delete("test.cdb").
 write_hash_tables_1_test() ->
@ -623,7 +701,8 @@ write_hash_tables_1_test() ->
  R2 = array:set(67, gb_trees:insert(6383014723, [0], array:get(67, R1)), R1),
  Result = write_hash_tables(Handle, R2),
  io:format("write hash tables result of ~w ~n", [Result]),
-  ?assertMatch(Result,[{67,16,2},{64,0,2}]).
+  ?assertMatch(Result,[{67,16,2},{64,0,2}]),
  ok = file:delete("test.cdb").
 find_open_slot_1_test() ->
  List = [<<1:32,1:32>>,<<0:32,0:32>>,<<1:32,1:32>>,<<1:32,1:32>>],
@ -654,7 +733,8 @@ full_1_test() ->
  List1 = lists:sort([{"key1","value1"},{"key2","value2"}]),
  create("simple.cdb",lists:sort([{"key1","value1"},{"key2","value2"}])),
  List2 = lists:sort(dump("simple.cdb")),
-  ?assertMatch(List1,List2).
+  ?assertMatch(List1,List2),
  ok = file:delete("simple.cdb").
 full_2_test() ->
  List1 = lists:sort([{lists:flatten(io_lib:format("~s~p",[Prefix,Plug])),
@ -664,7 +744,8 @@ full_2_test() ->
      "tiep4||","qweq"]]),
  create("full.cdb",List1),
  List2 = lists:sort(dump("full.cdb")),
-  ?assertMatch(List1,List2).
+  ?assertMatch(List1,List2),
  ok = file:delete("full.cdb").
 from_dict_test() ->
  D = dict:new(),
@ -676,7 +757,8 @@ from_dict_test() ->
  D3 = lists:sort(dict:to_list(D2)),
  io:format("KVP is ~w~n", [KVP]),
  io:format("D3 is ~w~n", [D3]),
-  ?assertMatch(KVP,D3).
+  ?assertMatch(KVP, D3),
  ok = file:delete("from_dict_test.cdb").
 to_dict_test() ->
  D = dict:new(),
@ -686,7 +768,8 @@ to_dict_test() ->
  Dict = to_dict("from_dict_test.cdb"),
  D3 = lists:sort(dict:to_list(D2)),
  D4 = lists:sort(dict:to_list(Dict)),
-  ?assertMatch(D4,D3).
+  ?assertMatch(D4,D3),
  ok = file:delete("from_dict_test.cdb").
 crccheck_emptyvalue_test() ->
  ?assertMatch(false, crccheck_value(<<>>)).
@ -729,9 +812,10 @@ activewrite_singlewrite_test() ->
  {LastPosition, KeyDict} = open_active_file("test_mem.cdb"),
  io:format("File opened as new active file " 
    "with LastPosition=~w ~n", [LastPosition]),
-  {_, UpdKeyDict} = put("test_mem.cdb", Key, Value, {LastPosition, KeyDict}),
+  {_, _, UpdKeyDict} = put("test_mem.cdb", Key, Value, {LastPosition, KeyDict}),
  io:format("New key and value added to active file ~n", []),
-  ?assertMatch({Key, Value}, get_mem(Key, "test_mem.cdb", UpdKeyDict)).
+  ?assertMatch({Key, Value}, get_mem(Key, "test_mem.cdb", UpdKeyDict)),
  ok = file:delete("test_mem.cdb").
 search_hash_table_findinslot_test() ->
  Key1 = "key1", % this is in slot 3 if count is 8
@ -766,10 +850,11 @@ search_hash_table_findinslot_test() ->
  ok = file:pwrite(Handle, FirstHashPosition + (Slot -1) * ?DWORD_SIZE, RBin),
  ok = file:close(Handle),
  io:format("Find key following change to hash table~n"),
-  ?assertMatch(missing, get("hashtable1_test.cdb", Key1)).
+  ?assertMatch(missing, get("hashtable1_test.cdb", Key1)),
  ok = file:delete("hashtable1_test.cdb").
-getnextkey_test() ->
+getnextkey_inclemptyvalue_test() ->
-  L = [{"K9", "V9"}, {"K2", "V2"}, {"K3", "V3"}, 
+  L = [{"K9", "V9"}, {"K2", "V2"}, {"K3", ""}, 
    {"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"}, 
    {"K8", "V8"}, {"K1", "V1"}],
  ok = create("hashtable1_test.cdb", L),
@ -778,7 +863,8 @@ getnextkey_test() ->
  ?assertMatch("K9", FirstKey),
  {SecondKey, Handle, P2} = get_nextkey(Handle, P1),
  ?assertMatch("K2", SecondKey),
-  {_, Handle, P3} = get_nextkey(Handle, P2),
+  {ThirdKeyNoValue, Handle, P3} = get_nextkey(Handle, P2),
  ?assertMatch("K3", ThirdKeyNoValue),
  {_, Handle, P4} = get_nextkey(Handle, P3),
  {_, Handle, P5} = get_nextkey(Handle, P4),
  {_, Handle, P6} = get_nextkey(Handle, P5),
@ -786,19 +872,114 @@ getnextkey_test() ->
  {_, Handle, P8} = get_nextkey(Handle, P7),
  {LastKey, Info} = get_nextkey(Handle, P8),
  ?assertMatch(nomorekeys, Info),
-  ?assertMatch("K1", LastKey).
+  ?assertMatch("K1", LastKey),
  ok = file:delete("hashtable1_test.cdb").
 newactivefile_test() ->
  {LastPosition, _} = open_active_file("activefile_test.cdb"),
  ?assertMatch(256 * ?DWORD_SIZE, LastPosition),
  Response = get_nextkey("activefile_test.cdb"),
-  ?assertMatch(nomorekeys, Response).
+  ?assertMatch(nomorekeys, Response),
-
+  ok = file:delete("activefile_test.cdb").
 emptyvalue_fromdict_test() ->
  D = dict:new(),
  D1 = dict:store("K1", "V1", D),
  D2 = dict:store("K2", "", D1),
  D3 = dict:store("K3", "V3", D2),
  D4 = dict:store("K4", "", D3),
  ok = from_dict("from_dict_test_ev.cdb",D4),
  io:format("Store created ~n", []),
  KVP = lists:sort(dump("from_dict_test_ev.cdb")),
  D_Result = lists:sort(dict:to_list(D4)),
  io:format("KVP is ~w~n", [KVP]),
  io:format("D_Result is ~w~n", [D_Result]),
  ?assertMatch(KVP, D_Result),
  ok = file:delete("from_dict_test_ev.cdb").
 fold_test() ->
  K1 = {"Key1", 1},
  V1 = 2,
  K2 = {"Key1", 2},
  V2 = 4,
  K3 = {"Key1", 3},
  V3 = 8,
  K4 = {"Key1", 4},
  V4 = 16,
  K5 = {"Key1", 5},
  V5 = 32,
  D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}]),
  ok = from_dict("fold_test.cdb", D),
  FromSN = 2,
  FoldFun = fun(K, V, Acc) ->
    {_Key, Seq} = K,
    if Seq > FromSN ->
      Acc + V;
    true ->
      Acc
    end
  end,
  ?assertMatch(56, fold("fold_test.cdb", FoldFun, 0)),
  ok = file:delete("fold_test.cdb").
 fold_keys_test() ->
  K1 = {"Key1", 1},
  V1 = 2,
  K2 = {"Key2", 2},
  V2 = 4,
  K3 = {"Key3", 3},
  V3 = 8,
  K4 = {"Key4", 4},
  V4 = 16,
  K5 = {"Key5", 5},
  V5 = 32,
  D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, {K4, V4}, {K5, V5}]),
  ok = from_dict("fold_keys_test.cdb", D),
  FromSN = 2,
  FoldFun = fun(K, Acc) ->
    {Key, Seq} = K,
    if Seq > FromSN ->
      lists:append(Acc, [Key]);
    true ->
      Acc
    end
  end,
  Result = fold_keys("fold_keys_test.cdb", FoldFun, []),
  ?assertMatch(["Key3", "Key4", "Key5"], lists:sort(Result)),
  ok = file:delete("fold_keys_test.cdb").
 fold2_test() ->
  K1 = {"Key1", 1},
  V1 = 2,
  K2 = {"Key1", 2},
  V2 = 4,
  K3 = {"Key1", 3},
  V3 = 8,
  K4 = {"Key1", 4},
  V4 = 16,
  K5 = {"Key1", 5},
  V5 = 32,
  K6 = {"Key2", 1},
  V6 = 64,
  D = dict:from_list([{K1, V1}, {K2, V2}, {K3, V3}, 
    {K4, V4}, {K5, V5}, {K6, V6}]),
  ok = from_dict("fold2_test.cdb", D),
  FoldFun = fun(K, V, Acc) ->
    {Key, Seq} = K,
    case dict:find(Key, Acc) of 
      error ->
        dict:store(Key, {Seq, V}, Acc);
      {ok, {LSN, _V}} when Seq > LSN ->
        dict:store(Key, {Seq, V}, Acc);
      _ ->
        Acc 
    end 
  end,
  RD = dict:new(),
  RD1 = dict:store("Key1", {5, 32}, RD),
  RD2 = dict:store("Key2", {1, 64}, RD1),
  Result = fold("fold2_test.cdb", FoldFun, dict:new()),
  ?assertMatch(RD2, Result),
  ok = file:delete("fold2_test.cdb").
 -endif.
--- a/src/leveled_iterator.erl
+++ b/src/leveled_iterator.erl
@ -1,58 +1,69 @@
-module(leveled_internal).
+-module(leveled_iterator).
-export([termiterator/6]).
+-export([termiterator/3]).
 -include_lib("eunit/include/eunit.hrl").
-%% We will have a sorted list of terms
+%% Takes a list of terms to iterate - the terms being sorted in Erlang term
-%% Some terms will be dummy terms which are pointers to more terms which can be 
+%% order
 %% found.  If a pointer is hit need to replenish the term list before 
 %% proceeding.
 %%
 %% Helper Functions should have free functions - 
-%% {FolderFun, CompareFun, PointerCheck}
+%% {FolderFun, CompareFun, PointerCheck, PointerFetch}
 %% FolderFun - function which takes the next item and the accumulator and 
-%% returns an updated accumulator
+%% returns an updated accumulator.  Note FolderFun can only increase the 
 %% accumulator by one entry each time
 %% CompareFun - function which should be able to compare two keys (which are 
 %% not pointers), and return a winning item (or combination of items)
 %% PointerCheck - function for differentiating between keys and pointer
 %% PointerFetch - function that takes a pointer an EndKey (which may be 
 %% infinite) and returns a ne wslice of ordered results from that pointer
 %% 
 %% Range can be for the form
 %% {StartKey, EndKey, MaxKeys} where EndKey or MaxKeys can be infinite (but 
 %% not both)
-termiterator(HeadItem, [], Acc, HelperFuns, 
+
-	_StartKey, _EndKey) ->
+termiterator(ListToIterate, HelperFuns, Range) ->
 	case Range of 
 		{_, infinte, infinite} ->
 			bad_iterator;
 		_ ->
 			termiterator(null, ListToIterate, [], HelperFuns, Range)
 	end.
 termiterator(HeadItem, [], Acc, HelperFuns, _) ->
 	case HeadItem of 
 		null ->
 			Acc;
 		_ ->
-			{FolderFun, _, _} = HelperFuns,
+			{FolderFun, _, _, _} = HelperFuns,
 			FolderFun(Acc, HeadItem)
 	end;
-termiterator(null, [NextItem|TailList], Acc, HelperFuns, 
+termiterator(null, [NextItem|TailList], Acc, HelperFuns, Range) ->
 	StartKey, EndKey) ->
 	%% Check that the NextItem is not a pointer before promoting to HeadItem
 	%% Cannot now promote a HeadItem which is a pointer
-	{_, _, PointerCheck} = HelperFuns,
+	{_, _, PointerCheck, PointerFetch} = HelperFuns,
 	case PointerCheck(NextItem) of 
 		{true, Pointer} ->
-			NewSlice = getnextslice(Pointer, EndKey),
+			{_, EndKey, _} = Range,
 			NewSlice = PointerFetch(Pointer, EndKey),
 			ExtendedList = lists:merge(NewSlice, TailList),
-			termiterator(null, ExtendedList, Acc, HelperFuns, 
+			termiterator(null, ExtendedList, Acc, HelperFuns, Range);
 				StartKey, EndKey);
 		false ->
-			termiterator(NextItem, TailList, Acc, HelperFuns, 
+			termiterator(NextItem, TailList, Acc, HelperFuns, Range)
 				StartKey, EndKey)
 	end;
-termiterator(HeadItem, [NextItem|TailList], Acc, HelperFuns, 
+termiterator(HeadItem, [NextItem|TailList], Acc, HelperFuns, Range) ->
-	StartKey, EndKey) ->
+	{FolderFun, CompareFun, PointerCheck, PointerFetch} = HelperFuns,
-	{FolderFun, CompareFun, PointerCheck} = HelperFuns,
+	{_, EndKey, MaxItems} = Range,
 	%% HeadItem cannot be pointer, but NextItem might be, so check before 
 	%% comparison
 	case PointerCheck(NextItem) of 
 		{true, Pointer} ->
-			NewSlice = getnextslice(Pointer, EndKey),
+			NewSlice = PointerFetch(Pointer, EndKey),
-			ExtendedList = lists:merge(NewSlice, [NextItem|TailList]),
+			ExtendedList = lists:merge(NewSlice, [HeadItem|TailList]),
-			termiterator(null, ExtendedList, Acc, HelperFuns, 
+			termiterator(null, ExtendedList, Acc, HelperFuns, Range);
 				StartKey, EndKey);
 		false ->
 			%% Compare to see if Head and Next match, or if Head is a winner 
 			%% to be added to accumulator
@ -60,39 +71,65 @@ termiterator(HeadItem, [NextItem|TailList], Acc, HelperFuns,
 				{match, StrongItem, _WeakItem} ->
 					%% Discard WeakItem, Strong Item might be an aggregation of
 					%% the items  
-					termiterator(StrongItem, TailList, Acc, HelperFuns, 
+					termiterator(StrongItem, TailList, Acc, HelperFuns, Range);
 						StartKey, EndKey);
 				{winner, HeadItem} ->
 					%% Add next item to accumulator, and proceed with next item
 					AccPlus = FolderFun(Acc, HeadItem),
-					termiterator(NextItem, TailList, AccPlus, HelperFuns, 
+					case length(AccPlus) of 
-						HeadItem, EndKey)
+						MaxItems ->
 							AccPlus;
 						_ ->
 							termiterator(NextItem, TailList, AccPlus, 
 								HelperFuns, 
 								{HeadItem, EndKey, MaxItems})
 					end
 			end
 	end.
 %% Initial forms of keys supported are Index Keys and Object Keys
 %% 
 %% All keys are of the form {Key, Value, SequenceNumber, State}
 %%
 %% The Key will be of the form:
 %% {o, Bucket, Key} - for an Object Key
 %% {i, Bucket, IndexName, IndexTerm, Key} - for an Index Key
 %%
 %% The value will be of the form: 
 %% {o, ObjectHash, [vector-clocks]} - for an Object Key
 %% null - for an Index Key
 %%
 %% Sequence number is the sequence number the key was added, and the highest
 %% sequence number in the list of keys for an index key.
 %%
 %% State can be one of the following: 
 %% live - an active key
 %% tomb - a tombstone key
 %% {timestamp, TS} - an active key to a certain timestamp
 %% {pointer, Pointer} - to be added by iterators to indicate further data 
 %% available in the range from a particular source
 pointercheck_indexkey(IndexKey) ->
 	case IndexKey of 
-		{i, _Bucket, _Index, _Term, _Key, _Sequence, {zpointer, Pointer}} ->
+		{_Key, _Values, _Sequence, {pointer, Pointer}} ->
 			{true, Pointer};
 		_ -> 
 			false
 	end.
 folder_indexkey(Acc, IndexKey) ->
 	io:format("Folding index key of - ~w~n", [IndexKey]),
 	case IndexKey of 
-		{i, _Bucket, _Index, _Term, _Key, _Sequence, tombstone} ->
+		{_Key, _Value, _Sequence, tomb} ->
 			Acc;
-		{i, _Bucket, _Index, _Term, Key, _Sequence, null} ->
+		{Key, _Value, _Sequence, live} ->
-			io:format("Adding key ~s~n", [Key]),
+			{i, _, _, _, ObjectKey} = Key,
-			lists:append(Acc, [Key])
+			lists:append(Acc, [ObjectKey])
 	end.
 compare_indexkey(IndexKey1, IndexKey2) ->
-	{i, Bucket1, Index1, Term1, Key1, Sequence1, _Value1} = IndexKey1,
+	{{i, Bucket1, Index1, Term1, Key1}, _Val1, Sequence1, _St1} = IndexKey1,
-	{i, Bucket2, Index2, Term2, Key2, Sequence2, _Value2} = IndexKey2,
+	{{i, Bucket2, Index2, Term2, Key2}, _Val2, Sequence2, _St2} = IndexKey2,
 	case {Bucket1, Index1, Term1, Key1} of 
 		{Bucket2, Index2, Term2, Key2} when Sequence1 >= Sequence2 ->
 			{match, IndexKey1, IndexKey2};
@ -105,6 +142,9 @@ compare_indexkey(IndexKey1, IndexKey2) ->
 	end.
 %% Unit testsß
 getnextslice(Pointer, _EndKey) ->
 	case Pointer of 
 		{test, NewList} ->
@ -114,18 +154,43 @@ getnextslice(Pointer, _EndKey) ->
 	end.
-%% Unit tests
+iterateoverindexkeyswithnopointer_test() ->
-
+	Key1 = {{i, "pdsRecord", "familyName_bin", "1972SMITH", "10001"}, 
-
+	null, 1, live},
-iterateoverindexkeyswithnopointer_test_() ->
+	Key2 = {{i, "pdsRecord", "familyName_bin", "1972SMITH", "10001"}, 
-	Key1 = {i, "pdsRecord", "familyName_bin", "1972SMITH", "10001", 1, null},
+	null, 2, tomb},
-	Key2 = {i, "pdsRecord", "familyName_bin", "1972SMITH", "10001", 2, tombstone},
+	Key3 = {{i, "pdsRecord", "familyName_bin", "1971SMITH", "10002"}, 
-	Key3 = {i, "pdsRecord", "familyName_bin", "1971SMITH", "10002", 2, null},
+	null, 2, live},
-	Key4 = {i, "pdsRecord", "familyName_bin", "1972JONES", "10003", 2, null},
+	Key4 = {{i, "pdsRecord", "familyName_bin", "1972JONES", "10003"}, 
 	null, 2, live},
 	KeyList = lists:sort([Key1, Key2, Key3, Key4]),
-	HelperFuns = {fun folder_indexkey/2, fun compare_indexkey/2, fun pointercheck_indexkey/1},
+	HelperFuns = {fun folder_indexkey/2, fun compare_indexkey/2, 
-	ResultList = ["10002", "10003"],
+		fun pointercheck_indexkey/1, fun getnextslice/2},
-	?_assertEqual(ResultList, termiterator(null, KeyList, [], HelperFuns, "1971", "1973")).
+	?assertMatch(["10002", "10003"], 
 		termiterator(KeyList, HelperFuns, {"1971", "1973", infinite})).
 iterateoverindexkeyswithpointer_test() ->
 	Key1 = {{i, "pdsRecord", "familyName_bin", "1972SMITH", "10001"}, 
 	null, 1, live},
 	Key2 = {{i, "pdsRecord", "familyName_bin", "1972SMITH", "10001"}, 
 	null, 2, tomb},
 	Key3 = {{i, "pdsRecord", "familyName_bin", "1971SMITH", "10002"}, 
 	null, 2, live},
 	Key4 = {{i, "pdsRecord", "familyName_bin", "1972JONES", "10003"}, 
 	null, 2, live},
 	Key5 = {{i, "pdsRecord", "familyName_bin", "1972ZAFRIDI", "10004"}, 
 	null, 2, live},
 	Key6 = {{i, "pdsRecord", "familyName_bin", "1972JONES", "10004"}, 
 	null, 0, {pointer, {test, [Key5]}}},
 	KeyList = lists:sort([Key1, Key2, Key3, Key4, Key6]),
 	HelperFuns = {fun folder_indexkey/2, fun compare_indexkey/2, 
 		fun pointercheck_indexkey/1, fun getnextslice/2},
 	?assertMatch(["10002", "10003", "10004"], 
 		termiterator(KeyList, HelperFuns, {"1971", "1973", infinite})),
 	?assertMatch(["10002", "10003"], 
 		termiterator(KeyList, HelperFuns, {"1971", "1973", 2})).
--- a/test/lookup_test.erl
+++ b/test/lookup_test.erl
@ -1,7 +1,11 @@
 -module(lookup_test).
-export([go_dict/1, go_ets/1, go_gbtree/1, 
+-export([go_dict/1, 
-    go_arrayofdict/1, go_arrayofgbtree/1, go_arrayofdict_withcache/1]).
+    go_ets/1, 
    go_gbtree/1, 
    go_arrayofdict/1, 
    go_arrayofgbtree/1, 
    go_arrayofdict_withcache/1]).
 -define(CACHE_SIZE, 512).