diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml index 8d76c7d..7507efe 100644 --- a/.github/workflows/erlang.yml +++ b/.github/workflows/erlang.yml @@ -2,9 +2,15 @@ name: Erlang CI on: push: - branches: [ develop-3.0 ] + branches: + - develop-3.1 + - develop-3.0 + - develop-2.9 pull_request: - branches: [ develop-3.0 ] + branches: + - develop-3.1 + - develop-3.0 + - develop-2.9 jobs: diff --git a/README.md b/README.md index 1cfc224..fc768b2 100644 --- a/README.md +++ b/README.md @@ -44,48 +44,13 @@ For more details on the store: - There is also a ["Why"](docs/WHY.md) section looking at lower level design choices and the rationale that supports them. -## Is this interesting? - -Making a positive contribution to this space is hard - given the superior brainpower and experience of those that have contributed to the KV store problem space in general, and the Riak backend space in particular. - -The target at inception was to do something interesting, to re-think certain key assumptions and trade-offs, and prove through working software the potential for improvements to be realised. - -[Initial volume tests](docs/VOLUME.md) indicate that it is at least interesting. With improvements in throughput for multiple configurations, with this improvement becoming more marked as the test progresses (and the base data volume becomes more realistic). - -The delta in the table below is the comparison in Riak throughput between the identical test run with a leveled backend in comparison to leveldb. The realism of the tests increase as the test progresses - so focus is given to the throughput delta in the last hour of the test. - -Test Description | Hardware | Duration |Avg TPS | TPS Delta (Overall) | TPS Delta (Last Hour) -:---------------------------------|:-------------|:--------:|----------:|-----------------:|-------------------: -8KB value, 60 workers, sync | 5 x i2.2x | 4 hr | 12,679.91 | + 70.81% | + 63.99% -8KB value, 100 workers, no_sync | 5 x i2.2x | 6 hr | 14,100.19 | + 16.15% | + 35.92% -8KB value, 50 workers, no_sync | 5 x d2.2x | 4 hr | 10,400.29 | + 8.37% | + 23.51% -4KB value, 100 workers, no_sync | 5 x i2.2x | 6 hr | 14,993.95 | - 10.44% | - 4.48% -16KB value, 60 workers, no_sync | 5 x i2.2x | 6 hr | 11,167.44 | + 80.48% | + 113.55% -8KB value, 80 workers, no_sync, 2i queries | 5 x i2.2x | 6 hr | 9,855.96 | + 4.48% | + 22.36% - -Tests generally show a 5:1 improvement in tail latency for leveled. - -All tests have in common: - -- Target Key volume - 200M with pareto distribution of load -- 5 GETs per 1 update -- RAID 10 (software) drives -- allow_mult=false, lww=false -- modified riak optimised for leveled used in leveled tests - -The throughput in leveled is generally CPU-bound, whereas in comparative tests for leveledb the throughput was disk bound. This potentially makes capacity planning simpler, and opens up the possibility of scaling out to equivalent throughput at much lower cost (as CPU is relatively low cost when compared to disk space at high I/O) - [offering better alignment between resource constraints and the cost of resource](docs/INTRO.md). - -More information can be found in the [volume testing section](docs/VOLUME.md). - -As a general rule though, the most interesting thing is the potential to enable [new features](docs/FUTURE.md). The tagging of different object types, with an ability to set different rules for both compaction and metadata creation by tag, is a potential enabler for further change. Further, having a separate key/metadata store which can be scanned without breaking the page cache or working against mitigation for write amplifications, is also potentially an enabler to offer features to both the developer and the operator. - ## Feedback Please create an issue if you have any suggestions. You can ping me @masleeds if you wish ## Running Leveled -Unit and current tests in leveled should run with rebar3. Leveled has been tested in OTP18, but it can be started with OTP16 to support Riak (although tests will not work as expected). +Unit and current tests in leveled should run with rebar3. A new database can be started by running @@ -99,13 +64,18 @@ The book_start method should respond once startup is complete. The [leveled_boo Running in Riak requires Riak 2.9 or beyond, which is available from January 2019. +There are three main branches: + +[`develop-3.1` - default](https://github.com/martinsumner/leveled/tree/develop-3.1): Target for the Riak 3.1 release with support for OTP 22 and OTP 24; + +[`develop-3.0`](https://github.com/martinsumner/leveled/tree/develop-3.0): Used in the Riak 3.0 release with support for OTP 20 and OTP 22; + +[`develop-2.9`](https://github.com/martinsumner/leveled/tree/develop-2.9): Used in the Riak 2.9 release with support for OTP R16 through to OTP 20. + ### Contributing -In order to contribute to leveled, fork the repository, make a branch -for your changes, and open a pull request. The acceptance criteria for -updating leveled is that it passes rebar3 dialyzer, xref, eunit, and -ct with 100% coverage. +In order to contribute to leveled, fork the repository, make a branch for your changes, and open a pull request. The acceptance criteria for updating leveled is that it passes rebar3 dialyzer, xref, eunit, and ct with 100% coverage. To have rebar3 execute the full set of tests, run: - `rebar3 as test do cover --reset, eunit --cover, ct --cover, cover --verbose` + `rebar3 as test do xref, dialyzer, cover --reset, eunit --cover, ct --cover, cover --verbose` diff --git a/docs/VOLUME.md b/docs/VOLUME.md index ce6ee85..3bd1cc0 100644 --- a/docs/VOLUME.md +++ b/docs/VOLUME.md @@ -2,7 +2,7 @@ ## Parallel Node Testing -Initial volume tests have been [based on standard basho_bench eleveldb test](../test/volume/single_node/examples) to run multiple stores in parallel on the same node and and subjecting them to concurrent pressure. +Initial volume tests have been [based on standard basho_bench eleveldb test](volume/single_node/examples) to run multiple stores in parallel on the same node and and subjecting them to concurrent pressure. This showed a [relative positive performance for leveled](VOLUME_PRERIAK.md) for both population and load. This also showed that although the leveled throughput was relatively stable, it was still subject to fluctuations related to CPU constraints - especially as compaction of the ledger was a CPU intensive activity. Prior to moving on to full Riak testing, a number of changes where then made to leveled to reduce the CPU load during these merge events. @@ -38,7 +38,7 @@ Comparison charts for this test: Riak + leveled | Riak + eleveldb :-------------------------:|:-------------------------: -![](../test/volume/cluster_one/output/summary_leveled_5n_60t_i2_sync.png "LevelEd") | ![](../test/volume/cluster_one/output/summary_leveldb_5n_60t_i2_sync.png "LevelDB") +![](volume/cluster_one/output/summary_leveled_5n_60t_i2_sync.png "LevelEd") | ![](volume/cluster_one/output/summary_leveldb_5n_60t_i2_sync.png "LevelDB") ### Mid-Size Object, SSDs, No Sync-On-Write @@ -54,7 +54,7 @@ Comparison charts for this test: Riak + leveled | Riak + eleveldb :-------------------------:|:-------------------------: -![](../test/volume/cluster_two/output/summary_leveled_5n_100t_i2_nosync.png "LevelEd") | ![](../test/volume/cluster_two/output/summary_leveldb_5n_100t_i2_nosync.png "LevelDB") +![](volume/cluster_two/output/summary_leveled_5n_100t_i2_nosync.png "LevelEd") | ![](volume/cluster_two/output/summary_leveldb_5n_100t_i2_nosync.png "LevelDB") ### Mid-Size Object, HDDs, No Sync-On-Write @@ -70,7 +70,7 @@ Comparison charts for this test: Riak + leveled | Riak + eleveldb :-------------------------:|:-------------------------: -![](../test/volume/cluster_three/output/summary_leveled_5n_50t_d2_nosync.png "LevelEd") | ![](../test/volume/cluster_three/output/summary_leveldb_5n_50t_d2_nosync.png "LevelDB") +![](volume/cluster_three/output/summary_leveled_5n_50t_d2_nosync.png "LevelEd") | ![](volume/cluster_three/output/summary_leveldb_5n_50t_d2_nosync.png "LevelDB") Note that there is a clear inflexion point when throughput starts to drop sharply at about the hour mark into the test. This is the stage when the volume of data has begun to exceed the volume supportable in cache, and so disk activity begins to be required for GET operations with increasing frequency. @@ -89,7 +89,7 @@ Comparison charts for this test: Riak + leveled | Riak + eleveldb :-------------------------:|:-------------------------: -![](../test/volume/cluster_four/output/summary_leveled_5n_100t_i2_4KB_nosync.png "LevelEd") | ![](../test/volume/cluster_four/output/summary_leveldb_5n_100t_i2_4KB_nosync.png "LevelDB") +![](volume/cluster_four/output/summary_leveled_5n_100t_i2_4KB_nosync.png "LevelEd") | ![](volume/cluster_four/output/summary_leveldb_5n_100t_i2_4KB_nosync.png "LevelDB") ### Double-Size Object, SSDs, No Sync-On-Write @@ -106,14 +106,14 @@ Comparison charts for this test: Riak + leveled | Riak + eleveldb :-------------------------:|:-------------------------: -![](../test/volume/cluster_five/output/summary_leveled_5n_60t_i2_16KB_nosync.png "LevelEd") | ![](../test/volume/cluster_five/output/summary_leveldb_5n_60t_i2_16KB_nosync.png "LevelDB") +![](volume/cluster_five/output/summary_leveled_5n_60t_i2_16KB_nosync.png "LevelEd") | ![](volume/cluster_five/output/summary_leveldb_5n_60t_i2_16KB_nosync.png "LevelDB") ### Lies, damned lies etc The first thing to note about the test is the impact of the pareto distribution and the start from an empty store, on what is actually being tested. At the start of the test there is a 0% chance of a GET request actually finding an object. Normally, it will be 3 hours into the test before a GET request will have a 50% chance of finding an object. -![](../test/volume/cluster_two/output/NotPresentPerc.png "Percentage of GET requests being found at different leveled levels") +![](volume/cluster_two/output/NotPresentPerc.png "Percentage of GET requests being found at different leveled levels") Both leveled and leveldb are optimised for finding non-presence through the use of bloom filters, so the comparison is not unduly influenced by this. However, the workload at the end of the test is both more realistic (in that objects are found), and harder if the previous throughput had been greater (in that more objects are found). @@ -152,7 +152,7 @@ These tests have been completed using the following static characteristics which - 5 x i2.2x nodes, - 6 hour duration. -This is [a test used in Phase 1](https://github.com/martinsumner/leveled/blob/master/docs/VOLUME.md#mid-size-object-ssds-no-sync-on-write). Note that since Phase 1 was completed a number of performance improvements have been made in leveled, so that the starting gap between Riak/leveled and Riak/leveldb has widened. +This is [a test used in Phase 1](VOLUME.md#mid-size-object-ssds-no-sync-on-write). Note that since Phase 1 was completed a number of performance improvements have been made in leveled, so that the starting gap between Riak/leveled and Riak/leveldb has widened. The tests have been run using the new riak_kv_sweeper facility within develop. This feature is an alternative approach to controlling and scheduling rebuilds, allowing for other work to be scheduled into the same fold. As the test is focused on hashtree rebuilds, the test was run with: @@ -173,7 +173,7 @@ The comparison between leveled and leveldb shows a marked difference in throughp Riak + leveled | Riak + leveldb :-------------------------:|:-------------------------: -![](../test/volume/cluster_aae/output/summary_leveled_5n_100t_i2_nosync_inkcheckaae.png "LevelEd") | ![](../test/volume/cluster_aae/output/summary_leveldb_5n_100t_i2_nosync_sweeperaae.png "LevelDB") +![](volume/cluster_aae/output/summary_leveled_5n_100t_i2_nosync_inkcheckaae.png "LevelEd") | ![](volume/cluster_aae/output/summary_leveldb_5n_100t_i2_nosync_sweeperaae.png "LevelDB") The differences between the two tests are: @@ -231,7 +231,7 @@ As before, the Riak + leveled test had substantially lower tail latency, and ach Riak + leveled | Riak + leveldb :-------------------------:|:-------------------------: -![](../test/volume/cluster_journalcompact/output/summary_leveled_5n_80t_i2_nosync_jc.png "LevelEd") | ![](../test/volume/cluster_journalcompact/output/summary_leveldb_5n_80t_i2_nosync.png "LevelDB") +![](volume/cluster_journalcompact/output/summary_leveled_5n_80t_i2_nosync_jc.png "LevelEd") | ![](volume/cluster_journalcompact/output/summary_leveldb_5n_80t_i2_nosync.png "LevelDB") The throughput difference by hour of the test was: @@ -271,11 +271,9 @@ The secondary index test was built on a test which sent The query load is relatively light compared to GET/PUT load in-line with Basho recommendations (decline from 350 queries per second to 120 queries per second through the test). The queries return o(1000) results maximum towards the tail of the test and o(1) results at the start of the test. -Further details on the implementation of the secondary indexes for volume tests can be found in the [driver file](https://github.com/martinsumner/basho_bench/blob/mas-nhsload/src/basho_bench_driver_riakc_pb.erl) for the test. - Riak + leveled | Riak + leveldb :-------------------------:|:-------------------------: -![](../test/volume/cluster_2i/output/summary_leveled_5n_80t_i2_nosync_2i.png "LevelEd") | ![](../test/volume/cluster_2i/output/summary_leveldb_5n_80t_i2_nosync_2i.png "LevelDB") +![](volume/cluster_2i/output/summary_leveled_5n_80t_i2_nosync_2i.png "LevelEd") | ![](volume/cluster_2i/output/summary_leveldb_5n_80t_i2_nosync_2i.png "LevelDB") The results are similar as to previous tests. Although the test is on infrastructure with optimised disk throughput (and with no flushing to disk on write from Riak to minimise direct pressure from Riak), when running the tests with leveldb disk busyness rapidly becomes a constraining factor - and the reaction to that is volatility in throughput. Riak combined with leveldb is capable in short bursts of greater throughput than Riak + leveled, however when throttled within the cluster by a node or nodes with busy disks, the reaction is extreme. @@ -307,7 +305,7 @@ Here is a side-by-side on a standard Phase 1 test on i2, without sync, and with Riak + leveled | Riak + bitcask :-------------------------:|:-------------------------: -![](../test/volume/cluster_five/output/summary_leveled_5n_60t_i2_16KB_nosync.png "LevelEd") | ![](../test/volume/cluster_five/output/summary_bitcask_5n_60t_i2_16KB_nosync.png "LevelDB") +![](volume/cluster_five/output/summary_leveled_5n_60t_i2_16KB_nosync.png "LevelEd") | ![](volume/cluster_five/output/summary_bitcask_5n_60t_i2_16KB_nosync.png "LevelDB") In the first hour of the test, bitcask throughput is 39.13% greater than leveled. Over the whole test, the bitcask-backed cluster achieves 16.48% more throughput than leveled, but in the last hour this advantage is just 0.34%. diff --git a/docs/VOLUME_PRERIAK.md b/docs/VOLUME_PRERIAK.md index a307f9d..f58297d 100644 --- a/docs/VOLUME_PRERIAK.md +++ b/docs/VOLUME_PRERIAK.md @@ -2,17 +2,17 @@ ## Parallel Node Testing - Non-Riak -Initial volume tests have been [based on standard basho_bench eleveldb test](../test/volume/single_node/examples) to run multiple stores in parallel on the same node and and subjecting them to concurrent pressure. +Initial volume tests have been [based on standard basho_bench eleveldb test](volume/single_node/examples) to run multiple stores in parallel on the same node and and subjecting them to concurrent pressure. This showed a relative positive performance for leveled for both population and load. Populate leveled | Populate eleveldb :-------------------------:|:-------------------------: -![](../test/volume/single_node/output/leveled_pop.png "LevelEd - Populate") | ![](../test/volume/single_node/output/leveldb_pop.png "LevelDB - Populate") +![](volume/single_node/output/leveled_pop.png "LevelEd - Populate") | ![](volume/single_node/output/leveldb_pop.png "LevelDB - Populate") Load leveled | Load eleveldb :-------------------------:|:-------------------------: -![](../test/volume/single_node/output/leveled_load.png "LevelEd - Populate") | ![](../test/volume/single_node/output/leveldb_load.png "LevelDB - Populate") +![](volume/single_node/output/leveled_load.png "LevelEd - Populate") | ![](volume/single_node/output/leveldb_load.png "LevelDB - Populate") This test was a positive comparison for LevelEd, but also showed that although the LevelEd throughput was relatively stable it was still subject to fluctuations related to CPU constraints. Prior to moving on to full Riak testing, a number of changes where then made to LevelEd to reduce the CPU load in particular during merge events. diff --git a/test/volume/cluster_2i/output/summary_leveldb_5n_80t_i2_nosync_2i.png b/docs/volume/cluster_2i/output/summary_leveldb_5n_80t_i2_nosync_2i.png similarity index 100% rename from test/volume/cluster_2i/output/summary_leveldb_5n_80t_i2_nosync_2i.png rename to docs/volume/cluster_2i/output/summary_leveldb_5n_80t_i2_nosync_2i.png diff --git a/test/volume/cluster_2i/output/summary_leveled_5n_80t_i2_nosync_2i.png b/docs/volume/cluster_2i/output/summary_leveled_5n_80t_i2_nosync_2i.png similarity index 100% rename from test/volume/cluster_2i/output/summary_leveled_5n_80t_i2_nosync_2i.png rename to docs/volume/cluster_2i/output/summary_leveled_5n_80t_i2_nosync_2i.png diff --git a/test/volume/cluster_aae/output/summary_leveldb_5n_100t_i2_nosync_sweeperaae.png b/docs/volume/cluster_aae/output/summary_leveldb_5n_100t_i2_nosync_sweeperaae.png similarity index 100% rename from test/volume/cluster_aae/output/summary_leveldb_5n_100t_i2_nosync_sweeperaae.png rename to docs/volume/cluster_aae/output/summary_leveldb_5n_100t_i2_nosync_sweeperaae.png diff --git a/test/volume/cluster_aae/output/summary_leveled_5n_100t_i2_nosync_inkcheckaae.png b/docs/volume/cluster_aae/output/summary_leveled_5n_100t_i2_nosync_inkcheckaae.png similarity index 100% rename from test/volume/cluster_aae/output/summary_leveled_5n_100t_i2_nosync_inkcheckaae.png rename to docs/volume/cluster_aae/output/summary_leveled_5n_100t_i2_nosync_inkcheckaae.png diff --git a/test/volume/cluster_five/output/summary_bitcask_5n_60t_i2_16KB_nosync.png b/docs/volume/cluster_five/output/summary_bitcask_5n_60t_i2_16KB_nosync.png similarity index 100% rename from test/volume/cluster_five/output/summary_bitcask_5n_60t_i2_16KB_nosync.png rename to docs/volume/cluster_five/output/summary_bitcask_5n_60t_i2_16KB_nosync.png diff --git a/test/volume/cluster_five/output/summary_leveldb_5n_60t_i2_16KB_nosync.png b/docs/volume/cluster_five/output/summary_leveldb_5n_60t_i2_16KB_nosync.png similarity index 100% rename from test/volume/cluster_five/output/summary_leveldb_5n_60t_i2_16KB_nosync.png rename to docs/volume/cluster_five/output/summary_leveldb_5n_60t_i2_16KB_nosync.png diff --git a/test/volume/cluster_five/output/summary_leveled_5n_60t_i2_16KB_nosync.png b/docs/volume/cluster_five/output/summary_leveled_5n_60t_i2_16KB_nosync.png similarity index 100% rename from test/volume/cluster_five/output/summary_leveled_5n_60t_i2_16KB_nosync.png rename to docs/volume/cluster_five/output/summary_leveled_5n_60t_i2_16KB_nosync.png diff --git a/test/volume/cluster_four/output/summary_leveldb_5n_100t_i2_4KB_nosync.png b/docs/volume/cluster_four/output/summary_leveldb_5n_100t_i2_4KB_nosync.png similarity index 100% rename from test/volume/cluster_four/output/summary_leveldb_5n_100t_i2_4KB_nosync.png rename to docs/volume/cluster_four/output/summary_leveldb_5n_100t_i2_4KB_nosync.png diff --git a/test/volume/cluster_four/output/summary_leveled_5n_100t_i2_4KB_nosync.png b/docs/volume/cluster_four/output/summary_leveled_5n_100t_i2_4KB_nosync.png similarity index 100% rename from test/volume/cluster_four/output/summary_leveled_5n_100t_i2_4KB_nosync.png rename to docs/volume/cluster_four/output/summary_leveled_5n_100t_i2_4KB_nosync.png diff --git a/test/volume/cluster_journalcompact/output/summary_leveldb_5n_80t_i2_nosync.png b/docs/volume/cluster_journalcompact/output/summary_leveldb_5n_80t_i2_nosync.png similarity index 100% rename from test/volume/cluster_journalcompact/output/summary_leveldb_5n_80t_i2_nosync.png rename to docs/volume/cluster_journalcompact/output/summary_leveldb_5n_80t_i2_nosync.png diff --git a/test/volume/cluster_journalcompact/output/summary_leveled_5n_80t_i2_nosync_jc.png b/docs/volume/cluster_journalcompact/output/summary_leveled_5n_80t_i2_nosync_jc.png similarity index 100% rename from test/volume/cluster_journalcompact/output/summary_leveled_5n_80t_i2_nosync_jc.png rename to docs/volume/cluster_journalcompact/output/summary_leveled_5n_80t_i2_nosync_jc.png diff --git a/test/volume/cluster_one/output/summary_leveldb_5n_60t_i2_sync.png b/docs/volume/cluster_one/output/summary_leveldb_5n_60t_i2_sync.png similarity index 100% rename from test/volume/cluster_one/output/summary_leveldb_5n_60t_i2_sync.png rename to docs/volume/cluster_one/output/summary_leveldb_5n_60t_i2_sync.png diff --git a/test/volume/cluster_one/output/summary_leveled_5n_60t_i2_sync.png b/docs/volume/cluster_one/output/summary_leveled_5n_60t_i2_sync.png similarity index 100% rename from test/volume/cluster_one/output/summary_leveled_5n_60t_i2_sync.png rename to docs/volume/cluster_one/output/summary_leveled_5n_60t_i2_sync.png diff --git a/test/volume/cluster_three/output/summary_leveldb_5n_50t_d2_nosync.png b/docs/volume/cluster_three/output/summary_leveldb_5n_50t_d2_nosync.png similarity index 100% rename from test/volume/cluster_three/output/summary_leveldb_5n_50t_d2_nosync.png rename to docs/volume/cluster_three/output/summary_leveldb_5n_50t_d2_nosync.png diff --git a/test/volume/cluster_three/output/summary_leveled_5n_50t_d2_nosync.png b/docs/volume/cluster_three/output/summary_leveled_5n_50t_d2_nosync.png similarity index 100% rename from test/volume/cluster_three/output/summary_leveled_5n_50t_d2_nosync.png rename to docs/volume/cluster_three/output/summary_leveled_5n_50t_d2_nosync.png diff --git a/test/volume/cluster_two/output/NotPresentPerc.png b/docs/volume/cluster_two/output/NotPresentPerc.png similarity index 100% rename from test/volume/cluster_two/output/NotPresentPerc.png rename to docs/volume/cluster_two/output/NotPresentPerc.png diff --git a/test/volume/cluster_two/output/summary_leveldb_5n_100t_i2_nosync.png b/docs/volume/cluster_two/output/summary_leveldb_5n_100t_i2_nosync.png similarity index 100% rename from test/volume/cluster_two/output/summary_leveldb_5n_100t_i2_nosync.png rename to docs/volume/cluster_two/output/summary_leveldb_5n_100t_i2_nosync.png diff --git a/test/volume/cluster_two/output/summary_leveled_5n_100t_i2_nosync.png b/docs/volume/cluster_two/output/summary_leveled_5n_100t_i2_nosync.png similarity index 100% rename from test/volume/cluster_two/output/summary_leveled_5n_100t_i2_nosync.png rename to docs/volume/cluster_two/output/summary_leveled_5n_100t_i2_nosync.png diff --git a/test/volume/single_node/examples/eleveldb_load.config b/docs/volume/single_node/examples/eleveldb_load.config similarity index 100% rename from test/volume/single_node/examples/eleveldb_load.config rename to docs/volume/single_node/examples/eleveldb_load.config diff --git a/test/volume/single_node/examples/eleveldb_pop.config b/docs/volume/single_node/examples/eleveldb_pop.config similarity index 100% rename from test/volume/single_node/examples/eleveldb_pop.config rename to docs/volume/single_node/examples/eleveldb_pop.config diff --git a/test/volume/single_node/examples/eleveleddb_load.config b/docs/volume/single_node/examples/eleveleddb_load.config similarity index 100% rename from test/volume/single_node/examples/eleveleddb_load.config rename to docs/volume/single_node/examples/eleveleddb_load.config diff --git a/test/volume/single_node/examples/eleveleddb_pop.config b/docs/volume/single_node/examples/eleveleddb_pop.config similarity index 100% rename from test/volume/single_node/examples/eleveleddb_pop.config rename to docs/volume/single_node/examples/eleveleddb_pop.config diff --git a/test/volume/single_node/output/leveldb_load.png b/docs/volume/single_node/output/leveldb_load.png similarity index 100% rename from test/volume/single_node/output/leveldb_load.png rename to docs/volume/single_node/output/leveldb_load.png diff --git a/test/volume/single_node/output/leveldb_pop.png b/docs/volume/single_node/output/leveldb_pop.png similarity index 100% rename from test/volume/single_node/output/leveldb_pop.png rename to docs/volume/single_node/output/leveldb_pop.png diff --git a/test/volume/single_node/output/leveled_load.png b/docs/volume/single_node/output/leveled_load.png similarity index 100% rename from test/volume/single_node/output/leveled_load.png rename to docs/volume/single_node/output/leveled_load.png diff --git a/test/volume/single_node/output/leveled_pop.png b/docs/volume/single_node/output/leveled_pop.png similarity index 100% rename from test/volume/single_node/output/leveled_pop.png rename to docs/volume/single_node/output/leveled_pop.png diff --git a/test/volume/single_node/src/basho_bench_driver_eleveleddb.erl b/docs/volume/single_node/src/basho_bench_driver_eleveleddb.erl similarity index 100% rename from test/volume/single_node/src/basho_bench_driver_eleveleddb.erl rename to docs/volume/single_node/src/basho_bench_driver_eleveleddb.erl diff --git a/rebar.config b/rebar.config index c205046..ed8bb76 100644 --- a/rebar.config +++ b/rebar.config @@ -9,14 +9,24 @@ {xref_checks, [undefined_function_calls,undefined_functions]}. +{cover_excl_mods, + [testutil, + appdefined_SUITE, basic_SUITE, iterator_SUITE, + perf_SUITE, recovery_SUITE, riak_SUITE, tictac_SUITE]}. + {eunit_opts, [verbose]}. {profiles, [{eqc, [{deps, [meck, fqc]}, {erl_opts, [debug_info, {parse_transform, eqc_cover}]}, - {extra_src_dirs, ["test"]}]} + {extra_src_dirs, ["test"]}]}, + {test, [ + {eunit_compile_opts, [{src_dirs, ["src", "test/end_to_end"]}]} + ]} ]}. {deps, [ {lz4, ".*", {git, "https://github.com/martinsumner/erlang-lz4", {tag, "0.2.5"}}} ]}. + +{ct_opts, [{dir, ["test/end_to_end"]}]}. diff --git a/rebar3 b/rebar3 index 00494f9..a592824 100755 Binary files a/rebar3 and b/rebar3 differ diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index b2d9629..45c89c2 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -2835,9 +2835,9 @@ foldobjects_vs_hashtree_testto() -> fun(B, K, ProxyV, Acc) -> {proxy_object, MD, - _Size, + _Size1, _Fetcher} = binary_to_term(ProxyV), - {Hash, _Size, _UserDefinedMD} = MD, + {Hash, _Size0, _UserDefinedMD} = MD, [{B, K, Hash}|Acc] end, diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index d6f5e7f..e62e67b 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -1594,7 +1594,7 @@ read_integerpairs(<>, Pairs) -> %% false - don't check the CRC before returning key & value %% loose_presence - confirm that the hash of the key is present search_hash_table(_Handle, - {_, _, _TotalSlots, _TotalSlots}, + {_, _, TotalSlots, TotalSlots}, _Hash, _Key, _QuickCheck, _BinaryMode, Timings) -> % We have done the full loop - value must not be present diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index 4eb34c6..9c29c1c 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -1407,7 +1407,7 @@ compact_journal_testto(WRP, ExpectedFiles) -> build_dummy_journal(fun test_ledgerkey/1), {ok, Ink1} = ink_start(InkOpts), - {ok, NewSQN1, _ObjSize} = ink_put(Ink1, + {ok, NewSQN1, ObjSize} = ink_put(Ink1, test_ledgerkey("KeyAA"), "TestValueAA", {[], infinity}), @@ -1427,7 +1427,7 @@ compact_journal_testto(WRP, ExpectedFiles) -> {SQN, test_ledgerkey(PK)} end, FunnyLoop), - {ok, NewSQN2, _ObjSize} = ink_put(Ink1, + {ok, NewSQN2, ObjSize} = ink_put(Ink1, test_ledgerkey("KeyBB"), "TestValueBB", {[], infinity}), diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index 6fc2c8a..cff1799 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -300,16 +300,16 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> {ok, Pid, {SK, EK}, Bloom} end. --spec sst_newmerge(string(), string(), - list(leveled_codec:ledger_kv()|sst_pointer()), +-spec sst_newmerge(string(), string(), list(leveled_codec:ledger_kv()|sst_pointer()), - boolean(), integer(), + list(leveled_codec:ledger_kv()|sst_pointer()), + boolean(), integer(), integer(), sst_options()) - -> empty|{ok, pid(), - {{list(leveled_codec:ledger_kv()), - list(leveled_codec:ledger_kv())}, - leveled_codec:ledger_key(), - leveled_codec:ledger_key()}, + -> empty|{ok, pid(), + {{list(leveled_codec:ledger_kv()), + list(leveled_codec:ledger_kv())}, + leveled_codec:ledger_key(), + leveled_codec:ledger_key()}, binary()}. %% @doc %% Start a new SST file at the assigned level passing in a two lists of @@ -322,11 +322,11 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> %% be that the merge_lists returns nothing (for example when a basement file is %% all tombstones) - and the atom empty is returned in this case so that the %% file is not added to the manifest. -sst_newmerge(RootPath, Filename, - KVL1, KVL2, IsBasement, Level, +sst_newmerge(RootPath, Filename, + KVL1, KVL2, IsBasement, Level, MaxSQN, OptsSST) -> - sst_newmerge(RootPath, Filename, - KVL1, KVL2, IsBasement, Level, + sst_newmerge(RootPath, Filename, + KVL1, KVL2, IsBasement, Level, MaxSQN, OptsSST, ?INDEX_MODDATE, ?TOMB_COUNT). sst_newmerge(RootPath, Filename, @@ -1036,10 +1036,8 @@ sst_getfilteredslots(Pid, SlotList, SegList, LowLastMod) -> non_neg_integer()) -> list(non_neg_integer()). %% @doc %% Find a list of positions where there is an element with a matching segment -%% ID to the expected segments (which cna either be a single segment, a list of +%% ID to the expected segments (which can either be a single segment, a list of %% segments or a set of segments depending on size. -find_pos(<<>>, _Hash, PosList, _Count) -> - PosList; find_pos(<<1:1/integer, PotentialHit:15/integer, T/binary>>, Checker, PosList, Count) -> case member_check(PotentialHit, Checker) of @@ -1049,7 +1047,12 @@ find_pos(<<1:1/integer, PotentialHit:15/integer, T/binary>>, find_pos(T, Checker, PosList, Count + 1) end; find_pos(<<0:1/integer, NHC:7/integer, T/binary>>, Checker, PosList, Count) -> - find_pos(T, Checker, PosList, Count + NHC + 1). + find_pos(T, Checker, PosList, Count + NHC + 1); +find_pos(_BinRem, _Hash, PosList, _Count) -> + %% Expect this to be <<>> - i.e. at end of binary, but if there is + %% corruption, could be some other value - so return as well in this + %% case + PosList. -spec member_check(non_neg_integer(), @@ -2858,14 +2861,14 @@ update_timings(SW, Timings, Stage, Continue) -> -define(TEST_AREA, "test/test_area/"). testsst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> - OptsSST = + OptsSST = #sst_options{press_method=PressMethod, log_options=leveled_log:get_opts()}, sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, false). -testsst_new(RootPath, Filename, +testsst_new(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, PressMethod) -> - OptsSST = + OptsSST = #sst_options{press_method=PressMethod, log_options=leveled_log:get_opts()}, sst_newmerge(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, @@ -2949,17 +2952,17 @@ tombcount_test() -> OptsSST = #sst_options{press_method=native, log_options=leveled_log:get_opts()}, - {ok, SST1, _KD, _BB} = sst_newmerge(RP, Filename, - KVL1, KVL2, false, 2, - N, OptsSST, false, false), + {ok, SST1, KD, BB} = sst_newmerge(RP, Filename, + KVL1, KVL2, false, 2, + N, OptsSST, false, false), ?assertMatch(not_counted, sst_gettombcount(SST1)), ok = sst_close(SST1), ok = file:delete(filename:join(RP, Filename ++ ".sst")), - {ok, SST2, _KD, _BB} = sst_newmerge(RP, Filename, - KVL1, KVL2, false, 2, - N, OptsSST, false, true), - + {ok, SST2, KD, BB} = sst_newmerge(RP, Filename, + KVL1, KVL2, false, 2, + N, OptsSST, false, true), + ?assertMatch(ExpectedCount, sst_gettombcount(SST2)), ok = sst_close(SST2), ok = file:delete(filename:join(RP, Filename ++ ".sst")). @@ -3012,7 +3015,7 @@ indexed_list_test() -> SW0 = os:timestamp(), - {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = + {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = generate_binary_slot(lookup, KVL1, native, ?INDEX_MODDATE, no_timing), io:format(user, "Indexed list created slot in ~w microseconds of size ~w~n", @@ -3041,7 +3044,7 @@ indexed_list_mixedkeys_test() -> KVL1 = lists:sublist(KVL0, 33), Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), - {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = + {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE, no_timing), {TestK1, TestV1} = lists:nth(4, KVL1), @@ -3068,7 +3071,7 @@ indexed_list_mixedkeys2_test() -> IdxKeys2 = lists:ukeysort(1, generate_indexkeys(30)), % this isn't actually ordered correctly Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2, - {{_Header, FullBin, _HL, _LK}, no_timing} = + {{_Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot(lookup, Keys, native, ?INDEX_MODDATE, no_timing), lists:foreach(fun({K, V}) -> MH = leveled_codec:segment_hash(K), @@ -3079,9 +3082,9 @@ indexed_list_mixedkeys2_test() -> indexed_list_allindexkeys_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), ?LOOK_SLOTSIZE), - {{HeaderT, FullBinT, _HL, _LK}, no_timing} = + {{HeaderT, FullBinT, HL, LK}, no_timing} = generate_binary_slot(lookup, Keys, native, true, no_timing), - {{HeaderF, FullBinF, _HL, _LK}, no_timing} = + {{HeaderF, FullBinF, HL, LK}, no_timing} = generate_binary_slot(lookup, Keys, native, false, no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, LMD = ?FLIPPER32, @@ -3172,6 +3175,9 @@ indexed_list_allindexkeys_trimmed_test() -> ?assertMatch(R3, O3). +findposfrag_test() -> + ?assertMatch([], find_pos(<<128:8/integer>>, 1, [], 0)). + indexed_list_mixedkeys_bitflip_test() -> KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)), KVL1 = lists:sublist(KVL0, 33), @@ -3333,7 +3339,7 @@ simple_persisted_range_tester(SSTNewFun) -> KVList1 = lists:ukeysort(1, KVList0), [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), - {ok, Pid, {FirstKey, LastKey}, _Bloom} = + {ok, Pid, {FirstKey, LastKey}, _Bloom} = SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), {o, B, K, null} = LastKey, @@ -3375,7 +3381,7 @@ simple_persisted_rangesegfilter_tester(SSTNewFun) -> KVList1 = lists:ukeysort(1, KVList0), [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), - {ok, Pid, {FirstKey, LastKey}, _Bloom} = + {ok, Pid, {FirstKey, LastKey}, _Bloom} = SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), SK1 = element(1, lists:nth(124, KVList1)), @@ -3523,7 +3529,7 @@ simple_persisted_slotsize_tester(SSTNewFun) -> ?LOOK_SLOTSIZE), [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), - {ok, Pid, {FirstKey, LastKey}, _Bloom} = + {ok, Pid, {FirstKey, LastKey}, _Bloom} = SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), lists:foreach(fun({K, V}) -> ?assertMatch({K, V}, sst_get(Pid, K)) @@ -3564,7 +3570,7 @@ simple_persisted_tester(SSTNewFun) -> KVList1 = lists:ukeysort(1, KVList0), [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), - {ok, Pid, {FirstKey, LastKey}, _Bloom} = + {ok, Pid, {FirstKey, LastKey}, Bloom} = SSTNewFun(RP, Filename, Level, KVList1, length(KVList1), native), B0 = check_binary_references(Pid), @@ -3632,7 +3638,7 @@ simple_persisted_tester(SSTNewFun) -> ?assertMatch(SubKVList1L, length(FetchedList2)), ?assertMatch(SubKVList1, FetchedList2), - {Eight000Key, _v800} = lists:nth(800, KVList1), + {Eight000Key, V800} = lists:nth(800, KVList1), SubKVListA1 = lists:sublist(KVList1, 10, 791), SubKVListA1L = length(SubKVListA1), FetchListA2 = sst_getkvrange(Pid, TenthKey, Eight000Key, 2), @@ -3664,7 +3670,7 @@ simple_persisted_tester(SSTNewFun) -> Eight000Key, 4), FetchedListB4 = lists:foldl(FoldFun, [], FetchListB4), - ?assertMatch([{Eight000Key, _v800}], FetchedListB4), + ?assertMatch([{Eight000Key, V800}], FetchedListB4), B1 = check_binary_references(Pid), @@ -3673,7 +3679,7 @@ simple_persisted_tester(SSTNewFun) -> io:format(user, "Reopen SST file~n", []), OptsSST = #sst_options{press_method=native, log_options=leveled_log:get_opts()}, - {ok, OpenP, {FirstKey, LastKey}, _Bloom} = + {ok, OpenP, {FirstKey, LastKey}, Bloom} = sst_open(RP, Filename ++ ".sst", OptsSST, Level), B2 = check_binary_references(OpenP), diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 4d3c256..5f5be28 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -244,8 +244,8 @@ alter_segment(Segment, Hash, Tree) -> %% Returns a list of segment IDs which hold differences between the state %% represented by the two trees. find_dirtyleaves(SrcTree, SnkTree) -> - _Size = SrcTree#tictactree.size, - _Size = SnkTree#tictactree.size, + Size = SrcTree#tictactree.size, + Size = SnkTree#tictactree.size, IdxList = find_dirtysegments(fetch_root(SrcTree), fetch_root(SnkTree)), SrcLeaves = fetch_leaves(SrcTree, IdxList), diff --git a/src/leveled_util.erl b/src/leveled_util.erl index c81d814..d654669 100644 --- a/src/leveled_util.erl +++ b/src/leveled_util.erl @@ -103,11 +103,11 @@ magichashperf_test() -> {K, X} end, KL = lists:map(KeyFun, lists:seq(1, 1000)), - {TimeMH, _HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]), + {TimeMH, HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]), io:format(user, "1000 keys magic hashed in ~w microseconds~n", [TimeMH]), {TimePH, _Hl2} = timer:tc(lists, map, [fun(K) -> erlang:phash2(K) end, KL]), io:format(user, "1000 keys phash2 hashed in ~w microseconds~n", [TimePH]), - {TimeMH2, _HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]), + {TimeMH2, HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]), io:format(user, "1000 keys magic hashed in ~w microseconds~n", [TimeMH2]). diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 0fe9a66..f6773f3 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -169,18 +169,7 @@ encode_maybe_binary(Bin) -> %% ================================================= sync_strategy() -> - case erlang:system_info(otp_release) of - "17" -> - sync; - "18" -> - sync; - "19" -> - sync; - _ -> - % running the sync strategy with OTP16 on macbook is - % super slow. So revert to no sync - none - end. + none. book_riakput(Pid, RiakObject, IndexSpecs) -> leveled_bookie:book_put(Pid, diff --git a/test/lookup_test.erl b/test/lookup_test.erl deleted file mode 100644 index e446b08..0000000 --- a/test/lookup_test.erl +++ /dev/null @@ -1,323 +0,0 @@ --module(lookup_test). - --export([go_dict/1, - go_ets/1, - go_gbtree/1, - go_arrayofdict/1, - go_arrayofgbtree/1, - go_arrayofdict_withcache/1, - create_blocks/3, - size_testblocks/1, - test_testblocks/2]). - --define(CACHE_SIZE, 512). - -hash(Key) -> - H = 5381, - hash1(H,Key) band 16#FFFFFFFF. - -hash1(H,[]) ->H; -hash1(H,[B|Rest]) -> - H1 = H * 33, - H2 = H1 bxor B, - hash1(H2,Rest). - -% Get the least significant 8 bits from the hash. -hash_to_index(Hash) -> - Hash band 255. - - -%% -%% Timings (microseconds): -%% -%% go_dict(200000) : 1569894 -%% go_dict(1000000) : 17191365 -%% go_dict(5000000) : forever - -go_dict(N) -> - go_dict(dict:new(), N, N). - -go_dict(_, 0, _) -> - {erlang:memory(), statistics(garbage_collection)}; -go_dict(D, N, M) -> - % Lookup a random key - which may not be present - LookupKey = lists:concat(["key-", leveled_rand:uniform(M)]), - LookupHash = hash(LookupKey), - dict:find(LookupHash, D), - - % Add a new key - which may be present so value to be appended - Key = lists:concat(["key-", N]), - Hash = hash(Key), - case dict:find(Hash, D) of - error -> - go_dict(dict:store(Hash, [N], D), N-1, M); - {ok, List} -> - go_dict(dict:store(Hash, [N|List], D), N-1, M) - end. - - - -%% -%% Timings (microseconds): -%% -%% go_ets(200000) : 609119 -%% go_ets(1000000) : 3520757 -%% go_ets(5000000) : 19974562 - -go_ets(N) -> - go_ets(ets:new(ets_test, [private, bag]), N, N). - -go_ets(_, 0, _) -> - {erlang:memory(), statistics(garbage_collection)}; -go_ets(Ets, N, M) -> - % Lookup a random key - which may not be present - LookupKey = lists:concat(["key-", leveled_rand:uniform(M)]), - LookupHash = hash(LookupKey), - ets:lookup(Ets, LookupHash), - - % Add a new key - which may be present so value to be appended - Key = lists:concat(["key-", N]), - Hash = hash(Key), - ets:insert(Ets, {Hash, N}), - go_ets(Ets, N - 1, M). - -%% -%% Timings (microseconds): -%% -%% go_gbtree(200000) : 1393936 -%% go_gbtree(1000000) : 8430997 -%% go_gbtree(5000000) : 45630810 - -go_gbtree(N) -> - go_gbtree(gb_trees:empty(), N, N). - -go_gbtree(_, 0, _) -> - {erlang:memory(), statistics(garbage_collection)}; -go_gbtree(Tree, N, M) -> - % Lookup a random key - which may not be present - LookupKey = lists:concat(["key-", leveled_rand:uniform(M)]), - LookupHash = hash(LookupKey), - gb_trees:lookup(LookupHash, Tree), - - % Add a new key - which may be present so value to be appended - Key = lists:concat(["key-", N]), - Hash = hash(Key), - case gb_trees:lookup(Hash, Tree) of - none -> - go_gbtree(gb_trees:insert(Hash, [N], Tree), N - 1, M); - {value, List} -> - go_gbtree(gb_trees:update(Hash, [N|List], Tree), N - 1, M) - end. - - -%% -%% Timings (microseconds): -%% -%% go_arrayofidict(200000) : 1266931 -%% go_arrayofidict(1000000) : 7387219 -%% go_arrayofidict(5000000) : 49511484 - -go_arrayofdict(N) -> - go_arrayofdict(array:new(256, {default, dict:new()}), N, N). - -go_arrayofdict(_, 0, _) -> - % dict:to_list(array:get(0, Array)), - % dict:to_list(array:get(1, Array)), - % dict:to_list(array:get(2, Array)), - % dict:to_list(array:get(3, Array)), - % dict:to_list(array:get(4, Array)), - % dict:to_list(array:get(5, Array)), - % dict:to_list(array:get(6, Array)), - % dict:to_list(array:get(7, Array)), - % dict:to_list(array:get(8, Array)), - % dict:to_list(array:get(9, Array)), - {erlang:memory(), statistics(garbage_collection)}; -go_arrayofdict(Array, N, M) -> - % Lookup a random key - which may not be present - LookupKey = lists:concat(["key-", leveled_rand:uniform(M)]), - LookupHash = hash(LookupKey), - LookupIndex = hash_to_index(LookupHash), - dict:find(LookupHash, array:get(LookupIndex, Array)), - - % Add a new key - which may be present so value to be appended - Key = lists:concat(["key-", N]), - Hash = hash(Key), - Index = hash_to_index(Hash), - D = array:get(Index, Array), - case dict:find(Hash, D) of - error -> - go_arrayofdict(array:set(Index, - dict:store(Hash, [N], D), Array), N-1, M); - {ok, List} -> - go_arrayofdict(array:set(Index, - dict:store(Hash, [N|List], D), Array), N-1, M) - end. - -%% -%% Timings (microseconds): -%% -%% go_arrayofgbtree(200000) : 1176224 -%% go_arrayofgbtree(1000000) : 7480653 -%% go_arrayofgbtree(5000000) : 41266701 - -go_arrayofgbtree(N) -> - go_arrayofgbtree(array:new(256, {default, gb_trees:empty()}), N, N). - -go_arrayofgbtree(_, 0, _) -> - % gb_trees:to_list(array:get(0, Array)), - % gb_trees:to_list(array:get(1, Array)), - % gb_trees:to_list(array:get(2, Array)), - % gb_trees:to_list(array:get(3, Array)), - % gb_trees:to_list(array:get(4, Array)), - % gb_trees:to_list(array:get(5, Array)), - % gb_trees:to_list(array:get(6, Array)), - % gb_trees:to_list(array:get(7, Array)), - % gb_trees:to_list(array:get(8, Array)), - % gb_trees:to_list(array:get(9, Array)), - {erlang:memory(), statistics(garbage_collection)}; -go_arrayofgbtree(Array, N, M) -> - % Lookup a random key - which may not be present - LookupKey = lists:concat(["key-", leveled_rand:uniform(M)]), - LookupHash = hash(LookupKey), - LookupIndex = hash_to_index(LookupHash), - gb_trees:lookup(LookupHash, array:get(LookupIndex, Array)), - - % Add a new key - which may be present so value to be appended - Key = lists:concat(["key-", N]), - Hash = hash(Key), - Index = hash_to_index(Hash), - Tree = array:get(Index, Array), - case gb_trees:lookup(Hash, Tree) of - none -> - go_arrayofgbtree(array:set(Index, - gb_trees:insert(Hash, [N], Tree), Array), N - 1, M); - {value, List} -> - go_arrayofgbtree(array:set(Index, - gb_trees:update(Hash, [N|List], Tree), Array), N - 1, M) - end. - - -%% -%% Timings (microseconds): -%% -%% go_arrayofdict_withcache(200000) : 1432951 -%% go_arrayofdict_withcache(1000000) : 9140169 -%% go_arrayofdict_withcache(5000000) : 59435511 - -go_arrayofdict_withcache(N) -> - go_arrayofdict_withcache({array:new(256, {default, dict:new()}), - array:new(256, {default, dict:new()})}, N, N). - -go_arrayofdict_withcache(_, 0, _) -> - {erlang:memory(), statistics(garbage_collection)}; -go_arrayofdict_withcache({MArray, CArray}, N, M) -> - % Lookup a random key - which may not be present - LookupKey = lists:concat(["key-", leveled_rand:uniform(M)]), - LookupHash = hash(LookupKey), - LookupIndex = hash_to_index(LookupHash), - dict:find(LookupHash, array:get(LookupIndex, CArray)), - dict:find(LookupHash, array:get(LookupIndex, MArray)), - - % Add a new key - which may be present so value to be appended - Key = lists:concat(["key-", N]), - Hash = hash(Key), - Index = hash_to_index(Hash), - Cache = array:get(Index, CArray), - case dict:find(Hash, Cache) of - error -> - UpdCache = dict:store(Hash, [N], Cache); - {ok, _} -> - UpdCache = dict:append(Hash, N, Cache) - end, - case dict:size(UpdCache) of - ?CACHE_SIZE -> - UpdCArray = array:set(Index, dict:new(), CArray), - UpdMArray = array:set(Index, dict:merge(fun merge_values/3, UpdCache, array:get(Index, MArray)), MArray), - go_arrayofdict_withcache({UpdMArray, UpdCArray}, N - 1, M); - _ -> - UpdCArray = array:set(Index, UpdCache, CArray), - go_arrayofdict_withcache({MArray, UpdCArray}, N - 1, M) - end. - - - -merge_values(_, Value1, Value2) -> - lists:append(Value1, Value2). - - -%% Some functions for testing options compressing term_to_binary - -create_block(N, BlockType) -> - case BlockType of - keylist -> - create_block(N, BlockType, []); - keygbtree -> - create_block(N, BlockType, gb_trees:empty()) - end. - -create_block(0, _, KeyStruct) -> - KeyStruct; -create_block(N, BlockType, KeyStruct) -> - Bucket = <<"pdsRecord">>, - case N of - 20 -> - Key = lists:concat(["key-20-special"]); - _ -> - Key = lists:concat(["key-", N, "-", leveled_rand:uniform(1000)]) - end, - SequenceNumber = leveled_rand:uniform(1000000000), - Indexes = [{<<"DateOfBirth_int">>, leveled_rand:uniform(10000)}, {<<"index1_bin">>, lists:concat([leveled_rand:uniform(1000), "SomeCommonText"])}, {<<"index2_bin">>, <<"RepetitionRepetitionRepetition">>}], - case BlockType of - keylist -> - Term = {o, Bucket, Key, {Indexes, SequenceNumber}}, - create_block(N-1, BlockType, [Term|KeyStruct]); - keygbtree -> - create_block(N-1, BlockType, gb_trees:insert({o, Bucket, Key}, {Indexes, SequenceNumber}, KeyStruct)) - end. - - -create_blocks(N, Compression, BlockType) -> - create_blocks(N, Compression, BlockType, 10000, []). - -create_blocks(_, _, _, 0, BlockList) -> - BlockList; -create_blocks(N, Compression, BlockType, TestLoops, BlockList) -> - NewBlock = term_to_binary(create_block(N, BlockType), [{compressed, Compression}]), - create_blocks(N, Compression, BlockType, TestLoops - 1, [NewBlock|BlockList]). - -size_testblocks(BlockList) -> - size_testblocks(BlockList,0). - -size_testblocks([], Acc) -> - Acc; -size_testblocks([H|T], Acc) -> - size_testblocks(T, Acc + byte_size(H)). - -test_testblocks([], _) -> - true; -test_testblocks([H|T], BlockType) -> - Block = binary_to_term(H), - case findkey("key-20-special", Block, BlockType) of - true -> - test_testblocks(T, BlockType); - not_found -> - false - end. - -findkey(_, [], keylist) -> - not_found; -findkey(Key, [H|T], keylist) -> - case H of - {o, <<"pdsRecord">>, Key, _} -> - true; - _ -> - findkey(Key,T, keylist) - end; -findkey(Key, Tree, keygbtree) -> - case gb_trees:lookup({o, <<"pdsRecord">>, Key}, Tree) of - none -> - not_found; - _ -> - true - end. - diff --git a/test/member_test.erl b/test/member_test.erl deleted file mode 100644 index 2803aea..0000000 --- a/test/member_test.erl +++ /dev/null @@ -1,51 +0,0 @@ --module(member_test). - --export([test_membership/0]). - --define(SEGMENTS_TO_CHECK, 32768). % a whole SST file --define(MEMBERSHIP_LENGTHS, [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]). - -segments(Length) -> - AllSegs = lists:seq(1, ?SEGMENTS_TO_CHECK), - AllSegsBin = - lists:foldl(fun(I, Acc) -> <> end, - <<>>, - AllSegs), - StartPos = leveled_rand:uniform(length(AllSegs) - Length), - {<>, - lists:sublist(AllSegs, StartPos, Length)}. - -test_membership(Length) -> - {AllSegsBin, TestList} = segments(Length), - ExpectedOutput = - lists:reverse(TestList ++ TestList ++ TestList ++ TestList), - - SW0 = os:timestamp(), - TestListFun = fun(I) -> lists:member(I, TestList) end, - true = test_binary(AllSegsBin, [], TestListFun) == ExpectedOutput, - ListT = timer:now_diff(os:timestamp(), SW0) / 131072, - - SW1 = os:timestamp(), - TestSet = sets:from_list(TestList), - TestSetsFun = fun(I) -> sets:is_element(I, TestSet) end, - true = test_binary(AllSegsBin, [], TestSetsFun) == ExpectedOutput, - SetsT = timer:now_diff(os:timestamp(), SW1) / 131072, - - io:format("Test with segment count ~w ..." - ++ " took ~w ms per 1000 checks with list ..." - ++ " took ~w ms per 1000 checks with set~n", [Length, ListT, SetsT]). - - -test_binary(<<>>, Acc, _TestFun) -> - Acc; -test_binary(<<0:1/integer, TestSeg:15/integer, Rest/binary>>, Acc, TestFun) -> - case TestFun(TestSeg) of - true -> - test_binary(Rest, [TestSeg|Acc], TestFun); - false -> - test_binary(Rest, Acc, TestFun) - end. - -test_membership() -> - lists:foreach(fun(I) -> test_membership(I) end, ?MEMBERSHIP_LENGTHS). \ No newline at end of file diff --git a/test/rice_test.erl b/test/rice_test.erl deleted file mode 100644 index 1a7c83d..0000000 --- a/test/rice_test.erl +++ /dev/null @@ -1,59 +0,0 @@ -%% Test performance and accuracy of rice-encoded bloom filters -%% -%% Calling check_negative(2048, 1000000) should return about 122 false -%% positives in around 11 seconds, with a size below 4KB -%% -%% The equivalent positive check is check_positive(2048, 488) and this -%% should take around 6 seconds. -%% -%% So a blooom with 2048 members should support o(100K) checks per second -%% on a modern CPU, whilst requiring 2 bytes per member. - --module(rice_test). - --export([check_positive/2, check_negative/2, calc_hash/2]). - - - -check_positive(KeyCount, LoopCount) -> - KeyList = produce_keylist(KeyCount), - Bloom = leveled_rice:create_bloom(KeyList), - check_positive(KeyList, Bloom, LoopCount). - -check_positive(_, Bloom, 0) -> - {ok, byte_size(Bloom)}; -check_positive(KeyList, Bloom, LoopCount) -> - true = leveled_rice:check_keys(KeyList, Bloom), - check_positive(KeyList, Bloom, LoopCount - 1). - - -produce_keylist(KeyCount) -> - KeyPrefix = lists:concat(["PositiveKey-", leveled_rand:uniform(KeyCount)]), - produce_keylist(KeyCount, [], KeyPrefix). - -produce_keylist(0, KeyList, _) -> - KeyList; -produce_keylist(KeyCount, KeyList, KeyPrefix) -> - Key = lists:concat([KeyPrefix, KeyCount]), - produce_keylist(KeyCount - 1, [Key|KeyList], KeyPrefix). - - -check_negative(KeyCount, CheckCount) -> - KeyList = produce_keylist(KeyCount), - Bloom = leveled_rice:create_bloom(KeyList), - check_negative(Bloom, CheckCount, 0). - -check_negative(Bloom, 0, FalsePos) -> - {byte_size(Bloom), FalsePos}; -check_negative(Bloom, CheckCount, FalsePos) -> - Key = lists:concat(["NegativeKey-", CheckCount, leveled_rand:uniform(CheckCount)]), - case leveled_rice:check_key(Key, Bloom) of - true -> check_negative(Bloom, CheckCount - 1, FalsePos + 1); - false -> check_negative(Bloom, CheckCount - 1, FalsePos) - end. - -calc_hash(_, 0) -> - ok; -calc_hash(Key, Count) -> - erlang:phash2(lists:concat([Key, Count, "sometxt"])), - calc_hash(Key, Count -1).