From ff3557d883af18d849789159c32b8c446d666d98 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Fri, 14 Oct 2011 12:29:28 -0500 Subject: [PATCH] add suport and documentation for signatures for dictionaries Signed-off-by: Jordan Wilberding --- doc/property_based_testing.md | 363 ++++++++++++++++++++++++++ doc/signatures.md | 462 ++++++++++++++++++++++++++++++++++ ebin/erlware_commons.app | 10 +- src/ec_assoc_list.erl | 103 ++++++++ src/ec_dict.erl | 107 ++++++++ src/ec_dictionary.erl | 158 ++++++++++++ src/ec_gb_trees.erl | 223 ++++++++++++++++ src/ec_orddict.erl | 107 ++++++++ src/ec_rbdict.erl | 319 +++++++++++++++++++++++ test/ec_dictionary_proper.erl | 223 ++++++++++++++++ test/mock.erl | 10 + 11 files changed, 2082 insertions(+), 3 deletions(-) create mode 100644 doc/property_based_testing.md create mode 100644 doc/signatures.md create mode 100644 src/ec_assoc_list.erl create mode 100644 src/ec_dict.erl create mode 100644 src/ec_dictionary.erl create mode 100644 src/ec_gb_trees.erl create mode 100644 src/ec_orddict.erl create mode 100644 src/ec_rbdict.erl create mode 100644 test/ec_dictionary_proper.erl create mode 100644 test/mock.erl diff --git a/doc/property_based_testing.md b/doc/property_based_testing.md new file mode 100644 index 0000000..414c962 --- /dev/null +++ b/doc/property_based_testing.md @@ -0,0 +1,363 @@ +Property based testing for unit testers +======================================= + +Main contributors: Torben Hoffmann, Raghav Karol, Eric Merritt + +The purpose of the short document is to help people who are familiar +with unit testing understand how property based testing (PBT) differs, +but also where the thinking is the same. + +This document focusses on the PBT tool +[`PropEr`](https://github.com/manopapad/proper) for Erlang since that is +what I am familiar with, but the general principles applies to all PBT +tools regardless of which language they are written in. + +The approach taken here is that we hear from people who are used to +working with unit testing regarding how they think when designing +their tests and how a concrete test might look. + +These descriptions are then "converted" into the way it works with +PBT, with a clear focus on what stays the same and what is different. + +## Testing philosophies + +### A quote from Martin Logan: + +> For me unit testing is about contracts. I think about the same things +> I think about when I write statements like {ok, Resp} = +> Mod:Func(Args). Unit testing and writing specs are very close for me. +> Hypothetically speaking lets say a function should return return {ok, +> string()} | {error, term()} for all given input parameters then my +> unit tests should be able to show that for a representative set of +> input parameters that those contracts are honored. The art comes in +> thinking about what that set is. + + +The trap in writing all your own tests can often be that we think +about the set in terms of what we coded for and not what may indeed be +asked of our function. As the code is tried in further exploratory +testing and in production new input parameter sets for which the given +function does not meet the stated contract are discovered and added to +the test case once a fix has been put into place. + +This is a very good description of what the ground rules for unit +testing are: + +* Checking that contracts are obeyed. +* Creating a representative set of input parameters. + +The former is very much part of PBT - each property you write will +check a contract, so that thinking is the same. + +## xUnit vs PBT + +Unit testing has become popular for software testing with the advent +of xUnit tools like jUnit for Java. xUnit like tools typically +provide a testing framework with the following functionality + +* test fixture setup +* test case execution +* test fixture teardown +* test suite management +* test status reporting and management + +While xUnit tools provide a lot of functionality to execute and manage +test cases and suites, reporting results there is no focus on test +case execution step, while this is the main focus area of +property-based testing (PBT). + +Consider the following function specification + + :::erlang + sort(list::integer()) ---> list::integer() | error + +A verbal specification of this function is, + +> For all input lists of integers, the sort function returns a sorted +> list of integers. + +For any other kind of argument the function returns the atom error. + +The specification above may be a requirement of how the function +should behave or even how the function does behave. This distinction +is important; the former is the requirement for the function, the +latter is the actual API. Both should be the same and that is what our +testing should confirm. Test cases for this function might look like + + :::erlang + assertEqual(sort([5,4,3,2,1]), [1,2,3,4,5]) + assertEqual(sort([1,2,3,4,5]), [1,2,3,4,5]) + assertEqual(sort([] ), [] ) + assertEqual(sort([-1,0, 1] ), [-1, 0, 1] ) + +How many tests cases should we write to be convinced that the actual +behaviour of the function is the same as its specification? Clearly, +it is impossible to write tests cases for all possible input values, +here all lists of integers, the art of testing is finding individual +input values that are representative of a large part of the input +space. We hope that the test cases are exhaustive to cover the +specification. xUnit tools offer no support for this and this is where +PBT and PBT Tools like `PropEr` and `QuickCheck` come in. + +PBT introduces testing with a large set of random input values and +verifying that the specification holds for each input value +selected. Functions used to generate input values, generators, are +specified using rules and can be simply composed together to construct +complicated values. So, a property based test for the function above +may look like: + + :::erlang + FOREACH({I, J, InputList}, {nat(), nat(), integer_list()}, + SUCHTHAT(I < J andalso J < length(InputList), + SortedList = sort(InputList) + length(SortedList) == length(InputList) + andalso + lists:get(SortedList, I) =< lists:get(SortedList, J)) + + +The property above works as follows + +* Generate a random list of integers `InputList` and two natural numbers + I, J, such that I < J < size of `InputList` +* Check that size of sorted and input lists is the same. +* Check that element with smaller index I is less than or equal to + element with larger index J in `SortedList`. + +Notice in the property above, we *specify* property. Verification of +the property based on random input values will be done by the property +based tool, therefore we can generated a large number of tests cases +with random input values and have a higher level of confidence that +the function when using unit tests alone. + +But it does not stop at generation of input parameters. If you have +more complex tests where you have to generate a series of events and +keep track of some state then your PBT tool will generate random +sequences of events which corresponds to legal sequences of events and +test that your system behaves correctly for all sequences. + +So when you have written a property with associated generators you +have in fact created something that can create numerous test cases - +you just have to tell your PBT tool how many test cases you want to +check the property on. + +## Shrinking the bar + +At this point you might still have the feeling that introducing the +notion of some sort of generators to your unit testing tool of choice +would bring you on par with PBT tools, but wait there is more to +come. + +When a PBT tool creates a test case that fails there is real chance +that it has created a long test case or some big input parameters - +trying to debug that is very much like receiving a humongous log from +a system in the field and try to figure out what cause the system to +fail. + +Enter shrinking... + +When a test case fails the PBT tool will try to shrink the failing +test case down to the essentials by stripping out input elements or +events that does not cause the failure. In most cases this results in +a very short counterexample that clearly states which events and +inputs are required to break a property. + +As we go through some concrete examples later the effects of shrinking +will be shown. + +Shrinking makes it a lot easier to debug problems and is as key to the +strength of PBT as the generators. + +## Converting a unit test + +We will now take a look at one possible way of translating a unit +test into a PBT setting. + +The example comes from Eric Merritt and is about the `add/2` function in +the `ec_dictionary` instance `ec_gb_trees`. + +The add function has the following spec: + + :::erlang + -spec add(ec_dictionary:key(), ec_dictionary:value(), Object::dictionary()) -> + dictionary(). + +and it is supposed to do the obvious: add the key and value pair to +the dictionary and return a new dictionary. + +Eric states his basic expectations as follows: + +1. I can put arbitrary terms into the dictionary as keys +2. I can put arbitrary terms into the dictionary as values +3. When I put a value in the dictionary by a key, I can retrieve that same value +4. When I put a different value in the dictionary by key it does not change other key value pairs. +5. When I update a value the new value in available by the new key +6. When a value does not exist a not found exception is created + +The first two expectations regarding being able to use arbritrary +terms as keys and values is a job for generators. + +The latter four are prime candidates for properties and we will create +one for each of them. + +### Generators + + :::erlang + key() -> any(). + + value() -> any(). + + +For `PropEr` this approach has the drawback that creation and shrinking +becomes rather time consuming, so it might be better to narrow to +something like this: + + :::erlang + key() -> union([integer(),atom()]). + + value() -> union([integer(),atom(),binary(),boolean(),string()]). + +What is best depends on the situation and intended usage. + +Now, being able to generate keys and values is not enough. You also +have to tell `PropEr` how to create a dictionary and in this case we +will use a symbolic generator (detail to be explained later). + + :::erlang + sym_dict() -> + ?SIZED(N,sym_dict(N)). + + sym_dict(0) -> + {'$call',ec_dictionary,new,[ec_gb_trees]}; + sym_dict(N) -> + ?LAZY( + frequency([ + {1, {'$call',ec_dictionary,remove,[key(),sym_dict(N-1)]}}, + {2, {'$call',ec_dictionary,add,[value(),value(),sym_dict(N-1)]}} + ])). + + +`sym_dict/0` uses the `?SIZED` macro to control the size of the +generated dictionary. `PropEr` will start out with small numbers and +gradually raise it. + +`sym_dict/1` is building a dictionary by randomly adding key/value +pairs and removing keys. Eventually the base case is reached which +will create an empty dictionary. + +The `?LAZY` macro is used to defer the calculation of the +`sym_dict(N-1)` until they are needed and `frequency/1` is used +to ensure that twice as many adds compared to removes are done. This +should give rather more interesting dictionaries in the long run, if +not one can alter the frequencies accondingly. + +But does it really work? + +That is a good question and one that should always be asked when +looking at genetors. Fortunately there is a way to see what a +generator produces provided that the generator functions are exported. + +Hint: in most cases it will not hurt to throw in a +`-compile(export_all).` in the module used to specify the +properties. And here we actually have a sub-hint: specify the +properties in a separate file to avoid peeking inside the +implementation! Base the test on the published API as this is what the +users of the code will be restricted to. + +When the test module has been loaded you can test the generators by +starting up an Erlang shell (this example uses the erlware_commons +code so get yourself a clone to play with): + + :::sh + $ erl -pz ebin -pz test + 1> proper_gen:pick(ec_dictionary_proper:key()). + {ok,4} + 2> proper_gen:pick(ec_dictionary_proper:key()). + {ok,35} + 3> proper_gen:pick(ec_dictionary_proper:key()). + {ok,-5} + 4> proper_gen:pick(ec_dictionary_proper:key()). + {ok,48} + 5> proper_gen:pick(ec_dictionary_proper:key()). + {ok,'\036\207_là´?\nc'} + 6> proper_gen:pick(ec_dictionary_proper:value()). + {ok,2} + 7> proper_gen:pick(ec_dictionary_proper:value()). + {ok,-14} + 8> proper_gen:pick(ec_dictionary_proper:value()). + {ok,-3} + 9> proper_gen:pick(ec_dictionary_proper:value()). + {ok,27} + 10> proper_gen:pick(ec_dictionary_proper:value()). + {ok,-8} + 11> proper_gen:pick(ec_dictionary_proper:value()). + {ok,[472765,17121]} + 12> proper_gen:pick(ec_dictionary_proper:value()). + {ok,true} + 13> proper_gen:pick(ec_dictionary_proper:value()). + {ok,<<>>} + 14> proper_gen:pick(ec_dictionary_proper:value()). + {ok,<<89,69,18,148,32,42,238,101>>} + 15> proper_gen:pick(ec_dictionary_proper:sym_dict()). + {ok,{'$call',ec_dictionary,add, + [[114776,1053475], + 'fª\020\227\215', + {'$call',ec_dictionary,add, + ['',true, + {'$call',ec_dictionary,add, + ['2^Ø¡', + [900408,886056], + {'$call',ec_dictionary,add,[[48618|...],<<...>>|...]}]}]}]}} + 16> proper_gen:pick(ec_dictionary_proper:sym_dict()). + {ok,{'$call',ec_dictionary,add, + [10,'a¯\214\031fõC', + {'$call',ec_dictionary,add, + [false,-1, + {'$call',ec_dictionary,remove, + ['d·ÉV÷[', + {'$call',ec_dictionary,remove,[12,{'$call',...}]}]}]}]}} + +That does not look too bad, so we will continue with that for now. + + +### Properties of `add/2` + +The first expectation Eric had about how the dictionary works was that +if a key had been stored it could be retrieved. + +One way of expressing this could be with this property: + + :::erlang + prop_get_after_add_returns_correct_value() -> + ?FORALL({Dict,K,V}, {sym_dict(),key(),value()}, + begin + try ec_dictionary:get(K,ec_dictionary:add(K,V,Dict)) of + V -> + true; + _ -> + false + catch + _:_ -> + false + end + end). + +This property reads that for all dictionaries `get/2` using a key +from a key/value pair just inserted using the `add/3` function +will return that value. If that is not the case the property will +evaluate to false. + +Running the property is done using `proper:quickcheck/1`: + + :::sh + proper:quickcheck(ec_dictionary_proper:prop_get_after_add_returns_correct_value()). + .................................................................................................... + OK: Passed 100 test(s). + true + + +This was as expected, but at this point we will take a little detour +and introduce a mistake in the `ec_gb_trees` implementation and see +how that works. + + + diff --git a/doc/signatures.md b/doc/signatures.md new file mode 100644 index 0000000..cf95960 --- /dev/null +++ b/doc/signatures.md @@ -0,0 +1,462 @@ +Signatures +========== + +It often occurs in coding that we need a library, a set of +functionaly. Often there are several algorithms that could provide +this functionality. However, the code that uses it, either doesn't +care about the individual algorithm or wishes to delegate choosing +that algorithm to some higher level. Lets take the concrete example of +dictionaries. A dictionary provides the ability to access a value via +a key (other things as well but primarily this). There are may ways to +implement a dictionary. Just a few are: + +* [Associative Arrays](http://en.wikipedia.org/wiki/Associative_array) +* [Binary Trees](http://en.wikipedia.org/wiki/Binary_tree) +* [Hash Tables](http://en.wikipedia.org/wiki/Hash_table#Performance_analysis) +* [Skip Lists](http://en.wikipedia.org/wiki/Skip_list) +* Many, many more .... + +Each of these approaches has there own performance characteristics, +memory footprints etc. For example, a table of size n with open +addressing has no collisions and holds up to n elements, with a single +comparison for successful lookup, and a table of size n with chaining +and k keys has the minimum max(0, k-n) collisions and O(1 + k/n) +comparisons for lookup. While for skip lists the performance +characteristics are about as good as that of randomly-built binary +search trees - namely (O log n). So the choice of which to select +depends very much on memory available, insert/read characteristics, +etc. So delegating the choice to a single point in your code is a very +good idea. Unfortunately, in Erlang thats ot so easy to do at the moment. + +Other languages, have built in support for this +functionality. [Java](http://en.wikipedia.org/wiki/Java_(programming_language)) +has +[Interfaces](http://download.oracle.com/javase/tutorial/java/IandI/createinterface.html), +[SML](http://en.wikipedia.org/wiki/Standard_ML) has +[Signatures](http://en.wikipedia.org/wiki/Standard_ML#Module_system). +Erlang, though, doesn't currently support this model, at least not +directly. There are a few ways you can approximate it. One way is to +pass the Module name to the calling functions along with the data that +it is going to be called on. + + :::erlang + add(ModuleToUse, Key, Value, DictData) -> + ModuleToUse:add(Key, Value, DictData). + +This works, and you can vary how you want to pass the data. For +example, you could easily use a tuple to contain the data. That is, +you could pass in `{ModuleToUse, DictData}` and that would make it a +bit cleaner. + :::erlang + add(Key, Value, {ModuleToUse, DictData}) -> + ModuleToUse:add(Key, Value, DictData). + +Either way, there are a few problems with this approach. One of the +biggest is that you lose code locality, by looking at this bit of code +you don't know what `ModuleToUse` is at all. You would need to follow +the call chain up to figure out what it is. Also it may not be obvious +what is actually happening. The fact that `ModuleToUse` is a variable +name obscures the code making it harder to understand. The other big +problem is that the tools provided with Erlang can't help find +mistakes that you might have made. Tools like +[Xref](http://www.erlang.org/doc/man/xref.html) and +[Dialyzer](http://www.erlang.org/doc/man/dialyzer.html) have just as +hard a time figuring out the what `ModuleToUse` is pointing to as you +do. So they can't give you warnings about potential problems. In fact +someone could inadvertantly pass an unexpected function name as +`ModuleToUse` and you would never get any warnings, just an exception +at run time. + +Fortunately, Erlang is a pretty flexable language so we can use a +similar approach with a few adjustments to give us the best of both +worlds. Both the flexibiltiy of ignoreing a specific implementation +and keeping all the nice locality we get by using an explicit module +name. + +So what we actually want to do is something mole like this: + + :::erlang + add(Key, Value, DictData) -> + dictionary:add(Key, Value, DictData). + +Doing this we retain the locality. We can easily look up the +`dictionary` Module. We immediately have a good idea what a +`dictionary` actually is and we know what functions we are +calling. Also, all the tools know what a `dictionary` is as well and +how to check that your code is calling it correctly. For all of these +reasons, this is a much better approach to the problem. This is what +*Signatures* are all about. + +Signatures +---------- + +How do we actually do this in Erlang now that Erlang is missing what Java, SML and friends has built in? + +The first thing we need to do is to define +a [Behaviour](http://metajack.im/2008/10/29/custom-behaviors-in-erlang/) +for our functionality. To continue our example we will define a +Behaviour for dictionaries. That Behaviour looks like this: + + :::erlang + -module(ec_dictionary). + + -export([behaviour_info/1]). + + behaviour_info(callbacks) -> + [{new, 0}, + {has_key, 2}, + {get, 2}, + {add, 3}, + {remove, 2}, + {has_value, 2}, + {size, 1}, + {to_list, 1}, + {from_list, 1}, + {keys, 1}]; + behaviour_info(_) -> + undefined. + + +So we have our Behaviour now. Unfortunately, this doesn't give us much +yet. It will make sure that any dictionaries we write will have all +the functions they need to have, but it wont help use actually use the +dictionaries in an abstract way in our code. To do that we need to add +a bit of functionality. We do that by actually implementing our own +behaviour, starting with `new/1`. + + :::erlang + %% @doc create a new dictionary object from the specified module. The + %% module should implement the dictionary behaviour. + %% + %% @param ModuleName The module name. + -spec new(module()) -> dictionary(_K, _V). + new(ModuleName) when is_atom(ModuleName) -> + #dict_t{callback = ModuleName, data = ModuleName:new()}. + +This code creates a new dictionary for us. Or to be more specific it +actually creates a new dictionary Signature record, that will be used +subsequently in other calls. This might look a bit familiar from our +previous less optimal approach. We have both the module name and the +data. here in the record. We call the module name named in +`ModuleName` to create the initial data. We then construct the record +and return that record to the caller and we have a new +dictionary. What about the other functions, the ones that don't create +a dictionary but make use of it. Let's take a look at the +implementations of two kinds of functions, one that updates the +dictionary and another that just retrieves data. + +The first we will look at is the one that updates the dictionary by +adding a value. + + :::erlang + %% @doc add a new value to the existing dictionary. Return a new + %% dictionary containing the value. + %% + %% @param Dict the dictionary object to add too + %% @param Key the key to add + %% @param Value the value to add + -spec add(key(K), value(V), dictionary(K, V)) -> dictionary(K, V). + add(Key, Value, #dict_t{callback = Mod, data = Data} = Dict) -> + Dict#dict_t{data = Mod:add(Key, Value, Data)}. + +There are two key things here. + +1. The dictionary is deconstructed so we can get access to the data +and the callback module. +1. We modify the dictionary record we the new data and return that +modified record. + +This is the same approach that you will use for any Signature that +updates data. As a side note, notice that we are calling the concrete +implementation to do the work itself. + +Now lets do a data retrieval function. In this case, the `get` function +of the dictionary Signature. + + :::erlang + %% @doc given a key return that key from the dictionary. If the key is + %% not found throw a 'not_found' exception. + %% + %% @param Dict The dictionary object to return the value from + %% @param Key The key requested + %% @throws not_found when the key does not exist + -spec get(key(K), dictionary(K, V)) -> value(V). + get(Key, #dict_t{callback = Mod, data = Data}) -> + Mod:get(Key, Data). + +In this case, you can see a very similar approach to deconstructing +the dict record. We still need to pull out the callback module and the +data itself and call the concrete implementation of the algorithm. In +this case, we return the data returned from the call, not the record +itself. + +That is really all you need to define a Signature. There is a complete +implementation in +[erlware_commons/ec_dictionary](https://github.com/ericbmerritt/erlware_commons/blob/types/src/ec_dictionary.erl). + +Using Signatures +---------------- + +Its a good idea to work through an example so we have a bit better +idea of how to use these Signatures. If you are like me, you probably +have some questions about what kind of performance burden this places +on the code. At the very least we have an additional function call +along with the record deconstruction. This must add some overhead. So +lets write a little timing test, so we can get a good idea of how much +this is all costing us. + +In general, there are two kinds of concrete implementations for +Signatures. The first is a native implementations, the second is a +wrapper. + +### Native Signature Implementations + +A Native Signature Implementation is just that, a module that +implements the Behaviour defined by a Signature directly. For most +user defined Signatures this is going to be the norm. In our current +example, the +[erlware_commons/ec_rbdict](https://github.com/ericbmerritt/erlware_commons/blob/types/src/ec_rbdict.erl) +module is the best example of a Native Signature Implementation. It +implements the ec_dictionary module directly. + +### Signature Wrappers + +A Signature Wrapper is a module that wraps another module. Its +purpose is to help a preexisting module implement the Behaviour +defined by a Signature. A good example if this in our current example +is the +[erlware_commons/ec_dict](https://github.com/ericbmerritt/erlware_commons/blob/types/src/ec_dict.erl) +module. It implements the ec_dictionary Behaviour, but all the +functionality is provided by the +[stdlib/dict](http://www.erlang.org/doc/man/dict.html) module +itself. Lets take a look at one example to see how this is done. + +We will take a look at one of the functions we have already seen. The +`get` function an ec_dictionary `get` doesn't have quite the same +semantics as any of the functions in the dict module. So a bit of +translation needs to be done. We do that in the ec_dict module `get` function. + + :::erlang + -spec get(ec_dictionary:key(K), Object::dictionary(K, V)) -> + ec_dictionary:value(V). + get(Key, Data) -> + case dict:find(Key, Data) of + {ok, Value} -> + Value; + error -> + throw(not_found) + end. + +So the ec_dict module's purpose for existence is to help the +preexisting dict module implement the Behaviour defined by the +Signature. + + +Why do we bring this up here? Because we are going to be looking at +timings, and Signature Wrappers add an extra level of indirection to +the mix and that adds a bit of additional overhead. + +### Creating the Timing Module + +We are going to creating timings for both Native Signature +Implementations and Signature Wrappers. + +Lets get started by looking at some helper functions. We want +dictionaries to have a bit of data in them. So to that end we are will +create a couple of functions that create dictionaries for each type we +want to test. The first we want to time is the Signature Wrapper, so +`dict` vs `ec_dict` called as a Signature. + + :::erlang + create_dict() -> + lists:foldl(fun(El, Dict) -> + dict:store(El, El, Dict) + end, dict:new(), + lists:seq(1,100)). + +The only thing we do here is create a sequence of numbers 1 to 100, +and then add each of those to the dict as an entry. We aren't too +worried about replicating real data in the dictionary. We care about +timing the function call overhead of Signatures, not the performance +of the dictionaries themselves. + +We need to create a similar function for our Signature based +dictionary `ec_dict`. + + :::erlang + create_dictionary(Type) -> + lists:foldl(fun(El, Dict) -> + ec_dictionary:add(El, El, Dict) + end, + ec_dictionary:new(Type), + lists:seq(1,100)). + +Here we actually create everything using the Signature. So we don't +need one function for each type. We can have one function that can +create anything that implements the Signature. That is the magic of +Signatures. Otherwise, this does the exact same thing as the dict +`create_dict/1`. + +We are going to use two function calls in our timing. One that updates +data and one that returns data, just to get good coverage. For our +dictionaries that we are going to use the `size` function as well as +the `add` function. + + :::erlang + time_direct_vs_signature_dict() -> + io:format("Timing dict~n"), + Dict = create_dict(), + test_avg(fun() -> + dict:size(dict:store(some_key, some_value, Dict)) + end, + 1000000), + io:format("Timing ec_dict implementation of ec_dictionary~n"), + time_dict_type(ec_dict). + +The `test_avg` function runs the provided function the number of times +specified in the second argument and collects timing information. We +are going to run these one million times to get a good average (its +fast so it doesn't take long). You can see that in the anonymous +function that we directly call `dict:size/1` and `dict:store/3` to perform +the test. However, because we are in the wonderful world of Signatures +we don't have to hard code the calls for the Signature +implementations. Lets take a look at the `time_dict_type` function. + + + :::erlang + time_dict_type(Type) -> + io:format("Testing ~p~n", [Type]), + Dict = create_dictionary(Type), + test_avg(fun() -> + ec_dictionary:size(ec_dictionary:add(some_key, some_value, Dict)) + end, + 1000000). + +As you can see we take the type as an argument (we need it for `dict` +creation) and call our create function. Then we run the same timings +that we did for ec dict. In this case though, the type of dictionary +is never specified, we only ever call ec_dictionary, so this test will +work for anything that implements that Signature. + +#### `dict` vs `ec_dict` Results + +So we have our tests, what was the result. Well on my laptop this is +what it looked like. + + :::sh + Erlang R14B01 (erts-5.8.2) [source] [64-bit] [smp:4:4] [rq:4] [async-threads:0] [hipe] [kernel-poll:false] + + Eshell V5.8.2 (abort with ^G) + + 1> ec_timing:time_direct_vs_signature_dict(). + Timing dict + Range: 2 - 5621 mics + Median: 3 mics + Average: 3 mics + Timing ec_dict implementation of ec_dictionary + Testing ec_dict + Range: 3 - 6097 mics + Median: 3 mics + Average: 4 mics + 2> + +So for the direct dict call, we average about 3 mics per call, while +for the Signature Wrapper we average around 4. Thats a 25% cost for +Signature Wrappers in this example, for a very small number of +calls. Depending on what you are doing that is going to be greater or +lesser. In any case, we can see that there is some cost associated +with the Signature Wrapper Implementations. + +What about native Signatures though? Lets take a look at +`ec_rbdict`. The `ec_rbdict` also implements the `ec_dictionary` +Signature, but it is not a Signature Wrapper. It is a native +implementation of the Signature. To use `ec_rbdict` directly we have +to create a creation helper just like we did for dict. + + :::erlang + create_rbdict() -> + lists:foldl(fun(El, Dict) -> + ec_rbdict:add(El, El, Dict) + end, ec_rbdict:new(), + lists:seq(1,100)). + +This is exactly the same as `create_dict` with the exception that dict +is replaced by `ec_rbdict`. + +The timing function itself looks very similar as well. Again notice +that we have to hard code the concrete name for the concrete +implementation, but we don't for the ec_dictionary test. + + :::erlang + time_direct_vs_signature_rbdict() -> + io:format("Timing rbdict~n"), + Dict = create_rbdict(), + test_avg(fun() -> + ec_rbdict:size(ec_rbdict:add(some_key, some_value, Dict)) + end, + 1000000), + io:format("Timing ec_dict implementation of ec_dictionary~n"), + time_dict_type(ec_rbdict). + +And there we have our test. What do the results look like? + +#### `ec_rbdict` vs `ec_rbdict` as an `ec_dictionary` Results + +The main thing we are timing here is the additional cost of the +dictionary Signature itself. Keep that in mind as we look at the +results. + + :::sh + Erlang R14B01 (erts-5.8.2) [source] [64-bit] [smp:4:4] [rq:4] [async-threads:0] [hipe] [kernel-poll:false] + + Eshell V5.8.2 (abort with ^G) + + 1> ec_timing:time_direct_vs_signature_rbdict(). + Timing rbdict + Range: 6 - 15070 mics + Median: 7 mics + Average: 7 mics + Timing ec_dict implementation of ec_dictionary + Testing ec_rbdict + Range: 6 - 6013 mics + Median: 7 mics + Average: 7 mics + 2> + +So no difference it time. Well the reality is that there is a +difference in timing, there must be, but we don't have enough +resolution in the timing system to be able to figure out what that +difference is. Essentially that means its really, really small - or small +enough not to worry about at the very least. + +Conclusion +---------- + +Signatures are a viable, useful approach to the problem of interfaces +in Erlang. The have little or no over head depending on the type of +implementation, and greatly increase the flexibility of the a library +while retaining testability and locality. + +### Terminology + +Behaviour +: A normal Erlang Behaviour that defines a contract + +Signature +: A combination of an Behaviour and functionality to make the + functions callable in a concrete way + +Native Signature Implementation +: A module that implements a signature directly + +Signature Wrapper +: A module that does translation between a preexisting module and a + Signature, allowing the preexisting module to be used as a Signature + Implementation. + +### Code Referenced + +* [ec_dictionary Implementation] (https://github.com/ericbmerritt/erlware_commons/blob/types/src/ec_dictionary.erl) +* [ec_dict Signature Wrapper] (https://github.com/ericbmerritt/erlware_commons/blob/types/src/ec_dict.erl) +* [ec_rbdict Native Signature Implementation] (https://github.com/ericbmerritt/erlware_commons/blob/types/src/ec_rbdict.erl) +* [ec_timing Signature Use Example and Timing Collector] (https://github.com/ericbmerritt/erlware_commons/blob/types/examples/ec_timing.erl) diff --git a/ebin/erlware_commons.app b/ebin/erlware_commons.app index 792980a..0fc142e 100644 --- a/ebin/erlware_commons.app +++ b/ebin/erlware_commons.app @@ -1,14 +1,18 @@ %% -*- mode: Erlang; fill-column: 75; comment-column: 50; -*- {application, erlware_commons, [{description, "Additional standard library for Erlang"}, - {vsn, "0.5.0"}, + {vsn, "0.6.0"}, {modules, [ ec_lists, ec_plists, ec_file, ec_string, ec_semver, - ec_talk - ]}, + ec_dictionary, + ec_assoc_list, + ec_dict, + ec_gb_trees, + ec_rbdict, + ec_orddict]}, {registered, []}, {applications, [kernel, stdlib]}]}. diff --git a/src/ec_assoc_list.erl b/src/ec_assoc_list.erl new file mode 100644 index 0000000..1fdb2c4 --- /dev/null +++ b/src/ec_assoc_list.erl @@ -0,0 +1,103 @@ +%%%------------------------------------------------------------------- +%%% @author Eric Merritt +%%% @copyright 2011 Erlware, LLC. +%%% @doc +%%% provides an implementation of ec_dictionary using an association +%%% list as a basy +%%% @end +%%% @see ec_dictionary +%%%------------------------------------------------------------------- +-module(ec_assoc_list). + +-behaviour(ec_dictionary). + +%% API +-export([new/0, + has_key/2, + get/2, + get/3, + add/3, + remove/2, + has_value/2, + size/1, + to_list/1, + from_list/1, + keys/1]). + +-export_type([dictionary/2]). + +%%%=================================================================== +%%% Types +%%%=================================================================== +-opaque dictionary(K, V) :: {ec_assoc_list, + [{ec_dictionary:key(K), ec_dictionary:value(V)}]}. + +%%%=================================================================== +%%% API +%%%=================================================================== + +-spec new() -> dictionary(_K, _V). +new() -> + {ec_assoc_list, []}. + +-spec has_key(ec_dictionary:key(K), Object::dictionary(K, _V)) -> boolean(). +has_key(Key, {ec_assoc_list, Data}) -> + lists:keymember(Key, 1, Data). + +-spec get(ec_dictionary:key(K), Object::dictionary(K, V)) -> + ec_dictionary:value(V). +get(Key, {ec_assoc_list, Data}) -> + case lists:keyfind(Key, 1, Data) of + {Key, Value} -> + Value; + false -> + throw(not_found) + end. + +-spec get(ec_dictionary:key(K), + ec_dictionary:value(V), + Object::dictionary(K, V)) -> + ec_dictionary:value(V). +get(Key, Default, {ec_assoc_list, Data}) -> + case lists:keyfind(Key, 1, Data) of + {Key, Value} -> + Value; + false -> + Default + end. + +-spec add(ec_dictionary:key(K), ec_dictionary:value(V), + Object::dictionary(K, V)) -> + dictionary(K, V). +add(Key, Value, {ec_assoc_list, _Data}=Dict) -> + {ec_assoc_list, Rest} = remove(Key,Dict), + {ec_assoc_list, [{Key, Value} | Rest ]}. + +-spec remove(ec_dictionary:key(K), Object::dictionary(K, _V)) -> + dictionary(K, _V). +remove(Key, {ec_assoc_list, Data}) -> + {ec_assoc_list, lists:keydelete(Key, 1, Data)}. + +-spec has_value(ec_dictionary:value(V), Object::dictionary(_K, V)) -> boolean(). +has_value(Value, {ec_assoc_list, Data}) -> + lists:keymember(Value, 2, Data). + +-spec size(Object::dictionary(_K, _V)) -> integer(). +size({ec_assoc_list, Data}) -> + length(Data). + +-spec to_list(dictionary(K, V)) -> [{ec_dictionary:key(K), + ec_dictionary:value(V)}]. +to_list({ec_assoc_list, Data}) -> + Data. + +-spec from_list([{ec_dictionary:key(K), ec_dictionary:value(V)}]) -> + dictionary(K, V). +from_list(List) when is_list(List) -> + {ec_assoc_list, List}. + +-spec keys(dictionary(K, _V)) -> [ec_dictionary:key(K)]. +keys({ec_assoc_list, Data}) -> + lists:map(fun({Key, _Value}) -> + Key + end, Data). diff --git a/src/ec_dict.erl b/src/ec_dict.erl new file mode 100644 index 0000000..b760e92 --- /dev/null +++ b/src/ec_dict.erl @@ -0,0 +1,107 @@ +%%%------------------------------------------------------------------- +%%% @author Eric Merritt +%%% @copyright 2011 Erlware, LLC. +%%% @doc +%%% This provides an implementation of the ec_dictionary type using +%%% erlang dicts as a base. The function documentation for +%%% ec_dictionary applies here as well. +%%% @end +%%% @see ec_dictionary +%%% @see dict +%%%------------------------------------------------------------------- +-module(ec_dict). + +-behaviour(ec_dictionary). + +%% API +-export([new/0, + has_key/2, + get/2, + get/3, + add/3, + remove/2, + has_value/2, + size/1, + to_list/1, + from_list/1, + keys/1]). + +-export_type([dictionary/2]). + +%%%=================================================================== +%%% Types +%%%=================================================================== +-opaque dictionary(_K, _V) :: dict(). + +%%%=================================================================== +%%% API +%%%=================================================================== + +-spec new() -> dictionary(_K, _V). +new() -> + dict:new(). + +-spec has_key(ec_dictionary:key(K), Object::dictionary(K, _V)) -> boolean(). +has_key(Key, Data) -> + dict:is_key(Key, Data). + +-spec get(ec_dictionary:key(K), Object::dictionary(K, V)) -> + ec_dictionary:value(V). +get(Key, Data) -> + case dict:find(Key, Data) of + {ok, Value} -> + Value; + error -> + throw(not_found) + end. + +-spec get(ec_dictionary:key(K), + ec_dictionary:value(V), + Object::dictionary(K, V)) -> + ec_dictionary:value(V). +get(Key, Default, Data) -> + case dict:find(Key, Data) of + {ok, Value} -> + Value; + error -> + Default + end. + +-spec add(ec_dictionary:key(K), ec_dictionary:value(V), + Object::dictionary(K, V)) -> + dictionary(K, V). +add(Key, Value, Data) -> + dict:store(Key, Value, Data). + +-spec remove(ec_dictionary:key(K), Object::dictionary(K, V)) -> + dictionary(K, V). +remove(Key, Data) -> + dict:erase(Key, Data). + +-spec has_value(ec_dictionary:value(V), Object::dictionary(_K, V)) -> boolean(). +has_value(Value, Data) -> + dict:fold(fun(_, NValue, _) when NValue == Value -> + true; + (_, _, Acc) -> + Acc + end, + false, + Data). + +-spec size(Object::dictionary(_K, _V)) -> integer(). +size(Data) -> + dict:size(Data). + +-spec to_list(dictionary(K, V)) -> [{ec_dictionary:key(K), + ec_dictionary:value(V)}]. +to_list(Data) -> + dict:to_list(Data). + +-spec from_list([{ec_dictionary:key(K), ec_dictionary:value(V)}]) -> + dictionary(K, V). +from_list(List) when is_list(List) -> + dict:from_list(List). + +-spec keys(dictionary(K, _V)) -> [ec_dictionary:key(K)]. +keys(Dict) -> + dict:fetch_keys(Dict). diff --git a/src/ec_dictionary.erl b/src/ec_dictionary.erl new file mode 100644 index 0000000..0e0e11b --- /dev/null +++ b/src/ec_dictionary.erl @@ -0,0 +1,158 @@ +%%%------------------------------------------------------------------- +%%% @author Eric Merritt +%%% @copyright 2011 Erlware, LLC. +%%% @doc +%%% A module that supports association of keys to values. A map cannot +%%% contain duplicate keys; each key can map to at most one value. +%%% +%%% This interface is a member of the Erlware Commons Library. +%%% @end +%%%------------------------------------------------------------------- +-module(ec_dictionary). + +%%% Behaviour Callbacks +-export([behaviour_info/1]). + +%% API +-export([new/1, + has_key/2, + get/2, + get/3, + add/3, + remove/2, + has_value/2, + size/1, + to_list/1, + from_list/2, + keys/1]). + +-export_type([dictionary/2, + key/1, + value/1]). + +%%%=================================================================== +%%% Types +%%%=================================================================== + +-record(dict_t, + {callback, + data}). + +-opaque dictionary(_K, _V) :: #dict_t{}. +-type key(T) :: T. +-type value(T) :: T. + +%%%=================================================================== +%%% API +%%%=================================================================== + +%% @doc export the behaviour callbacks for this type +%% @private +behaviour_info(callbacks) -> + [{new, 0}, + {has_key, 2}, + {get, 2}, + {add, 3}, + {remove, 2}, + {has_value, 2}, + {size, 1}, + {to_list, 1}, + {from_list, 1}, + {keys, 1}]; +behaviour_info(_) -> + undefined. + +%% @doc create a new dictionary object from the specified module. The +%% module should implement the dictionary behaviour. +%% +%% @param ModuleName The module name. +-spec new(module()) -> dictionary(_K, _V). +new(ModuleName) when is_atom(ModuleName) -> + #dict_t{callback = ModuleName, data = ModuleName:new()}. + +%% @doc check to see if the dictionary provided has the specified key. +%% +%% @param Dict The dictory object to check +%% @param Key The key to check the dictionary for +-spec has_key(key(K), dictionary(K, _V)) -> boolean(). +has_key(Key, #dict_t{callback = Mod, data = Data}) -> + Mod:has_key(Key, Data). + +%% @doc given a key return that key from the dictionary. If the key is +%% not found throw a 'not_found' exception. +%% +%% @param Dict The dictionary object to return the value from +%% @param Key The key requested +%% @throws not_found when the key does not exist +-spec get(key(K), dictionary(K, V)) -> value(V). +get(Key, #dict_t{callback = Mod, data = Data}) -> + Mod:get(Key, Data). + +%% @doc given a key return that key from the dictionary. If the key is +%% not found then the default value is returned. +%% +%% @param Dict The dictionary object to return the value from +%% @param Key The key requested +%% @param Default The value that will be returned if no value is found +%% in the database. +-spec get(key(K), value(V), dictionary(K, V)) -> value(V). +get(Key, Default, #dict_t{callback = Mod, data = Data}) -> + Mod:get(Key, Default, Data). + +%% @doc add a new value to the existing dictionary. Return a new +%% dictionary containing the value. +%% +%% @param Dict the dictionary object to add too +%% @param Key the key to add +%% @param Value the value to add +-spec add(key(K), value(V), dictionary(K, V)) -> dictionary(K, V). +add(Key, Value, #dict_t{callback = Mod, data = Data} = Dict) -> + Dict#dict_t{data = Mod:add(Key, Value, Data)}. + +%% @doc Remove a value from the dictionary returning a new dictionary +%% with the value removed. +%% +%% @param Dict the dictionary object to remove the value from +%% @param Key the key of the key/value pair to remove +-spec remove(key(K), dictionary(K, V)) -> dictionary(K, V). +remove(Key, #dict_t{callback = Mod, data = Data} = Dict) -> + Dict#dict_t{data = Mod:remove(Key, Data)}. + +%% @doc Check to see if the value exists in the dictionary +%% +%% @param Dict the dictionary object to check +%% @param Value The value to check if exists +-spec has_value(value(V), dictionary(_K, V)) -> boolean(). +has_value(Value, #dict_t{callback = Mod, data = Data}) -> + Mod:has_value(Value, Data). + +%% @doc return the current number of key value pairs in the dictionary +%% +%% @param Dict the object return the size for. +-spec size(dictionary(_K, _V)) -> integer(). +size(#dict_t{callback = Mod, data = Data}) -> + Mod:size(Data). + +%% @doc Return the contents of this dictionary as a list of key value +%% pairs. +%% +%% @param Dict the base dictionary to make use of. +-spec to_list(Dict::dictionary(K, V)) -> [{key(K), value(V)}]. +to_list(#dict_t{callback = Mod, data = Data}) -> + Mod:to_list(Data). + +%% @doc Create a new dictionary, of the specified implementation using +%% the list provided as the starting contents. +%% +%% @param ModuleName the type to create the dictionary from +%% @param List The list of key value pairs to start with +-spec from_list(module(), [{key(K), value(V)}]) -> dictionary(K, V). +from_list(ModuleName, List) when is_list(List) -> + #dict_t{callback = ModuleName, data = ModuleName:from_list(List)}. + +%% @doc Return the keys of this dictionary as a list +%% +%% @param Dict the base dictionary to make use of. +-spec keys(Dict::dictionary(K, _V)) -> [key(K)]. +keys(#dict_t{callback = Mod, data = Data}) -> + Mod:keys(Data). diff --git a/src/ec_gb_trees.erl b/src/ec_gb_trees.erl new file mode 100644 index 0000000..d9fa761 --- /dev/null +++ b/src/ec_gb_trees.erl @@ -0,0 +1,223 @@ +%%%------------------------------------------------------------------- +%%% @author Eric Merritt +%%% @copyright 2011 Erlware, LLC. +%%% @doc +%%% This provides an implementation of the type ec_dictionary using +%%% gb_trees as a backin +%%% @end +%%% @see ec_dictionary +%%% @see gb_trees +%%%------------------------------------------------------------------- +-module(ec_gb_trees). + +-behaviour(ec_dictionary). + +%% API +-export([new/0, + has_key/2, + get/2, + get/3, + add/3, + remove/2, + has_value/2, + size/1, + to_list/1, + from_list/1, + keys/1]). + +-export_type([dictionary/2]). + +%%%=================================================================== +%%% Types +%%%=================================================================== +-opaque dictionary(K, V) :: {non_neg_integer(), ec_gb_tree_node(K, V)}. + +-type ec_gb_tree_node(K, V) :: 'nil' | {K, V, + ec_gb_tree_node(K, V), + ec_gb_tree_node(K, V)}. + +%%%=================================================================== +%%% API +%%%=================================================================== + +%% @doc create a new dictionary object from the specified module. The +%% module should implement the dictionary behaviour. In the clause +%% where an existing object is passed in new empty dictionary of the +%% same implementation is created and returned. +%% +%% @param ModuleName|Object The module name or existing dictionary object. +-spec new() -> dictionary(_K, _V). +new() -> + gb_trees:empty(). + +%% @doc check to see if the dictionary provided has the specified key. +%% +%% @param Object The dictory object to check +%% @param Key The key to check the dictionary for +-spec has_key(ec_dictionary:key(K), Object::dictionary(K, _V)) -> boolean(). +has_key(Key, Data) -> + case gb_trees:lookup(Key, Data) of + {value, _Val} -> + true; + none -> + false + end. + +%% @doc given a key return that key from the dictionary. If the key is +%% not found throw a 'not_found' exception. +%% +%% @param Object The dictionary object to return the value from +%% @param Key The key requested +%% @throws not_found when the key does not exist +-spec get(ec_dictionary:key(K), Object::dictionary(K, V)) -> + ec_dictionary:value(V). +get(Key, Data) -> + case gb_trees:lookup(Key, Data) of + {value, Value} -> + Value; + none -> + throw(not_found) + end. + +-spec get(ec_dictionary:key(K), + ec_dictionary:value(V), + Object::dictionary(K, V)) -> + ec_dictionary:value(V). +get(Key, Default, Data) -> + case gb_trees:lookup(Key, Data) of + {value, Value} -> + Value; + none -> + Default + end. + +%% @doc add a new value to the existing dictionary. Return a new +%% dictionary containing the value. +%% +%% @param Object the dictionary object to add too +%% @param Key the key to add +%% @param Value the value to add +-spec add(ec_dictionary:key(K), ec_dictionary:value(V), + Object::dictionary(K, V)) -> + dictionary(K, V). +add(Key, Value, Data) -> + gb_trees:enter(Key, Value, Data). + +%% @doc Remove a value from the dictionary returning a new dictionary +%% with the value removed. +%% +%% @param Object the dictionary object to remove the value from +%% @param Key the key of the key/value pair to remove +-spec remove(ec_dictionary:key(K), Object::dictionary(K, V)) -> + dictionary(K, V). +remove(Key, Data) -> + gb_trees:delete_any(Key, Data). + +%% @doc Check to see if the value exists in the dictionary +%% +%% @param Object the dictionary object to check +%% @param Value The value to check if exists +-spec has_value(ec_dictionary:value(V), Object::dictionary(_K, V)) -> boolean(). +has_value(Value, Data) -> + lists:member(Value, gb_trees:values(Data)). + +%% @doc return the current number of key value pairs in the dictionary +%% +%% @param Object the object return the size for. +-spec size(Object::dictionary(_K, _V)) -> integer(). +size(Data) -> + gb_trees:size(Data). + +-spec to_list(dictionary(K, V)) -> [{ec_dictionary:key(K), + ec_dictionary:value(V)}]. +to_list(Data) -> + gb_trees:to_list(Data). + +-spec from_list([{ec_dictionary:key(K), ec_dictionary:value(V)}]) -> + dictionary(K, V). +from_list(List) when is_list(List) -> + lists:foldl(fun({Key, Value}, Dict) -> + gb_trees:enter(Key, Value, Dict) + end, + gb_trees:empty(), + List). + +-spec keys(dictionary(K,_V)) -> [ec_dictionary:key(K)]. +keys(Data) -> + gb_trees:keys(Data). + +%%%=================================================================== +%%% Tests +%%%=================================================================== + + +-ifndef(NOTEST). +-include_lib("eunit/include/eunit.hrl"). + +%% For me unit testing initially is about covering the obvious case. A +%% check to make sure that what you expect the tested functionality to +%% do, it actually does. As time goes on and people detect bugs you +%% add tests for those specific problems to the unit test suit. +%% +%% However, when getting started you can only test your basic +%% expectations. So here are the expectations I have for the add +%% functionality. +%% +%% 1) I can put arbitrary terms into the dictionary as keys +%% 2) I can put arbitrary terms into the dictionary as values +%% 3) When I put a value in the dictionary by a key, I can retrieve +%% that same value +%% 4) When I put a different value in the dictionary by key it does +%% not change other key value pairs. +%% 5) When I update a value the new value in available by the new key +%% 6) When a value does not exist a not found exception is created + +add_test() -> + Dict0 = ec_dictionary:new(ec_gb_trees), + + Key1 = foo, + Key2 = [1, 3], + Key3 = {"super"}, + Key4 = <<"fabulous">>, + Key5 = {"Sona", 2, <<"Zuper">>}, + + Value1 = Key5, + Value2 = Key4, + Value3 = Key2, + Value4 = Key3, + Value5 = Key1, + + Dict01 = ec_dictionary:add(Key1, Value1, Dict0), + Dict02 = ec_dictionary:add(Key3, Value3, + ec_dictionary:add(Key2, Value2, + Dict01)), + Dict1 = + ec_dictionary:add(Key5, Value5, + ec_dictionary:add(Key4, Value4, + Dict02)), + + ?assertMatch(Value1, ec_dictionary:get(Key1, Dict1)), + ?assertMatch(Value2, ec_dictionary:get(Key2, Dict1)), + ?assertMatch(Value3, ec_dictionary:get(Key3, Dict1)), + ?assertMatch(Value4, ec_dictionary:get(Key4, Dict1)), + ?assertMatch(Value5, ec_dictionary:get(Key5, Dict1)), + + + Dict2 = ec_dictionary:add(Key3, Value5, + ec_dictionary:add(Key2, Value4, Dict1)), + + + ?assertMatch(Value1, ec_dictionary:get(Key1, Dict2)), + ?assertMatch(Value4, ec_dictionary:get(Key2, Dict2)), + ?assertMatch(Value5, ec_dictionary:get(Key3, Dict2)), + ?assertMatch(Value4, ec_dictionary:get(Key4, Dict2)), + ?assertMatch(Value5, ec_dictionary:get(Key5, Dict2)), + + + ?assertThrow(not_found, ec_dictionary:get(should_blow_up, Dict2)), + ?assertThrow(not_found, ec_dictionary:get("This should blow up too", + Dict2)). + + + +-endif. diff --git a/src/ec_orddict.erl b/src/ec_orddict.erl new file mode 100644 index 0000000..71f3d23 --- /dev/null +++ b/src/ec_orddict.erl @@ -0,0 +1,107 @@ +%%%------------------------------------------------------------------- +%%% @author Eric Merritt +%%% @copyright 2011 Erlware, LLC. +%%% @doc +%%% This provides an implementation of the ec_dictionary type using +%%% erlang orddicts as a base. The function documentation for +%%% ec_dictionary applies here as well. +%%% @end +%%% @see ec_dictionary +%%% @see orddict +%%%------------------------------------------------------------------- +-module(ec_orddict). + +-behaviour(ec_dictionary). + +%% API +-export([new/0, + has_key/2, + get/2, + get/3, + add/3, + remove/2, + has_value/2, + size/1, + to_list/1, + from_list/1, + keys/1]). + +-export_type([dictionary/2]). + +%%%=================================================================== +%%% Types +%%%=================================================================== +-opaque dictionary(K, V) :: [{K, V}]. + +%%%=================================================================== +%%% API +%%%=================================================================== + +-spec new() -> dictionary(_K, _V). +new() -> + orddict:new(). + +-spec has_key(ec_dictionary:key(K), Object::dictionary(K, _V)) -> boolean(). +has_key(Key, Data) -> + orddict:is_key(Key, Data). + +-spec get(ec_dictionary:key(K), Object::dictionary(K, V)) -> + ec_dictionary:value(V). +get(Key, Data) -> + case orddict:find(Key, Data) of + {ok, Value} -> + Value; + error -> + throw(not_found) + end. + +-spec get(ec_dictionary:key(K), + Default::ec_dictionary:value(V), + Object::dictionary(K, V)) -> + ec_dictionary:value(V). +get(Key, Default, Data) -> + case orddict:find(Key, Data) of + {ok, Value} -> + Value; + error -> + Default + end. + +-spec add(ec_dictionary:key(K), ec_dictionary:value(V), + Object::dictionary(K, V)) -> + dictionary(K, V). +add(Key, Value, Data) -> + orddict:store(Key, Value, Data). + +-spec remove(ec_dictionary:key(K), Object::dictionary(K, V)) -> + dictionary(K, V). +remove(Key, Data) -> + orddict:erase(Key, Data). + +-spec has_value(ec_dictionary:value(V), Object::dictionary(_K, V)) -> boolean(). +has_value(Value, Data) -> + orddict:fold(fun(_, NValue, _) when NValue == Value -> + true; + (_, _, Acc) -> + Acc + end, + false, + Data). + +-spec size(Object::dictionary(_K, _V)) -> integer(). +size(Data) -> + orddict:size(Data). + +-spec to_list(dictionary(K, V)) -> + [{ec_dictionary:key(K), ec_dictionary:value(V)}]. +to_list(Data) -> + orddict:to_list(Data). + +-spec from_list([{ec_dictionary:key(K), ec_dictionary:value(V)}]) -> + dictionary(K, V). +from_list(List) when is_list(List) -> + orddict:from_list(List). + +-spec keys(dictionary(K, _V)) -> [ec_dictionary:key(K)]. +keys(Dict) -> + orddict:fetch_keys(Dict). diff --git a/src/ec_rbdict.erl b/src/ec_rbdict.erl new file mode 100644 index 0000000..0e962ec --- /dev/null +++ b/src/ec_rbdict.erl @@ -0,0 +1,319 @@ +%%% Copyright (c) 2008 Robert Virding. All rights reserved. +%%% +%%% Redistribution and use in source and binary forms, with or without +%%% modification, are permitted provided that the following conditions +%%% are met: +%%% +%%% 1. Redistributions of source code must retain the above copyright +%%% notice, this list of conditions and the following disclaimer. +%%% 2. Redistributions in binary form must reproduce the above copyright +%%% notice, this list of conditions and the following disclaimer in the +%%% documentation and/or other materials provided with the distribution. +%%% +%%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +%%% "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +%%% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +%%% FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +%%% COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +%%% INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +%%% BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +%%% LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +%%% CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +%%% LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +%%% ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +%%% POSSIBILITY OF SUCH DAMAGE. +%%%------------------------------------------------------------------- +%%% @copyright 2008 Robert Verding +%%% +%%% @doc +%%% +%%% Rbdict implements a Key - Value dictionary. An rbdict is a +%%% representation of a dictionary, where a red-black tree is used to +%%% store the keys and values. +%%% +%%% This module implents exactly the same interface as the module +%%% ec_dictionary but with a defined representation. One difference is +%%% that while dict considers two keys as different if they do not +%%% match (=:=), this module considers two keys as different if and +%%% only if they do not compare equal (==). +%%% +%%% The algorithms here are taken directly from Okasaki and Rbset +%%% in ML/Scheme. The interface is compatible with the standard dict +%%% interface. +%%% +%%% The following structures are used to build the the RB-dict: +%%% +%%% {r,Left,Key,Val,Right} +%%% {b,Left,Key,Val,Right} +%%% empty +%%% +%%% It is interesting to note that expanding out the first argument of +%%% l/rbalance, the colour, in store etc. is actually slower than not +%%% doing it. Measured. +%%% +%%% @end +%%% @see ec_dictionary +%%%------------------------------------------------------------------- +-module(ec_rbdict). + +-behaviour(ec_dictionary). + +%% Standard interface. +-export([add/3, from_list/1, get/2, get/3, has_key/2, + has_value/2, new/0, remove/2, size/1, to_list/1, + keys/1]). + +-export_type([dictionary/2]). + +%%%=================================================================== +%%% Types +%%%=================================================================== + +-opaque dictionary(K, V) :: empty | {color(), + dictionary(K, V), + ec_dictionary:key(K), + ec_dictionary:value(V), + dictionary(K, V)}. + +-type color() :: r | b. + +%%%=================================================================== +%%% API +%%%=================================================================== + +-spec new() -> dictionary(_K, _V). +new() -> empty. + +-spec has_key(ec_dictionary:key(K), dictionary(K, _V)) -> boolean(). +has_key(_, empty) -> + false; +has_key(K, {_, Left, K1, _, _}) when K < K1 -> + has_key(K, Left); +has_key(K, {_, _, K1, _, Right}) when K > K1 -> + has_key(K, Right); +has_key(_, {_, _, _, _, _}) -> + true. + +-spec get(ec_dictionary:key(K), dictionary(K, V)) -> ec_dictionary:value(V). +get(_, empty) -> + throw(not_found); +get(K, {_, Left, K1, _, _}) when K < K1 -> + get(K, Left); +get(K, {_, _, K1, _, Right}) when K > K1 -> + get(K, Right); +get(_, {_, _, _, Val, _}) -> + Val. + +-spec get(ec_dictionary:key(K), + ec_dictionary:value(V), + dictionary(K, V)) -> ec_dictionary:value(V). +get(_, Default, empty) -> + Default; +get(K, Default, {_, Left, K1, _, _}) when K < K1 -> + get(K, Default, Left); +get(K, Default, {_, _, K1, _, Right}) when K > K1 -> + get(K, Default, Right); +get(_, _, {_, _, _, Val, _}) -> + Val. + +-spec add(ec_dicitonary:key(K), ec_dictionary:value(V), + dictionary(K, V)) -> dictionary(K, V). +add(Key, Value, Dict) -> + {_, L, K1, V1, R} = add1(Key, Value, Dict), + {b, L, K1, V1, R}. + +-spec remove(ec_dictionary:key(K), dictionary(K, V)) -> dictionary(K, V). +remove(Key, Dictionary) -> + {Dict1, _} = erase_aux(Key, Dictionary), Dict1. + +-spec has_value(ec_dictionary:value(V), dictionary(_K, V)) -> boolean(). +has_value(Value, Dict) -> + fold(fun (_, NValue, _) when NValue == Value -> true; + (_, _, Acc) -> Acc + end, + false, Dict). + +-spec size(dictionary(_K, _V)) -> integer(). +size(T) -> + size1(T). + +-spec to_list(dictionary(K, V)) -> + [{ec_dictionary:key(K), ec_dictionary:value(V)}]. +to_list(T) -> + to_list(T, []). + +-spec from_list([{ec_dictionary:key(K), ec_dictionary:value(V)}]) -> + dictionary(K, V). +from_list(L) -> + lists:foldl(fun ({K, V}, D) -> + add(K, V, D) + end, new(), + L). + +-spec keys(dictionary(K, _V)) -> [ec_dictionary:key(K)]. +keys(Dict) -> + keys(Dict, []). + +%%%=================================================================== +%%% Enternal functions +%%%=================================================================== +-spec keys(dictionary(K, _V), [ec_dictionary:key(K)]) -> + [ec_dictionary:key(K)]. +keys(empty, Tail) -> + Tail; +keys({_, L, K, _, R}, Tail) -> + keys(L, [K | keys(R, Tail)]). + + +-spec erase_aux(ec_dictionary:key(K), dictionary(K, V)) -> + {dictionary(K, V), boolean()}. +erase_aux(_, empty) -> + {empty, false}; +erase_aux(K, {b, A, Xk, Xv, B}) -> + if K < Xk -> + {A1, Dec} = erase_aux(K, A), + if Dec -> + unbalright(b, A1, Xk, Xv, B); + true -> + {{b, A1, Xk, Xv, B}, false} + end; + K > Xk -> + {B1, Dec} = erase_aux(K, B), + if Dec -> + unballeft(b, A, Xk, Xv, B1); + true -> + {{b, A, Xk, Xv, B1}, false} + end; + true -> + case B of + empty -> + blackify(A); + _ -> + {B1, {Mk, Mv}, Dec} = erase_min(B), + if Dec -> + unballeft(b, A, Mk, Mv, B1); + true -> + {{b, A, Mk, Mv, B1}, false} + end + end + end; +erase_aux(K, {r, A, Xk, Xv, B}) -> + if K < Xk -> + {A1, Dec} = erase_aux(K, A), + if Dec -> + unbalright(r, A1, Xk, Xv, B); + true -> + {{r, A1, Xk, Xv, B}, false} + end; + K > Xk -> + {B1, Dec} = erase_aux(K, B), + if Dec -> + unballeft(r, A, Xk, Xv, B1); + true -> + {{r, A, Xk, Xv, B1}, false} + end; + true -> + case B of + empty -> + {A, false}; + _ -> + {B1, {Mk, Mv}, Dec} = erase_min(B), + if Dec -> + unballeft(r, A, Mk, Mv, B1); + true -> + {{r, A, Mk, Mv, B1}, false} + end + end + end. + +-spec erase_min(dictionary(K, V)) -> + {dictionary(K, V), {ec_dictionary:key(K), ec_dictionary:value(V)}, boolean}. +erase_min({b, empty, Xk, Xv, empty}) -> + {empty, {Xk, Xv}, true}; +erase_min({b, empty, Xk, Xv, {r, A, Yk, Yv, B}}) -> + {{b, A, Yk, Yv, B}, {Xk, Xv}, false}; +erase_min({b, empty, _, _, {b, _, _, _, _}}) -> + exit(boom); +erase_min({r, empty, Xk, Xv, A}) -> + {A, {Xk, Xv}, false}; +erase_min({b, A, Xk, Xv, B}) -> + {A1, Min, Dec} = erase_min(A), + if Dec -> + {T, Dec1} = unbalright(b, A1, Xk, Xv, B), + {T, Min, Dec1}; + true -> {{b, A1, Xk, Xv, B}, Min, false} + end; +erase_min({r, A, Xk, Xv, B}) -> + {A1, Min, Dec} = erase_min(A), + if Dec -> + {T, Dec1} = unbalright(r, A1, Xk, Xv, B), + {T, Min, Dec1}; + true -> {{r, A1, Xk, Xv, B}, Min, false} + end. + +blackify({r, A, K, V, B}) -> {{b, A, K, V, B}, false}; +blackify(Node) -> {Node, true}. + +unballeft(r, {b, A, Xk, Xv, B}, Yk, Yv, C) -> + {lbalance(b, {r, A, Xk, Xv, B}, Yk, Yv, C), false}; +unballeft(b, {b, A, Xk, Xv, B}, Yk, Yv, C) -> + {lbalance(b, {r, A, Xk, Xv, B}, Yk, Yv, C), true}; +unballeft(b, {r, A, Xk, Xv, {b, B, Yk, Yv, C}}, Zk, Zv, + D) -> + {{b, A, Xk, Xv, + lbalance(b, {r, B, Yk, Yv, C}, Zk, Zv, D)}, + false}. + +unbalright(r, A, Xk, Xv, {b, B, Yk, Yv, C}) -> + {rbalance(b, A, Xk, Xv, {r, B, Yk, Yv, C}), false}; +unbalright(b, A, Xk, Xv, {b, B, Yk, Yv, C}) -> + {rbalance(b, A, Xk, Xv, {r, B, Yk, Yv, C}), true}; +unbalright(b, A, Xk, Xv, + {r, {b, B, Yk, Yv, C}, Zk, Zv, D}) -> + {{b, rbalance(b, A, Xk, Xv, {r, B, Yk, Yv, C}), Zk, Zv, + D}, + false}. + +-spec fold(fun(), dictionary(K, V), dictionary(K, V)) -> dictionary(K, V). +fold(_, Acc, empty) -> Acc; +fold(F, Acc, {_, A, Xk, Xv, B}) -> + fold(F, F(Xk, Xv, fold(F, Acc, B)), A). + +add1(K, V, empty) -> {r, empty, K, V, empty}; +add1(K, V, {C, Left, K1, V1, Right}) when K < K1 -> + lbalance(C, add1(K, V, Left), K1, V1, Right); +add1(K, V, {C, Left, K1, V1, Right}) when K > K1 -> + rbalance(C, Left, K1, V1, add1(K, V, Right)); +add1(K, V, {C, L, _, _, R}) -> {C, L, K, V, R}. + +size1(empty) -> 0; +size1({_, L, _, _, R}) -> size1(L) + size1(R) + 1. + +to_list(empty, List) -> List; +to_list({_, A, Xk, Xv, B}, List) -> + to_list(A, [{Xk, Xv} | to_list(B, List)]). + +%% Balance a tree afer (possibly) adding a node to the left/right. +-spec lbalance(color(), dictionary(K, V), + ec_dictinary:key(K), ec_dictionary:value(V), + dictionary(K, V)) -> + dictionary(K, V). +lbalance(b, {r, {r, A, Xk, Xv, B}, Yk, Yv, C}, Zk, Zv, + D) -> + {r, {b, A, Xk, Xv, B}, Yk, Yv, {b, C, Zk, Zv, D}}; +lbalance(b, {r, A, Xk, Xv, {r, B, Yk, Yv, C}}, Zk, Zv, + D) -> + {r, {b, A, Xk, Xv, B}, Yk, Yv, {b, C, Zk, Zv, D}}; +lbalance(C, A, Xk, Xv, B) -> {C, A, Xk, Xv, B}. + +-spec rbalance(color(), dictionary(K, V), + ec_dictinary:key(K), ec_dictionary:value(V), + dictionary(K, V)) -> + dictionary(K, V). +rbalance(b, A, Xk, Xv, + {r, {r, B, Yk, Yv, C}, Zk, Zv, D}) -> + {r, {b, A, Xk, Xv, B}, Yk, Yv, {b, C, Zk, Zv, D}}; +rbalance(b, A, Xk, Xv, + {r, B, Yk, Yv, {r, C, Zk, Zv, D}}) -> + {r, {b, A, Xk, Xv, B}, Yk, Yv, {b, C, Zk, Zv, D}}; +rbalance(C, A, Xk, Xv, B) -> {C, A, Xk, Xv, B}. diff --git a/test/ec_dictionary_proper.erl b/test/ec_dictionary_proper.erl new file mode 100644 index 0000000..d30f542 --- /dev/null +++ b/test/ec_dictionary_proper.erl @@ -0,0 +1,223 @@ +%% compile with +%% erl -pz ebin --make +%% start test with +%% erl -pz ebin -pz test +%% proper:module(ec_dictionary_proper). +-module(ec_dictionary_proper). + +-export([my_dict/0, dict/1, sym_dict/0, sym_dict/1, gb_tree/0, gb_tree/1, sym_dict2/0]). + +-include_lib("proper/include/proper.hrl"). + + +%%------------------------------------------------------------------------------ +%% Properties +%%------------------------------------------------------------------------------ + +prop_size_increases_with_new_key() -> + ?FORALL({Dict,K}, {sym_dict(),integer()}, + begin + Size = ec_dictionary:size(Dict), + case ec_dictionary:has_key(K,Dict) of + true -> + Size == ec_dictionary:size(ec_dictionary:add(K,0,Dict)); + false -> + (Size + 1) == ec_dictionary:size(ec_dictionary:add(K,0,Dict)) + end + end). + +prop_size_decrease_when_removing() -> + ?FORALL({Dict,K}, {sym_dict(),integer()}, + begin + Size = ec_dictionary:size(Dict), + case ec_dictionary:has_key(K,Dict) of + false -> + Size == ec_dictionary:size(ec_dictionary:remove(K,Dict)); + true -> + (Size - 1) == ec_dictionary:size(ec_dictionary:remove(K,Dict)) + end + end). + +prop_get_after_add_returns_correct_value() -> + ?FORALL({Dict,K,V}, {sym_dict(),key(),value()}, + begin + try ec_dictionary:get(K,ec_dictionary:add(K,V,Dict)) of + V -> + true; + _ -> + false + catch + _:_ -> + false + end + end). + +prop_get_default_returns_correct_value() -> + ?FORALL({Dict,K1,K2,V,Default}, + {sym_dict(),key(),key(),value(),value()}, + begin + NewDict = ec_dictionary:add(K1,V, Dict), + %% In the unlikely event that keys that are the same + %% are generated + case ec_dictionary:has_key(K2, NewDict) of + true -> + true; + false -> + ec_dictionary:get(K2, Default, NewDict) == Default + end + end). + + +prop_add_does_not_change_values_for_other_keys() -> + ?FORALL({Dict,K,V}, {sym_dict(),key(),value()}, + begin + Keys = ec_dictionary:keys(Dict), + ?IMPLIES(not lists:member(K,Keys), + begin + Dict2 = ec_dictionary:add(K,V,Dict), + try lists:all(fun(B) -> B end, + [ ec_dictionary:get(Ka,Dict) == + ec_dictionary:get(Ka,Dict2) || + Ka <- Keys ]) of + Bool -> Bool + catch + throw:not_found -> true + end + end) + end). + + + +prop_key_is_present_after_add() -> + ?FORALL({Dict,K,V}, {sym_dict(),integer(),integer()}, + begin + ec_dictionary:has_key(K,ec_dictionary:add(K,V,Dict)) end). + +prop_value_is_present_after_add() -> + ?FORALL({Dict,K,V}, {sym_dict(),integer(),integer()}, + begin + ec_dictionary:has_value(V,ec_dictionary:add(K,V,Dict)) + end). + +prop_to_list_matches_get() -> + ?FORALL(Dict,sym_dict(), + begin + %% Dict = eval(SymDict), + %% io:format("SymDict: ~p~n",[proper_symb:symbolic_seq(SymDict)]), + ToList = ec_dictionary:to_list(Dict), + %% io:format("ToList:~p~n",[ToList]), + GetList = + try [ {K,ec_dictionary:get(K,Dict)} || {K,_V} <- ToList ] of + List -> List + catch + throw:not_found -> key_not_found + end, + %% io:format("~p == ~p~n",[ToList,GetList]), + lists:sort(ToList) == lists:sort(GetList) + end). + +prop_value_changes_after_update() -> + ?FORALL({Dict, K1, V1, V2}, + {sym_dict(), + key(), value(), value()}, + begin + Dict1 = ec_dictionary:add(K1, V1, Dict), + Dict2 = ec_dictionary:add(K1, V2, Dict1), + V1 == ec_dictionary:get(K1, Dict1) andalso + V2 == ec_dictionary:get(K1, Dict2) + end). + +prop_remove_removes_only_one_key() -> + ?FORALL({Dict,K}, + {sym_dict(),key()}, + begin + {KeyGone,Dict2} = case ec_dictionary:has_key(K,Dict) of + true -> + D2 = ec_dictionary:remove(K,Dict), + {ec_dictionary:has_key(K,D2) == false, + D2}; + false -> + {true,ec_dictionary:remove(K,Dict)} + end, + OtherEntries = [ KV || {K1,_} = KV <- ec_dictionary:to_list(Dict), + K1 /= K ], + KeyGone andalso + lists:sort(OtherEntries) == lists:sort(ec_dictionary:to_list(Dict2)) + end). + +prop_from_list() -> + ?FORALL({Dict,DictType}, + {sym_dict(),dictionary()}, + begin + List = ec_dictionary:to_list(Dict), + D2 = ec_dictionary:from_list(DictType,List), + List2 = ec_dictionary:to_list(D2), + lists:sort(List) == lists:sort(List2) + end). + + +%%----------------------------------------------------------------------------- +%% Generators +%%----------------------------------------------------------------------------- + +key() -> union([integer(),atom()]). + +value() -> union([integer(),atom(),binary(),boolean(),string()]). + + +my_dict() -> + ?SIZED(N,dict(N)). + + +dict(0) -> + ec_dictionary:new(ec_gb_trees); +dict(N) -> + ?LET(D,dict(N-1), + frequency([ + {1, dict(0)}, + {3, ec_dictionary:remove(integer(),D)}, + {6, ec_dictionary:add(integer(),integer(),D)} + ])). + +sym_dict() -> + ?SIZED(N,sym_dict(N)). + +%% This symbolic generator will create a random instance of a ec_dictionary +%% that will be used in the properties. +sym_dict(0) -> + ?LET(Dict,dictionary(), + {'$call',ec_dictionary,new,[Dict]}); +sym_dict(N) -> + ?LAZY( + frequency([ + {1, sym_dict(0)}, + {3, {'$call',ec_dictionary,remove,[key(),sym_dict(N-1)]}}, + {6, {'$call',ec_dictionary,add,[value(),value(),sym_dict(N-1)]}} + ]) + ). + +dictionary() -> + union([ec_gb_trees,ec_assoc_list,ec_dict,ec_orddict]). + +sym_dict2() -> + ?SIZED(N,sym_dict2(N)). + +sym_dict2(0) -> + {call,ec_dictionary,new,[ec_gb_trees]}; +sym_dict2(N) -> + D = dict(N-1), + frequency([ + {1, {call,ec_dictionary,remove,[integer(),D]}}, + {2, {call,ec_dictionary,add,[integer(),integer(),D]}} + ]). + + +%% For the tutorial. +gb_tree() -> + ?SIZED(N,gb_tree(N)). + +gb_tree(0) -> + gb_trees:empty(); +gb_tree(N) -> + gb_trees:enter(key(),value(),gb_tree(N-1)). + diff --git a/test/mock.erl b/test/mock.erl new file mode 100644 index 0000000..ad7c6bd --- /dev/null +++ b/test/mock.erl @@ -0,0 +1,10 @@ +-module(mock). + +-export([new_dictionary/0]). + +new_dictionary() -> + meck:new(ec_dictionary_proper), + meck:expect(ec_dictionary_proper, dictionary, fun() -> + proper_types:union([ec_dict]) + end). +