From 1b0138061361d0d9cb3708141f5c1347e2e48bc8 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Sun, 16 Sep 2012 13:11:41 -0500 Subject: [PATCH 01/20] compilation utilities for the implementors Signed-off-by: Jordan Wilberding --- src/ec_compile.erl | 107 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/ec_compile.erl diff --git a/src/ec_compile.erl b/src/ec_compile.erl new file mode 100644 index 0000000..482bdb3 --- /dev/null +++ b/src/ec_compile.erl @@ -0,0 +1,107 @@ +%%%------------------------------------------------------------------- +%%% @author Eric Merritt <> +%%% @copyright (C) 2011, Erlware, LLC. +%%% @doc +%%% These are various utility functions to help with compiling and +%%% decompiling erlang source. They are mostly useful to the +%%% language/parse transform implementor. +%%% @end +%%%------------------------------------------------------------------- +-module(ec_compile). + +-export([beam_to_erl_source/2, + erl_source_to_core_ast/1, + erl_source_to_erl_ast/1, + erl_source_to_asm/1, + erl_string_to_core_ast/1, + erl_string_to_erl_ast/1, + erl_string_to_asm/1]). + +%%%=================================================================== +%%% API +%%%=================================================================== + +%% @doc decompile a beam file that has been compiled with +debug_info +%% into a erlang source file +%% +%% @param BeamFName the name of the beamfile +%% @param ErlFName the name of the erlang file where the generated +%% source file will be output. This should *not* be the same as the +%% source file that created the beamfile unless you want to overwrite +%% it. +-spec beam_to_erl_source(string(), string()) -> ok | term(). +beam_to_erl_source(BeamFName, ErlFName) -> + case beam_lib:chunks(BeamFName, [abstract_code]) of + {ok, {_, [{abstract_code, {raw_abstract_v1,Forms}}]}} -> + Src = + erl_prettypr:format(erl_syntax:form_list(tl(Forms))), + {ok, Fd} = file:open(ErlFName, [write]), + io:fwrite(Fd, "~s~n", [Src]), + file:close(Fd); + Error -> + Error + end. + +%% @doc compile an erlang source file into a Core Erlang AST +%% +%% @param Path - The path to the erlang source file +-spec erl_source_to_core_ast(file:filename()) -> CoreAst::term(). +erl_source_to_core_ast(Path) -> + {ok, Contents} = file:read_file(Path), + erl_string_to_core_ast(binary_to_list(Contents)). + +%% @doc compile an erlang source file into an Erlang AST +%% +%% @param Path - The path to the erlang source file +-spec erl_source_to_erl_ast(file:filename()) -> ErlangAst::term(). +erl_source_to_erl_ast(Path) -> + {ok, Contents} = file:read_file(Path), + erl_string_to_erl_ast(binary_to_list(Contents)). + +%% @doc compile an erlang source file into erlang terms that represent +%% the relevant ASM +%% +%% @param Path - The path to the erlang source file +-spec erl_source_to_asm(file:filename()) -> ErlangAsm::term(). +erl_source_to_asm(Path) -> + {ok, Contents} = file:read_file(Path), + erl_string_to_asm(binary_to_list(Contents)). + +%% @doc compile a string representing an erlang expression into an +%% Erlang AST +%% +%% @param StringExpr - The path to the erlang source file +-spec erl_string_to_erl_ast(string()) -> ErlangAst::term(). +erl_string_to_erl_ast(StringExpr) -> + Forms0 = + lists:foldl(fun(<<>>, Acc) -> + Acc; + (<<"\n\n">>, Acc) -> + Acc; + (El, Acc) -> + {ok, Tokens, _} = + erl_scan:string(binary_to_list(El) + ++ "."), + [Tokens | Acc] + end, [], re:split(StringExpr, "\\.\n")), + %% No need to reverse. This will rereverse for us + lists:foldl(fun(Form, Forms) -> + {ok, ErlAST} = erl_parse:parse_form(Form), + [ErlAST | Forms] + end, [], Forms0). + +%% @doc compile a string representing an erlang expression into a +%% Core Erlang AST +%% +%% @param StringExpr - The path to the erlang source file +-spec erl_string_to_core_ast(string()) -> CoreAst::term(). +erl_string_to_core_ast(StringExpr) -> + compile:forms(erl_string_to_erl_ast(StringExpr), [to_core]). + +%% @doc compile a string representing an erlang expression into a term +%% that represents the ASM +%% +%% @param StringExpr - The path to the erlang source file +-spec erl_string_to_asm(string()) -> ErlangAsm::term(). +erl_string_to_asm(StringExpr) -> + compile:forms(erl_string_to_erl_ast(StringExpr), ['S']). From 3a29539285786391fc5bef4230820651ba50e776 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Mon, 10 Sep 2012 18:21:58 -0500 Subject: [PATCH 02/20] fixes for edoc compilation Signed-off-by: Jordan Wilberding --- src/ec_semver.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ec_semver.erl b/src/ec_semver.erl index 4dc83e9..cdff19e 100644 --- a/src/ec_semver.erl +++ b/src/ec_semver.erl @@ -141,8 +141,8 @@ between(Vsn1, Vsn2, VsnMatch) -> %% revisions and "~> 2.6.5" is pessimistic about future minor %% revisions. %% -%% "~> 2.6" matches cookbooks >= 2.6.0 AND < 3.0.0 -%% "~> 2.6.5" matches cookbooks >= 2.6.5 AND < 2.7.0 +%% "~> 2.6" matches cookbooks >= 2.6.0 AND < 3.0.0 +%% "~> 2.6.5" matches cookbooks >= 2.6.5 AND < 2.7.0 pes(VsnA, VsnB) -> internal_pes(parse(VsnA), parse(VsnB)). From 0c345499018398b1f3a0cecadad76a232c08f117 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Mon, 10 Sep 2012 18:23:56 -0500 Subject: [PATCH 03/20] make sure the docs get run as part of a bare make Signed-off-by: Jordan Wilberding --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 933ce28..538d561 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ ERLWARE_COMMONS_PLT=$(CURDIR)/.erlware_commons_plt .PHONY: all compile doc clean test dialyzer typer shell distclean pdf get-deps escript -all: compile test dialyzer +all: compile test doc dialyzer get-deps: $(REBAR) get-deps From e9161d8688e7e2b902220c741ed584689243d899 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Tue, 18 Sep 2012 15:34:57 -0700 Subject: [PATCH 04/20] provide the ability to format a version into a string as well as parse a version Signed-off-by: Jordan Wilberding --- rebar.config | 2 +- src/ec_semver.erl | 54 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/rebar.config b/rebar.config index 6284cba..2c3cad3 100644 --- a/rebar.config +++ b/rebar.config @@ -2,7 +2,7 @@ %% These are all only compile time dependencies {deps, [{neotoma, "", - {git, "https://github.com/seancribbs/neotoma.git", {tag, "1.5"}}}, + {git, "https://github.com/seancribbs/neotoma.git", {tag, "1.5.1"}}}, {proper, "", {git, "https://github.com/manopapad/proper.git", {branch, master}}}]}. {erl_first_files, ["ec_dictionary"]}. diff --git a/src/ec_semver.erl b/src/ec_semver.erl index cdff19e..83bf6ef 100644 --- a/src/ec_semver.erl +++ b/src/ec_semver.erl @@ -8,6 +8,7 @@ -module(ec_semver). -export([parse/1, + format/1, eql/2, gt/2, gte/2, @@ -32,10 +33,11 @@ | {non_neg_integer(), non_neg_integer()} | {non_neg_integer(), non_neg_integer(), non_neg_integer()}. --type alpha_part() :: integer() | binary(). +-type alpha_part() :: integer() | binary() | string(). +-type alpha_info() :: {PreRelease::[alpha_part()], + BuildVersion::[alpha_part()]}. --type semver() :: {major_minor_patch(), {PreReleaseVersion::[alpha_part()], - BuildVersion::[alpha_part()]}}. +-type semver() :: {major_minor_patch(), alpha_info()}. -type version_string() :: string() | binary(). @@ -54,6 +56,24 @@ parse(Version) when erlang:is_binary(Version) -> parse(Version) -> Version. +-spec format(semver()) -> iolist(). +format({Maj, {AlphaPart, BuildPart}}) + when erlang:is_integer(Maj) -> + [erlang:integer_to_list(Maj), + format_vsn_rest(<<"-">>, AlphaPart), + format_vsn_rest(<<"+">>, BuildPart)]; +format({{Maj, Min}, {AlphaPart, BuildPart}}) -> + [erlang:integer_to_list(Maj), ".", + erlang:integer_to_list(Min), + format_vsn_rest(<<"-">>, AlphaPart), + format_vsn_rest(<<"+">>, BuildPart)]; +format({{Maj, Min, Patch}, {AlphaPart, BuildPart}}) -> + [erlang:integer_to_list(Maj), ".", + erlang:integer_to_list(Min), ".", + erlang:integer_to_list(Patch), + format_vsn_rest(<<"-">>, AlphaPart), + format_vsn_rest(<<"+">>, BuildPart)]. + %% @doc test for quality between semver versions -spec eql(any_version(), any_version()) -> boolean(). eql(VsnA, VsnB) -> @@ -189,6 +209,19 @@ format_alpha_part([<<".">>, AlphaPart]) -> %%%=================================================================== %%% Internal Functions %%%=================================================================== +-spec to_list(integer() | binary() | string()) -> string() | binary(). +to_list(Detail) when erlang:is_integer(Detail) -> + erlang:integer_to_list(Detail); +to_list(Detail) when erlang:is_list(Detail); erlang:is_binary(Detail) -> + Detail. + +-spec format_vsn_rest(binary() | string(), [integer() | binary()]) -> iolist(). +format_vsn_rest(_TypeMark, []) -> + []; +format_vsn_rest(TypeMark, [Head | Rest]) -> + [TypeMark, Head | + [[".", to_list(Detail)] || Detail <- Rest]]. + %% @doc normalize the semver so they can be compared -spec normalize(semver()) -> semver(). normalize({Vsn, Rest}) @@ -523,4 +556,19 @@ pes_test() -> ?assertMatch(true, not pes("2.7", "2.6.5")), ?assertMatch(true, not pes("2.5", "2.6.5")). +version_format_test() -> + ?assertEqual(["1", [], []], format({1, {[],[]}})), + ?assertEqual(["1", ".", "2", ".", "34", [], []], format({{1,2,34},{[],[]}})), + ?assertEqual(<<"1">>, erlang:iolist_to_binary(format({1, {[],[]}}))), + ?assertEqual(<<"1.2">>, erlang:iolist_to_binary(format({{1,2}, {[],[]}}))), + ?assertEqual(<<"1.2.2">>, erlang:iolist_to_binary(format({{1,2,2}, {[],[]}}))), + ?assertEqual(<<"1.99.2">>, erlang:iolist_to_binary(format({{1,99,2}, {[],[]}}))), + ?assertEqual(<<"1.99.2-alpha">>, erlang:iolist_to_binary(format({{1,99,2}, {[<<"alpha">>],[]}}))), + ?assertEqual(<<"1.99.2-alpha.1">>, erlang:iolist_to_binary(format({{1,99,2}, {[<<"alpha">>,1], []}}))), + ?assertEqual(<<"1.99.2+build.1.a36">>, + erlang:iolist_to_binary(format({{1,99,2}, {[], [<<"build">>, 1, <<"a36">>]}}))), + ?assertEqual(<<"1.99.2-alpha.1+build.1.a36">>, + erlang:iolist_to_binary(format({{1,99,2}, {[<<"alpha">>, 1], [<<"build">>, 1, <<"a36">>]}}))), + ?assertEqual(<<"1">>, erlang:iolist_to_binary(format({1, {[],[]}}))). + -endif. From 5105df48f956c0b9a2272a553eb452228832a644 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Sun, 16 Sep 2012 13:07:17 -0500 Subject: [PATCH 05/20] minor whitespace cleanup for ec_semver Signed-off-by: Jordan Wilberding --- src/ec_semver.erl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/ec_semver.erl b/src/ec_semver.erl index 83bf6ef..0d8b40b 100644 --- a/src/ec_semver.erl +++ b/src/ec_semver.erl @@ -245,9 +245,6 @@ internal_pes(VsnA, {{LM, LMI, LP}, _}) -> internal_pes(Vsn, LVsn) -> gte(Vsn, LVsn). - - - %%%=================================================================== %%% Test Functions %%%=================================================================== @@ -275,7 +272,6 @@ eql_test() -> ?assertMatch(true, not eql("1.0.0+build.1", "1.0.1+build.2")). - gt_test() -> ?assertMatch(true, gt("1.0.0-alpha.1", "1.0.0-alpha")), From b4ab414419d544d9f8989e055bed8b8f6c6bc747 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Mon, 17 Sep 2012 09:38:32 -0500 Subject: [PATCH 06/20] support four primary version numbers of in parsing The OTP Versions distributed with erlang tend to have four version numbers not three. This is a fairly minor deviation from semver that we can support. Basically, the semver parser treats the fourth version in exactly the same way as the other three. Signed-off-by: Jordan Wilberding --- src/ec_semver.erl | 99 ++++++++++++++++++++++++++++++++-------- src/ec_semver_parser.peg | 4 +- 2 files changed, 83 insertions(+), 20 deletions(-) diff --git a/src/ec_semver.erl b/src/ec_semver.erl index 0d8b40b..0566322 100644 --- a/src/ec_semver.erl +++ b/src/ec_semver.erl @@ -1,3 +1,4 @@ + %%%------------------------------------------------------------------- %%% @copyright (C) 2011, Erlware LLC %%% @doc @@ -28,16 +29,18 @@ %%% Public Types %%%=================================================================== --type major_minor_patch() :: +-type major_minor_patch_minpatch() :: non_neg_integer() | {non_neg_integer(), non_neg_integer()} - | {non_neg_integer(), non_neg_integer(), non_neg_integer()}. + | {non_neg_integer(), non_neg_integer(), non_neg_integer()} + | {non_neg_integer(), non_neg_integer(), + non_neg_integer(), non_neg_integer()}. -type alpha_part() :: integer() | binary() | string(). -type alpha_info() :: {PreRelease::[alpha_part()], BuildVersion::[alpha_part()]}. --type semver() :: {major_minor_patch(), alpha_info()}. +-type semver() :: {major_minor_patch_minpatch(), alpha_info()}. -type version_string() :: string() | binary(). @@ -72,6 +75,13 @@ format({{Maj, Min, Patch}, {AlphaPart, BuildPart}}) -> erlang:integer_to_list(Min), ".", erlang:integer_to_list(Patch), format_vsn_rest(<<"-">>, AlphaPart), + format_vsn_rest(<<"+">>, BuildPart)]; +format({{Maj, Min, Patch, MinPatch}, {AlphaPart, BuildPart}}) -> + [erlang:integer_to_list(Maj), ".", + erlang:integer_to_list(Min), ".", + erlang:integer_to_list(Patch), ".", + erlang:integer_to_list(MinPatch), + format_vsn_rest(<<"-">>, AlphaPart), format_vsn_rest(<<"+">>, BuildPart)]. %% @doc test for quality between semver versions @@ -172,17 +182,20 @@ pes(VsnA, VsnB) -> %% @doc helper function for the peg grammer to parse the iolist into a semver -spec internal_parse_version(iolist()) -> semver(). internal_parse_version([MMP, AlphaPart, BuildPart, _]) -> - {parse_major_minor_patch(MMP), {parse_alpha_part(AlphaPart), - parse_alpha_part(BuildPart)}}. + {parse_major_minor_patch_minpatch(MMP), {parse_alpha_part(AlphaPart), + parse_alpha_part(BuildPart)}}. %% @doc helper function for the peg grammer to parse the iolist into a major_minor_patch --spec parse_major_minor_patch(iolist()) -> major_minor_patch(). -parse_major_minor_patch([MajVsn, [], []]) -> +-spec parse_major_minor_patch_minpatch(iolist()) -> major_minor_patch_minpatch(). +parse_major_minor_patch_minpatch([MajVsn, [], [], []]) -> MajVsn; -parse_major_minor_patch([MajVsn, [<<".">>, MinVsn], []]) -> +parse_major_minor_patch_minpatch([MajVsn, [<<".">>, MinVsn], [], []]) -> {MajVsn, MinVsn}; -parse_major_minor_patch([MajVsn, [<<".">>, MinVsn], [<<".">>, PatchVsn]]) -> - {MajVsn, MinVsn, PatchVsn}. +parse_major_minor_patch_minpatch([MajVsn, [<<".">>, MinVsn], [<<".">>, PatchVsn], []]) -> + {MajVsn, MinVsn, PatchVsn}; +parse_major_minor_patch_minpatch([MajVsn, [<<".">>, MinVsn], + [<<".">>, PatchVsn], [<<".">>, MinPatch]]) -> + {MajVsn, MinVsn, PatchVsn, MinPatch}. %% @doc helper function for the peg grammer to parse the iolist into an alpha part -spec parse_alpha_part(iolist()) -> [alpha_part()]. @@ -226,22 +239,29 @@ format_vsn_rest(TypeMark, [Head | Rest]) -> -spec normalize(semver()) -> semver(). normalize({Vsn, Rest}) when erlang:is_integer(Vsn) -> - {{Vsn, 0, 0}, Rest}; + {{Vsn, 0, 0, 0}, Rest}; normalize({{Maj, Min}, Rest}) -> - {{Maj, Min, 0}, Rest}; -normalize(Other) -> + {{Maj, Min, 0, 0}, Rest}; +normalize({{Maj, Min, Patch}, Rest}) -> + {{Maj, Min, Patch, 0}, Rest}; +normalize(Other = {{_, _, _, _}, {_,_}}) -> Other. %% @doc to do the pessimistic compare we need a parsed semver. This is %% the internal implementation of the of the pessimistic run. The %% external just ensures that versions are parsed. +-spec internal_pes(semver(), semver()) -> boolean(). internal_pes(VsnA, {{LM, LMI}, _}) -> gte(VsnA, {{LM, LMI, 0}, {[], []}}) andalso - lt(VsnA, {{LM + 1, 0, 0}, {[], []}}); + lt(VsnA, {{LM + 1, 0, 0, 0}, {[], []}}); internal_pes(VsnA, {{LM, LMI, LP}, _}) -> gte(VsnA, {{LM, LMI, LP}, {[], []}}) andalso - lt(VsnA, {{LM, LMI + 1, 0}, {[], []}}); + lt(VsnA, {{LM, LMI + 1, 0, 0}, {[], []}}); +internal_pes(VsnA, {{LM, LMI, LP, LMP}, _}) -> + gte(VsnA, {{LM, LMI, LP, LMP}, {[], []}}) + andalso + lt(VsnA, {{LM, LMI, LP + 1, 0}, {[], []}}); internal_pes(Vsn, LVsn) -> gte(Vsn, LVsn). @@ -261,24 +281,38 @@ eql_test() -> "1.0.0")), ?assertMatch(true, eql("1.0.0", "1")), + ?assertMatch(true, eql("1.0.0.0", + "1")), ?assertMatch(true, eql("1.0+alpha.1", "1.0.0+alpha.1")), ?assertMatch(true, eql("1.0-alpha.1+build.1", "1.0.0-alpha.1+build.1")), + ?assertMatch(true, eql("1.0-alpha.1+build.1", + "1.0.0.0-alpha.1+build.1")), ?assertMatch(true, not eql("1.0.0", "1.0.1")), ?assertMatch(true, not eql("1.0.0-alpha", "1.0.1+alpha")), ?assertMatch(true, not eql("1.0.0+build.1", - "1.0.1+build.2")). + "1.0.1+build.2")), + ?assertMatch(true, not eql("1.0.0.0+build.1", + "1.0.0.1+build.2")). gt_test() -> ?assertMatch(true, gt("1.0.0-alpha.1", "1.0.0-alpha")), + ?assertMatch(true, gt("1.0.0.1-alpha.1", + "1.0.0.1-alpha")), + ?assertMatch(true, gt("1.0.0.4-alpha.1", + "1.0.0.2-alpha")), + ?assertMatch(true, gt("1.0.0.0-alpha.1", + "1.0.0-alpha")), ?assertMatch(true, gt("1.0.0-beta.2", "1.0.0-alpha.1")), ?assertMatch(true, gt("1.0.0-beta.11", "1.0.0-beta.2")), + ?assertMatch(true, gt("1.0.0-beta.11", + "1.0.0.0-beta.2")), ?assertMatch(true, gt("1.0.0-rc.1", "1.0.0-beta.11")), ?assertMatch(true, gt("1.0.0-rc.1+build.1", "1.0.0-rc.1")), ?assertMatch(true, gt("1.0.0", "1.0.0-rc.1+build.1")), @@ -286,10 +320,14 @@ gt_test() -> ?assertMatch(true, gt("1.3.7+build", "1.0.0+0.3.7")), ?assertMatch(true, gt("1.3.7+build.2.b8f12d7", "1.3.7+build")), + ?assertMatch(true, gt("1.3.7+build.2.b8f12d7", + "1.3.7.0+build")), ?assertMatch(true, gt("1.3.7+build.11.e0f985a", "1.3.7+build.2.b8f12d7")), ?assertMatch(true, not gt("1.0.0-alpha", "1.0.0-alpha.1")), + ?assertMatch(true, not gt("1.0.0-alpha", + "1.0.0.0-alpha.1")), ?assertMatch(true, not gt("1.0.0-alpha.1", "1.0.0-beta.2")), ?assertMatch(true, not gt("1.0.0-beta.2", @@ -324,12 +362,16 @@ gt_test() -> lt_test() -> ?assertMatch(true, lt("1.0.0-alpha", "1.0.0-alpha.1")), + ?assertMatch(true, lt("1.0.0-alpha", + "1.0.0.0-alpha.1")), ?assertMatch(true, lt("1.0.0-alpha.1", "1.0.0-beta.2")), ?assertMatch(true, lt("1.0.0-beta.2", "1.0.0-beta.11")), ?assertMatch(true, lt("1.0.0-beta.11", "1.0.0-rc.1")), + ?assertMatch(true, lt("1.0.0.1-beta.11", + "1.0.0.1-rc.1")), ?assertMatch(true, lt("1.0.0-rc.1", "1.0.0-rc.1+build.1")), ?assertMatch(true, lt("1.0.0-rc.1+build.1", @@ -346,9 +388,11 @@ lt_test() -> "1.0.0-alpha")), ?assertMatch(true, not lt("1", "1.0.0")), + ?assertMatch(true, lt("1", + "1.0.0.1")), ?assertMatch(true, not lt("1.0", "1.0.0")), - ?assertMatch(true, not lt("1.0.0", + ?assertMatch(true, not lt("1.0.0.0", "1")), ?assertMatch(true, not lt("1.0+alpha.1", "1.0.0+alpha.1")), @@ -384,12 +428,18 @@ gte_test() -> ?assertMatch(true, gte("1.0.0", "1")), + ?assertMatch(true, gte("1.0.0.0", + "1")), + ?assertMatch(true, gte("1.0+alpha.1", "1.0.0+alpha.1")), ?assertMatch(true, gte("1.0-alpha.1+build.1", "1.0.0-alpha.1+build.1")), + ?assertMatch(true, gte("1.0.0-alpha.1+build.1", + "1.0.0.0-alpha.1+build.1")), + ?assertMatch(true, gte("1.0.0-alpha.1", "1.0.0-alpha")), ?assertMatch(true, gte("1.0.0-beta.2", @@ -458,6 +508,8 @@ lte_test() -> "1")), ?assertMatch(true, lte("1.0+alpha.1", "1.0.0+alpha.1")), + ?assertMatch(true, lte("1.0.0.0+alpha.1", + "1.0.0+alpha.1")), ?assertMatch(true, lte("1.0-alpha.1+build.1", "1.0.0-alpha.1+build.1")), ?assertMatch(true, not lt("1.0.0-alpha.1", @@ -476,7 +528,6 @@ lte_test() -> ?assertMatch(true, not lt("1.3.7+build.11.e0f985a", "1.3.7+build.2.b8f12d7")). - between_test() -> ?assertMatch(true, between("1.0.0-alpha", "1.0.0-alpha.3", @@ -493,6 +544,10 @@ between_test() -> ?assertMatch(true, between("1.0.0-rc.1", "1.0.0-rc.1+build.3", "1.0.0-rc.1+build.1")), + + ?assertMatch(true, between("1.0.0.0-rc.1", + "1.0.0-rc.1+build.3", + "1.0.0-rc.1+build.1")), ?assertMatch(true, between("1.0.0-rc.1+build.1", "1.0.0", "1.0.0-rc.33")), @@ -517,6 +572,10 @@ between_test() -> ?assertMatch(true, between("1.0", "1.0.0", "1.0.0")), + + ?assertMatch(true, between("1.0", + "1.0.0.0", + "1.0.0.0")), ?assertMatch(true, between("1.0.0", "1", "1")), @@ -549,7 +608,9 @@ pes_test() -> ?assertMatch(true, pes("2.6.7", "2.6.5")), ?assertMatch(true, pes("2.6.8", "2.6.5")), ?assertMatch(true, pes("2.6.9", "2.6.5")), + ?assertMatch(true, pes("2.6.0.9", "2.6.0.5")), ?assertMatch(true, not pes("2.7", "2.6.5")), + ?assertMatch(true, not pes("2.1.7", "2.1.6.5")), ?assertMatch(true, not pes("2.5", "2.6.5")). version_format_test() -> @@ -563,6 +624,8 @@ version_format_test() -> ?assertEqual(<<"1.99.2-alpha.1">>, erlang:iolist_to_binary(format({{1,99,2}, {[<<"alpha">>,1], []}}))), ?assertEqual(<<"1.99.2+build.1.a36">>, erlang:iolist_to_binary(format({{1,99,2}, {[], [<<"build">>, 1, <<"a36">>]}}))), + ?assertEqual(<<"1.99.2.44+build.1.a36">>, + erlang:iolist_to_binary(format({{1,99,2,44}, {[], [<<"build">>, 1, <<"a36">>]}}))), ?assertEqual(<<"1.99.2-alpha.1+build.1.a36">>, erlang:iolist_to_binary(format({{1,99,2}, {[<<"alpha">>, 1], [<<"build">>, 1, <<"a36">>]}}))), ?assertEqual(<<"1">>, erlang:iolist_to_binary(format({1, {[],[]}}))). diff --git a/src/ec_semver_parser.peg b/src/ec_semver_parser.peg index 9636d95..f505693 100644 --- a/src/ec_semver_parser.peg +++ b/src/ec_semver_parser.peg @@ -1,7 +1,7 @@ -semver <- major_minor_patch ("-" alpha_part ("." alpha_part)*)? ("+" alpha_part ("." alpha_part)*)? !. +semver <- major_minor_patch_min_patch ("-" alpha_part ("." alpha_part)*)? ("+" alpha_part ("." alpha_part)*)? !. ` ec_semver:internal_parse_version(Node) ` ; -major_minor_patch <- version_part ("." version_part)? ("." version_part)? ; +major_minor_patch_min_patch <- version_part ("." version_part)? ("." version_part)? ("." version_part)? ; version_part <- [0-9]+ `erlang:list_to_integer(erlang:binary_to_list(erlang:iolist_to_binary(Node)))` ; From eab58fb6605c3354bb9530db0c2cf79e3382ea5b Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Tue, 18 Sep 2012 17:18:34 -0700 Subject: [PATCH 07/20] export mkdir_p (this should have been done already) Signed-off-by: Jordan Wilberding --- src/ec_file.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ec_file.erl b/src/ec_file.erl index c5cbe56..f815a3a 100644 --- a/src/ec_file.erl +++ b/src/ec_file.erl @@ -11,6 +11,7 @@ copy/3, insecure_mkdtemp/0, mkdir_path/1, + mkdir_p/1, find/2, is_symlink/1, remove/1, From a9f2a771f018ef5efa3864d91bf888175c22c54f Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Tue, 18 Sep 2012 17:45:27 -0700 Subject: [PATCH 08/20] fix bug in ec_file:copy/3 spec Signed-off-by: Jordan Wilberding --- src/ec_file.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ec_file.erl b/src/ec_file.erl index f815a3a..37ad1b1 100644 --- a/src/ec_file.erl +++ b/src/ec_file.erl @@ -36,12 +36,12 @@ %% Types %%============================================================================ -type option() :: recursive. --type void() :: ok. + %%%=================================================================== %%% API %%%=================================================================== %% @doc copy an entire directory to another location. --spec copy(file:name(), file:name(), Options::[option()]) -> void(). +-spec copy(file:name(), file:name(), Options::[option()]) -> ok | {error, Reason::term()}. copy(From, To, []) -> copy(From, To); copy(From, To, [recursive] = Options) -> @@ -230,7 +230,7 @@ tmp() -> end. %% Copy the subfiles of the From directory to the to directory. --spec copy_subfiles(file:name(), file:name(), [option()]) -> void(). +-spec copy_subfiles(file:name(), file:name(), [option()]) -> {error, Reason::term()} | ok. copy_subfiles(From, To, Options) -> Fun = fun(ChildFrom) -> From f77afd43c31dce68664358e7bf1c32b02f45b39e Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Fri, 19 Oct 2012 08:57:23 -0500 Subject: [PATCH 09/20] add exists to ec_file Signed-off-by: Jordan Wilberding --- src/ec_file.erl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/ec_file.erl b/src/ec_file.erl index 37ad1b1..45adb2a 100644 --- a/src/ec_file.erl +++ b/src/ec_file.erl @@ -7,6 +7,7 @@ -module(ec_file). -export([ + exists/1, copy/2, copy/3, insecure_mkdtemp/0, @@ -40,6 +41,15 @@ %%%=================================================================== %%% API %%%=================================================================== +-spec exists(file:filename()) -> boolean(). +exists(Filename) -> + case file:read_file_info(Filename) of + {ok, _} -> + true; + {error, _Reason} -> + false + end. + %% @doc copy an entire directory to another location. -spec copy(file:name(), file:name(), Options::[option()]) -> ok | {error, Reason::term()}. copy(From, To, []) -> @@ -314,6 +324,16 @@ setup_base_and_target() -> ok = file:write_file(NoName, DummyContents), {BaseDir, SourceDir, {Name1, Name2, Name3, NoName}}. +exists_test() -> + BaseDir = insecure_mkdtemp(), + SourceDir = filename:join([BaseDir, "source1"]), + NoName = filename:join([SourceDir, "noname"]), + ok = file:make_dir(SourceDir), + Name1 = filename:join([SourceDir, "fileone"]), + ok = file:write_file(Name1, <<"Testn">>), + ?assertMatch(true, exists(Name1)), + ?assertMatch(false, exists(NoName)). + find_test() -> %% Create a directory in /tmp for the test. Clean everything afterwards {BaseDir, _SourceDir, {Name1, Name2, Name3, _NoName}} = setup_base_and_target(), From 0db7042ff90ac420be8e4ebca21d11e153d845d6 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Wed, 19 Sep 2012 07:57:48 -0700 Subject: [PATCH 10/20] support reasonable versioning for erlware_commons You should get the latest and greatest rebar to build this. Signed-off-by: Jordan Wilberding --- src/erlware_commons.app.src | 2 +- src/erlware_commons.app.src.script | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 src/erlware_commons.app.src.script diff --git a/src/erlware_commons.app.src b/src/erlware_commons.app.src index 8aaa5f3..7e6111b 100644 --- a/src/erlware_commons.app.src +++ b/src/erlware_commons.app.src @@ -1,7 +1,7 @@ %% -*- mode: Erlang; fill-column: 75; comment-column: 50; -*- {application, erlware_commons, [{description, "Additional standard library for Erlang"}, - {vsn, "0.8.0"}, + {vsn, git}, {modules, []}, {registered, []}, {applications, [kernel, stdlib]}]}. diff --git a/src/erlware_commons.app.src.script b/src/erlware_commons.app.src.script new file mode 100644 index 0000000..381e7dc --- /dev/null +++ b/src/erlware_commons.app.src.script @@ -0,0 +1,22 @@ +%% -*- mode: Erlang; erlang-indent-level: 4; indent-tabs-mode: nil; fill-column: 80 -*- + +[{application, AppName, Details0}] = CONFIG, + +%% Get the tag timestamp and minimal ref from the system. The +%% timestamp is really important from an ordering perspective. +{ok, RawRef} = rebar_utils:sh("git log -n 1 --pretty=format:'%ct.%h\n' .", []), +{ok, RawTag} = rebar_utils:sh("git describe --always --abbrev=0 --tags " + "`git log -n 1 --pretty=format:%h .`", []), + +%% Cleanup the tag and the Ref information. Basically leading 'v's and +%% whitespace needs to go away. +Tag = re:replace(RawTag, "(^v)|\\s", "", [global]), +Ref = re:replace(RawRef, "\\s", "", [global]), + +%% Create the valid [semver](http://semver.org) version from the tag +Vsn = erlang:binary_to_list(erlang:iolist_to_binary([Tag, "+build.", Ref])), + +%% Replace the old version with the new one +Details1 = lists:keyreplace(vsn, 1, Details0, {vsn, Vsn}), + +[{application, AppName, Details1}]. From 7e4ba401fd74572affcd4db13056c12c7b4b33bb Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Thu, 11 Oct 2012 10:01:45 -0500 Subject: [PATCH 11/20] support non-numeric versions in major/minor/patch/minor-patch This allows for two things. The first is support for non rigorous versions. However, it still fully supports semver. So if you have semver versions they work correctly, if you have alpha versions they also work correctly but using natural alpha ordering. Signed-off-by: Jordan Wilberding --- src/ec_semver.erl | 132 ++++++++++++++++++++++++++++----------- src/ec_semver_parser.peg | 5 +- 2 files changed, 99 insertions(+), 38 deletions(-) diff --git a/src/ec_semver.erl b/src/ec_semver.erl index 0566322..ac810e5 100644 --- a/src/ec_semver.erl +++ b/src/ec_semver.erl @@ -29,12 +29,14 @@ %%% Public Types %%%=================================================================== +-type version_element() :: non_neg_integer() | binary(). + -type major_minor_patch_minpatch() :: - non_neg_integer() - | {non_neg_integer(), non_neg_integer()} - | {non_neg_integer(), non_neg_integer(), non_neg_integer()} - | {non_neg_integer(), non_neg_integer(), - non_neg_integer(), non_neg_integer()}. + version_element() + | {version_element(), version_element()} + | {version_element(), version_element(), version_element()} + | {version_element(), version_element(), + version_element(), version_element()}. -type alpha_part() :: integer() | binary() | string(). -type alpha_info() :: {PreRelease::[alpha_part()], @@ -53,37 +55,58 @@ %% @doc parse a string or binary into a valid semver representation -spec parse(any_version()) -> semver(). parse(Version) when erlang:is_list(Version) -> - ec_semver_parser:parse(Version); + case ec_semver_parser:parse(Version) of + {fail, _} -> + {erlang:iolist_to_binary(Version), {[],[]}}; + Good -> + Good + end; parse(Version) when erlang:is_binary(Version) -> - ec_semver_parser:parse(Version); + case ec_semver_parser:parse(Version) of + {fail, _} -> + {Version, {[],[]}}; + Good -> + Good + end; parse(Version) -> Version. -spec format(semver()) -> iolist(). format({Maj, {AlphaPart, BuildPart}}) - when erlang:is_integer(Maj) -> - [erlang:integer_to_list(Maj), + when erlang:is_integer(Maj); + erlang:is_binary(Maj) -> + [format_version_part(Maj), format_vsn_rest(<<"-">>, AlphaPart), format_vsn_rest(<<"+">>, BuildPart)]; format({{Maj, Min}, {AlphaPart, BuildPart}}) -> - [erlang:integer_to_list(Maj), ".", - erlang:integer_to_list(Min), + [format_version_part(Maj), ".", + format_version_part(Min), format_vsn_rest(<<"-">>, AlphaPart), format_vsn_rest(<<"+">>, BuildPart)]; format({{Maj, Min, Patch}, {AlphaPart, BuildPart}}) -> - [erlang:integer_to_list(Maj), ".", - erlang:integer_to_list(Min), ".", - erlang:integer_to_list(Patch), + [format_version_part(Maj), ".", + format_version_part(Min), ".", + format_version_part(Patch), format_vsn_rest(<<"-">>, AlphaPart), format_vsn_rest(<<"+">>, BuildPart)]; format({{Maj, Min, Patch, MinPatch}, {AlphaPart, BuildPart}}) -> - [erlang:integer_to_list(Maj), ".", - erlang:integer_to_list(Min), ".", - erlang:integer_to_list(Patch), ".", - erlang:integer_to_list(MinPatch), + [format_version_part(Maj), ".", + format_version_part(Min), ".", + format_version_part(Patch), ".", + format_version_part(MinPatch), format_vsn_rest(<<"-">>, AlphaPart), format_vsn_rest(<<"+">>, BuildPart)]. +-spec format_version_part(integer() | binary()) -> iolist(). +format_version_part(Vsn) + when erlang:is_integer(Vsn) -> + erlang:integer_to_list(Vsn); +format_version_part(Vsn) + when erlang:is_binary(Vsn) -> + Vsn. + + + %% @doc test for quality between semver versions -spec eql(any_version(), any_version()) -> boolean(). eql(VsnA, VsnB) -> @@ -238,7 +261,8 @@ format_vsn_rest(TypeMark, [Head | Rest]) -> %% @doc normalize the semver so they can be compared -spec normalize(semver()) -> semver(). normalize({Vsn, Rest}) - when erlang:is_integer(Vsn) -> + when erlang:is_binary(Vsn); + erlang:is_integer(Vsn) -> {{Vsn, 0, 0, 0}, Rest}; normalize({{Maj, Min}, Rest}) -> {{Maj, Min, 0, 0}, Rest}; @@ -251,14 +275,23 @@ normalize(Other = {{_, _, _, _}, {_,_}}) -> %% the internal implementation of the of the pessimistic run. The %% external just ensures that versions are parsed. -spec internal_pes(semver(), semver()) -> boolean(). -internal_pes(VsnA, {{LM, LMI}, _}) -> +internal_pes(VsnA, {{LM, LMI}, _}) + when erlang:is_integer(LM), + erlang:is_integer(LMI) -> gte(VsnA, {{LM, LMI, 0}, {[], []}}) andalso lt(VsnA, {{LM + 1, 0, 0, 0}, {[], []}}); -internal_pes(VsnA, {{LM, LMI, LP}, _}) -> +internal_pes(VsnA, {{LM, LMI, LP}, _}) + when erlang:is_integer(LM), + erlang:is_integer(LMI), + erlang:is_integer(LP) -> gte(VsnA, {{LM, LMI, LP}, {[], []}}) andalso lt(VsnA, {{LM, LMI + 1, 0, 0}, {[], []}}); -internal_pes(VsnA, {{LM, LMI, LP, LMP}, _}) -> +internal_pes(VsnA, {{LM, LMI, LP, LMP}, _}) + when erlang:is_integer(LM), + erlang:is_integer(LMI), + erlang:is_integer(LP), + erlang:is_integer(LMP) -> gte(VsnA, {{LM, LMI, LP, LMP}, {[], []}}) andalso lt(VsnA, {{LM, LMI, LP + 1, 0}, {[], []}}); @@ -289,6 +322,9 @@ eql_test() -> "1.0.0-alpha.1+build.1")), ?assertMatch(true, eql("1.0-alpha.1+build.1", "1.0.0.0-alpha.1+build.1")), + ?assertMatch(true, eql("aa", "aa")), + ?assertMatch(true, eql("AA.BB", "AA.BB")), + ?assertMatch(true, eql("BBB-super", "BBB-super")), ?assertMatch(true, not eql("1.0.0", "1.0.1")), ?assertMatch(true, not eql("1.0.0-alpha", @@ -296,7 +332,9 @@ eql_test() -> ?assertMatch(true, not eql("1.0.0+build.1", "1.0.1+build.2")), ?assertMatch(true, not eql("1.0.0.0+build.1", - "1.0.0.1+build.2")). + "1.0.0.1+build.2")), + ?assertMatch(true, not eql("FFF", "BBB")), + ?assertMatch(true, not eql("1", "1BBBB")). gt_test() -> ?assertMatch(true, gt("1.0.0-alpha.1", @@ -324,6 +362,8 @@ gt_test() -> "1.3.7.0+build")), ?assertMatch(true, gt("1.3.7+build.11.e0f985a", "1.3.7+build.2.b8f12d7")), + ?assertMatch(true, gt("aa.cc", + "aa.bb")), ?assertMatch(true, not gt("1.0.0-alpha", "1.0.0-alpha.1")), ?assertMatch(true, not gt("1.0.0-alpha", @@ -350,6 +390,10 @@ gt_test() -> "1.0.0-alpha")), ?assertMatch(true, not gt("1", "1.0.0")), + ?assertMatch(true, not gt("aa.bb", + "aa.bb")), + ?assertMatch(true, not gt("aa.cc", + "aa.dd")), ?assertMatch(true, not gt("1.0", "1.0.0")), ?assertMatch(true, not gt("1.0.0", @@ -390,12 +434,15 @@ lt_test() -> "1.0.0")), ?assertMatch(true, lt("1", "1.0.0.1")), + ?assertMatch(true, lt("AA.DD", + "AA.EE")), ?assertMatch(true, not lt("1.0", "1.0.0")), ?assertMatch(true, not lt("1.0.0.0", "1")), ?assertMatch(true, not lt("1.0+alpha.1", "1.0.0+alpha.1")), + ?assertMatch(true, not lt("AA.DD", "AA.CC")), ?assertMatch(true, not lt("1.0-alpha.1+build.1", "1.0.0-alpha.1+build.1")), ?assertMatch(true, not lt("1.0.0-alpha.1", @@ -414,7 +461,6 @@ lt_test() -> ?assertMatch(true, not lt("1.3.7+build.11.e0f985a", "1.3.7+build.2.b8f12d7")). - gte_test() -> ?assertMatch(true, gte("1.0.0-alpha", "1.0.0-alpha")), @@ -439,13 +485,14 @@ gte_test() -> ?assertMatch(true, gte("1.0.0-alpha.1+build.1", "1.0.0.0-alpha.1+build.1")), - ?assertMatch(true, gte("1.0.0-alpha.1", "1.0.0-alpha")), ?assertMatch(true, gte("1.0.0-beta.2", "1.0.0-alpha.1")), ?assertMatch(true, gte("1.0.0-beta.11", "1.0.0-beta.2")), + ?assertMatch(true, gte("aa.bb", "aa.bb")), + ?assertMatch(true, gte("dd", "aa")), ?assertMatch(true, gte("1.0.0-rc.1", "1.0.0-beta.11")), ?assertMatch(true, gte("1.0.0-rc.1+build.1", "1.0.0-rc.1")), ?assertMatch(true, gte("1.0.0", "1.0.0-rc.1+build.1")), @@ -457,6 +504,7 @@ gte_test() -> "1.3.7+build.2.b8f12d7")), ?assertMatch(true, not gte("1.0.0-alpha", "1.0.0-alpha.1")), + ?assertMatch(true, not gte("CC", "DD")), ?assertMatch(true, not gte("1.0.0-alpha.1", "1.0.0-beta.2")), ?assertMatch(true, not gte("1.0.0-beta.2", @@ -512,20 +560,23 @@ lte_test() -> "1.0.0+alpha.1")), ?assertMatch(true, lte("1.0-alpha.1+build.1", "1.0.0-alpha.1+build.1")), - ?assertMatch(true, not lt("1.0.0-alpha.1", + ?assertMatch(true, lte("aa","cc")), + ?assertMatch(true, lte("cc","cc")), + ?assertMatch(true, not lte("1.0.0-alpha.1", "1.0.0-alpha")), - ?assertMatch(true, not lt("1.0.0-beta.2", + ?assertMatch(true, not lte("cc", "aa")), + ?assertMatch(true, not lte("1.0.0-beta.2", "1.0.0-alpha.1")), - ?assertMatch(true, not lt("1.0.0-beta.11", + ?assertMatch(true, not lte("1.0.0-beta.11", "1.0.0-beta.2")), - ?assertMatch(true, not lt("1.0.0-rc.1", "1.0.0-beta.11")), - ?assertMatch(true, not lt("1.0.0-rc.1+build.1", "1.0.0-rc.1")), - ?assertMatch(true, not lt("1.0.0", "1.0.0-rc.1+build.1")), - ?assertMatch(true, not lt("1.0.0+0.3.7", "1.0.0")), - ?assertMatch(true, not lt("1.3.7+build", "1.0.0+0.3.7")), - ?assertMatch(true, not lt("1.3.7+build.2.b8f12d7", + ?assertMatch(true, not lte("1.0.0-rc.1", "1.0.0-beta.11")), + ?assertMatch(true, not lte("1.0.0-rc.1+build.1", "1.0.0-rc.1")), + ?assertMatch(true, not lte("1.0.0", "1.0.0-rc.1+build.1")), + ?assertMatch(true, not lte("1.0.0+0.3.7", "1.0.0")), + ?assertMatch(true, not lte("1.3.7+build", "1.0.0+0.3.7")), + ?assertMatch(true, not lte("1.3.7+build.2.b8f12d7", "1.3.7+build")), - ?assertMatch(true, not lt("1.3.7+build.11.e0f985a", + ?assertMatch(true, not lte("1.3.7+build.11.e0f985a", "1.3.7+build.2.b8f12d7")). between_test() -> @@ -585,6 +636,9 @@ between_test() -> ?assertMatch(true, between("1.0-alpha.1+build.1", "1.0.0-alpha.1+build.1", "1.0.0-alpha.1+build.1")), + ?assertMatch(true, between("aaa", + "ddd", + "cc")), ?assertMatch(true, not between("1.0.0-alpha.1", "1.0.0-alpha.22", "1.0.0")), @@ -594,13 +648,16 @@ between_test() -> ?assertMatch(true, not between("1.0.0-beta.1", "1.0.0-beta.11", "1.0.0-alpha")), - ?assertMatch(true, not between("1.0.0-beta.11", "1.0.0-rc.1", "1.0.0-rc.22")). + ?assertMatch(true, not between("1.0.0-beta.11", "1.0.0-rc.1", + "1.0.0-rc.22")), + ?assertMatch(true, not between("aaa", "ddd", "zzz")). pes_test() -> ?assertMatch(true, pes("2.6.0", "2.6")), ?assertMatch(true, pes("2.7", "2.6")), ?assertMatch(true, pes("2.8", "2.6")), ?assertMatch(true, pes("2.9", "2.6")), + ?assertMatch(true, pes("A.B", "A.A")), ?assertMatch(true, not pes("3.0.0", "2.6")), ?assertMatch(true, not pes("2.5", "2.6")), ?assertMatch(true, pes("2.6.5", "2.6.5")), @@ -611,11 +668,14 @@ pes_test() -> ?assertMatch(true, pes("2.6.0.9", "2.6.0.5")), ?assertMatch(true, not pes("2.7", "2.6.5")), ?assertMatch(true, not pes("2.1.7", "2.1.6.5")), + ?assertMatch(true, not pes("A.A", "A.B")), ?assertMatch(true, not pes("2.5", "2.6.5")). version_format_test() -> ?assertEqual(["1", [], []], format({1, {[],[]}})), ?assertEqual(["1", ".", "2", ".", "34", [], []], format({{1,2,34},{[],[]}})), + ?assertEqual(<<"a">>, erlang:iolist_to_binary(format({<<"a">>, {[],[]}}))), + ?assertEqual(<<"a.b">>, erlang:iolist_to_binary(format({{<<"a">>,<<"b">>}, {[],[]}}))), ?assertEqual(<<"1">>, erlang:iolist_to_binary(format({1, {[],[]}}))), ?assertEqual(<<"1.2">>, erlang:iolist_to_binary(format({{1,2}, {[],[]}}))), ?assertEqual(<<"1.2.2">>, erlang:iolist_to_binary(format({{1,2,2}, {[],[]}}))), diff --git a/src/ec_semver_parser.peg b/src/ec_semver_parser.peg index f505693..09779ae 100644 --- a/src/ec_semver_parser.peg +++ b/src/ec_semver_parser.peg @@ -3,9 +3,10 @@ semver <- major_minor_patch_min_patch ("-" alpha_part ("." alpha_part)*)? ("+" a major_minor_patch_min_patch <- version_part ("." version_part)? ("." version_part)? ("." version_part)? ; -version_part <- [0-9]+ `erlang:list_to_integer(erlang:binary_to_list(erlang:iolist_to_binary(Node)))` ; +version_part <- numeric_part / alpha_part ; -alpha_part <- [A-Za-z0-9-]+ ; +numeric_part <- [0-9]+ `erlang:list_to_integer(erlang:binary_to_list(erlang:iolist_to_binary(Node)))` ; +alpha_part <- [A-Za-z0-9]+ `erlang:iolist_to_binary(Node)` ; %% This only exists to get around a bug in erlang where if %% warnings_as_errors is specified `nowarn` directives are ignored From 1540fb16521ccec7d22268d05f1bf2fa485fea66 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Fri, 19 Oct 2012 18:20:07 -0500 Subject: [PATCH 12/20] cleanup the rebar config Signed-off-by: Jordan Wilberding --- rebar.config | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/rebar.config b/rebar.config index 2c3cad3..cd5d68a 100644 --- a/rebar.config +++ b/rebar.config @@ -1,12 +1,20 @@ %% -*- mode: Erlang; fill-column: 80; comment-column: 75; -*- -%% These are all only compile time dependencies +%% Dependencies ================================================================ {deps, [{neotoma, "", - {git, "https://github.com/seancribbs/neotoma.git", {tag, "1.5.1"}}}, + {git, "https://github.com/seancribbs/neotoma.git", {branch, master}}}, {proper, "", {git, "https://github.com/manopapad/proper.git", {branch, master}}}]}. {erl_first_files, ["ec_dictionary"]}. +%% Compiler Options ============================================================ {erl_opts, [debug_info, warnings_as_errors]}. + +%% EUnit ======================================================================= +{eunit_opts, [verbose, + {report, {eunit_surefire, [{dir, "."}]}}]}. + +{cover_enabled, true}. +{cover_print_enabled, true}. From c4887e202193aa21171288598d9b9ff7f40ed3a0 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Fri, 19 Oct 2012 18:22:03 -0500 Subject: [PATCH 13/20] enable the rebar semver plugin on erlware_commons Signed-off-by: Jordan Wilberding --- rebar.config | 7 ++++++- src/erlware_commons.app.src | 2 +- src/erlware_commons.app.src.script | 22 ---------------------- 3 files changed, 7 insertions(+), 24 deletions(-) delete mode 100644 src/erlware_commons.app.src.script diff --git a/rebar.config b/rebar.config index cd5d68a..08863d2 100644 --- a/rebar.config +++ b/rebar.config @@ -3,7 +3,9 @@ %% Dependencies ================================================================ {deps, [{neotoma, "", {git, "https://github.com/seancribbs/neotoma.git", {branch, master}}}, - {proper, "", {git, "https://github.com/manopapad/proper.git", {branch, master}}}]}. + {proper, "", {git, "https://github.com/manopapad/proper.git", {branch, master}}}, + {rebar_vsn_plugin, ".*", {git, "https://github.com/erlware/rebar_vsn_plugin.git", + {branch, "master"}}}]}. {erl_first_files, ["ec_dictionary"]}. @@ -18,3 +20,6 @@ {cover_enabled, true}. {cover_print_enabled, true}. + +%% Rebar Plugins ============================================================== +{plugins, [rebar_vsn_plugin]}. diff --git a/src/erlware_commons.app.src b/src/erlware_commons.app.src index 7e6111b..042c56a 100644 --- a/src/erlware_commons.app.src +++ b/src/erlware_commons.app.src @@ -1,7 +1,7 @@ %% -*- mode: Erlang; fill-column: 75; comment-column: 50; -*- {application, erlware_commons, [{description, "Additional standard library for Erlang"}, - {vsn, git}, + {vsn, "semver"}, {modules, []}, {registered, []}, {applications, [kernel, stdlib]}]}. diff --git a/src/erlware_commons.app.src.script b/src/erlware_commons.app.src.script deleted file mode 100644 index 381e7dc..0000000 --- a/src/erlware_commons.app.src.script +++ /dev/null @@ -1,22 +0,0 @@ -%% -*- mode: Erlang; erlang-indent-level: 4; indent-tabs-mode: nil; fill-column: 80 -*- - -[{application, AppName, Details0}] = CONFIG, - -%% Get the tag timestamp and minimal ref from the system. The -%% timestamp is really important from an ordering perspective. -{ok, RawRef} = rebar_utils:sh("git log -n 1 --pretty=format:'%ct.%h\n' .", []), -{ok, RawTag} = rebar_utils:sh("git describe --always --abbrev=0 --tags " - "`git log -n 1 --pretty=format:%h .`", []), - -%% Cleanup the tag and the Ref information. Basically leading 'v's and -%% whitespace needs to go away. -Tag = re:replace(RawTag, "(^v)|\\s", "", [global]), -Ref = re:replace(RawRef, "\\s", "", [global]), - -%% Create the valid [semver](http://semver.org) version from the tag -Vsn = erlang:binary_to_list(erlang:iolist_to_binary([Tag, "+build.", Ref])), - -%% Replace the old version with the new one -Details1 = lists:keyreplace(vsn, 1, Details0, {vsn, Vsn}), - -[{application, AppName, Details1}]. From b5371974d15fbf074f0e7c26597b2e95dc642c12 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Thu, 25 Oct 2012 08:54:49 -0500 Subject: [PATCH 14/20] add fullpath to the makefile Signed-off-by: Jordan Wilberding --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 538d561..55ab0fd 100644 --- a/Makefile +++ b/Makefile @@ -34,11 +34,11 @@ test: compile $(ERLWARE_COMMONS_PLT): @echo Building local plt at $(ERLWARE_COMMONS_PLT) @echo - - dialyzer --output_plt $(ERLWARE_COMMONS_PLT) --build_plt \ + - dialyzer --fullpath --output_plt $(ERLWARE_COMMONS_PLT) --build_plt \ --apps erts kernel stdlib eunit -r deps dialyzer: $(ERLWARE_COMMONS_PLT) - dialyzer --plt $(ERLWARE_COMMONS_PLT) -Wrace_conditions --src src + dialyzer --fullpath --plt $(ERLWARE_COMMONS_PLT) -Wrace_conditions --src src typer: typer --plt $(ERLWARE_COMMONS_PLT) -r ./src From 1a1b87bf53e899a5525710018ac83c1280e67191 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Thu, 25 Oct 2012 08:55:41 -0500 Subject: [PATCH 15/20] add a clean and rebuild task to makefile Signed-off-by: Jordan Wilberding --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 55ab0fd..7ee2120 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,8 @@ endif ERLWARE_COMMONS_PLT=$(CURDIR)/.erlware_commons_plt -.PHONY: all compile doc clean test dialyzer typer shell distclean pdf get-deps escript +.PHONY: all compile doc clean test dialyzer typer shell distclean pdf get-deps \ + rebuild all: compile test doc dialyzer @@ -62,3 +63,5 @@ clean: distclean: clean rm -rf $(ERLWARE_COMMONS_PLT) rm -rvf $(CURDIR)/deps/* + +rebuild: distclean all From dda4c8558653d907dc1e7c27e2509bd6cdadc2f6 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Thu, 25 Oct 2012 08:56:00 -0500 Subject: [PATCH 16/20] reorder default tasks so dialyzer is run after compile Signed-off-by: Jordan Wilberding --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7ee2120..ab8b307 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ ERLWARE_COMMONS_PLT=$(CURDIR)/.erlware_commons_plt .PHONY: all compile doc clean test dialyzer typer shell distclean pdf get-deps \ rebuild -all: compile test doc dialyzer +all: compile dialyzer doc test get-deps: $(REBAR) get-deps From 0e10d59b3af1ba6aa6ea2dde0d19a94a07bca709 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Thu, 25 Oct 2012 11:49:26 -0500 Subject: [PATCH 17/20] add Stephen Marsh's plists to the system origin: http://code.google.com/p/plists/ detail: http://plists.wordpress.com/2007/09/20/introducing-plists-an-erlang-module-for-doing-list-operations-in-parallel/ Signed-off-by: Jordan Wilberding --- src/plists.erl | 858 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 858 insertions(+) create mode 100644 src/plists.erl diff --git a/src/plists.erl b/src/plists.erl new file mode 100644 index 0000000..688d5a5 --- /dev/null +++ b/src/plists.erl @@ -0,0 +1,858 @@ +% @author Stephen Marsh +% @copyright 2007 Stephen Marsh freeyourmind ++ [$@|gmail.com] +% @doc plists is a drop-in replacement for module +% lists, +% making most list operations parallel. It can operate on each element in +% parallel, for IO-bound operations, on sublists in parallel, for +% taking advantage of multi-core machines with CPU-bound operations, and +% across erlang nodes, for parallizing inside a cluster. It handles +% errors and node failures. It can be configured, tuned, and tweaked to +% get optimal performance while minimizing overhead. +% +% Almost all the functions are +% identical to equivalent functions in lists, returning exactly the same +% result, and having both a form with an identical syntax that operates on +% each element in parallel and a form which takes an optional "malt", +% a specification for how to parallize the operation. +% +% fold is the one exception, parallel fold is different from linear fold. +% This module also include a simple mapreduce implementation, and the +% function runmany. All the other functions are implemented with runmany, +% which is as a generalization of parallel list operations. +% +% == Malts == +% A malt specifies how to break a list into sublists, and can optionally +% specify a timeout, which nodes to run on, and how many processes to start +% per node. +% +% Malt = MaltComponent | [MaltComponent]
+% MaltComponent = SubListSize::integer() | {processes, integer()} | +% {processes, schedulers} | +% {timeout, Milliseconds::integer()} | {nodes, [NodeSpec]}
+% NodeSpec = Node::atom() | {Node::atom(), NumProcesses::integer()} | +% {Node::atom(), schedulers} +% +% An integer can be given to specify the exact size for +% sublists. 1 is a good choice for IO-bound operations and when +% the operation on each list element is expensive. Larger numbers +% minimize overhead and are faster for cheap operations. +% +% If the integer is omitted, and +% you have specified a {processes, X}, the list is +% split into X sublists. This is only +% useful when the time to process each element is close to identical and you +% know exactly how many lines of execution are available to you. +% +% If neither of the above applies, the sublist size defaults to 1. +% +% You can use {processes, X} to have the list processed +% by X processes on the local machine. A good choice for X is the number of +% lines of execution (cores) the machine provides. This can be done +% automatically with {processes, schedulers}, which sets +% the number of processes to the number of schedulers in the erlang virtual +% machine (probably equal to the number of cores). +% +% {timeout, Milliseconds} specifies a timeout. This is a timeout for the entire +% operation, both operating on the sublists and combining the results. +% exit(timeout) is evaluated if the timeout is exceeded. +% +% {nodes, NodeList} specifies that the operation should be done across nodes. +% Every element of NodeList is of the form {NodeName, NumProcesses} or +% NodeName, which means the same as {NodeName, 1}. plists runs +% NumProcesses processes on NodeName concurrently. A good choice for +% NumProcesses is the number of lines of execution (cores) a node provides +% plus one. This ensures the node is completely busy even when +% fetching a new sublist. This can be done automatically with +% {NodeName, schedulers}, in which case +% plists uses a cached value if it has one, and otherwise finds the number of +% schedulers in the remote node and adds one. This will ensure at least one +% busy process per core (assuming the node has a scheduler for each core). +% +% plists is able to recover if a node goes down. +% If all nodes go down, exit(allnodescrashed) is evaluated. +% +% Any of the above may be used as a malt, or may be combined into a list. +% {nodes, NodeList} and {processes, X} may not be combined. +% +% === Examples === +% % start a process for each element (1-element sublists)
+% 1 +% +% % start a process for each ten elements (10-element sublists)
+% 10 +% +% % split the list into two sublists and process in two processes
+% {processes, 2} +% +% % split the list into X sublists and process in X processes,
+% % where X is the number of cores in the machine
+% {processes, schedulers} +% +% % split the list into 10-element sublists and process in two processes
+% [10, {processes, 2}] +% +% % timeout after one second. Assumes that a process should be started
+% % for each element.
+% {timeout, 1000} +% +% % Runs 3 processes at a time on apple@desktop, +% and 2 on orange@laptop
+% % This is the best way to utilize all the CPU-power of a dual-core
+% % desktop and a single-core laptop. Assumes that the list should be
+% % split into 1-element sublists.
+% {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]} +% +% Like above, but makes plists figure out how many processes to use. +% {nodes, [{apple@desktop, schedulers}, {orange@laptop, schedulers}]} +% +% % Gives apple and orange three seconds to process the list as
+% % 100-element sublists.
+% [100, {timeout, 3000}, {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]}] +% +% === Aside: Why Malt? === +% I needed a word for this concept, so maybe my subconsciousness gave me one by +% making me misspell multiply. Maybe it is an acronym for Malt is A List +% Tearing Specification. Maybe it is a beer metaphor, suggesting that code +% only runs in parallel if bribed with spirits. It's jargon, learn it +% or you can't be part of the in-group. +% +% == Messages and Errors == +% plists assures that no extraneous messages are left in or will later +% enter the message queue. This is guaranteed even in the event of an error. +% +% Errors in spawned processes are caught and propagated to the calling +% process. If you invoke +% +% plists:map(fun (X) -> 1/X end, [1, 2, 3, 0]). +% +% you get a badarith error, exactly like when you use lists:map. +% +% plists uses monitors to watch the processes it spawns. It is not a good idea +% to invoke plists when you are already monitoring processes. If one of them +% does a non-normal exit, plists receives the 'DOWN' message believing it to be +% from one of its own processes. The error propagation system goes into +% effect, which results in the error occuring in the calling process. +% +% == License == +% The MIT License +% +% Copyright (c) 2007 Stephen Marsh +% +% Permission is hereby granted, free of charge, to any person obtaining a copy +% of this software and associated documentation files (the "Software"), to deal +% in the Software without restriction, including without limitation the rights +% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the Software is +% furnished to do so, subject to the following conditions: +% +% The above copyright notice and this permission notice shall be included in +% all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +% THE SOFTWARE. + + +-module(plists). +-export([all/2, all/3, any/2, any/3, filter/2, filter/3, +fold/3, fold/4, fold/5, foreach/2, foreach/3, map/2, map/3, +partition/2, partition/3, sort/1, sort/2, sort/3, +usort/1, usort/2, usort/3, mapreduce/2, mapreduce/3, mapreduce/5, +runmany/3, runmany/4]). + +% Everything here is defined in terms of runmany. +% The following methods are convient interfaces to runmany. + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> bool() +all(Fun, List) -> + all(Fun, List, 1). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> bool() +all(Fun, List, Malt) -> + try runmany(fun (L) -> + B = lists:all(Fun, L), + if B -> + nil; + true -> + exit(notall) + end + end, + fun (_A1, _A2) -> + nil + end, + List, Malt) of + _ -> + true + catch exit:notall -> + false + end. + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> bool() +any(Fun, List) -> + any(Fun, List, 1). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> bool() +any(Fun, List, Malt) -> + try runmany(fun (L) -> + B = lists:any(Fun, L), + if B -> + exit(any); + true -> + nil + end + end, + fun (_A1, _A2) -> + nil + end, + List, Malt) of + _ -> + false + catch exit:any -> + true + end. + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> list() +filter(Fun, List) -> + filter(Fun, List, 1). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> list() +filter(Fun, List, Malt) -> + runmany(fun (L) -> + lists:filter(Fun, L) + end, + {reverse, fun (A1, A2) -> + A1 ++ A2 + end}, + List, Malt). + +% Note that with parallel fold there is not foldl and foldr, +% instead just one fold that can fuse Accumlators. + +% @doc Like below, but assumes 1 as the Malt. This function is almost useless, +% and is intended only to aid converting code from using lists to plists. +% @spec (Fun, InitAcc, List) -> term() +fold(Fun, InitAcc, List) -> + fold(Fun, Fun, InitAcc, List, 1). + +% @doc Like below, but uses the Fun as the Fuse by default. +% @spec (Fun, InitAcc, List, Malt) -> term() +fold(Fun, InitAcc, List, Malt) -> + fold(Fun, Fun, InitAcc, List, Malt). + +% @doc fold is more complex when made parallel. There is no foldl and foldr, +% accumulators aren't passed in any defined order. +% The list is split into sublists which are folded together. Fun is +% identical to the function passed to lists:fold[lr], it takes +% (an element, and the accumulator) and returns -> a new accumulator. +% It is used for the initial stage of folding sublists. Fuse fuses together +% the results, it takes (Results1, Result2) and returns -> a new result. +% By default sublists are fused left to right, each result of a fuse being +% fed into the first element of the next fuse. The result of the last fuse +% is the result. +% +% Fusing may also run in parallel using a recursive algorithm, +% by specifying the fuse as {recursive, Fuse}. See +% the discussion in {@link runmany/4}. +% +% Malt is the malt for the initial folding of sublists, and for the +% possible recursive fuse. +% @spec (Fun, Fuse, InitAcc, List, Malt) -> term() +fold(Fun, Fuse, InitAcc, List, Malt) -> + Fun2 = fun (L) -> lists:foldl(Fun, InitAcc, L) end, + runmany(Fun2, Fuse, List, Malt). + +% @doc Similiar to foreach in module +% lists +% except it makes no guarantee about the order it processes list elements. +% @spec (Fun, List) -> void() +foreach(Fun, List) -> + foreach(Fun, List, 1). + +% @doc Similiar to foreach in module +% lists +% except it makes no guarantee about the order it processes list elements. +% @spec (Fun, List, Malt) -> void() +foreach(Fun, List, Malt) -> + runmany(fun (L) -> + lists:foreach(Fun, L) + end, + fun (_A1, _A2) -> + ok + end, + List, Malt). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> list() +map(Fun, List) -> + map(Fun, List, 1). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> list() +map(Fun, List, Malt) -> + runmany(fun (L) -> + lists:map(Fun, L) + end, + {reverse, fun (A1, A2) -> + A1 ++ A2 + end}, + List, Malt). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> {list(), list()} +partition(Fun, List) -> + partition(Fun, List, 1). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> {list(), list()} +partition(Fun, List, Malt) -> + runmany(fun (L) -> + lists:partition(Fun, L) + end, + {reverse, fun ({True1, False1}, {True2, False2}) -> + {True1 ++ True2, False1 ++ False2} + end}, + List, Malt). + +% SORTMALT needs to be tuned +-define(SORTMALT, 100). + +% @doc Same semantics as in module +% lists. +% @spec (List) -> list() +sort(List) -> + sort(fun (A, B) -> + A =< B + end, + List). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> list() +sort(Fun, List) -> + sort(Fun, List, ?SORTMALT). + +% @doc This version lets you specify your own malt for sort. +% +% sort splits the list into sublists and sorts them, and it merges the +% sorted lists together. These are done in parallel. Each sublist is +% sorted in a seperate process, and each merging of results is done in a +% seperate process. Malt defaults to 100, causing the list to be split into +% 100-element sublists. +% @spec (Fun, List, Malt) -> list() +sort(Fun, List, Malt) -> + Fun2 = fun (L) -> + lists:sort(Fun, L) + end, + Fuse = fun (A1, A2) -> + lists:merge(Fun, A1, A2) + end, + runmany(Fun2, {recursive, Fuse}, List, Malt). + +% @doc Same semantics as in module +% lists. +% @spec (List) -> list() +usort(List) -> + usort(fun (A, B) -> + A =< B + end, + List). + +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> list() +usort(Fun, List) -> + usort(Fun, List, ?SORTMALT). + +% @doc This version lets you specify your own malt for usort. +% +% usort splits the list into sublists and sorts them, and it merges the +% sorted lists together. These are done in parallel. Each sublist is +% sorted in a seperate process, and each merging of results is done in a +% seperate process. Malt defaults to 100, causing the list to be split into +% 100-element sublists. +% +% usort removes duplicate elments while it sorts. +% @spec (Fun, List, Malt) -> list() +usort(Fun, List, Malt) -> + Fun2 = fun (L) -> + lists:usort(Fun, L) + end, + Fuse = fun (A1, A2) -> + lists:umerge(Fun, A1, A2) + end, + runmany(Fun2, {recursive, Fuse}, List, Malt). + +% @doc Like below, assumes default MapMalt of 1. +% @spec (MapFunc, List) -> Dict +% MapFunc = (term()) -> DeepListOfKeyValuePairs +% DeepListOfKeyValuePairs = [DeepListOfKeyValuePairs] | {Key, Value} +mapreduce(MapFunc, List) -> + mapreduce(MapFunc, List, 1). + +% Like below, but uses a default reducer that collects all +% {Key, Value} pairs into a +% dict, +% with values {Key, [Value1, Value2...]}. +% This dict is returned as the result. +mapreduce(MapFunc, List, MapMalt) -> + mapreduce(MapFunc, List, dict:new(), fun add_key/3, MapMalt). + +% @doc This is a very basic mapreduce. You won't write a Google-rivaling +% search engine with it. It has no equivalent in lists. Each +% element in the list is run through the MapFunc, which produces either +% a {Key, Value} pair, or a lists of key value pairs, or a list of lists of +% key value pairs...etc. A reducer process runs in parallel with the mapping +% processes, collecting the key value pairs. It starts with a state given by +% InitState, and for each {Key, Value} pair that it receives it invokes +% ReduceFunc(OldState, Key, Value) to compute its new state. mapreduce returns +% the reducer's final state. +% +% MapMalt is the malt for the mapping operation, with a default value of 1, +% meaning each element of the list is mapped by a seperate process. +% +% mapreduce requires OTP R11B, or it may leave monitoring messages in the +% message queue. +% @spec (MapFunc, List, InitState, ReduceFunc, MapMalt) -> Dict +% MapFunc = (term()) -> DeepListOfKeyValuePairs +% DeepListOfKeyValuePairs = [DeepListOfKeyValuePairs] | {Key, Value} +% ReduceFunc = (OldState::term(), Key::term(), Value::term() -> NewState::term() +mapreduce(MapFunc, List, InitState, ReduceFunc, MapMalt) -> + Parent = self(), + {Reducer, ReducerRef} = + erlang:spawn_monitor(fun () -> + reducer(Parent, 0, InitState, ReduceFunc) + end), + MapFunc2 = fun (L) -> + Reducer ! lists:map(MapFunc, L), + 1 + end, + SentMessages = try runmany(MapFunc2, fun (A, B) -> A+B end, List, MapMalt) + catch + exit:Reason -> + erlang:demonitor(ReducerRef, [flush]), + Reducer ! die, + exit(Reason) + end, + Reducer ! {mappers, done, SentMessages}, + Results = receive + {Reducer, Results2} -> + Results2; + {'DOWN', _, _, Reducer, Reason2} -> + exit(Reason2) + end, + receive + {'DOWN', _, _, Reducer, normal} -> + nil + end, + Results. + +reducer(Parent, NumReceived, State, Func) -> + receive + die -> + nil; + {mappers, done, NumReceived} -> + Parent ! {self (), State}; + Keys -> + reducer(Parent, NumReceived + 1, each_key(State, Func, Keys), Func) + end. + +each_key(State, Func, {Key, Value}) -> + Func(State, Key, Value); +each_key(State, Func, [List|Keys]) -> + each_key(each_key(State, Func, List), Func, Keys); +each_key(State, _, []) -> + State. + +add_key(Dict, Key, Value) -> + case dict:is_key(Key, Dict) of + true -> + dict:append(Key, Value, Dict); + false -> + dict:store(Key, [Value], Dict) + end. + +% @doc Like below, but assumes a Malt of 1, +% meaning each element of the list is processed by a seperate process. +% @spec (Fun, Fuse, List) -> term() +runmany(Fun, Fuse, List) -> + runmany(Fun, Fuse, List, 1). + +% Begin internal stuff (though runmany/4 is exported). + +% @doc All of the other functions are implemented with runmany. runmany +% takes a List, splits it into sublists, and starts processes to operate on +% each sublist, all done according to Malt. Each process passes its sublist +% into Fun and sends the result back. +% +% The results are then fused together to get the final result. There are two +% ways this can operate, lineraly and recursively. If Fuse is a function, +% a fuse is done linearly left-to-right on the sublists, the results +% of processing the first and second sublists being passed to Fuse, then +% the result of the first fuse and processing the third sublits, and so on. If +% Fuse is {reverse, FuseFunc}, then a fuse is done right-to-left, the results +% of processing the second-to-last and last sublists being passed to FuseFunc, +% then the results of processing the third-to-last sublist and +% the results of the first fuse, and and so forth. +% Both methods preserve the original order of the lists elements. +% +% To do a recursive fuse, pass Fuse as {recursive, FuseFunc}. +% The recursive fuse makes no guarantee about the order the results of +% sublists, or the results of fuses are passed to FuseFunc. It +% continues fusing pairs of results until it is down to one. +% +% Recursive fuse is down in parallel with processing the sublists, and a +% process is spawned to fuse each pair of results. It is a parallized +% algorithm. Linear fuse is done after all results of processing sublists +% have been collected, and can only run in a single process. +% +% Even if you pass {recursive, FuseFunc}, a recursive fuse is only done if +% the malt contains {nodes, NodeList} or {processes, X}. If this is not the +% case, a linear fuse is done. +% @spec (Fun, Fuse, List, Malt) -> term() +% Fun = (list()) -> term() +% Fuse = FuseFunc | {recursive, FuseFunc} +% FuseFunc = (term(), term()) -> term() +runmany(Fun, Fuse, List, Malt) when is_list(Malt) -> + runmany(Fun, Fuse, List, local, no_split, Malt); +runmany(Fun, Fuse, List, Malt) -> + runmany(Fun, Fuse, List, [Malt]). + +runmany(Fun, Fuse, List, Nodes, no_split, [MaltTerm|Malt]) when is_integer(MaltTerm) -> + runmany(Fun, Fuse, List, Nodes, MaltTerm, Malt); +% run a process for each scheduler +runmany(Fun, Fuse, List, local, Split, [{processes, schedulers}|Malt]) -> + S = erlang:system_info(schedulers), + runmany(Fun, Fuse, List, local, Split, [{processes, S}|Malt]); +% Split the list into X sublists, where X is the number of processes +runmany(Fun, Fuse, List, local, no_split, [{processes, X}|_]=Malt) -> + L = length(List), + case L rem X of + 0 -> + runmany(Fun, Fuse, List, local, L div X, Malt); + _ -> + runmany(Fun, Fuse, List, local, L div X + 1, Malt) + end; +% run X process on local machine +runmany(Fun, Fuse, List, local, Split, [{processes, X}|Malt]) -> + Nodes = lists:duplicate(X, node()), + runmany(Fun, Fuse, List, Nodes, Split, Malt); +runmany(Fun, Fuse, List, Nodes, Split, [{timeout, X}|Malt]) -> + Parent = self(), + Timer = spawn(fun () -> + receive + stoptimer -> + Parent ! {timerstopped, self()} + after X -> + Parent ! {timerrang, self()}, + receive + stoptimer -> + Parent ! {timerstopped, self()} + end + end + end), + Ans = try runmany(Fun, Fuse, List, Nodes, Split, Malt) + catch + % we really just want the after block, the syntax + % makes this catch necessary. + willneverhappen -> + nil + after + Timer ! stoptimer, + cleanup_timer(Timer) + end, + Ans; +runmany(Fun, Fuse, List, local, Split, [{nodes, NodeList}|Malt]) -> + Nodes = lists:foldl(fun ({Node, schedulers}, A) -> + X = schedulers_on_node(Node) + 1, + lists:reverse(lists:duplicate(X, Node), A); + ({Node, X}, A) -> + lists:reverse(lists:duplicate(X, Node), A); + (Node, A) -> + [Node|A] + end, + [], NodeList), + runmany(Fun, Fuse, List, Nodes, Split, Malt); +% local recursive fuse, for when we weren't invoked with {processes, X} +% or {nodes, NodeList}. Degenerates recursive fuse into linear fuse. +runmany(Fun, {recursive, Fuse}, List, local, Split, []) -> + runmany(Fun, Fuse, List, local, Split, []); +% by default, operate on each element seperately +runmany(Fun, Fuse, List, Nodes, no_split, []) -> + runmany(Fun, Fuse, List, Nodes, 1, []); +runmany(Fun, Fuse, List, local, Split, []) -> + List2 = splitmany(List, Split), + local_runmany(Fun, Fuse, List2); +runmany(Fun, Fuse, List, Nodes, Split, []) -> + List2 = splitmany(List, Split), + cluster_runmany(Fun, Fuse, List2, Nodes). + +cleanup_timer(Timer) -> + receive + {timerrang, Timer} -> + cleanup_timer(Timer); + {timerstopped, Timer} -> + nil + end. + +schedulers_on_node(Node) -> + case get(plists_schedulers_on_nodes) of + undefined -> + X = determine_schedulers(Node), + put(plists_schedulers_on_nodes, + dict:store(Node, X, dict:new())), + X; + Dict -> + case dict:is_key(Node, Dict) of + true -> + dict:fetch(Node, Dict); + false -> + X = determine_schedulers(Node), + put(plists_schedulers_on_nodes, + dict:store(Node, X, Dict)), + X + end + end. + +determine_schedulers(Node) -> + Parent = self(), + Child = spawn(Node, fun () -> + Parent ! {self(), erlang:system_info(schedulers)} + end), + erlang:monitor(process, Child), + receive + {Child, X} -> + receive + {'DOWN', _, _, Child, _Reason} -> + nil + end, + X; + {'DOWN', _, _, Child, Reason} when Reason =/= normal -> + 0 + end. + +% local runmany, for when we weren't invoked with {processes, X} +% or {nodes, NodeList}. Every sublist is processed in parallel. +local_runmany(Fun, Fuse, List) -> + Parent = self (), + Pids = lists:map(fun (L) -> + F = fun () -> + Parent ! + {self (), Fun(L)} + end, + {Pid, _} = erlang:spawn_monitor(F), + Pid + end, + List), + Answers = try lists:map(fun receivefrom/1, Pids) + catch throw:Message -> + {BadPid, Reason} = Message, + handle_error(BadPid, Reason, Pids) + end, + lists:foreach(fun (Pid) -> + normal_cleanup(Pid) + end, Pids), + fuse(Fuse, Answers). + +receivefrom(Pid) -> + receive + {Pid, R} -> + R; + {'DOWN', _, _, BadPid, Reason} when Reason =/= normal -> + throw({BadPid, Reason}); + {timerrang, _} -> + throw({nil, timeout}) + end. + +% Convert List into [{Number, Sublist}] +cluster_runmany(Fun, Fuse, List, Nodes) -> + {List2, _} = lists:foldl(fun (X, {L, Count}) -> + {[{Count, X}|L], Count+1} + end, + {[], 0}, List), + cluster_runmany(Fun, Fuse, List2, Nodes, [], []). + +% Add a pair of results into the TaskList as a fusing task +cluster_runmany(Fun, {recursive, Fuse}, [], Nodes, Running, + [{_, R1}, {_, R2}|Results]) -> + cluster_runmany(Fun, {recursive, Fuse}, [{fuse, R1, R2}], Nodes, + Running, Results); +% recursive fuse done, return result +cluster_runmany(_, {recursive, _Fuse}, [], _Nodes, [], [{_, Result}]) -> + Result; +% edge case where we are asked to do nothing +cluster_runmany(_, {recursive, _Fuse}, [], _Nodes, [], []) -> + []; +% We're done, now we just have to [linear] fuse the results +cluster_runmany(_, Fuse, [], _Nodes, [], Results) -> + fuse(Fuse, lists:map(fun ({_, R}) -> R end, + lists:sort(fun ({A, _}, {B, _}) -> + A =< B + end, + lists:reverse(Results)))); +% We have a ready node and a sublist or fuse to be processed, so we start +% a new process +cluster_runmany(Fun, Fuse, [Task|TaskList], [N|Nodes], Running, Results) -> + Parent = self(), + case Task of + {Num, L2} -> + Fun2 = fun () -> + Parent ! {self(), Num, Fun(L2)} + end; + {fuse, R1, R2} -> + {recursive, FuseFunc} = Fuse, + Fun2 = fun () -> + Parent ! {self(), fuse, FuseFunc(R1, R2)} + end + end, + Fun3 = fun () -> + try Fun2() + catch + exit:siblingdied -> + ok; + exit:Reason -> + Parent ! {self(), error, Reason}; + error:R -> + Parent ! {self(), error, {R, erlang:get_stacktrace()}}; + throw:R -> + Parent ! {self(), error, {{nocatch, R}, erlang:get_stacktrace()}} + end + end, + Pid = spawn(N, Fun3), + erlang:monitor(process, Pid), + cluster_runmany(Fun, Fuse, TaskList, Nodes, [{Pid, N, Task}|Running], Results); +% We can't start a new process, but can watch over already running ones +cluster_runmany(Fun, Fuse, TaskList, Nodes, Running, Results) when length(Running) > 0 -> + receive + {_Pid, error, Reason} -> + RunningPids = lists:map(fun ({Pid, _, _}) -> + Pid + end, + Running), + handle_error(junkvalue, Reason, RunningPids); + {Pid, Num, Result} -> + % throw out the exit message, Reason should be + % normal, noproc, or noconnection + receive {'DOWN', _, _, Pid, _Reason} -> + nil + end, + {Running2, FinishedNode, _} = delete_running(Pid, Running, []), + cluster_runmany(Fun, Fuse, TaskList, + [FinishedNode|Nodes], Running2, [{Num, Result}|Results]); + {timerrang, _} -> + RunningPids = lists:map(fun ({Pid, _, _}) -> + Pid + end, + Running), + handle_error(nil, timeout, RunningPids); + % node failure + {'DOWN', _, _, Pid, noconnection} -> + {Running2, _DeadNode, Task} = delete_running(Pid, Running, []), + cluster_runmany(Fun, Fuse, [Task|TaskList], Nodes, + Running2, Results); + % could a noproc exit message come before the message from + % the process? we are assuming it can't. + % this clause is unlikely to get invoked due to cluster_runmany's + % spawned processes. It will still catch errors in mapreduce's + % reduce process, however. + {'DOWN', _, _, BadPid, Reason} when Reason =/= normal -> + RunningPids = lists:map(fun ({Pid, _, _}) -> + Pid + end, + Running), + handle_error(BadPid, Reason, RunningPids) + end; +% We have data, but no nodes either available or occupied +cluster_runmany(_, _, [_Non|_Empty], []=_Nodes, []=_Running, _) -> + exit(allnodescrashed). + +delete_running(Pid, [{Pid, Node, List}|Running], Acc) -> + {Running ++ Acc, Node, List}; +delete_running(Pid, [R|Running], Acc) -> + delete_running(Pid, Running, [R|Acc]). + +handle_error(BadPid, Reason, Pids) -> + lists:foreach(fun (Pid) -> + exit(Pid, siblingdied) + end, Pids), + lists:foreach(fun (Pid) -> + error_cleanup(Pid, BadPid) + end, Pids), + exit(Reason). + +error_cleanup(BadPid, BadPid) -> + ok; +error_cleanup(Pid, BadPid) -> + receive + {Pid, _} -> + error_cleanup(Pid, BadPid); + {Pid, _, _} -> + error_cleanup(Pid, BadPid); + {'DOWN', _, _, Pid, _Reason} -> + ok + end. + +normal_cleanup(Pid) -> + receive + {'DOWN', _, _, Pid, _Reason} -> + ok + end. + +% edge case +fuse(_, []) -> + []; +fuse({reverse, _}=Fuse, Results) -> + [RL|ResultsR] = lists:reverse(Results), + fuse(Fuse, ResultsR, RL); +fuse(Fuse, [R1|Results]) -> + fuse(Fuse, Results, R1). + +fuse({reverse, FuseFunc}=Fuse, [R2|Results], R1) -> + fuse(Fuse, Results, FuseFunc(R2, R1)); +fuse(Fuse, [R2|Results], R1) -> + fuse(Fuse, Results, Fuse(R1, R2)); +fuse(_, [], R) -> + R. + +% Splits a list into a list of sublists, each of size Size, +% except for the last element which is less if the original list +% could not be evenly divided into Size-sized lists. +splitmany(List, Size) -> + splitmany(List, [], Size). + +splitmany([], Acc, _) -> + lists:reverse(Acc); +splitmany(List, Acc, Size) -> + {Top, NList} = split(Size, List), + splitmany(NList, [Top|Acc], Size). + +% Like lists:split, except it splits a list smaller than its first +% parameter +split(Size, List) -> + split(Size, List, []). + +split(0, List, Acc) -> + {lists:reverse(Acc), List}; +split(Size, [H|List], Acc) -> + split(Size - 1, List, [H|Acc]); +split(_, [], Acc) -> + {lists:reverse(Acc), []}. From 31ebca114ade860f2bf665ae6265f3de4b3f2cee Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Thu, 25 Oct 2012 11:52:47 -0500 Subject: [PATCH 18/20] replace ec_plists with Stephan's plists Signed-off-by: Jordan Wilberding --- src/ec_plists.erl | 1060 +++++++++++++++++++++++++++++++++++---------- src/plists.erl | 858 ------------------------------------ 2 files changed, 827 insertions(+), 1091 deletions(-) delete mode 100644 src/plists.erl diff --git a/src/ec_plists.erl b/src/ec_plists.erl index cd14697..688d5a5 100644 --- a/src/ec_plists.erl +++ b/src/ec_plists.erl @@ -1,264 +1,858 @@ -%%%------------------------------------------------------------------- -%%% @doc -%%% simple parrallel map. Originally provided by Joe Armstrong -%%% on the erlang questions mailing list. -%%% @end -%%%------------------------------------------------------------------- --module(ec_plists). +% @author Stephen Marsh +% @copyright 2007 Stephen Marsh freeyourmind ++ [$@|gmail.com] +% @doc plists is a drop-in replacement for module +% lists, +% making most list operations parallel. It can operate on each element in +% parallel, for IO-bound operations, on sublists in parallel, for +% taking advantage of multi-core machines with CPU-bound operations, and +% across erlang nodes, for parallizing inside a cluster. It handles +% errors and node failures. It can be configured, tuned, and tweaked to +% get optimal performance while minimizing overhead. +% +% Almost all the functions are +% identical to equivalent functions in lists, returning exactly the same +% result, and having both a form with an identical syntax that operates on +% each element in parallel and a form which takes an optional "malt", +% a specification for how to parallize the operation. +% +% fold is the one exception, parallel fold is different from linear fold. +% This module also include a simple mapreduce implementation, and the +% function runmany. All the other functions are implemented with runmany, +% which is as a generalization of parallel list operations. +% +% == Malts == +% A malt specifies how to break a list into sublists, and can optionally +% specify a timeout, which nodes to run on, and how many processes to start +% per node. +% +% Malt = MaltComponent | [MaltComponent]
+% MaltComponent = SubListSize::integer() | {processes, integer()} | +% {processes, schedulers} | +% {timeout, Milliseconds::integer()} | {nodes, [NodeSpec]}
+% NodeSpec = Node::atom() | {Node::atom(), NumProcesses::integer()} | +% {Node::atom(), schedulers} +% +% An integer can be given to specify the exact size for +% sublists. 1 is a good choice for IO-bound operations and when +% the operation on each list element is expensive. Larger numbers +% minimize overhead and are faster for cheap operations. +% +% If the integer is omitted, and +% you have specified a {processes, X}, the list is +% split into X sublists. This is only +% useful when the time to process each element is close to identical and you +% know exactly how many lines of execution are available to you. +% +% If neither of the above applies, the sublist size defaults to 1. +% +% You can use {processes, X} to have the list processed +% by X processes on the local machine. A good choice for X is the number of +% lines of execution (cores) the machine provides. This can be done +% automatically with {processes, schedulers}, which sets +% the number of processes to the number of schedulers in the erlang virtual +% machine (probably equal to the number of cores). +% +% {timeout, Milliseconds} specifies a timeout. This is a timeout for the entire +% operation, both operating on the sublists and combining the results. +% exit(timeout) is evaluated if the timeout is exceeded. +% +% {nodes, NodeList} specifies that the operation should be done across nodes. +% Every element of NodeList is of the form {NodeName, NumProcesses} or +% NodeName, which means the same as {NodeName, 1}. plists runs +% NumProcesses processes on NodeName concurrently. A good choice for +% NumProcesses is the number of lines of execution (cores) a node provides +% plus one. This ensures the node is completely busy even when +% fetching a new sublist. This can be done automatically with +% {NodeName, schedulers}, in which case +% plists uses a cached value if it has one, and otherwise finds the number of +% schedulers in the remote node and adds one. This will ensure at least one +% busy process per core (assuming the node has a scheduler for each core). +% +% plists is able to recover if a node goes down. +% If all nodes go down, exit(allnodescrashed) is evaluated. +% +% Any of the above may be used as a malt, or may be combined into a list. +% {nodes, NodeList} and {processes, X} may not be combined. +% +% === Examples === +% % start a process for each element (1-element sublists)
+% 1 +% +% % start a process for each ten elements (10-element sublists)
+% 10 +% +% % split the list into two sublists and process in two processes
+% {processes, 2} +% +% % split the list into X sublists and process in X processes,
+% % where X is the number of cores in the machine
+% {processes, schedulers} +% +% % split the list into 10-element sublists and process in two processes
+% [10, {processes, 2}] +% +% % timeout after one second. Assumes that a process should be started
+% % for each element.
+% {timeout, 1000} +% +% % Runs 3 processes at a time on apple@desktop, +% and 2 on orange@laptop
+% % This is the best way to utilize all the CPU-power of a dual-core
+% % desktop and a single-core laptop. Assumes that the list should be
+% % split into 1-element sublists.
+% {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]} +% +% Like above, but makes plists figure out how many processes to use. +% {nodes, [{apple@desktop, schedulers}, {orange@laptop, schedulers}]} +% +% % Gives apple and orange three seconds to process the list as
+% % 100-element sublists.
+% [100, {timeout, 3000}, {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]}] +% +% === Aside: Why Malt? === +% I needed a word for this concept, so maybe my subconsciousness gave me one by +% making me misspell multiply. Maybe it is an acronym for Malt is A List +% Tearing Specification. Maybe it is a beer metaphor, suggesting that code +% only runs in parallel if bribed with spirits. It's jargon, learn it +% or you can't be part of the in-group. +% +% == Messages and Errors == +% plists assures that no extraneous messages are left in or will later +% enter the message queue. This is guaranteed even in the event of an error. +% +% Errors in spawned processes are caught and propagated to the calling +% process. If you invoke +% +% plists:map(fun (X) -> 1/X end, [1, 2, 3, 0]). +% +% you get a badarith error, exactly like when you use lists:map. +% +% plists uses monitors to watch the processes it spawns. It is not a good idea +% to invoke plists when you are already monitoring processes. If one of them +% does a non-normal exit, plists receives the 'DOWN' message believing it to be +% from one of its own processes. The error propagation system goes into +% effect, which results in the error occuring in the calling process. +% +% == License == +% The MIT License +% +% Copyright (c) 2007 Stephen Marsh +% +% Permission is hereby granted, free of charge, to any person obtaining a copy +% of this software and associated documentation files (the "Software"), to deal +% in the Software without restriction, including without limitation the rights +% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +% copies of the Software, and to permit persons to whom the Software is +% furnished to do so, subject to the following conditions: +% +% The above copyright notice and this permission notice shall be included in +% all copies or substantial portions of the Software. +% +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +% THE SOFTWARE. --export([map/2, - map/3, - ftmap/2, - ftmap/3, - filter/2, - filter/3]). --export_type([thunk/0]). +-module(plists). +-export([all/2, all/3, any/2, any/3, filter/2, filter/3, +fold/3, fold/4, fold/5, foreach/2, foreach/3, map/2, map/3, +partition/2, partition/3, sort/1, sort/2, sort/3, +usort/1, usort/2, usort/3, mapreduce/2, mapreduce/3, mapreduce/5, +runmany/3, runmany/4]). -%%============================================================================= -%% Types -%%============================================================================= --type thunk() :: fun((any()) -> any()). +% Everything here is defined in terms of runmany. +% The following methods are convient interfaces to runmany. -%%============================================================================= -%% Public API -%%============================================================================= +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> bool() +all(Fun, List) -> + all(Fun, List, 1). -%% @doc Takes a function and produces a list of the result of the function -%% applied to each element of the argument list. A timeout is optional. -%% In the event of a timeout or an exception the entire map will fail -%% with an excption with class throw. --spec map(fun(), [any()]) -> [any()]. -map(Fun, List) -> - map(Fun, List, infinity). +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> bool() +all(Fun, List, Malt) -> + try runmany(fun (L) -> + B = lists:all(Fun, L), + if B -> + nil; + true -> + exit(notall) + end + end, + fun (_A1, _A2) -> + nil + end, + List, Malt) of + _ -> + true + catch exit:notall -> + false + end. --spec map(thunk(), [any()], timeout() | infinity) -> [any()]. -map(Fun, List, Timeout) -> - run_list_fun_in_parallel(map, Fun, List, Timeout). +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> bool() +any(Fun, List) -> + any(Fun, List, 1). -%% @doc Takes a function and produces a list of the result of the function -%% applied to each element of the argument list. A timeout is optional. -%% This function differes from regular map in that it is fault tolerant. -%% If a timeout or an exception occurs while processing an element in -%% the input list the ftmap operation will continue to function. Timeouts -%% and exceptions will be reflected in the output of this function. -%% All application level results are wrapped in a tuple with the tag -%% 'value'. Exceptions will come through as they are and timeouts will -%% return as the atom timeout. -%% This is useful when the ftmap is being used for side effects. -%%
-%% 2> ftmap(fun(N) -> factorial(N) end, [1, 2, 1000000, "not num"], 100)
-%% [{value, 1}, {value, 2}, timeout, {badmatch, ...}]
-%% 
--spec ftmap(thunk(), [any()]) -> [{value, any()} | any()]. -ftmap(Fun, List) -> - ftmap(Fun, List, infinity). +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> bool() +any(Fun, List, Malt) -> + try runmany(fun (L) -> + B = lists:any(Fun, L), + if B -> + exit(any); + true -> + nil + end + end, + fun (_A1, _A2) -> + nil + end, + List, Malt) of + _ -> + false + catch exit:any -> + true + end. --spec ftmap(thunk(), [any()], timeout() | infinity) -> [{value, any()} | any()]. -ftmap(Fun, List, Timeout) -> - run_list_fun_in_parallel(ftmap, Fun, List, Timeout). - -%% @doc Returns a list of the elements in the supplied list which -%% the function Fun returns true. A timeout is optional. In the -%% event of a timeout the filter operation fails. --spec filter(thunk(), [any()]) -> [any()]. +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> list() filter(Fun, List) -> - filter(Fun, List, infinity). + filter(Fun, List, 1). --spec filter(thunk(), [any()], timeout() | infinity) -> [any()]. -filter(Fun, List, Timeout) -> - run_list_fun_in_parallel(filter, Fun, List, Timeout). +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> list() +filter(Fun, List, Malt) -> + runmany(fun (L) -> + lists:filter(Fun, L) + end, + {reverse, fun (A1, A2) -> + A1 ++ A2 + end}, + List, Malt). -%%============================================================================= -%% Internal API -%%============================================================================= --spec run_list_fun_in_parallel(atom(), thunk(), [any()], timeout() | infinity) -> [any()]. -run_list_fun_in_parallel(ListFun, Fun, List, Timeout) -> - LocalPid = self(), - Pids = - lists:map(fun(E) -> - Pid = - proc_lib:spawn(fun() -> - wait(LocalPid, Fun, - E, Timeout) - end), - {Pid, E} - end, List), - gather(ListFun, Pids). +% Note that with parallel fold there is not foldl and foldr, +% instead just one fold that can fuse Accumlators. --spec wait(pid(), thunk(), any(), timeout() | infinity) -> any(). -wait(Parent, Fun, E, Timeout) -> - WaitPid = self(), - Child = spawn(fun() -> - do_f(WaitPid, Fun, E) - end), +% @doc Like below, but assumes 1 as the Malt. This function is almost useless, +% and is intended only to aid converting code from using lists to plists. +% @spec (Fun, InitAcc, List) -> term() +fold(Fun, InitAcc, List) -> + fold(Fun, Fun, InitAcc, List, 1). - wait(Parent, Child, Timeout). +% @doc Like below, but uses the Fun as the Fuse by default. +% @spec (Fun, InitAcc, List, Malt) -> term() +fold(Fun, InitAcc, List, Malt) -> + fold(Fun, Fun, InitAcc, List, Malt). --spec wait(pid(), pid(), timeout() | infinity) -> any(). -wait(Parent, Child, Timeout) -> - receive - {Child, Ret} -> - Parent ! {self(), Ret} - after Timeout -> - exit(Child, timeout), - Parent ! {self(), timeout} - end. +% @doc fold is more complex when made parallel. There is no foldl and foldr, +% accumulators aren't passed in any defined order. +% The list is split into sublists which are folded together. Fun is +% identical to the function passed to lists:fold[lr], it takes +% (an element, and the accumulator) and returns -> a new accumulator. +% It is used for the initial stage of folding sublists. Fuse fuses together +% the results, it takes (Results1, Result2) and returns -> a new result. +% By default sublists are fused left to right, each result of a fuse being +% fed into the first element of the next fuse. The result of the last fuse +% is the result. +% +% Fusing may also run in parallel using a recursive algorithm, +% by specifying the fuse as {recursive, Fuse}. See +% the discussion in {@link runmany/4}. +% +% Malt is the malt for the initial folding of sublists, and for the +% possible recursive fuse. +% @spec (Fun, Fuse, InitAcc, List, Malt) -> term() +fold(Fun, Fuse, InitAcc, List, Malt) -> + Fun2 = fun (L) -> lists:foldl(Fun, InitAcc, L) end, + runmany(Fun2, Fuse, List, Malt). --spec gather(atom(), [any()]) -> [any()]. -gather(map, PidElementList) -> - map_gather(PidElementList); -gather(ftmap, PidElementList) -> - ftmap_gather(PidElementList); -gather(filter, PidElementList) -> - filter_gather(PidElementList). +% @doc Similiar to foreach in module +% lists +% except it makes no guarantee about the order it processes list elements. +% @spec (Fun, List) -> void() +foreach(Fun, List) -> + foreach(Fun, List, 1). --spec map_gather([pid()]) -> [any()]. -map_gather([{Pid, _E} | Rest]) -> - receive - {Pid, {value, Ret}} -> - [Ret|map_gather(Rest)]; - %% timeouts fall here too. Should timeouts be a return value - %% or an exception? I lean toward return value, but the code - %% is easier with the exception. Thoughts? - {Pid, Exception} -> - killall(Rest), - throw(Exception) - end; -map_gather([]) -> - []. +% @doc Similiar to foreach in module +% lists +% except it makes no guarantee about the order it processes list elements. +% @spec (Fun, List, Malt) -> void() +foreach(Fun, List, Malt) -> + runmany(fun (L) -> + lists:foreach(Fun, L) + end, + fun (_A1, _A2) -> + ok + end, + List, Malt). --spec ftmap_gather([pid()]) -> [any()]. -ftmap_gather([{Pid, _E} | Rest]) -> - receive - {Pid, Value} -> [Value|ftmap_gather(Rest)] - end; -ftmap_gather([]) -> - []. +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> list() +map(Fun, List) -> + map(Fun, List, 1). --spec filter_gather([pid()]) -> [any()]. -filter_gather([{Pid, E} | Rest]) -> - receive - {Pid, {value, false}} -> - filter_gather(Rest); - {Pid, {value, true}} -> - [E|filter_gather(Rest)]; - {Pid, {value, NotBool}} -> - killall(Rest), - throw({bad_return_value, NotBool}); - {Pid, Exception} -> - killall(Rest), - throw(Exception) - end; -filter_gather([]) -> - []. +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> list() +map(Fun, List, Malt) -> + runmany(fun (L) -> + lists:map(Fun, L) + end, + {reverse, fun (A1, A2) -> + A1 ++ A2 + end}, + List, Malt). --spec do_f(pid(), thunk(), any()) -> no_return(). -do_f(Parent, F, E) -> - try - Result = F(E), - Parent ! {self(), {value, Result}} - catch - _Class:Exception -> - %% Losing class info here, but since throw does not accept - %% that arg anyhow and forces a class of throw it does not - %% matter. - Parent ! {self(), Exception} - end. +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> {list(), list()} +partition(Fun, List) -> + partition(Fun, List, 1). --spec killall([pid()]) -> ok. -killall([{Pid, _E}|T]) -> - exit(Pid, kill), - killall(T); -killall([]) -> - ok. +% @doc Same semantics as in module +% lists. +% @spec (Fun, List, Malt) -> {list(), list()} +partition(Fun, List, Malt) -> + runmany(fun (L) -> + lists:partition(Fun, L) + end, + {reverse, fun ({True1, False1}, {True2, False2}) -> + {True1 ++ True2, False1 ++ False2} + end}, + List, Malt). -%%============================================================================= -%% Tests -%%============================================================================= +% SORTMALT needs to be tuned +-define(SORTMALT, 100). --ifndef(NOTEST). --include_lib("eunit/include/eunit.hrl"). +% @doc Same semantics as in module +% lists. +% @spec (List) -> list() +sort(List) -> + sort(fun (A, B) -> + A =< B + end, + List). -map_good_test() -> - Results = map(fun(_) -> - ok - end, - lists:seq(1, 5), infinity), - ?assertMatch([ok, ok, ok, ok, ok], - Results). +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> list() +sort(Fun, List) -> + sort(Fun, List, ?SORTMALT). -ftmap_good_test() -> - Results = ftmap(fun(_) -> - ok - end, - lists:seq(1, 3), infinity), - ?assertMatch([{value, ok}, {value, ok}, {value, ok}], - Results). +% @doc This version lets you specify your own malt for sort. +% +% sort splits the list into sublists and sorts them, and it merges the +% sorted lists together. These are done in parallel. Each sublist is +% sorted in a seperate process, and each merging of results is done in a +% seperate process. Malt defaults to 100, causing the list to be split into +% 100-element sublists. +% @spec (Fun, List, Malt) -> list() +sort(Fun, List, Malt) -> + Fun2 = fun (L) -> + lists:sort(Fun, L) + end, + Fuse = fun (A1, A2) -> + lists:merge(Fun, A1, A2) + end, + runmany(Fun2, {recursive, Fuse}, List, Malt). -filter_good_test() -> - Results = filter(fun(X) -> - X == show - end, - [show, show, remove], infinity), - ?assertMatch([show, show], - Results). +% @doc Same semantics as in module +% lists. +% @spec (List) -> list() +usort(List) -> + usort(fun (A, B) -> + A =< B + end, + List). -map_timeout_test() -> - Results = - try - map(fun(T) -> - timer:sleep(T), - T - end, - [1, 100], 10) - catch - C:E -> {C, E} - end, - ?assertMatch({throw, timeout}, Results). +% @doc Same semantics as in module +% lists. +% @spec (Fun, List) -> list() +usort(Fun, List) -> + usort(Fun, List, ?SORTMALT). -ftmap_timeout_test() -> - Results = ftmap(fun(X) -> - timer:sleep(X), - true - end, - [100, 1], 10), - ?assertMatch([timeout, {value, true}], Results). +% @doc This version lets you specify your own malt for usort. +% +% usort splits the list into sublists and sorts them, and it merges the +% sorted lists together. These are done in parallel. Each sublist is +% sorted in a seperate process, and each merging of results is done in a +% seperate process. Malt defaults to 100, causing the list to be split into +% 100-element sublists. +% +% usort removes duplicate elments while it sorts. +% @spec (Fun, List, Malt) -> list() +usort(Fun, List, Malt) -> + Fun2 = fun (L) -> + lists:usort(Fun, L) + end, + Fuse = fun (A1, A2) -> + lists:umerge(Fun, A1, A2) + end, + runmany(Fun2, {recursive, Fuse}, List, Malt). -filter_timeout_test() -> - Results = - try - filter(fun(T) -> - timer:sleep(T), - T == 1 +% @doc Like below, assumes default MapMalt of 1. +% @spec (MapFunc, List) -> Dict +% MapFunc = (term()) -> DeepListOfKeyValuePairs +% DeepListOfKeyValuePairs = [DeepListOfKeyValuePairs] | {Key, Value} +mapreduce(MapFunc, List) -> + mapreduce(MapFunc, List, 1). + +% Like below, but uses a default reducer that collects all +% {Key, Value} pairs into a +% dict, +% with values {Key, [Value1, Value2...]}. +% This dict is returned as the result. +mapreduce(MapFunc, List, MapMalt) -> + mapreduce(MapFunc, List, dict:new(), fun add_key/3, MapMalt). + +% @doc This is a very basic mapreduce. You won't write a Google-rivaling +% search engine with it. It has no equivalent in lists. Each +% element in the list is run through the MapFunc, which produces either +% a {Key, Value} pair, or a lists of key value pairs, or a list of lists of +% key value pairs...etc. A reducer process runs in parallel with the mapping +% processes, collecting the key value pairs. It starts with a state given by +% InitState, and for each {Key, Value} pair that it receives it invokes +% ReduceFunc(OldState, Key, Value) to compute its new state. mapreduce returns +% the reducer's final state. +% +% MapMalt is the malt for the mapping operation, with a default value of 1, +% meaning each element of the list is mapped by a seperate process. +% +% mapreduce requires OTP R11B, or it may leave monitoring messages in the +% message queue. +% @spec (MapFunc, List, InitState, ReduceFunc, MapMalt) -> Dict +% MapFunc = (term()) -> DeepListOfKeyValuePairs +% DeepListOfKeyValuePairs = [DeepListOfKeyValuePairs] | {Key, Value} +% ReduceFunc = (OldState::term(), Key::term(), Value::term() -> NewState::term() +mapreduce(MapFunc, List, InitState, ReduceFunc, MapMalt) -> + Parent = self(), + {Reducer, ReducerRef} = + erlang:spawn_monitor(fun () -> + reducer(Parent, 0, InitState, ReduceFunc) + end), + MapFunc2 = fun (L) -> + Reducer ! lists:map(MapFunc, L), + 1 + end, + SentMessages = try runmany(MapFunc2, fun (A, B) -> A+B end, List, MapMalt) + catch + exit:Reason -> + erlang:demonitor(ReducerRef, [flush]), + Reducer ! die, + exit(Reason) end, - [1, 100], 10) - catch - C:E -> {C, E} - end, - ?assertMatch({throw, timeout}, Results). - -map_bad_test() -> - Results = - try - map(fun(_) -> - throw(test_exception) - end, - lists:seq(1, 5), infinity) - catch - C:E -> {C, E} - end, - ?assertMatch({throw, test_exception}, Results). - -ftmap_bad_test() -> - Results = - ftmap(fun(2) -> - throw(test_exception); - (N) -> - N + Reducer ! {mappers, done, SentMessages}, + Results = receive + {Reducer, Results2} -> + Results2; + {'DOWN', _, _, Reducer, Reason2} -> + exit(Reason2) end, - lists:seq(1, 5), infinity), - ?assertMatch([{value, 1}, test_exception, {value, 3}, - {value, 4}, {value, 5}] , Results). + receive + {'DOWN', _, _, Reducer, normal} -> + nil + end, + Results. --endif. +reducer(Parent, NumReceived, State, Func) -> + receive + die -> + nil; + {mappers, done, NumReceived} -> + Parent ! {self (), State}; + Keys -> + reducer(Parent, NumReceived + 1, each_key(State, Func, Keys), Func) + end. + +each_key(State, Func, {Key, Value}) -> + Func(State, Key, Value); +each_key(State, Func, [List|Keys]) -> + each_key(each_key(State, Func, List), Func, Keys); +each_key(State, _, []) -> + State. + +add_key(Dict, Key, Value) -> + case dict:is_key(Key, Dict) of + true -> + dict:append(Key, Value, Dict); + false -> + dict:store(Key, [Value], Dict) + end. + +% @doc Like below, but assumes a Malt of 1, +% meaning each element of the list is processed by a seperate process. +% @spec (Fun, Fuse, List) -> term() +runmany(Fun, Fuse, List) -> + runmany(Fun, Fuse, List, 1). + +% Begin internal stuff (though runmany/4 is exported). + +% @doc All of the other functions are implemented with runmany. runmany +% takes a List, splits it into sublists, and starts processes to operate on +% each sublist, all done according to Malt. Each process passes its sublist +% into Fun and sends the result back. +% +% The results are then fused together to get the final result. There are two +% ways this can operate, lineraly and recursively. If Fuse is a function, +% a fuse is done linearly left-to-right on the sublists, the results +% of processing the first and second sublists being passed to Fuse, then +% the result of the first fuse and processing the third sublits, and so on. If +% Fuse is {reverse, FuseFunc}, then a fuse is done right-to-left, the results +% of processing the second-to-last and last sublists being passed to FuseFunc, +% then the results of processing the third-to-last sublist and +% the results of the first fuse, and and so forth. +% Both methods preserve the original order of the lists elements. +% +% To do a recursive fuse, pass Fuse as {recursive, FuseFunc}. +% The recursive fuse makes no guarantee about the order the results of +% sublists, or the results of fuses are passed to FuseFunc. It +% continues fusing pairs of results until it is down to one. +% +% Recursive fuse is down in parallel with processing the sublists, and a +% process is spawned to fuse each pair of results. It is a parallized +% algorithm. Linear fuse is done after all results of processing sublists +% have been collected, and can only run in a single process. +% +% Even if you pass {recursive, FuseFunc}, a recursive fuse is only done if +% the malt contains {nodes, NodeList} or {processes, X}. If this is not the +% case, a linear fuse is done. +% @spec (Fun, Fuse, List, Malt) -> term() +% Fun = (list()) -> term() +% Fuse = FuseFunc | {recursive, FuseFunc} +% FuseFunc = (term(), term()) -> term() +runmany(Fun, Fuse, List, Malt) when is_list(Malt) -> + runmany(Fun, Fuse, List, local, no_split, Malt); +runmany(Fun, Fuse, List, Malt) -> + runmany(Fun, Fuse, List, [Malt]). + +runmany(Fun, Fuse, List, Nodes, no_split, [MaltTerm|Malt]) when is_integer(MaltTerm) -> + runmany(Fun, Fuse, List, Nodes, MaltTerm, Malt); +% run a process for each scheduler +runmany(Fun, Fuse, List, local, Split, [{processes, schedulers}|Malt]) -> + S = erlang:system_info(schedulers), + runmany(Fun, Fuse, List, local, Split, [{processes, S}|Malt]); +% Split the list into X sublists, where X is the number of processes +runmany(Fun, Fuse, List, local, no_split, [{processes, X}|_]=Malt) -> + L = length(List), + case L rem X of + 0 -> + runmany(Fun, Fuse, List, local, L div X, Malt); + _ -> + runmany(Fun, Fuse, List, local, L div X + 1, Malt) + end; +% run X process on local machine +runmany(Fun, Fuse, List, local, Split, [{processes, X}|Malt]) -> + Nodes = lists:duplicate(X, node()), + runmany(Fun, Fuse, List, Nodes, Split, Malt); +runmany(Fun, Fuse, List, Nodes, Split, [{timeout, X}|Malt]) -> + Parent = self(), + Timer = spawn(fun () -> + receive + stoptimer -> + Parent ! {timerstopped, self()} + after X -> + Parent ! {timerrang, self()}, + receive + stoptimer -> + Parent ! {timerstopped, self()} + end + end + end), + Ans = try runmany(Fun, Fuse, List, Nodes, Split, Malt) + catch + % we really just want the after block, the syntax + % makes this catch necessary. + willneverhappen -> + nil + after + Timer ! stoptimer, + cleanup_timer(Timer) + end, + Ans; +runmany(Fun, Fuse, List, local, Split, [{nodes, NodeList}|Malt]) -> + Nodes = lists:foldl(fun ({Node, schedulers}, A) -> + X = schedulers_on_node(Node) + 1, + lists:reverse(lists:duplicate(X, Node), A); + ({Node, X}, A) -> + lists:reverse(lists:duplicate(X, Node), A); + (Node, A) -> + [Node|A] + end, + [], NodeList), + runmany(Fun, Fuse, List, Nodes, Split, Malt); +% local recursive fuse, for when we weren't invoked with {processes, X} +% or {nodes, NodeList}. Degenerates recursive fuse into linear fuse. +runmany(Fun, {recursive, Fuse}, List, local, Split, []) -> + runmany(Fun, Fuse, List, local, Split, []); +% by default, operate on each element seperately +runmany(Fun, Fuse, List, Nodes, no_split, []) -> + runmany(Fun, Fuse, List, Nodes, 1, []); +runmany(Fun, Fuse, List, local, Split, []) -> + List2 = splitmany(List, Split), + local_runmany(Fun, Fuse, List2); +runmany(Fun, Fuse, List, Nodes, Split, []) -> + List2 = splitmany(List, Split), + cluster_runmany(Fun, Fuse, List2, Nodes). + +cleanup_timer(Timer) -> + receive + {timerrang, Timer} -> + cleanup_timer(Timer); + {timerstopped, Timer} -> + nil + end. + +schedulers_on_node(Node) -> + case get(plists_schedulers_on_nodes) of + undefined -> + X = determine_schedulers(Node), + put(plists_schedulers_on_nodes, + dict:store(Node, X, dict:new())), + X; + Dict -> + case dict:is_key(Node, Dict) of + true -> + dict:fetch(Node, Dict); + false -> + X = determine_schedulers(Node), + put(plists_schedulers_on_nodes, + dict:store(Node, X, Dict)), + X + end + end. + +determine_schedulers(Node) -> + Parent = self(), + Child = spawn(Node, fun () -> + Parent ! {self(), erlang:system_info(schedulers)} + end), + erlang:monitor(process, Child), + receive + {Child, X} -> + receive + {'DOWN', _, _, Child, _Reason} -> + nil + end, + X; + {'DOWN', _, _, Child, Reason} when Reason =/= normal -> + 0 + end. + +% local runmany, for when we weren't invoked with {processes, X} +% or {nodes, NodeList}. Every sublist is processed in parallel. +local_runmany(Fun, Fuse, List) -> + Parent = self (), + Pids = lists:map(fun (L) -> + F = fun () -> + Parent ! + {self (), Fun(L)} + end, + {Pid, _} = erlang:spawn_monitor(F), + Pid + end, + List), + Answers = try lists:map(fun receivefrom/1, Pids) + catch throw:Message -> + {BadPid, Reason} = Message, + handle_error(BadPid, Reason, Pids) + end, + lists:foreach(fun (Pid) -> + normal_cleanup(Pid) + end, Pids), + fuse(Fuse, Answers). + +receivefrom(Pid) -> + receive + {Pid, R} -> + R; + {'DOWN', _, _, BadPid, Reason} when Reason =/= normal -> + throw({BadPid, Reason}); + {timerrang, _} -> + throw({nil, timeout}) + end. + +% Convert List into [{Number, Sublist}] +cluster_runmany(Fun, Fuse, List, Nodes) -> + {List2, _} = lists:foldl(fun (X, {L, Count}) -> + {[{Count, X}|L], Count+1} + end, + {[], 0}, List), + cluster_runmany(Fun, Fuse, List2, Nodes, [], []). + +% Add a pair of results into the TaskList as a fusing task +cluster_runmany(Fun, {recursive, Fuse}, [], Nodes, Running, + [{_, R1}, {_, R2}|Results]) -> + cluster_runmany(Fun, {recursive, Fuse}, [{fuse, R1, R2}], Nodes, + Running, Results); +% recursive fuse done, return result +cluster_runmany(_, {recursive, _Fuse}, [], _Nodes, [], [{_, Result}]) -> + Result; +% edge case where we are asked to do nothing +cluster_runmany(_, {recursive, _Fuse}, [], _Nodes, [], []) -> + []; +% We're done, now we just have to [linear] fuse the results +cluster_runmany(_, Fuse, [], _Nodes, [], Results) -> + fuse(Fuse, lists:map(fun ({_, R}) -> R end, + lists:sort(fun ({A, _}, {B, _}) -> + A =< B + end, + lists:reverse(Results)))); +% We have a ready node and a sublist or fuse to be processed, so we start +% a new process +cluster_runmany(Fun, Fuse, [Task|TaskList], [N|Nodes], Running, Results) -> + Parent = self(), + case Task of + {Num, L2} -> + Fun2 = fun () -> + Parent ! {self(), Num, Fun(L2)} + end; + {fuse, R1, R2} -> + {recursive, FuseFunc} = Fuse, + Fun2 = fun () -> + Parent ! {self(), fuse, FuseFunc(R1, R2)} + end + end, + Fun3 = fun () -> + try Fun2() + catch + exit:siblingdied -> + ok; + exit:Reason -> + Parent ! {self(), error, Reason}; + error:R -> + Parent ! {self(), error, {R, erlang:get_stacktrace()}}; + throw:R -> + Parent ! {self(), error, {{nocatch, R}, erlang:get_stacktrace()}} + end + end, + Pid = spawn(N, Fun3), + erlang:monitor(process, Pid), + cluster_runmany(Fun, Fuse, TaskList, Nodes, [{Pid, N, Task}|Running], Results); +% We can't start a new process, but can watch over already running ones +cluster_runmany(Fun, Fuse, TaskList, Nodes, Running, Results) when length(Running) > 0 -> + receive + {_Pid, error, Reason} -> + RunningPids = lists:map(fun ({Pid, _, _}) -> + Pid + end, + Running), + handle_error(junkvalue, Reason, RunningPids); + {Pid, Num, Result} -> + % throw out the exit message, Reason should be + % normal, noproc, or noconnection + receive {'DOWN', _, _, Pid, _Reason} -> + nil + end, + {Running2, FinishedNode, _} = delete_running(Pid, Running, []), + cluster_runmany(Fun, Fuse, TaskList, + [FinishedNode|Nodes], Running2, [{Num, Result}|Results]); + {timerrang, _} -> + RunningPids = lists:map(fun ({Pid, _, _}) -> + Pid + end, + Running), + handle_error(nil, timeout, RunningPids); + % node failure + {'DOWN', _, _, Pid, noconnection} -> + {Running2, _DeadNode, Task} = delete_running(Pid, Running, []), + cluster_runmany(Fun, Fuse, [Task|TaskList], Nodes, + Running2, Results); + % could a noproc exit message come before the message from + % the process? we are assuming it can't. + % this clause is unlikely to get invoked due to cluster_runmany's + % spawned processes. It will still catch errors in mapreduce's + % reduce process, however. + {'DOWN', _, _, BadPid, Reason} when Reason =/= normal -> + RunningPids = lists:map(fun ({Pid, _, _}) -> + Pid + end, + Running), + handle_error(BadPid, Reason, RunningPids) + end; +% We have data, but no nodes either available or occupied +cluster_runmany(_, _, [_Non|_Empty], []=_Nodes, []=_Running, _) -> + exit(allnodescrashed). + +delete_running(Pid, [{Pid, Node, List}|Running], Acc) -> + {Running ++ Acc, Node, List}; +delete_running(Pid, [R|Running], Acc) -> + delete_running(Pid, Running, [R|Acc]). + +handle_error(BadPid, Reason, Pids) -> + lists:foreach(fun (Pid) -> + exit(Pid, siblingdied) + end, Pids), + lists:foreach(fun (Pid) -> + error_cleanup(Pid, BadPid) + end, Pids), + exit(Reason). + +error_cleanup(BadPid, BadPid) -> + ok; +error_cleanup(Pid, BadPid) -> + receive + {Pid, _} -> + error_cleanup(Pid, BadPid); + {Pid, _, _} -> + error_cleanup(Pid, BadPid); + {'DOWN', _, _, Pid, _Reason} -> + ok + end. + +normal_cleanup(Pid) -> + receive + {'DOWN', _, _, Pid, _Reason} -> + ok + end. + +% edge case +fuse(_, []) -> + []; +fuse({reverse, _}=Fuse, Results) -> + [RL|ResultsR] = lists:reverse(Results), + fuse(Fuse, ResultsR, RL); +fuse(Fuse, [R1|Results]) -> + fuse(Fuse, Results, R1). + +fuse({reverse, FuseFunc}=Fuse, [R2|Results], R1) -> + fuse(Fuse, Results, FuseFunc(R2, R1)); +fuse(Fuse, [R2|Results], R1) -> + fuse(Fuse, Results, Fuse(R1, R2)); +fuse(_, [], R) -> + R. + +% Splits a list into a list of sublists, each of size Size, +% except for the last element which is less if the original list +% could not be evenly divided into Size-sized lists. +splitmany(List, Size) -> + splitmany(List, [], Size). + +splitmany([], Acc, _) -> + lists:reverse(Acc); +splitmany(List, Acc, Size) -> + {Top, NList} = split(Size, List), + splitmany(NList, [Top|Acc], Size). + +% Like lists:split, except it splits a list smaller than its first +% parameter +split(Size, List) -> + split(Size, List, []). + +split(0, List, Acc) -> + {lists:reverse(Acc), List}; +split(Size, [H|List], Acc) -> + split(Size - 1, List, [H|Acc]); +split(_, [], Acc) -> + {lists:reverse(Acc), []}. diff --git a/src/plists.erl b/src/plists.erl deleted file mode 100644 index 688d5a5..0000000 --- a/src/plists.erl +++ /dev/null @@ -1,858 +0,0 @@ -% @author Stephen Marsh -% @copyright 2007 Stephen Marsh freeyourmind ++ [$@|gmail.com] -% @doc plists is a drop-in replacement for module -% lists, -% making most list operations parallel. It can operate on each element in -% parallel, for IO-bound operations, on sublists in parallel, for -% taking advantage of multi-core machines with CPU-bound operations, and -% across erlang nodes, for parallizing inside a cluster. It handles -% errors and node failures. It can be configured, tuned, and tweaked to -% get optimal performance while minimizing overhead. -% -% Almost all the functions are -% identical to equivalent functions in lists, returning exactly the same -% result, and having both a form with an identical syntax that operates on -% each element in parallel and a form which takes an optional "malt", -% a specification for how to parallize the operation. -% -% fold is the one exception, parallel fold is different from linear fold. -% This module also include a simple mapreduce implementation, and the -% function runmany. All the other functions are implemented with runmany, -% which is as a generalization of parallel list operations. -% -% == Malts == -% A malt specifies how to break a list into sublists, and can optionally -% specify a timeout, which nodes to run on, and how many processes to start -% per node. -% -% Malt = MaltComponent | [MaltComponent]
-% MaltComponent = SubListSize::integer() | {processes, integer()} | -% {processes, schedulers} | -% {timeout, Milliseconds::integer()} | {nodes, [NodeSpec]}
-% NodeSpec = Node::atom() | {Node::atom(), NumProcesses::integer()} | -% {Node::atom(), schedulers} -% -% An integer can be given to specify the exact size for -% sublists. 1 is a good choice for IO-bound operations and when -% the operation on each list element is expensive. Larger numbers -% minimize overhead and are faster for cheap operations. -% -% If the integer is omitted, and -% you have specified a {processes, X}, the list is -% split into X sublists. This is only -% useful when the time to process each element is close to identical and you -% know exactly how many lines of execution are available to you. -% -% If neither of the above applies, the sublist size defaults to 1. -% -% You can use {processes, X} to have the list processed -% by X processes on the local machine. A good choice for X is the number of -% lines of execution (cores) the machine provides. This can be done -% automatically with {processes, schedulers}, which sets -% the number of processes to the number of schedulers in the erlang virtual -% machine (probably equal to the number of cores). -% -% {timeout, Milliseconds} specifies a timeout. This is a timeout for the entire -% operation, both operating on the sublists and combining the results. -% exit(timeout) is evaluated if the timeout is exceeded. -% -% {nodes, NodeList} specifies that the operation should be done across nodes. -% Every element of NodeList is of the form {NodeName, NumProcesses} or -% NodeName, which means the same as {NodeName, 1}. plists runs -% NumProcesses processes on NodeName concurrently. A good choice for -% NumProcesses is the number of lines of execution (cores) a node provides -% plus one. This ensures the node is completely busy even when -% fetching a new sublist. This can be done automatically with -% {NodeName, schedulers}, in which case -% plists uses a cached value if it has one, and otherwise finds the number of -% schedulers in the remote node and adds one. This will ensure at least one -% busy process per core (assuming the node has a scheduler for each core). -% -% plists is able to recover if a node goes down. -% If all nodes go down, exit(allnodescrashed) is evaluated. -% -% Any of the above may be used as a malt, or may be combined into a list. -% {nodes, NodeList} and {processes, X} may not be combined. -% -% === Examples === -% % start a process for each element (1-element sublists)
-% 1 -% -% % start a process for each ten elements (10-element sublists)
-% 10 -% -% % split the list into two sublists and process in two processes
-% {processes, 2} -% -% % split the list into X sublists and process in X processes,
-% % where X is the number of cores in the machine
-% {processes, schedulers} -% -% % split the list into 10-element sublists and process in two processes
-% [10, {processes, 2}] -% -% % timeout after one second. Assumes that a process should be started
-% % for each element.
-% {timeout, 1000} -% -% % Runs 3 processes at a time on apple@desktop, -% and 2 on orange@laptop
-% % This is the best way to utilize all the CPU-power of a dual-core
-% % desktop and a single-core laptop. Assumes that the list should be
-% % split into 1-element sublists.
-% {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]} -% -% Like above, but makes plists figure out how many processes to use. -% {nodes, [{apple@desktop, schedulers}, {orange@laptop, schedulers}]} -% -% % Gives apple and orange three seconds to process the list as
-% % 100-element sublists.
-% [100, {timeout, 3000}, {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]}] -% -% === Aside: Why Malt? === -% I needed a word for this concept, so maybe my subconsciousness gave me one by -% making me misspell multiply. Maybe it is an acronym for Malt is A List -% Tearing Specification. Maybe it is a beer metaphor, suggesting that code -% only runs in parallel if bribed with spirits. It's jargon, learn it -% or you can't be part of the in-group. -% -% == Messages and Errors == -% plists assures that no extraneous messages are left in or will later -% enter the message queue. This is guaranteed even in the event of an error. -% -% Errors in spawned processes are caught and propagated to the calling -% process. If you invoke -% -% plists:map(fun (X) -> 1/X end, [1, 2, 3, 0]). -% -% you get a badarith error, exactly like when you use lists:map. -% -% plists uses monitors to watch the processes it spawns. It is not a good idea -% to invoke plists when you are already monitoring processes. If one of them -% does a non-normal exit, plists receives the 'DOWN' message believing it to be -% from one of its own processes. The error propagation system goes into -% effect, which results in the error occuring in the calling process. -% -% == License == -% The MIT License -% -% Copyright (c) 2007 Stephen Marsh -% -% Permission is hereby granted, free of charge, to any person obtaining a copy -% of this software and associated documentation files (the "Software"), to deal -% in the Software without restriction, including without limitation the rights -% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -% copies of the Software, and to permit persons to whom the Software is -% furnished to do so, subject to the following conditions: -% -% The above copyright notice and this permission notice shall be included in -% all copies or substantial portions of the Software. -% -% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -% THE SOFTWARE. - - --module(plists). --export([all/2, all/3, any/2, any/3, filter/2, filter/3, -fold/3, fold/4, fold/5, foreach/2, foreach/3, map/2, map/3, -partition/2, partition/3, sort/1, sort/2, sort/3, -usort/1, usort/2, usort/3, mapreduce/2, mapreduce/3, mapreduce/5, -runmany/3, runmany/4]). - -% Everything here is defined in terms of runmany. -% The following methods are convient interfaces to runmany. - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> bool() -all(Fun, List) -> - all(Fun, List, 1). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> bool() -all(Fun, List, Malt) -> - try runmany(fun (L) -> - B = lists:all(Fun, L), - if B -> - nil; - true -> - exit(notall) - end - end, - fun (_A1, _A2) -> - nil - end, - List, Malt) of - _ -> - true - catch exit:notall -> - false - end. - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> bool() -any(Fun, List) -> - any(Fun, List, 1). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> bool() -any(Fun, List, Malt) -> - try runmany(fun (L) -> - B = lists:any(Fun, L), - if B -> - exit(any); - true -> - nil - end - end, - fun (_A1, _A2) -> - nil - end, - List, Malt) of - _ -> - false - catch exit:any -> - true - end. - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> list() -filter(Fun, List) -> - filter(Fun, List, 1). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> list() -filter(Fun, List, Malt) -> - runmany(fun (L) -> - lists:filter(Fun, L) - end, - {reverse, fun (A1, A2) -> - A1 ++ A2 - end}, - List, Malt). - -% Note that with parallel fold there is not foldl and foldr, -% instead just one fold that can fuse Accumlators. - -% @doc Like below, but assumes 1 as the Malt. This function is almost useless, -% and is intended only to aid converting code from using lists to plists. -% @spec (Fun, InitAcc, List) -> term() -fold(Fun, InitAcc, List) -> - fold(Fun, Fun, InitAcc, List, 1). - -% @doc Like below, but uses the Fun as the Fuse by default. -% @spec (Fun, InitAcc, List, Malt) -> term() -fold(Fun, InitAcc, List, Malt) -> - fold(Fun, Fun, InitAcc, List, Malt). - -% @doc fold is more complex when made parallel. There is no foldl and foldr, -% accumulators aren't passed in any defined order. -% The list is split into sublists which are folded together. Fun is -% identical to the function passed to lists:fold[lr], it takes -% (an element, and the accumulator) and returns -> a new accumulator. -% It is used for the initial stage of folding sublists. Fuse fuses together -% the results, it takes (Results1, Result2) and returns -> a new result. -% By default sublists are fused left to right, each result of a fuse being -% fed into the first element of the next fuse. The result of the last fuse -% is the result. -% -% Fusing may also run in parallel using a recursive algorithm, -% by specifying the fuse as {recursive, Fuse}. See -% the discussion in {@link runmany/4}. -% -% Malt is the malt for the initial folding of sublists, and for the -% possible recursive fuse. -% @spec (Fun, Fuse, InitAcc, List, Malt) -> term() -fold(Fun, Fuse, InitAcc, List, Malt) -> - Fun2 = fun (L) -> lists:foldl(Fun, InitAcc, L) end, - runmany(Fun2, Fuse, List, Malt). - -% @doc Similiar to foreach in module -% lists -% except it makes no guarantee about the order it processes list elements. -% @spec (Fun, List) -> void() -foreach(Fun, List) -> - foreach(Fun, List, 1). - -% @doc Similiar to foreach in module -% lists -% except it makes no guarantee about the order it processes list elements. -% @spec (Fun, List, Malt) -> void() -foreach(Fun, List, Malt) -> - runmany(fun (L) -> - lists:foreach(Fun, L) - end, - fun (_A1, _A2) -> - ok - end, - List, Malt). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> list() -map(Fun, List) -> - map(Fun, List, 1). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> list() -map(Fun, List, Malt) -> - runmany(fun (L) -> - lists:map(Fun, L) - end, - {reverse, fun (A1, A2) -> - A1 ++ A2 - end}, - List, Malt). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> {list(), list()} -partition(Fun, List) -> - partition(Fun, List, 1). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> {list(), list()} -partition(Fun, List, Malt) -> - runmany(fun (L) -> - lists:partition(Fun, L) - end, - {reverse, fun ({True1, False1}, {True2, False2}) -> - {True1 ++ True2, False1 ++ False2} - end}, - List, Malt). - -% SORTMALT needs to be tuned --define(SORTMALT, 100). - -% @doc Same semantics as in module -% lists. -% @spec (List) -> list() -sort(List) -> - sort(fun (A, B) -> - A =< B - end, - List). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> list() -sort(Fun, List) -> - sort(Fun, List, ?SORTMALT). - -% @doc This version lets you specify your own malt for sort. -% -% sort splits the list into sublists and sorts them, and it merges the -% sorted lists together. These are done in parallel. Each sublist is -% sorted in a seperate process, and each merging of results is done in a -% seperate process. Malt defaults to 100, causing the list to be split into -% 100-element sublists. -% @spec (Fun, List, Malt) -> list() -sort(Fun, List, Malt) -> - Fun2 = fun (L) -> - lists:sort(Fun, L) - end, - Fuse = fun (A1, A2) -> - lists:merge(Fun, A1, A2) - end, - runmany(Fun2, {recursive, Fuse}, List, Malt). - -% @doc Same semantics as in module -% lists. -% @spec (List) -> list() -usort(List) -> - usort(fun (A, B) -> - A =< B - end, - List). - -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> list() -usort(Fun, List) -> - usort(Fun, List, ?SORTMALT). - -% @doc This version lets you specify your own malt for usort. -% -% usort splits the list into sublists and sorts them, and it merges the -% sorted lists together. These are done in parallel. Each sublist is -% sorted in a seperate process, and each merging of results is done in a -% seperate process. Malt defaults to 100, causing the list to be split into -% 100-element sublists. -% -% usort removes duplicate elments while it sorts. -% @spec (Fun, List, Malt) -> list() -usort(Fun, List, Malt) -> - Fun2 = fun (L) -> - lists:usort(Fun, L) - end, - Fuse = fun (A1, A2) -> - lists:umerge(Fun, A1, A2) - end, - runmany(Fun2, {recursive, Fuse}, List, Malt). - -% @doc Like below, assumes default MapMalt of 1. -% @spec (MapFunc, List) -> Dict -% MapFunc = (term()) -> DeepListOfKeyValuePairs -% DeepListOfKeyValuePairs = [DeepListOfKeyValuePairs] | {Key, Value} -mapreduce(MapFunc, List) -> - mapreduce(MapFunc, List, 1). - -% Like below, but uses a default reducer that collects all -% {Key, Value} pairs into a -% dict, -% with values {Key, [Value1, Value2...]}. -% This dict is returned as the result. -mapreduce(MapFunc, List, MapMalt) -> - mapreduce(MapFunc, List, dict:new(), fun add_key/3, MapMalt). - -% @doc This is a very basic mapreduce. You won't write a Google-rivaling -% search engine with it. It has no equivalent in lists. Each -% element in the list is run through the MapFunc, which produces either -% a {Key, Value} pair, or a lists of key value pairs, or a list of lists of -% key value pairs...etc. A reducer process runs in parallel with the mapping -% processes, collecting the key value pairs. It starts with a state given by -% InitState, and for each {Key, Value} pair that it receives it invokes -% ReduceFunc(OldState, Key, Value) to compute its new state. mapreduce returns -% the reducer's final state. -% -% MapMalt is the malt for the mapping operation, with a default value of 1, -% meaning each element of the list is mapped by a seperate process. -% -% mapreduce requires OTP R11B, or it may leave monitoring messages in the -% message queue. -% @spec (MapFunc, List, InitState, ReduceFunc, MapMalt) -> Dict -% MapFunc = (term()) -> DeepListOfKeyValuePairs -% DeepListOfKeyValuePairs = [DeepListOfKeyValuePairs] | {Key, Value} -% ReduceFunc = (OldState::term(), Key::term(), Value::term() -> NewState::term() -mapreduce(MapFunc, List, InitState, ReduceFunc, MapMalt) -> - Parent = self(), - {Reducer, ReducerRef} = - erlang:spawn_monitor(fun () -> - reducer(Parent, 0, InitState, ReduceFunc) - end), - MapFunc2 = fun (L) -> - Reducer ! lists:map(MapFunc, L), - 1 - end, - SentMessages = try runmany(MapFunc2, fun (A, B) -> A+B end, List, MapMalt) - catch - exit:Reason -> - erlang:demonitor(ReducerRef, [flush]), - Reducer ! die, - exit(Reason) - end, - Reducer ! {mappers, done, SentMessages}, - Results = receive - {Reducer, Results2} -> - Results2; - {'DOWN', _, _, Reducer, Reason2} -> - exit(Reason2) - end, - receive - {'DOWN', _, _, Reducer, normal} -> - nil - end, - Results. - -reducer(Parent, NumReceived, State, Func) -> - receive - die -> - nil; - {mappers, done, NumReceived} -> - Parent ! {self (), State}; - Keys -> - reducer(Parent, NumReceived + 1, each_key(State, Func, Keys), Func) - end. - -each_key(State, Func, {Key, Value}) -> - Func(State, Key, Value); -each_key(State, Func, [List|Keys]) -> - each_key(each_key(State, Func, List), Func, Keys); -each_key(State, _, []) -> - State. - -add_key(Dict, Key, Value) -> - case dict:is_key(Key, Dict) of - true -> - dict:append(Key, Value, Dict); - false -> - dict:store(Key, [Value], Dict) - end. - -% @doc Like below, but assumes a Malt of 1, -% meaning each element of the list is processed by a seperate process. -% @spec (Fun, Fuse, List) -> term() -runmany(Fun, Fuse, List) -> - runmany(Fun, Fuse, List, 1). - -% Begin internal stuff (though runmany/4 is exported). - -% @doc All of the other functions are implemented with runmany. runmany -% takes a List, splits it into sublists, and starts processes to operate on -% each sublist, all done according to Malt. Each process passes its sublist -% into Fun and sends the result back. -% -% The results are then fused together to get the final result. There are two -% ways this can operate, lineraly and recursively. If Fuse is a function, -% a fuse is done linearly left-to-right on the sublists, the results -% of processing the first and second sublists being passed to Fuse, then -% the result of the first fuse and processing the third sublits, and so on. If -% Fuse is {reverse, FuseFunc}, then a fuse is done right-to-left, the results -% of processing the second-to-last and last sublists being passed to FuseFunc, -% then the results of processing the third-to-last sublist and -% the results of the first fuse, and and so forth. -% Both methods preserve the original order of the lists elements. -% -% To do a recursive fuse, pass Fuse as {recursive, FuseFunc}. -% The recursive fuse makes no guarantee about the order the results of -% sublists, or the results of fuses are passed to FuseFunc. It -% continues fusing pairs of results until it is down to one. -% -% Recursive fuse is down in parallel with processing the sublists, and a -% process is spawned to fuse each pair of results. It is a parallized -% algorithm. Linear fuse is done after all results of processing sublists -% have been collected, and can only run in a single process. -% -% Even if you pass {recursive, FuseFunc}, a recursive fuse is only done if -% the malt contains {nodes, NodeList} or {processes, X}. If this is not the -% case, a linear fuse is done. -% @spec (Fun, Fuse, List, Malt) -> term() -% Fun = (list()) -> term() -% Fuse = FuseFunc | {recursive, FuseFunc} -% FuseFunc = (term(), term()) -> term() -runmany(Fun, Fuse, List, Malt) when is_list(Malt) -> - runmany(Fun, Fuse, List, local, no_split, Malt); -runmany(Fun, Fuse, List, Malt) -> - runmany(Fun, Fuse, List, [Malt]). - -runmany(Fun, Fuse, List, Nodes, no_split, [MaltTerm|Malt]) when is_integer(MaltTerm) -> - runmany(Fun, Fuse, List, Nodes, MaltTerm, Malt); -% run a process for each scheduler -runmany(Fun, Fuse, List, local, Split, [{processes, schedulers}|Malt]) -> - S = erlang:system_info(schedulers), - runmany(Fun, Fuse, List, local, Split, [{processes, S}|Malt]); -% Split the list into X sublists, where X is the number of processes -runmany(Fun, Fuse, List, local, no_split, [{processes, X}|_]=Malt) -> - L = length(List), - case L rem X of - 0 -> - runmany(Fun, Fuse, List, local, L div X, Malt); - _ -> - runmany(Fun, Fuse, List, local, L div X + 1, Malt) - end; -% run X process on local machine -runmany(Fun, Fuse, List, local, Split, [{processes, X}|Malt]) -> - Nodes = lists:duplicate(X, node()), - runmany(Fun, Fuse, List, Nodes, Split, Malt); -runmany(Fun, Fuse, List, Nodes, Split, [{timeout, X}|Malt]) -> - Parent = self(), - Timer = spawn(fun () -> - receive - stoptimer -> - Parent ! {timerstopped, self()} - after X -> - Parent ! {timerrang, self()}, - receive - stoptimer -> - Parent ! {timerstopped, self()} - end - end - end), - Ans = try runmany(Fun, Fuse, List, Nodes, Split, Malt) - catch - % we really just want the after block, the syntax - % makes this catch necessary. - willneverhappen -> - nil - after - Timer ! stoptimer, - cleanup_timer(Timer) - end, - Ans; -runmany(Fun, Fuse, List, local, Split, [{nodes, NodeList}|Malt]) -> - Nodes = lists:foldl(fun ({Node, schedulers}, A) -> - X = schedulers_on_node(Node) + 1, - lists:reverse(lists:duplicate(X, Node), A); - ({Node, X}, A) -> - lists:reverse(lists:duplicate(X, Node), A); - (Node, A) -> - [Node|A] - end, - [], NodeList), - runmany(Fun, Fuse, List, Nodes, Split, Malt); -% local recursive fuse, for when we weren't invoked with {processes, X} -% or {nodes, NodeList}. Degenerates recursive fuse into linear fuse. -runmany(Fun, {recursive, Fuse}, List, local, Split, []) -> - runmany(Fun, Fuse, List, local, Split, []); -% by default, operate on each element seperately -runmany(Fun, Fuse, List, Nodes, no_split, []) -> - runmany(Fun, Fuse, List, Nodes, 1, []); -runmany(Fun, Fuse, List, local, Split, []) -> - List2 = splitmany(List, Split), - local_runmany(Fun, Fuse, List2); -runmany(Fun, Fuse, List, Nodes, Split, []) -> - List2 = splitmany(List, Split), - cluster_runmany(Fun, Fuse, List2, Nodes). - -cleanup_timer(Timer) -> - receive - {timerrang, Timer} -> - cleanup_timer(Timer); - {timerstopped, Timer} -> - nil - end. - -schedulers_on_node(Node) -> - case get(plists_schedulers_on_nodes) of - undefined -> - X = determine_schedulers(Node), - put(plists_schedulers_on_nodes, - dict:store(Node, X, dict:new())), - X; - Dict -> - case dict:is_key(Node, Dict) of - true -> - dict:fetch(Node, Dict); - false -> - X = determine_schedulers(Node), - put(plists_schedulers_on_nodes, - dict:store(Node, X, Dict)), - X - end - end. - -determine_schedulers(Node) -> - Parent = self(), - Child = spawn(Node, fun () -> - Parent ! {self(), erlang:system_info(schedulers)} - end), - erlang:monitor(process, Child), - receive - {Child, X} -> - receive - {'DOWN', _, _, Child, _Reason} -> - nil - end, - X; - {'DOWN', _, _, Child, Reason} when Reason =/= normal -> - 0 - end. - -% local runmany, for when we weren't invoked with {processes, X} -% or {nodes, NodeList}. Every sublist is processed in parallel. -local_runmany(Fun, Fuse, List) -> - Parent = self (), - Pids = lists:map(fun (L) -> - F = fun () -> - Parent ! - {self (), Fun(L)} - end, - {Pid, _} = erlang:spawn_monitor(F), - Pid - end, - List), - Answers = try lists:map(fun receivefrom/1, Pids) - catch throw:Message -> - {BadPid, Reason} = Message, - handle_error(BadPid, Reason, Pids) - end, - lists:foreach(fun (Pid) -> - normal_cleanup(Pid) - end, Pids), - fuse(Fuse, Answers). - -receivefrom(Pid) -> - receive - {Pid, R} -> - R; - {'DOWN', _, _, BadPid, Reason} when Reason =/= normal -> - throw({BadPid, Reason}); - {timerrang, _} -> - throw({nil, timeout}) - end. - -% Convert List into [{Number, Sublist}] -cluster_runmany(Fun, Fuse, List, Nodes) -> - {List2, _} = lists:foldl(fun (X, {L, Count}) -> - {[{Count, X}|L], Count+1} - end, - {[], 0}, List), - cluster_runmany(Fun, Fuse, List2, Nodes, [], []). - -% Add a pair of results into the TaskList as a fusing task -cluster_runmany(Fun, {recursive, Fuse}, [], Nodes, Running, - [{_, R1}, {_, R2}|Results]) -> - cluster_runmany(Fun, {recursive, Fuse}, [{fuse, R1, R2}], Nodes, - Running, Results); -% recursive fuse done, return result -cluster_runmany(_, {recursive, _Fuse}, [], _Nodes, [], [{_, Result}]) -> - Result; -% edge case where we are asked to do nothing -cluster_runmany(_, {recursive, _Fuse}, [], _Nodes, [], []) -> - []; -% We're done, now we just have to [linear] fuse the results -cluster_runmany(_, Fuse, [], _Nodes, [], Results) -> - fuse(Fuse, lists:map(fun ({_, R}) -> R end, - lists:sort(fun ({A, _}, {B, _}) -> - A =< B - end, - lists:reverse(Results)))); -% We have a ready node and a sublist or fuse to be processed, so we start -% a new process -cluster_runmany(Fun, Fuse, [Task|TaskList], [N|Nodes], Running, Results) -> - Parent = self(), - case Task of - {Num, L2} -> - Fun2 = fun () -> - Parent ! {self(), Num, Fun(L2)} - end; - {fuse, R1, R2} -> - {recursive, FuseFunc} = Fuse, - Fun2 = fun () -> - Parent ! {self(), fuse, FuseFunc(R1, R2)} - end - end, - Fun3 = fun () -> - try Fun2() - catch - exit:siblingdied -> - ok; - exit:Reason -> - Parent ! {self(), error, Reason}; - error:R -> - Parent ! {self(), error, {R, erlang:get_stacktrace()}}; - throw:R -> - Parent ! {self(), error, {{nocatch, R}, erlang:get_stacktrace()}} - end - end, - Pid = spawn(N, Fun3), - erlang:monitor(process, Pid), - cluster_runmany(Fun, Fuse, TaskList, Nodes, [{Pid, N, Task}|Running], Results); -% We can't start a new process, but can watch over already running ones -cluster_runmany(Fun, Fuse, TaskList, Nodes, Running, Results) when length(Running) > 0 -> - receive - {_Pid, error, Reason} -> - RunningPids = lists:map(fun ({Pid, _, _}) -> - Pid - end, - Running), - handle_error(junkvalue, Reason, RunningPids); - {Pid, Num, Result} -> - % throw out the exit message, Reason should be - % normal, noproc, or noconnection - receive {'DOWN', _, _, Pid, _Reason} -> - nil - end, - {Running2, FinishedNode, _} = delete_running(Pid, Running, []), - cluster_runmany(Fun, Fuse, TaskList, - [FinishedNode|Nodes], Running2, [{Num, Result}|Results]); - {timerrang, _} -> - RunningPids = lists:map(fun ({Pid, _, _}) -> - Pid - end, - Running), - handle_error(nil, timeout, RunningPids); - % node failure - {'DOWN', _, _, Pid, noconnection} -> - {Running2, _DeadNode, Task} = delete_running(Pid, Running, []), - cluster_runmany(Fun, Fuse, [Task|TaskList], Nodes, - Running2, Results); - % could a noproc exit message come before the message from - % the process? we are assuming it can't. - % this clause is unlikely to get invoked due to cluster_runmany's - % spawned processes. It will still catch errors in mapreduce's - % reduce process, however. - {'DOWN', _, _, BadPid, Reason} when Reason =/= normal -> - RunningPids = lists:map(fun ({Pid, _, _}) -> - Pid - end, - Running), - handle_error(BadPid, Reason, RunningPids) - end; -% We have data, but no nodes either available or occupied -cluster_runmany(_, _, [_Non|_Empty], []=_Nodes, []=_Running, _) -> - exit(allnodescrashed). - -delete_running(Pid, [{Pid, Node, List}|Running], Acc) -> - {Running ++ Acc, Node, List}; -delete_running(Pid, [R|Running], Acc) -> - delete_running(Pid, Running, [R|Acc]). - -handle_error(BadPid, Reason, Pids) -> - lists:foreach(fun (Pid) -> - exit(Pid, siblingdied) - end, Pids), - lists:foreach(fun (Pid) -> - error_cleanup(Pid, BadPid) - end, Pids), - exit(Reason). - -error_cleanup(BadPid, BadPid) -> - ok; -error_cleanup(Pid, BadPid) -> - receive - {Pid, _} -> - error_cleanup(Pid, BadPid); - {Pid, _, _} -> - error_cleanup(Pid, BadPid); - {'DOWN', _, _, Pid, _Reason} -> - ok - end. - -normal_cleanup(Pid) -> - receive - {'DOWN', _, _, Pid, _Reason} -> - ok - end. - -% edge case -fuse(_, []) -> - []; -fuse({reverse, _}=Fuse, Results) -> - [RL|ResultsR] = lists:reverse(Results), - fuse(Fuse, ResultsR, RL); -fuse(Fuse, [R1|Results]) -> - fuse(Fuse, Results, R1). - -fuse({reverse, FuseFunc}=Fuse, [R2|Results], R1) -> - fuse(Fuse, Results, FuseFunc(R2, R1)); -fuse(Fuse, [R2|Results], R1) -> - fuse(Fuse, Results, Fuse(R1, R2)); -fuse(_, [], R) -> - R. - -% Splits a list into a list of sublists, each of size Size, -% except for the last element which is less if the original list -% could not be evenly divided into Size-sized lists. -splitmany(List, Size) -> - splitmany(List, [], Size). - -splitmany([], Acc, _) -> - lists:reverse(Acc); -splitmany(List, Acc, Size) -> - {Top, NList} = split(Size, List), - splitmany(NList, [Top|Acc], Size). - -% Like lists:split, except it splits a list smaller than its first -% parameter -split(Size, List) -> - split(Size, List, []). - -split(0, List, Acc) -> - {lists:reverse(Acc), List}; -split(Size, [H|List], Acc) -> - split(Size - 1, List, [H|Acc]); -split(_, [], Acc) -> - {lists:reverse(Acc), []}. From c7717743ed72677f95fc8df4eec0b11650a04041 Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Thu, 25 Oct 2012 15:41:28 -0500 Subject: [PATCH 19/20] bring ec_plists up to erlware standards Signed-off-by: Jordan Wilberding --- src/ec_plists.erl | 962 +++++++++++++++++++++------------------ test/ec_plists_tests.erl | 75 +++ 2 files changed, 598 insertions(+), 439 deletions(-) create mode 100644 test/ec_plists_tests.erl diff --git a/src/ec_plists.erl b/src/ec_plists.erl index 688d5a5..a021d02 100644 --- a/src/ec_plists.erl +++ b/src/ec_plists.erl @@ -1,214 +1,264 @@ -% @author Stephen Marsh -% @copyright 2007 Stephen Marsh freeyourmind ++ [$@|gmail.com] -% @doc plists is a drop-in replacement for module -% lists, -% making most list operations parallel. It can operate on each element in -% parallel, for IO-bound operations, on sublists in parallel, for -% taking advantage of multi-core machines with CPU-bound operations, and -% across erlang nodes, for parallizing inside a cluster. It handles -% errors and node failures. It can be configured, tuned, and tweaked to -% get optimal performance while minimizing overhead. -% -% Almost all the functions are -% identical to equivalent functions in lists, returning exactly the same -% result, and having both a form with an identical syntax that operates on -% each element in parallel and a form which takes an optional "malt", -% a specification for how to parallize the operation. -% -% fold is the one exception, parallel fold is different from linear fold. -% This module also include a simple mapreduce implementation, and the -% function runmany. All the other functions are implemented with runmany, -% which is as a generalization of parallel list operations. -% -% == Malts == -% A malt specifies how to break a list into sublists, and can optionally -% specify a timeout, which nodes to run on, and how many processes to start -% per node. -% -% Malt = MaltComponent | [MaltComponent]
-% MaltComponent = SubListSize::integer() | {processes, integer()} | -% {processes, schedulers} | -% {timeout, Milliseconds::integer()} | {nodes, [NodeSpec]}
-% NodeSpec = Node::atom() | {Node::atom(), NumProcesses::integer()} | -% {Node::atom(), schedulers} -% -% An integer can be given to specify the exact size for -% sublists. 1 is a good choice for IO-bound operations and when -% the operation on each list element is expensive. Larger numbers -% minimize overhead and are faster for cheap operations. -% -% If the integer is omitted, and -% you have specified a {processes, X}, the list is -% split into X sublists. This is only -% useful when the time to process each element is close to identical and you -% know exactly how many lines of execution are available to you. -% -% If neither of the above applies, the sublist size defaults to 1. -% -% You can use {processes, X} to have the list processed -% by X processes on the local machine. A good choice for X is the number of -% lines of execution (cores) the machine provides. This can be done -% automatically with {processes, schedulers}, which sets -% the number of processes to the number of schedulers in the erlang virtual -% machine (probably equal to the number of cores). -% -% {timeout, Milliseconds} specifies a timeout. This is a timeout for the entire -% operation, both operating on the sublists and combining the results. -% exit(timeout) is evaluated if the timeout is exceeded. -% -% {nodes, NodeList} specifies that the operation should be done across nodes. -% Every element of NodeList is of the form {NodeName, NumProcesses} or -% NodeName, which means the same as {NodeName, 1}. plists runs -% NumProcesses processes on NodeName concurrently. A good choice for -% NumProcesses is the number of lines of execution (cores) a node provides -% plus one. This ensures the node is completely busy even when -% fetching a new sublist. This can be done automatically with -% {NodeName, schedulers}, in which case -% plists uses a cached value if it has one, and otherwise finds the number of -% schedulers in the remote node and adds one. This will ensure at least one -% busy process per core (assuming the node has a scheduler for each core). -% -% plists is able to recover if a node goes down. -% If all nodes go down, exit(allnodescrashed) is evaluated. -% -% Any of the above may be used as a malt, or may be combined into a list. -% {nodes, NodeList} and {processes, X} may not be combined. -% -% === Examples === -% % start a process for each element (1-element sublists)
-% 1 -% -% % start a process for each ten elements (10-element sublists)
-% 10 -% -% % split the list into two sublists and process in two processes
-% {processes, 2} -% -% % split the list into X sublists and process in X processes,
-% % where X is the number of cores in the machine
-% {processes, schedulers} -% -% % split the list into 10-element sublists and process in two processes
-% [10, {processes, 2}] -% -% % timeout after one second. Assumes that a process should be started
-% % for each element.
-% {timeout, 1000} -% -% % Runs 3 processes at a time on apple@desktop, -% and 2 on orange@laptop
-% % This is the best way to utilize all the CPU-power of a dual-core
-% % desktop and a single-core laptop. Assumes that the list should be
-% % split into 1-element sublists.
-% {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]} -% -% Like above, but makes plists figure out how many processes to use. -% {nodes, [{apple@desktop, schedulers}, {orange@laptop, schedulers}]} -% -% % Gives apple and orange three seconds to process the list as
-% % 100-element sublists.
-% [100, {timeout, 3000}, {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]}] -% -% === Aside: Why Malt? === -% I needed a word for this concept, so maybe my subconsciousness gave me one by -% making me misspell multiply. Maybe it is an acronym for Malt is A List -% Tearing Specification. Maybe it is a beer metaphor, suggesting that code -% only runs in parallel if bribed with spirits. It's jargon, learn it -% or you can't be part of the in-group. -% -% == Messages and Errors == -% plists assures that no extraneous messages are left in or will later -% enter the message queue. This is guaranteed even in the event of an error. -% -% Errors in spawned processes are caught and propagated to the calling -% process. If you invoke -% -% plists:map(fun (X) -> 1/X end, [1, 2, 3, 0]). -% -% you get a badarith error, exactly like when you use lists:map. -% -% plists uses monitors to watch the processes it spawns. It is not a good idea -% to invoke plists when you are already monitoring processes. If one of them -% does a non-normal exit, plists receives the 'DOWN' message believing it to be -% from one of its own processes. The error propagation system goes into -% effect, which results in the error occuring in the calling process. -% -% == License == -% The MIT License -% -% Copyright (c) 2007 Stephen Marsh -% -% Permission is hereby granted, free of charge, to any person obtaining a copy -% of this software and associated documentation files (the "Software"), to deal -% in the Software without restriction, including without limitation the rights -% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -% copies of the Software, and to permit persons to whom the Software is -% furnished to do so, subject to the following conditions: -% -% The above copyright notice and this permission notice shall be included in -% all copies or substantial portions of the Software. -% -% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -% THE SOFTWARE. +%%% -*- mode: Erlang; fill-column: 80; comment-column: 75; -*- +%%% The MIT License +%%% +%%% Copyright (c) 2007 Stephen Marsh +%%% +%%% Permission is hereby granted, free of charge, to any person obtaining a copy +%%% of this software and associated documentation files (the "Software"), to deal +%%% in the Software without restriction, including without limitation the rights +%%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +%%% copies of the Software, and to permit persons to whom the Software is +%%% furnished to do so, subject to the following conditions: +%%% +%%% The above copyright notice and this permission notice shall be included in +%%% all copies or substantial portions of the Software. +%%% +%%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +%%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +%%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +%%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +%%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +%%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +%%% THE SOFTWARE. +%%%--------------------------------------------------------------------------- +%%% @author Stephen Marsh +%%% @copyright 2007 Stephen Marsh freeyourmind ++ [$@|gmail.com] +%%% @doc +%%% plists is a drop-in replacement for module lists, making +%%% most list operations parallel. It can operate on each element in +%%% parallel, for IO-bound operations, on sublists in parallel, for +%%% taking advantage of multi-core machines with CPU-bound operations, +%%% and across erlang nodes, for parallizing inside a cluster. It +%%% handles errors and node failures. It can be configured, tuned, and +%%% tweaked to get optimal performance while minimizing overhead. +%%% +%%% Almost all the functions are identical to equivalent functions in +%%% lists, returning exactly the same result, and having both a form +%%% with an identical syntax that operates on each element in parallel +%%% and a form which takes an optional "malt", a specification for how +%%% to parallize the operation. +%%% +%%% fold is the one exception, parallel fold is different from linear +%%% fold. This module also include a simple mapreduce implementation, +%%% and the function runmany. All the other functions are implemented +%%% with runmany, which is as a generalization of parallel list +%%% operations. +%%% +%%% Malts +%%% ===== +%%% +%%% A malt specifies how to break a list into sublists, and can optionally +%%% specify a timeout, which nodes to run on, and how many processes to start +%%% per node. +%%% +%%% Malt = MaltComponent | [MaltComponent] +%%% MaltComponent = SubListSize::integer() | {processes, integer()} | +%%% {processes, schedulers} | +%%% {timeout, Milliseconds::integer()} | {nodes, [NodeSpec]}
+%%% +%%% NodeSpec = Node::atom() | {Node::atom(), NumProcesses::integer()} | +%%% {Node::atom(), schedulers} +%%% +%%% An integer can be given to specify the exact size for sublists. 1 +%%% is a good choice for IO-bound operations and when the operation on +%%% each list element is expensive. Larger numbers minimize overhead +%%% and are faster for cheap operations. +%%% +%%% If the integer is omitted, and you have specified a `{processes, +%%% X}`, the list is split into X sublists. This is only useful when +%%% the time to process each element is close to identical and you +%%% know exactly how many lines of execution are available to you. +%%% +%%% If neither of the above applies, the sublist size defaults to 1. +%%% +%%% You can use `{processes, X}` to have the list processed by `X` +%%% processes on the local machine. A good choice for `X` is the +%%% number of lines of execution (cores) the machine provides. This +%%% can be done automatically with {processes, schedulers}, which sets +%%% the number of processes to the number of schedulers in the erlang +%%% virtual machine (probably equal to the number of cores). +%%% +%%% `{timeout, Milliseconds}` specifies a timeout. This is a timeout +%%% for the entire operation, both operating on the sublists and +%%% combining the results. exit(timeout) is evaluated if the timeout +%%% is exceeded. +%%% +%%% `{nodes, NodeList}` specifies that the operation should be done +%%% across nodes. Every element of NodeList is of the form +%%% `{NodeName, NumProcesses}` or NodeName, which means the same as +%%% `{NodeName, 1}`. plists runs NumProcesses processes on NodeName +%%% concurrently. A good choice for NumProcesses is the number of +%%% lines of execution (cores) a node provides plus one. This ensures +%%% the node is completely busy even when fetching a new sublist. This +%%% can be done automatically with `{NodeName, schedulers}`, in which +%%% case plists uses a cached value if it has one, and otherwise finds +%%% the number of schedulers in the remote node and adds one. This +%%% will ensure at least one busy process per core (assuming the node +%%% has a scheduler for each core). +%%% +%%% plists is able to recover if a node goes down. If all nodes go +%%% down, exit(allnodescrashed) is evaluated. +%%% +%%% Any of the above may be used as a malt, or may be combined into a +%%% list. `{nodes, NodeList}` and {processes, X} may not be combined. +%%% +%%% Examples +%%% ======== +%%% +%%% %%start a process for each element (1-element sublists)< +%%% 1 +%%% +%%% %% start a process for each ten elements (10-element sublists) +%%% 10 +%%% +%%% %% split the list into two sublists and process in two processes +%%% {processes, 2} +%%% +%%% %% split the list into X sublists and process in X processes, +%%% %% where X is the number of cores in the machine +%%% {processes, schedulers} +%%% +%%% %% split the list into 10-element sublists and process in two processes +%%% [10, {processes, 2}] +%%% +%%% %% timeout after one second. Assumes that a process should be started +%%% %% for each element.
+%%% {timeout, 1000} +%%% +%%% %% Runs 3 processes at a time on apple@desktop, and 2 on orange@laptop +%%% %% This is the best way to utilize all the CPU-power of a dual-core
+%%% %% desktop and a single-core laptop. Assumes that the list should be
+%%% %% split into 1-element sublists.
+%%% {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]} +%%% +%%% %% Like above, but makes plists figure out how many processes to use. +%%% {nodes, [{apple@desktop, schedulers}, {orange@laptop, schedulers}]} +%%% +%%% %% Gives apple and orange three seconds to process the list as
+%%% %% 100-element sublists.
+%%% [100, {timeout, 3000}, {nodes, [{apple@desktop, 3}, {orange@laptop, 2}]}] +%%% +%%% Aside: Why Malt? +%%% ================ +%%% +%%% I needed a word for this concept, so maybe my subconsciousness +%%% gave me one by making me misspell multiply. Maybe it is an acronym +%%% for Malt is A List Tearing Specification. Maybe it is a beer +%%% metaphor, suggesting that code only runs in parallel if bribed +%%% with spirits. It's jargon, learn it or you can't be part of the +%%% in-group. +%%% +%%% Messages and Errors +%%% =================== +%%% +%%% plists assures that no extraneous messages are left in or will +%%% later enter the message queue. This is guaranteed even in the +%%% event of an error. +%%% +%%% Errors in spawned processes are caught and propagated to the +%%% calling process. If you invoke +%%% +%%% plists:map(fun (X) -> 1/X end, [1, 2, 3, 0]). +%%% +%%% you get a badarith error, exactly like when you use lists:map. +%%% +%%% plists uses monitors to watch the processes it spawns. It is not a +%%% good idea to invoke plists when you are already monitoring +%%% processes. If one of them does a non-normal exit, plists receives +%%% the 'DOWN' message believing it to be from one of its own +%%% processes. The error propagation system goes into effect, which +%%% results in the error occuring in the calling process. +%%% +-module(ec_plists). +-export([all/2, all/3, + any/2, any/3, + filter/2, filter/3, + fold/3, fold/4, fold/5, + foreach/2, foreach/3, + map/2, map/3, + ftmap/2, ftmap/3, + partition/2, partition/3, + sort/1, sort/2, sort/3, + usort/1, usort/2, usort/3, + mapreduce/2, mapreduce/3, mapreduce/5, + runmany/3, runmany/4]). --module(plists). --export([all/2, all/3, any/2, any/3, filter/2, filter/3, -fold/3, fold/4, fold/5, foreach/2, foreach/3, map/2, map/3, -partition/2, partition/3, sort/1, sort/2, sort/3, -usort/1, usort/2, usort/3, mapreduce/2, mapreduce/3, mapreduce/5, -runmany/3, runmany/4]). +-export_type([malt/0, malt_component/0, node_spec/0, fuse/0, fuse_fun/0]). -% Everything here is defined in terms of runmany. -% The following methods are convient interfaces to runmany. +%%============================================================================ +%% types +%%============================================================================ -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> bool() +-type malt() :: malt_component() | [malt_component()]. + +-type malt_component() :: SubListSize::integer() + | {processes, integer()} + | {processes, schedulers} + | {timeout, Milliseconds::integer()} + | {nodes, [node_spec()]}. + +-type node_spec() :: Node::atom() + | {Node::atom(), NumProcesses::integer()} + | {Node::atom(), schedulers}. + +-type fuse_fun() :: fun((term(), term()) -> term()). +-type fuse() :: fuse_fun() | {recursive, fuse_fun()} | {reverse, fuse_fun()}. +-type el_fun() :: fun((term()) -> term()). + +%%============================================================================ +%% API +%%============================================================================ + +%% Everything here is defined in terms of runmany. +%% The following methods are convient interfaces to runmany. + +%% @doc Same semantics as in module +%% lists. +-spec all/2 :: (el_fun(), list()) -> boolean(). all(Fun, List) -> all(Fun, List, 1). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> bool() +%% @doc Same semantics as in module +%% lists. +-spec all/3 :: (el_fun(), list(), malt()) -> boolean(). all(Fun, List, Malt) -> - try runmany(fun (L) -> + try + runmany(fun (L) -> B = lists:all(Fun, L), - if B -> + if + B -> nil; - true -> - exit(notall) + true -> + erlang:throw(notall) end end, fun (_A1, _A2) -> nil end, - List, Malt) of - _ -> - true - catch exit:notall -> - false + List, Malt), + true + catch + throw:notall -> + false end. -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> bool() +%% @doc Same semantics as in module +%% lists. +-spec any/2 :: (fun(), list()) -> boolean(). any(Fun, List) -> any(Fun, List, 1). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> bool() +%% @doc Same semantics as in module +%% lists. +-spec any/3 :: (fun(), list(), malt()) -> boolean(). any(Fun, List, Malt) -> - try runmany(fun (L) -> + try + runmany(fun (L) -> B = lists:any(Fun, L), if B -> - exit(any); + erlang:throw(any); true -> nil end @@ -219,19 +269,19 @@ any(Fun, List, Malt) -> List, Malt) of _ -> false - catch exit:any -> + catch throw:any -> true end. -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> list() +%% @doc Same semantics as in module +%% lists. +-spec filter/2 :: (fun(), list()) -> list(). filter(Fun, List) -> filter(Fun, List, 1). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> list() +%% @doc Same semantics as in module +%% lists. +-spec filter/3 :: (fun(), list(), malt()) -> list(). filter(Fun, List, Malt) -> runmany(fun (L) -> lists:filter(Fun, L) @@ -241,53 +291,55 @@ filter(Fun, List, Malt) -> end}, List, Malt). -% Note that with parallel fold there is not foldl and foldr, -% instead just one fold that can fuse Accumlators. +%% Note that with parallel fold there is not foldl and foldr, +%% instead just one fold that can fuse Accumlators. -% @doc Like below, but assumes 1 as the Malt. This function is almost useless, -% and is intended only to aid converting code from using lists to plists. -% @spec (Fun, InitAcc, List) -> term() +%% @doc Like below, but assumes 1 as the Malt. This function is almost useless, +%% and is intended only to aid converting code from using lists to plists. +-spec fold/3 :: (fun(), InitAcc::term(), list()) -> term(). fold(Fun, InitAcc, List) -> fold(Fun, Fun, InitAcc, List, 1). -% @doc Like below, but uses the Fun as the Fuse by default. -% @spec (Fun, InitAcc, List, Malt) -> term() +%% @doc Like below, but uses the Fun as the Fuse by default. +-spec fold/4 :: (fun(), InitAcc::term(), list(), malt()) -> term(). fold(Fun, InitAcc, List, Malt) -> fold(Fun, Fun, InitAcc, List, Malt). -% @doc fold is more complex when made parallel. There is no foldl and foldr, -% accumulators aren't passed in any defined order. -% The list is split into sublists which are folded together. Fun is -% identical to the function passed to lists:fold[lr], it takes -% (an element, and the accumulator) and returns -> a new accumulator. -% It is used for the initial stage of folding sublists. Fuse fuses together -% the results, it takes (Results1, Result2) and returns -> a new result. -% By default sublists are fused left to right, each result of a fuse being -% fed into the first element of the next fuse. The result of the last fuse -% is the result. -% -% Fusing may also run in parallel using a recursive algorithm, -% by specifying the fuse as {recursive, Fuse}. See -% the discussion in {@link runmany/4}. -% -% Malt is the malt for the initial folding of sublists, and for the -% possible recursive fuse. -% @spec (Fun, Fuse, InitAcc, List, Malt) -> term() +%% @doc fold is more complex when made parallel. There is no foldl and +%% foldr, accumulators aren't passed in any defined order. The list +%% is split into sublists which are folded together. Fun is identical +%% to the function passed to lists:fold[lr], it takes (an element, and +%% the accumulator) and returns -> a new accumulator. It is used for +%% the initial stage of folding sublists. Fuse fuses together the +%% results, it takes (Results1, Result2) and returns -> a new result. +%% By default sublists are fused left to right, each result of a fuse +%% being fed into the first element of the next fuse. The result of +%% the last fuse is the result. +%% +%% Fusing may also run in parallel using a recursive algorithm, +%% by specifying the fuse as {recursive, Fuse}. See +%% the discussion in {@link runmany/4}. +%% +%% Malt is the malt for the initial folding of sublists, and for the +%% possible recursive fuse. +-spec fold/5 :: (fun(), fuse(), InitAcc::term(), list(), malt()) -> term(). fold(Fun, Fuse, InitAcc, List, Malt) -> - Fun2 = fun (L) -> lists:foldl(Fun, InitAcc, L) end, + Fun2 = fun (L) -> + lists:foldl(Fun, InitAcc, L) + end, runmany(Fun2, Fuse, List, Malt). -% @doc Similiar to foreach in module -% lists -% except it makes no guarantee about the order it processes list elements. -% @spec (Fun, List) -> void() +%% @doc Similiar to foreach in module +%% lists +%% except it makes no guarantee about the order it processes list elements. +-spec foreach/2 :: (fun(), list()) -> ok. foreach(Fun, List) -> foreach(Fun, List, 1). -% @doc Similiar to foreach in module -% lists -% except it makes no guarantee about the order it processes list elements. -% @spec (Fun, List, Malt) -> void() +%% @doc Similiar to foreach in module +%% lists +%% except it makes no guarantee about the order it processes list elements. +-spec foreach/3 :: (fun(), list(), malt()) -> ok. foreach(Fun, List, Malt) -> runmany(fun (L) -> lists:foreach(Fun, L) @@ -297,33 +349,57 @@ foreach(Fun, List, Malt) -> end, List, Malt). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> list() +%% @doc Same semantics as in module +%% lists. +-spec map/2 :: (fun(), list()) -> list(). map(Fun, List) -> map(Fun, List, 1). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> list() +%% @doc Same semantics as in module +%% lists. +-spec map/3 :: (fun(), list(), malt()) -> list(). map(Fun, List, Malt) -> runmany(fun (L) -> lists:map(Fun, L) end, {reverse, fun (A1, A2) -> - A1 ++ A2 - end}, + A1 ++ A2 + end}, List, Malt). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> {list(), list()} +%% @doc values are returned as {value, term()}. +-spec ftmap/2 :: (fun(), list()) -> list(). +ftmap(Fun, List) -> + map(fun(L) -> + try + {value, Fun(L)} + catch + Class:Type -> + {error, {Class, Type}} + end + end, List). + +%% @doc values are returned as {value, term()}. +-spec ftmap/3 :: (fun(), list(), malt()) -> list(). +ftmap(Fun, List, Malt) -> + map(fun(L) -> + try + {value, Fun(L)} + catch + Class:Type -> + {error, {Class, Type}} + end + end, List, Malt). + +%% @doc Same semantics as in module +%% lists. +-spec partition/2 :: (fun(), list()) -> {list(), list()}. partition(Fun, List) -> partition(Fun, List, 1). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List, Malt) -> {list(), list()} +%% @doc Same semantics as in module +%% lists. +-spec partition/3 :: (fun(), list(), malt()) -> {list(), list()}. partition(Fun, List, Malt) -> runmany(fun (L) -> lists:partition(Fun, L) @@ -333,109 +409,110 @@ partition(Fun, List, Malt) -> end}, List, Malt). -% SORTMALT needs to be tuned +%% SORTMALT needs to be tuned -define(SORTMALT, 100). -% @doc Same semantics as in module -% lists. -% @spec (List) -> list() +%% @doc Same semantics as in module +%% lists. +-spec sort/1 :: (list()) -> list(). sort(List) -> sort(fun (A, B) -> A =< B end, List). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> list() +%% @doc Same semantics as in module +%% lists. +-spec sort/2 :: (fun(), list()) -> list(). sort(Fun, List) -> sort(Fun, List, ?SORTMALT). -% @doc This version lets you specify your own malt for sort. -% -% sort splits the list into sublists and sorts them, and it merges the -% sorted lists together. These are done in parallel. Each sublist is -% sorted in a seperate process, and each merging of results is done in a -% seperate process. Malt defaults to 100, causing the list to be split into -% 100-element sublists. -% @spec (Fun, List, Malt) -> list() +%% @doc This version lets you specify your own malt for sort. +%% +%% sort splits the list into sublists and sorts them, and it merges the +%% sorted lists together. These are done in parallel. Each sublist is +%% sorted in a seperate process, and each merging of results is done in a +%% seperate process. Malt defaults to 100, causing the list to be split into +%% 100-element sublists. +-spec sort/3 :: (fun(), list(), malt()) -> list(). sort(Fun, List, Malt) -> Fun2 = fun (L) -> - lists:sort(Fun, L) - end, + lists:sort(Fun, L) + end, Fuse = fun (A1, A2) -> - lists:merge(Fun, A1, A2) - end, + lists:merge(Fun, A1, A2) + end, runmany(Fun2, {recursive, Fuse}, List, Malt). -% @doc Same semantics as in module -% lists. -% @spec (List) -> list() +%% @doc Same semantics as in module +%% lists. +-spec usort/1 :: (list()) -> list(). usort(List) -> usort(fun (A, B) -> - A =< B - end, + A =< B + end, List). -% @doc Same semantics as in module -% lists. -% @spec (Fun, List) -> list() +%% @doc Same semantics as in module +%% lists. +-spec usort/2 :: (fun(), list()) -> list(). usort(Fun, List) -> usort(Fun, List, ?SORTMALT). -% @doc This version lets you specify your own malt for usort. -% -% usort splits the list into sublists and sorts them, and it merges the -% sorted lists together. These are done in parallel. Each sublist is -% sorted in a seperate process, and each merging of results is done in a -% seperate process. Malt defaults to 100, causing the list to be split into -% 100-element sublists. -% -% usort removes duplicate elments while it sorts. -% @spec (Fun, List, Malt) -> list() +%% @doc This version lets you specify your own malt for usort. +%% +%% usort splits the list into sublists and sorts them, and it merges the +%% sorted lists together. These are done in parallel. Each sublist is +%% sorted in a seperate process, and each merging of results is done in a +%% seperate process. Malt defaults to 100, causing the list to be split into +%% 100-element sublists. +%% +%% usort removes duplicate elments while it sorts. +-spec usort/3 :: (fun(), list(), malt()) -> list(). usort(Fun, List, Malt) -> Fun2 = fun (L) -> - lists:usort(Fun, L) - end, + lists:usort(Fun, L) + end, Fuse = fun (A1, A2) -> - lists:umerge(Fun, A1, A2) - end, + lists:umerge(Fun, A1, A2) + end, runmany(Fun2, {recursive, Fuse}, List, Malt). -% @doc Like below, assumes default MapMalt of 1. -% @spec (MapFunc, List) -> Dict -% MapFunc = (term()) -> DeepListOfKeyValuePairs -% DeepListOfKeyValuePairs = [DeepListOfKeyValuePairs] | {Key, Value} +%% @doc Like below, assumes default MapMalt of 1. +-spec mapreduce/2 :: (MapFunc, list()) -> dict() when + MapFunc :: fun((term()) -> DeepListOfKeyValuePairs), + DeepListOfKeyValuePairs :: [DeepListOfKeyValuePairs] | {Key::term(), Value::term()}. mapreduce(MapFunc, List) -> mapreduce(MapFunc, List, 1). -% Like below, but uses a default reducer that collects all -% {Key, Value} pairs into a -% dict, -% with values {Key, [Value1, Value2...]}. -% This dict is returned as the result. +%% Like below, but uses a default reducer that collects all +%% {Key, Value} pairs into a +%% dict, +%% with values {Key, [Value1, Value2...]}. +%% This dict is returned as the result. mapreduce(MapFunc, List, MapMalt) -> mapreduce(MapFunc, List, dict:new(), fun add_key/3, MapMalt). -% @doc This is a very basic mapreduce. You won't write a Google-rivaling -% search engine with it. It has no equivalent in lists. Each -% element in the list is run through the MapFunc, which produces either -% a {Key, Value} pair, or a lists of key value pairs, or a list of lists of -% key value pairs...etc. A reducer process runs in parallel with the mapping -% processes, collecting the key value pairs. It starts with a state given by -% InitState, and for each {Key, Value} pair that it receives it invokes -% ReduceFunc(OldState, Key, Value) to compute its new state. mapreduce returns -% the reducer's final state. -% -% MapMalt is the malt for the mapping operation, with a default value of 1, -% meaning each element of the list is mapped by a seperate process. -% -% mapreduce requires OTP R11B, or it may leave monitoring messages in the -% message queue. -% @spec (MapFunc, List, InitState, ReduceFunc, MapMalt) -> Dict -% MapFunc = (term()) -> DeepListOfKeyValuePairs -% DeepListOfKeyValuePairs = [DeepListOfKeyValuePairs] | {Key, Value} -% ReduceFunc = (OldState::term(), Key::term(), Value::term() -> NewState::term() +%% @doc This is a very basic mapreduce. You won't write a +%% Google-rivaling search engine with it. It has no equivalent in +%% lists. Each element in the list is run through the MapFunc, which +%% produces either a {Key, Value} pair, or a lists of key value pairs, +%% or a list of lists of key value pairs...etc. A reducer process runs +%% in parallel with the mapping processes, collecting the key value +%% pairs. It starts with a state given by InitState, and for each +%% {Key, Value} pair that it receives it invokes ReduceFunc(OldState, +%% Key, Value) to compute its new state. mapreduce returns the +%% reducer's final state. +%% +%% MapMalt is the malt for the mapping operation, with a default value of 1, +%% meaning each element of the list is mapped by a seperate process. +%% +%% mapreduce requires OTP R11B, or it may leave monitoring messages in the +%% message queue. +-spec mapreduce/5 :: (MapFunc, list(), InitState::term(), ReduceFunc, malt()) -> dict() when + MapFunc :: fun((term()) -> DeepListOfKeyValuePairs), + DeepListOfKeyValuePairs :: [DeepListOfKeyValuePairs] | {Key::term(), Value::term()}, + ReduceFunc :: fun((OldState::term(), Key::term(), Value::term()) -> NewState::term()). mapreduce(MapFunc, List, InitState, ReduceFunc, MapMalt) -> Parent = self(), {Reducer, ReducerRef} = @@ -446,7 +523,8 @@ mapreduce(MapFunc, List, InitState, ReduceFunc, MapMalt) -> Reducer ! lists:map(MapFunc, L), 1 end, - SentMessages = try runmany(MapFunc2, fun (A, B) -> A+B end, List, MapMalt) + SentMessages = try + runmany(MapFunc2, fun (A, B) -> A+B end, List, MapMalt) catch exit:Reason -> erlang:demonitor(ReducerRef, [flush]), @@ -491,94 +569,94 @@ add_key(Dict, Key, Value) -> dict:store(Key, [Value], Dict) end. -% @doc Like below, but assumes a Malt of 1, -% meaning each element of the list is processed by a seperate process. -% @spec (Fun, Fuse, List) -> term() +%% @doc Like below, but assumes a Malt of 1, +%% meaning each element of the list is processed by a seperate process. +-spec runmany/3 :: (fun(), fuse(), list()) -> term(). runmany(Fun, Fuse, List) -> runmany(Fun, Fuse, List, 1). -% Begin internal stuff (though runmany/4 is exported). +%% Begin internal stuff (though runmany/4 is exported). -% @doc All of the other functions are implemented with runmany. runmany -% takes a List, splits it into sublists, and starts processes to operate on -% each sublist, all done according to Malt. Each process passes its sublist -% into Fun and sends the result back. -% -% The results are then fused together to get the final result. There are two -% ways this can operate, lineraly and recursively. If Fuse is a function, -% a fuse is done linearly left-to-right on the sublists, the results -% of processing the first and second sublists being passed to Fuse, then -% the result of the first fuse and processing the third sublits, and so on. If -% Fuse is {reverse, FuseFunc}, then a fuse is done right-to-left, the results -% of processing the second-to-last and last sublists being passed to FuseFunc, -% then the results of processing the third-to-last sublist and -% the results of the first fuse, and and so forth. -% Both methods preserve the original order of the lists elements. -% -% To do a recursive fuse, pass Fuse as {recursive, FuseFunc}. -% The recursive fuse makes no guarantee about the order the results of -% sublists, or the results of fuses are passed to FuseFunc. It -% continues fusing pairs of results until it is down to one. -% -% Recursive fuse is down in parallel with processing the sublists, and a -% process is spawned to fuse each pair of results. It is a parallized -% algorithm. Linear fuse is done after all results of processing sublists -% have been collected, and can only run in a single process. -% -% Even if you pass {recursive, FuseFunc}, a recursive fuse is only done if -% the malt contains {nodes, NodeList} or {processes, X}. If this is not the -% case, a linear fuse is done. -% @spec (Fun, Fuse, List, Malt) -> term() -% Fun = (list()) -> term() -% Fuse = FuseFunc | {recursive, FuseFunc} -% FuseFunc = (term(), term()) -> term() -runmany(Fun, Fuse, List, Malt) when is_list(Malt) -> +%% @doc All of the other functions are implemented with runmany. runmany +%% takes a List, splits it into sublists, and starts processes to operate on +%% each sublist, all done according to Malt. Each process passes its sublist +%% into Fun and sends the result back. +%% +%% The results are then fused together to get the final result. There are two +%% ways this can operate, lineraly and recursively. If Fuse is a function, +%% a fuse is done linearly left-to-right on the sublists, the results +%% of processing the first and second sublists being passed to Fuse, then +%% the result of the first fuse and processing the third sublits, and so on. If +%% Fuse is {reverse, FuseFunc}, then a fuse is done right-to-left, the results +%% of processing the second-to-last and last sublists being passed to FuseFunc, +%% then the results of processing the third-to-last sublist and +%% the results of the first fuse, and and so forth. +%% Both methods preserve the original order of the lists elements. +%% +%% To do a recursive fuse, pass Fuse as {recursive, FuseFunc}. +%% The recursive fuse makes no guarantee about the order the results of +%% sublists, or the results of fuses are passed to FuseFunc. It +%% continues fusing pairs of results until it is down to one. +%% +%% Recursive fuse is down in parallel with processing the sublists, and a +%% process is spawned to fuse each pair of results. It is a parallized +%% algorithm. Linear fuse is done after all results of processing sublists +%% have been collected, and can only run in a single process. +%% +%% Even if you pass {recursive, FuseFunc}, a recursive fuse is only done if +%% the malt contains {nodes, NodeList} or {processes, X}. If this is not the +%% case, a linear fuse is done. +-spec runmany/4 :: (fun(([term()]) -> term()), fuse(), list(), malt()) -> term(). +runmany(Fun, Fuse, List, Malt) + when erlang:is_list(Malt) -> runmany(Fun, Fuse, List, local, no_split, Malt); runmany(Fun, Fuse, List, Malt) -> runmany(Fun, Fuse, List, [Malt]). -runmany(Fun, Fuse, List, Nodes, no_split, [MaltTerm|Malt]) when is_integer(MaltTerm) -> +runmany(Fun, Fuse, List, Nodes, no_split, [MaltTerm|Malt]) + when erlang:is_integer(MaltTerm) -> runmany(Fun, Fuse, List, Nodes, MaltTerm, Malt); -% run a process for each scheduler runmany(Fun, Fuse, List, local, Split, [{processes, schedulers}|Malt]) -> + %% run a process for each scheduler S = erlang:system_info(schedulers), runmany(Fun, Fuse, List, local, Split, [{processes, S}|Malt]); -% Split the list into X sublists, where X is the number of processes runmany(Fun, Fuse, List, local, no_split, [{processes, X}|_]=Malt) -> - L = length(List), - case L rem X of + %% Split the list into X sublists, where X is the number of processes + L = erlang:length(List), + case (L rem X) of 0 -> - runmany(Fun, Fuse, List, local, L div X, Malt); + runmany(Fun, Fuse, List, local, (L / X), Malt); _ -> - runmany(Fun, Fuse, List, local, L div X + 1, Malt) + runmany(Fun, Fuse, List, local, (L / X) + 1, Malt) end; -% run X process on local machine runmany(Fun, Fuse, List, local, Split, [{processes, X}|Malt]) -> + %% run X process on local machine Nodes = lists:duplicate(X, node()), runmany(Fun, Fuse, List, Nodes, Split, Malt); runmany(Fun, Fuse, List, Nodes, Split, [{timeout, X}|Malt]) -> - Parent = self(), - Timer = spawn(fun () -> - receive - stoptimer -> - Parent ! {timerstopped, self()} - after X -> - Parent ! {timerrang, self()}, - receive - stoptimer -> - Parent ! {timerstopped, self()} - end - end - end), - Ans = try runmany(Fun, Fuse, List, Nodes, Split, Malt) + Parent = erlang:self(), + Timer = proc_lib:spawn(fun () -> + receive + stoptimer -> + Parent ! {timerstopped, erlang:self()} + after X -> + Parent ! {timerrang, erlang:self()}, + receive + stoptimer -> + Parent ! {timerstopped, erlang:self()} + end + end + end), + Ans = try + runmany(Fun, Fuse, List, Nodes, Split, Malt) catch - % we really just want the after block, the syntax - % makes this catch necessary. + %% we really just want the after block, the syntax + %% makes this catch necessary. willneverhappen -> nil after - Timer ! stoptimer, - cleanup_timer(Timer) + Timer ! stoptimer, + cleanup_timer(Timer) end, Ans; runmany(Fun, Fuse, List, local, Split, [{nodes, NodeList}|Malt]) -> @@ -592,12 +670,12 @@ runmany(Fun, Fuse, List, local, Split, [{nodes, NodeList}|Malt]) -> end, [], NodeList), runmany(Fun, Fuse, List, Nodes, Split, Malt); -% local recursive fuse, for when we weren't invoked with {processes, X} -% or {nodes, NodeList}. Degenerates recursive fuse into linear fuse. runmany(Fun, {recursive, Fuse}, List, local, Split, []) -> + %% local recursive fuse, for when we weren't invoked with {processes, X} + %% or {nodes, NodeList}. Degenerates recursive fuse into linear fuse. runmany(Fun, Fuse, List, local, Split, []); -% by default, operate on each element seperately runmany(Fun, Fuse, List, Nodes, no_split, []) -> + %% by default, operate on each element seperately runmany(Fun, Fuse, List, Nodes, 1, []); runmany(Fun, Fuse, List, local, Split, []) -> List2 = splitmany(List, Split), @@ -615,10 +693,10 @@ cleanup_timer(Timer) -> end. schedulers_on_node(Node) -> - case get(plists_schedulers_on_nodes) of + case erlang:get(ec_plists_schedulers_on_nodes) of undefined -> X = determine_schedulers(Node), - put(plists_schedulers_on_nodes, + erlang:put(ec_plists_schedulers_on_nodes, dict:store(Node, X, dict:new())), X; Dict -> @@ -627,17 +705,17 @@ schedulers_on_node(Node) -> dict:fetch(Node, Dict); false -> X = determine_schedulers(Node), - put(plists_schedulers_on_nodes, + erlang:put(ec_plists_schedulers_on_nodes, dict:store(Node, X, Dict)), X end end. determine_schedulers(Node) -> - Parent = self(), - Child = spawn(Node, fun () -> - Parent ! {self(), erlang:system_info(schedulers)} - end), + Parent = erlang:self(), + Child = proc_lib:spawn(Node, fun () -> + Parent ! {self(), erlang:system_info(schedulers)} + end), erlang:monitor(process, Child), receive {Child, X} -> @@ -650,21 +728,22 @@ determine_schedulers(Node) -> 0 end. -% local runmany, for when we weren't invoked with {processes, X} -% or {nodes, NodeList}. Every sublist is processed in parallel. +%% @doc local runmany, for when we weren't invoked with {processes, X} +%% or {nodes, NodeList}. Every sublist is processed in parallel. local_runmany(Fun, Fuse, List) -> Parent = self (), Pids = lists:map(fun (L) -> F = fun () -> - Parent ! - {self (), Fun(L)} + Parent ! {self (), Fun(L)} end, {Pid, _} = erlang:spawn_monitor(F), Pid end, List), - Answers = try lists:map(fun receivefrom/1, Pids) - catch throw:Message -> + Answers = try + lists:map(fun receivefrom/1, Pids) + catch + throw:Message -> {BadPid, Reason} = Message, handle_error(BadPid, Reason, Pids) end, @@ -678,70 +757,74 @@ receivefrom(Pid) -> {Pid, R} -> R; {'DOWN', _, _, BadPid, Reason} when Reason =/= normal -> - throw({BadPid, Reason}); + erlang:throw({BadPid, Reason}); {timerrang, _} -> - throw({nil, timeout}) + erlang:throw({nil, timeout}) end. -% Convert List into [{Number, Sublist}] +%% Convert List into [{Number, Sublist}] cluster_runmany(Fun, Fuse, List, Nodes) -> {List2, _} = lists:foldl(fun (X, {L, Count}) -> - {[{Count, X}|L], Count+1} - end, - {[], 0}, List), + {[{Count, X}|L], Count+1} + end, + {[], 0}, List), cluster_runmany(Fun, Fuse, List2, Nodes, [], []). -% Add a pair of results into the TaskList as a fusing task +%% @doc Add a pair of results into the TaskList as a fusing task cluster_runmany(Fun, {recursive, Fuse}, [], Nodes, Running, [{_, R1}, {_, R2}|Results]) -> cluster_runmany(Fun, {recursive, Fuse}, [{fuse, R1, R2}], Nodes, Running, Results); -% recursive fuse done, return result cluster_runmany(_, {recursive, _Fuse}, [], _Nodes, [], [{_, Result}]) -> + %% recursive fuse done, return result Result; -% edge case where we are asked to do nothing cluster_runmany(_, {recursive, _Fuse}, [], _Nodes, [], []) -> + %% edge case where we are asked to do nothing []; -% We're done, now we just have to [linear] fuse the results cluster_runmany(_, Fuse, [], _Nodes, [], Results) -> - fuse(Fuse, lists:map(fun ({_, R}) -> R end, + %% We're done, now we just have to [linear] fuse the results + fuse(Fuse, lists:map(fun ({_, R}) -> + R + end, lists:sort(fun ({A, _}, {B, _}) -> A =< B end, lists:reverse(Results)))); -% We have a ready node and a sublist or fuse to be processed, so we start -% a new process cluster_runmany(Fun, Fuse, [Task|TaskList], [N|Nodes], Running, Results) -> - Parent = self(), +%% We have a ready node and a sublist or fuse to be processed, so we start +%% a new process + + Parent = erlang:self(), case Task of {Num, L2} -> Fun2 = fun () -> - Parent ! {self(), Num, Fun(L2)} + Parent ! {erlang:self(), Num, Fun(L2)} end; {fuse, R1, R2} -> {recursive, FuseFunc} = Fuse, Fun2 = fun () -> - Parent ! {self(), fuse, FuseFunc(R1, R2)} + Parent ! {erlang:self(), fuse, FuseFunc(R1, R2)} end end, Fun3 = fun () -> - try Fun2() + try + Fun2() catch - exit:siblingdied -> + exit:siblingdied -> ok; - exit:Reason -> - Parent ! {self(), error, Reason}; - error:R -> - Parent ! {self(), error, {R, erlang:get_stacktrace()}}; - throw:R -> - Parent ! {self(), error, {{nocatch, R}, erlang:get_stacktrace()}} - end + exit:Reason -> + Parent ! {erlang:self(), error, Reason}; + error:R -> + Parent ! {erlang:self(), error, {R, erlang:get_stacktrace()}}; + throw:R -> + Parent ! {erlang:self(), error, {{nocatch, R}, erlang:get_stacktrace()}} + end end, - Pid = spawn(N, Fun3), + Pid = proc_lib:spawn(N, Fun3), erlang:monitor(process, Pid), cluster_runmany(Fun, Fuse, TaskList, Nodes, [{Pid, N, Task}|Running], Results); -% We can't start a new process, but can watch over already running ones cluster_runmany(Fun, Fuse, TaskList, Nodes, Running, Results) when length(Running) > 0 -> + %% We can't start a new process, but can watch over already running ones receive {_Pid, error, Reason} -> RunningPids = lists:map(fun ({Pid, _, _}) -> @@ -750,9 +833,10 @@ cluster_runmany(Fun, Fuse, TaskList, Nodes, Running, Results) when length(Runnin Running), handle_error(junkvalue, Reason, RunningPids); {Pid, Num, Result} -> - % throw out the exit message, Reason should be - % normal, noproc, or noconnection - receive {'DOWN', _, _, Pid, _Reason} -> + %% throw out the exit message, Reason should be + %% normal, noproc, or noconnection + receive + {'DOWN', _, _, Pid, _Reason} -> nil end, {Running2, FinishedNode, _} = delete_running(Pid, Running, []), @@ -764,16 +848,16 @@ cluster_runmany(Fun, Fuse, TaskList, Nodes, Running, Results) when length(Runnin end, Running), handle_error(nil, timeout, RunningPids); - % node failure + %% node failure {'DOWN', _, _, Pid, noconnection} -> {Running2, _DeadNode, Task} = delete_running(Pid, Running, []), cluster_runmany(Fun, Fuse, [Task|TaskList], Nodes, Running2, Results); - % could a noproc exit message come before the message from - % the process? we are assuming it can't. - % this clause is unlikely to get invoked due to cluster_runmany's - % spawned processes. It will still catch errors in mapreduce's - % reduce process, however. + %% could a noproc exit message come before the message from + %% the process? we are assuming it can't. + %% this clause is unlikely to get invoked due to cluster_runmany's + %% spawned processes. It will still catch errors in mapreduce's + %% reduce process, however. {'DOWN', _, _, BadPid, Reason} when Reason =/= normal -> RunningPids = lists:map(fun ({Pid, _, _}) -> Pid @@ -781,9 +865,9 @@ cluster_runmany(Fun, Fuse, TaskList, Nodes, Running, Results) when length(Runnin Running), handle_error(BadPid, Reason, RunningPids) end; -% We have data, but no nodes either available or occupied cluster_runmany(_, _, [_Non|_Empty], []=_Nodes, []=_Running, _) -> - exit(allnodescrashed). +%% We have data, but no nodes either available or occupied + erlang:exit(allnodescrashed). delete_running(Pid, [{Pid, Node, List}|Running], Acc) -> {Running ++ Acc, Node, List}; @@ -792,12 +876,12 @@ delete_running(Pid, [R|Running], Acc) -> handle_error(BadPid, Reason, Pids) -> lists:foreach(fun (Pid) -> - exit(Pid, siblingdied) + erlang:exit(Pid, siblingdied) end, Pids), lists:foreach(fun (Pid) -> error_cleanup(Pid, BadPid) end, Pids), - exit(Reason). + erlang:exit(Reason). error_cleanup(BadPid, BadPid) -> ok; @@ -817,7 +901,7 @@ normal_cleanup(Pid) -> ok end. -% edge case +%% edge case fuse(_, []) -> []; fuse({reverse, _}=Fuse, Results) -> @@ -833,9 +917,9 @@ fuse(Fuse, [R2|Results], R1) -> fuse(_, [], R) -> R. -% Splits a list into a list of sublists, each of size Size, -% except for the last element which is less if the original list -% could not be evenly divided into Size-sized lists. +%% @doc Splits a list into a list of sublists, each of size Size, +%% except for the last element which is less if the original list +%% could not be evenly divided into Size-sized lists. splitmany(List, Size) -> splitmany(List, [], Size). @@ -845,8 +929,8 @@ splitmany(List, Acc, Size) -> {Top, NList} = split(Size, List), splitmany(NList, [Top|Acc], Size). -% Like lists:split, except it splits a list smaller than its first -% parameter +%% @doc Like lists:split, except it splits a list smaller than its first +%% parameter split(Size, List) -> split(Size, List, []). diff --git a/test/ec_plists_tests.erl b/test/ec_plists_tests.erl new file mode 100644 index 0000000..7acefe6 --- /dev/null +++ b/test/ec_plists_tests.erl @@ -0,0 +1,75 @@ +%%% @copyright Erlware, LLC. +-module(ec_plists_tests). + +-include_lib("eunit/include/eunit.hrl"). + +%%%=================================================================== +%%% Tests +%%%=================================================================== + +map_good_test() -> + Results = ec_plists:map(fun(_) -> + ok + end, + lists:seq(1, 5)), + ?assertMatch([ok, ok, ok, ok, ok], + Results). + +ftmap_good_test() -> + Results = ec_plists:ftmap(fun(_) -> + ok + end, + lists:seq(1, 3)), + ?assertMatch([{value, ok}, {value, ok}, {value, ok}], + Results). + +filter_good_test() -> + Results = ec_plists:filter(fun(X) -> + X == show + end, + [show, show, remove]), + ?assertMatch([show, show], + Results). + +map_timeout_test() -> + ?assertExit(timeout, + ec_plists:map(fun(T) -> + timer:sleep(T), + T + end, + [1, 100], {timeout, 10})). + +ftmap_timeout_test() -> + ?assertExit(timeout, + ec_plists:ftmap(fun(X) -> + timer:sleep(X), + true + end, + [100, 1], {timeout, 10})). + +filter_timeout_test() -> + ?assertExit(timeout, + ec_plists:filter(fun(T) -> + timer:sleep(T), + T == 1 + end, + [1, 100], {timeout, 10})). + +map_bad_test() -> + ?assertExit({{nocatch,test_exception}, _}, + ec_plists:map(fun(_) -> + erlang:throw(test_exception) + end, + lists:seq(1, 5))). + + +ftmap_bad_test() -> + Results = + ec_plists:ftmap(fun(2) -> + erlang:throw(test_exception); + (N) -> + N + end, + lists:seq(1, 5)), + ?assertMatch([{value, 1}, {error,{throw,test_exception}}, {value, 3}, + {value, 4}, {value, 5}] , Results). From 8d300f5d02538733dadbac27add48d95654aeb8f Mon Sep 17 00:00:00 2001 From: Eric Merritt Date: Thu, 25 Oct 2012 16:36:17 -0500 Subject: [PATCH 20/20] massively expand the documentation in the README Signed-off-by: Jordan Wilberding --- README.md | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/README.md b/README.md index f2cab01..3a8ed83 100644 --- a/README.md +++ b/README.md @@ -23,3 +23,107 @@ Goals for the project * High Quality * Well Documented * Well Tested + +Currently Available Modules/Systems +------------------------------------ + +### [ec_date](https://github.com/erlware/erlware_commons/blob/master/src/ec_date.erl) + +This module formats erlang dates in the form {{Year, Month, Day}, +{Hour, Minute, Second}} to printable strings, using (almost) +equivalent formatting rules as http://uk.php.net/date, US vs European +dates are disambiguated in the same way as +http://uk.php.net/manual/en/function.strtotime.php That is, Dates in +the m/d/y or d-m-y formats are disambiguated by looking at the +separator between the various components: if the separator is a slash +(/), then the American m/d/y is assumed; whereas if the separator is a +dash (-) or a dot (.), then the European d-m-y format is assumed. To +avoid potential ambiguity, it's best to use ISO 8601 (YYYY-MM-DD) +dates. + +erlang has no concept of timezone so the following formats are not +implemented: B e I O P T Z formats c and r will also differ slightly + +### [ec_file](https://github.com/erlware/erlware_commons/blob/master/src/ec_file.erl) + +A set of commonly defined helper functions for files that are not +included in stdlib. + +### [ec_plists](https://github.com/erlware/erlware_commons/blob/master/src/ec_plists.erl) + +plists is a drop-in replacement for module lists, making most +list operations parallel. It can operate on each element in parallel, +for IO-bound operations, on sublists in parallel, for taking advantage +of multi-core machines with CPU-bound operations, and across erlang +nodes, for parallizing inside a cluster. It handles errors and node +failures. It can be configured, tuned, and tweaked to get optimal +performance while minimizing overhead. + +Almost all the functions are identical to equivalent functions in +lists, returning exactly the same result, and having both a form with +an identical syntax that operates on each element in parallel and a +form which takes an optional "malt", a specification for how to +parallize the operation. + +fold is the one exception, parallel fold is different from linear +fold. This module also include a simple mapreduce implementation, and +the function runmany. All the other functions are implemented with +runmany, which is as a generalization of parallel list operations. + +### [ec_semver](https://github.com/erlware/erlware_commons/blob/master/src/ec_semver.erl) + +A complete parser for the [semver](http://semver.org/) +standard. Including a complete set of conforming comparison functions. + +### [ec_lists](https://github.com/ericbmerritt/erlware_commons/blob/master/src/ec_lists.erl) + +A set of additional list manipulation functions designed to supliment +the `lists` module in stdlib. + +### [ec_talk](https://github.com/erlware/erlware_commons/blob/master/src/ec_talk.erl) + +A set of simple utility functions to falicitate command line +communication with a user. + +Signatures +----------- + +Other languages, have built in support for **Interface** or +**signature** functionality. Java has Interfaces, SML has +Signatures. Erlang, though, doesn't currently support this model, at +least not directly. There are a few ways you can approximate it. We +have defined a mechnism called *signatures* and several modules that +to serve as examples and provide a good set of *dictionary* +signatures. More information about signatures can be found at +[signature](https://github.com/erlware/erlware_commons/blob/master/doc/signatures.md). + + +### [ec_dictionary](https://github.com/erlware/erlware_commons/blob/master/src/ec_dictionary.erl) + +A signature that supports association of keys to values. A map cannot +contain duplicate keys; each key can map to at most one value. + +### [ec_dict](https://github.com/erlware/erlware_commons/blob/master/src/ec_dict.erl) + +This provides an implementation of the ec_dictionary signature using +erlang's dicts as a base. The function documentation for ec_dictionary +applies here as well. + +### [ec_gb_trees](https://github.com/ericbmerritt/erlware_commons/blob/master/src/ec_gb_trees.erl) + +This provides an implementation of the ec_dictionary signature using +erlang's gb_trees as a base. The function documentation for +ec_dictionary applies here as well. + +### [ec_orddict](https://github.com/ericbmerritt/erlware_commons/blob/master/src/ec_orddict.erl) + +This provides an implementation of the ec_dictionary signature using +erlang's orddict as a base. The function documentation for +ec_dictionary applies here as well. + +### [ec_rbdict](https://github.com/ericbmerritt/erlware_commons/blob/master/src/ec_rbdict.erl) + +This provides an implementation of the ec_dictionary signature using +Robert Virding's rbdict module as a base. The function documentation +for ec_dictionary applies here as well.