Rewrite the token list parsing into separate, modulable functions

Introduce cowboy_http's list/2, nonempty_list/2, token/2 functions.
2025-07-14 20:30:23 +00:00 · 2011-10-17 09:15:01 +02:00 · 2011-10-17 09:15:01 +02:00 · 1e7338bdd4
commit 1e7338bdd4
parent 81cc99d10b
2 changed files with 64 additions and 42 deletions
--- a/src/cowboy_http.erl
+++ b/src/cowboy_http.erl
@ -12,10 +12,11 @@
 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+%% @doc Core HTTP parsing API.
 -module(cowboy_http).

 %% Parsing.
-export([parse_tokens_list/1]).
+-export([list/2, nonempty_list/2, token/2]).

 %% Interpretation.
 -export([connection_to_atom/1]).
@ -25,53 +26,74 @@

 %% Parsing.

-%% @doc Parse a list of tokens, as is often found in HTTP headers.
-%%
+%% @doc Parse a non-empty list of the given type.
+-spec nonempty_list(binary(), fun()) -> [any(), ...] | {error, badarg}.
+nonempty_list(Data, Fun) ->
+	case list(Data, Fun, []) of
+		{error, badarg} -> {error, badarg};
+		[] -> {error, badarg};
+		L -> lists:reverse(L)
+	end.
+
+%% @doc Parse a list of the given type.
+-spec list(binary(), fun()) -> list() | {error, badarg}.
+list(Data, Fun) ->
+	case list(Data, Fun, []) of
+		{error, badarg} -> {error, badarg};
+		L -> lists:reverse(L)
+	end.
+
+-spec list(binary(), fun(), [binary()]) -> [any()] | {error, badarg}.
+list(<<>>, _Fun, Acc) ->
+	Acc;
 %% From the RFC:
 %% <blockquote>Wherever this construct is used, null elements are allowed,
 %% but do not contribute to the count of elements present.
 %% That is, "(element), , (element) " is permitted, but counts
 %% as only two elements. Therefore, where at least one element is required,
 %% at least one non-null element MUST be present.</blockquote>
-spec parse_tokens_list(binary()) -> [binary()] | {error, badarg}.
-parse_tokens_list(Value) ->
-	case parse_tokens_list(Value, ws_or_sep, <<>>, []) of
-		{error, badarg} ->
-			{error, badarg};
-		L when length(L) =:= 0 ->
-			{error, badarg};
-		L ->
-			lists:reverse(L)
-	end.
+list(<< $,, Rest/bits >>, Fun, Acc) ->
+	list(Rest, Fun, Acc);
+list(Data, Fun, Acc) ->
+	Fun(Data,
+		fun (R, <<>>) -> list_separator(R,
+				fun (D) -> list(D, Fun, Acc) end);
+			(R, I) -> list_separator(R,
+				fun (D) -> list(D, Fun, [I|Acc]) end)
+		end).

-spec parse_tokens_list(binary(), token | ws | ws_or_sep, binary(),
-	[binary()]) -> [binary()] | {error, badarg}.
-parse_tokens_list(<<>>, token, Token, Acc) ->
-	[Token|Acc];
-parse_tokens_list(<< C, Rest/bits >>, token, Token, Acc)
+-spec list_separator(binary(), fun()) -> any().
+list_separator(<<>>, Fun) ->
+	Fun(<<>>);
+list_separator(<< $,, Rest/bits >>, Fun) ->
+	Fun(Rest);
+list_separator(<< C, Rest/bits >>, Fun)
 		when C =:= $\s; C =:= $\t ->
-	parse_tokens_list(Rest, ws, <<>>, [Token|Acc]);
-parse_tokens_list(<< $,, Rest/bits >>, token, Token, Acc) ->
-	parse_tokens_list(Rest, ws_or_sep, <<>>, [Token|Acc]);
-parse_tokens_list(<< C, Rest/bits >>, token, Token, Acc) ->
-	parse_tokens_list(Rest, token, << Token/binary, C >>, Acc);
-parse_tokens_list(<< C, Rest/bits >>, ws, <<>>, Acc)
-		when C =:= $\s; C =:= $\t ->
-	parse_tokens_list(Rest, ws, <<>>, Acc);
-parse_tokens_list(<< $,, Rest/bits >>, ws, <<>>, Acc) ->
-	parse_tokens_list(Rest, ws_or_sep, <<>>, Acc);
-parse_tokens_list(<<>>, ws_or_sep, <<>>, Acc) ->
-	Acc;
-parse_tokens_list(<< C, Rest/bits >>, ws_or_sep, <<>>, Acc)
-		when C =:= $\s; C =:= $\t ->
-	parse_tokens_list(Rest, ws_or_sep, <<>>, Acc);
-parse_tokens_list(<< $,, Rest/bits >>, ws_or_sep, <<>>, Acc) ->
-	parse_tokens_list(Rest, ws_or_sep, <<>>, Acc);
-parse_tokens_list(<< C, Rest/bits >>, ws_or_sep, <<>>, Acc) ->
-	parse_tokens_list(Rest, token, << C >>, Acc);
-parse_tokens_list(_Value, _State, _Token, _Acc) ->
+	list_separator(Rest, Fun);
+list_separator(_Data, _Fun) ->
 	{error, badarg}.

+%% @doc Parse a token.
+-spec token(binary(), fun()) -> any().
+token(<< C, Rest/bits >>, Fun)
+		when C =:= $\s; C =:= $\t ->
+	token(Rest, Fun);
+token(Data, Fun) ->
+	token(Data, Fun, <<>>).
+
+-spec token(binary(), fun(), binary()) -> any().
+token(<<>>, Fun, Acc) ->
+	Fun(<<>>, Acc);
+token(Data = << C, _Rest/bits >>, Fun, Acc)
+		when C =:= $(; C =:= $); C =:= $<; C =:= $>; C =:= $@;
+			 C =:= $,; C =:= $;; C =:= $:; C =:= $\\; C =:= $";
+			 C =:= $/; C =:= $[; C =:= $]; C =:= $?; C =:= $=;
+			 C =:= ${; C =:= $}; C =:= $\s; C =:= $\t;
+			 C < 32; C =:= 127 ->
+	Fun(Data, Acc);
+token(<< C, Rest/bits >>, Fun, Acc) ->
+	token(Rest, Fun, << Acc/binary, C >>).
+
 %% Interpretation.

 %% @doc Walk through a tokens list and return whether
@ -94,7 +116,7 @@ connection_to_atom([Connection|Tail]) ->

 -ifdef(TEST).

-parse_tokens_list_test_() ->
+nonempty_token_list_test_() ->
 	%% {Value, Result}
 	Tests = [
 		{<<>>, {error, badarg}},
@ -108,7 +130,7 @@ parse_tokens_list_test_() ->
 		{<<"close">>, [<<"close">>]},
 		{<<"keep-alive, upgrade">>, [<<"keep-alive">>, <<"upgrade">>]}
 	],
-	[{V, fun() -> R = parse_tokens_list(V) end} || {V, R} <- Tests].
+	[{V, fun() -> R = nonempty_list(V, fun token/2) end} || {V, R} <- Tests].

 connection_to_atom_test_() ->
 	%% {Tokens, Result}
--- a/src/cowboy_http_req.erl
+++ b/src/cowboy_http_req.erl
@ -220,7 +220,7 @@ parse_header(Name, Req=#http_req{p_headers=PHeaders}, Default)
 	case header(Name, Req) of
 		{undefined, Req2} -> {tokens, Default, Req2};
 		{Value, Req2} ->
-			case cowboy_http:parse_tokens_list(Value) of
+			case cowboy_http:nonempty_list(Value, fun cowboy_http:token/2) of
 				{error, badarg} ->
 					{error, badarg};
 				P ->
@ -417,7 +417,7 @@ response_connection([{Name, Value}|Tail], Connection) ->

 -spec response_connection_parse(binary()) -> keepalive | close.
 response_connection_parse(ReplyConn) ->
-	Tokens = cowboy_http:parse_tokens_list(ReplyConn),
+	Tokens = cowboy_http:nonempty_list(ReplyConn, fun cowboy_http:token/2),
 	cowboy_http:connection_to_atom(Tokens).

 -spec response_head(http_status(), http_headers(), http_headers()) -> iolist().