0
Fork 0
mirror of https://github.com/ninenines/cowboy.git synced 2025-07-14 12:20:24 +00:00

Graceful shutdown

Note: This commit makes cowboy depend on cowlib master.

Graceful shutdown for HTTP/2:

1. A GOAWAY frame with the last stream id set to 2^31-1 is sent and a
   timer is started (goaway_initial_timeout, default 1000ms), to wait
   for any in-flight requests sent by the client, and the status is set
   to 'closing_initiated'. If the client responds with GOAWAY and closes
   the connection, we're done.
2. A second GOAWAY frame is sent with the actual last stream id and the
   status is set to 'closing'. If no streams exist, the connection
   terminates. Otherwise a second timer (goaway_complete_timeout,
   default 3000ms) is started, to wait for the streams to complete. New
   streams are not accepted when status is 'closing'.
3. If all streams haven't completed after the second timeout, the
   connection is forcefully terminated.

Graceful shutdown for HTTP/1.x:

1. If a request is currently being handled, it is waited for and the
   response is sent back to the client with the header "Connection:
   close". Then, the connection is closed.
2. If the current request handler is not finished within the time
   configured in transport option 'shutdown' (default 5000ms), the
   connection process is killed by its supervisor (ranch).

Implemented for HTTP/1.x and HTTP/2 in the following scenarios:

* When receiving exit signal 'shutdown' from the supervisor (e.g. when
  cowboy:stop_listener/3 is called).
* When a connection process is requested to terminate using
  sys:terminate/2,3.

LH: Edited tests a bit and added todos for useful tests to add.
This commit is contained in:
Viktor Söderqvist 2020-10-08 17:53:25 +02:00 committed by Loïc Hoguin
parent fa9c8ad832
commit 059d58d39f
No known key found for this signature in database
GPG key ID: 8A9DF795F6FED764
9 changed files with 397 additions and 64 deletions

View file

@ -4,6 +4,14 @@
-export([init/2]).
init(Req, Delay) ->
init(Req, Delay) when is_integer(Delay) ->
init(Req, #{delay => Delay});
init(Req, Opts=#{delay := Delay}) ->
_ = case Opts of
#{notify_received := Pid} ->
Pid ! {request_received, maps:get(path, Req)};
_ ->
ok
end,
timer:sleep(Delay),
{ok, cowboy_req:reply(200, #{}, <<"Hello world!">>, Req), Delay}.

View file

@ -284,3 +284,135 @@ settings_timeout_infinity(Config) ->
after
cowboy:stop_listener(?FUNCTION_NAME)
end.
graceful_shutdown_connection(Config) ->
doc("Check that ongoing requests are handled before gracefully shutting down a connection."),
Dispatch = cowboy_router:compile([{"localhost", [
{"/delay_hello", delay_hello_h,
#{delay => 500, notify_received => self()}}
]}]),
ProtoOpts = #{
env => #{dispatch => Dispatch}
},
{ok, _} = cowboy:start_clear(?FUNCTION_NAME, [{port, 0}], ProtoOpts),
Port = ranch:get_port(?FUNCTION_NAME),
try
ConnPid = gun_open([{type, tcp}, {protocol, http2}, {port, Port}|Config]),
Ref = gun:get(ConnPid, "/delay_hello"),
%% Make sure the request is received.
receive {request_received, <<"/delay_hello">>} -> ok end,
%% Tell the connection to shutdown while the handler is working.
[CowboyConnPid] = ranch:procs(?FUNCTION_NAME, connections),
monitor(process, CowboyConnPid),
ok = sys:terminate(CowboyConnPid, goaway),
%% Check that the response is sent to the client before the
%% connection goes down.
{response, nofin, 200, _RespHeaders} = gun:await(ConnPid, Ref),
{ok, RespBody} = gun:await_body(ConnPid, Ref),
<<"Hello world!">> = iolist_to_binary(RespBody),
%% Check that the connection is gone soon afterwards. (The exit
%% reason is supposed to be 'goaway' as passed to
%% sys:terminate/2, but it is {shutdown, closed}.)
receive
{'DOWN', _, process, CowboyConnPid, _Reason} ->
ok
end,
[] = ranch:procs(?FUNCTION_NAME, connections),
gun:close(ConnPid)
after
cowboy:stop_listener(?FUNCTION_NAME)
end.
graceful_shutdown_timeout(Config) ->
doc("Check that a connection is closed when gracefully shutting down times out."),
Dispatch = cowboy_router:compile([{"localhost", [
{"/long_delay_hello", delay_hello_h,
#{delay => 10000, notify_received => self()}}
]}]),
ProtoOpts = #{
env => #{dispatch => Dispatch},
goaway_initial_timeout => 200,
goaway_complete_timeout => 500
},
{ok, _} = cowboy:start_clear(?FUNCTION_NAME, [{port, 0}], ProtoOpts),
Port = ranch:get_port(?FUNCTION_NAME),
try
ConnPid = gun_open([{type, tcp}, {protocol, http2}, {port, Port}|Config]),
Ref = gun:get(ConnPid, "/long_delay_hello"),
%% Make sure the request is received.
receive {request_received, <<"/long_delay_hello">>} -> ok end,
%% Tell the connection to shutdown while the handler is working.
[CowboyConnPid] = ranch:procs(?FUNCTION_NAME, connections),
monitor(process, CowboyConnPid),
ok = sys:terminate(CowboyConnPid, goaway),
%% Check that connection didn't wait for the slow handler.
{error, {stream_error, closed}} = gun:await(ConnPid, Ref),
%% Check that the connection is gone. (The exit reason is
%% supposed to be 'goaway' as passed to sys:terminate/2, but it
%% is {shutdown, {stop, {exit, goaway}, 'Graceful shutdown timed
%% out.'}}.)
receive
{'DOWN', _, process, CowboyConnPid, _Reason} ->
ok
after 100 ->
error(still_alive)
end,
[] = ranch:procs(?FUNCTION_NAME, connections),
gun:close(ConnPid)
after
cowboy:stop_listener(?FUNCTION_NAME)
end.
graceful_shutdown_listener(Config) ->
doc("Check that connections are shut down gracefully when stopping a listener."),
Dispatch = cowboy_router:compile([{"localhost", [
{"/delay_hello", delay_hello_h,
#{delay => 500, notify_received => self()}}
]}]),
ProtoOpts = #{
env => #{dispatch => Dispatch}
},
{ok, Listener} = cowboy:start_clear(?FUNCTION_NAME, [{port, 0}], ProtoOpts),
Port = ranch:get_port(?FUNCTION_NAME),
ConnPid = gun_open([{type, tcp}, {protocol, http2}, {port, Port}|Config]),
Ref = gun:get(ConnPid, "/delay_hello"),
%% Shutdown listener while the handlers are working.
receive {request_received, <<"/delay_hello">>} -> ok end,
ListenerMonitorRef = monitor(process, Listener),
ok = cowboy:stop_listener(?FUNCTION_NAME),
receive
{'DOWN', ListenerMonitorRef, process, Listener, _Reason} ->
ok
end,
%% Check that the request is handled before shutting down.
{response, nofin, 200, _RespHeaders} = gun:await(ConnPid, Ref),
{ok, RespBody} = gun:await_body(ConnPid, Ref),
<<"Hello world!">> = iolist_to_binary(RespBody),
gun:close(ConnPid).
graceful_shutdown_listener_timeout(Config) ->
doc("Check that connections are shut down when gracefully stopping a listener times out."),
Dispatch = cowboy_router:compile([{"localhost", [
{"/long_delay_hello", delay_hello_h,
#{delay => 10000, notify_received => self()}}
]}]),
ProtoOpts = #{
env => #{dispatch => Dispatch},
goaway_initial_timeout => 200,
goaway_complete_timeout => 500
},
{ok, Listener} = cowboy:start_clear(?FUNCTION_NAME, [{port, 0}], ProtoOpts),
Port = ranch:get_port(?FUNCTION_NAME),
ConnPid = gun_open([{type, tcp}, {protocol, http2}, {port, Port}|Config]),
Ref = gun:get(ConnPid, "/long_delay_hello"),
%% Shutdown listener while the handlers are working.
receive {request_received, <<"/long_delay_hello">>} -> ok end,
ListenerMonitorRef = monitor(process, Listener),
ok = cowboy:stop_listener(?FUNCTION_NAME),
receive
{'DOWN', ListenerMonitorRef, process, Listener, _Reason} ->
ok
end,
%% Check that the slow request is aborted.
{error, {stream_error, closed}} = gun:await(ConnPid, Ref),
gun:close(ConnPid).

View file

@ -20,6 +20,7 @@
-import(ct_helper, [doc/1]).
-import(ct_helper, [get_remote_pid_tcp/1]).
-import(cowboy_test, [gun_open/1]).
-import(cowboy_test, [gun_down/1]).
-import(cowboy_test, [raw_open/1]).
-import(cowboy_test, [raw_send/2]).
-import(cowboy_test, [raw_recv_head/1]).
@ -443,3 +444,73 @@ switch_protocol_flush(Config) ->
after
cowboy:stop_listener(?FUNCTION_NAME)
end.
graceful_shutdown_connection(Config) ->
doc("Check that the current request is handled before gracefully "
"shutting down a connection."),
Dispatch = cowboy_router:compile([{"localhost", [
{"/delay_hello", delay_hello_h,
#{delay => 500, notify_received => self()}},
{"/long_delay_hello", delay_hello_h,
#{delay => 10000, notify_received => self()}}
]}]),
ProtoOpts = #{
env => #{dispatch => Dispatch}
},
{ok, _} = cowboy:start_clear(?FUNCTION_NAME, [{port, 0}], ProtoOpts),
Port = ranch:get_port(?FUNCTION_NAME),
try
ConnPid = gun_open([{type, tcp}, {protocol, http}, {port, Port}|Config]),
{ok, http} = gun:await_up(ConnPid),
#{socket := Socket} = gun:info(ConnPid),
CowboyConnPid = get_remote_pid_tcp(Socket),
CowboyConnRef = erlang:monitor(process, CowboyConnPid),
Ref1 = gun:get(ConnPid, "/delay_hello"),
Ref2 = gun:get(ConnPid, "/delay_hello"),
receive {request_received, <<"/delay_hello">>} -> ok end,
receive {request_received, <<"/delay_hello">>} -> ok end,
ok = sys:terminate(CowboyConnPid, system_is_going_down),
{response, nofin, 200, RespHeaders} = gun:await(ConnPid, Ref1),
<<"close">> = proplists:get_value(<<"connection">>, RespHeaders),
{ok, RespBody} = gun:await_body(ConnPid, Ref1),
<<"Hello world!">> = iolist_to_binary(RespBody),
{error, {stream_error, _}} = gun:await(ConnPid, Ref2),
ok = gun_down(ConnPid),
receive
{'DOWN', CowboyConnRef, process, CowboyConnPid, _Reason} ->
ok
end
after
cowboy:stop_listener(?FUNCTION_NAME)
end.
graceful_shutdown_listener(Config) ->
doc("Check that connections are shut down gracefully when stopping a listener."),
Dispatch = cowboy_router:compile([{"localhost", [
{"/delay_hello", delay_hello_h,
#{delay => 500, notify_received => self()}},
{"/long_delay_hello", delay_hello_h,
#{delay => 10000, notify_received => self()}}
]}]),
ProtoOpts = #{
env => #{dispatch => Dispatch}
},
{ok, _} = cowboy:start_clear(?FUNCTION_NAME, [{port, 0}], ProtoOpts),
Port = ranch:get_port(?FUNCTION_NAME),
ConnPid1 = gun_open([{type, tcp}, {protocol, http}, {port, Port}|Config]),
Ref1 = gun:get(ConnPid1, "/delay_hello"),
ConnPid2 = gun_open([{type, tcp}, {protocol, http}, {port, Port}|Config]),
Ref2 = gun:get(ConnPid2, "/long_delay_hello"),
%% Shutdown listener while the handlers are working.
receive {request_received, <<"/delay_hello">>} -> ok end,
receive {request_received, <<"/long_delay_hello">>} -> ok end,
ok = cowboy:stop_listener(?FUNCTION_NAME),
%% Check that the 1st request is handled before shutting down.
{response, nofin, 200, RespHeaders} = gun:await(ConnPid1, Ref1),
<<"close">> = proplists:get_value(<<"connection">>, RespHeaders),
{ok, RespBody} = gun:await_body(ConnPid1, Ref1),
<<"Hello world!">> = iolist_to_binary(RespBody),
gun:close(ConnPid1),
%% Check that the 2nd (very slow) request is not handled.
{error, {stream_error, closed}} = gun:await(ConnPid2, Ref2),
gun:close(ConnPid2).

View file

@ -18,6 +18,7 @@
-import(ct_helper, [config/2]).
-import(ct_helper, [doc/1]).
-import(ct_helper, [get_remote_pid_tcp/1]).
-import(cowboy_test, [gun_open/1]).
-import(cowboy_test, [raw_open/1]).
-import(cowboy_test, [raw_send/2]).
@ -52,6 +53,7 @@ init_routes(_) -> [
{"localhost", [
{"/", hello_h, []},
{"/echo/:key", echo_h, []},
{"/delay_hello", delay_hello_h, 1200},
{"/long_polling", long_polling_h, []},
{"/loop_handler_abort", loop_handler_abort_h, []},
{"/resp/:key[/:arg]", resp_h, []}
@ -2955,39 +2957,64 @@ client_settings_disable_push(Config) ->
%% (RFC7540 6.8) GOAWAY
% @todo GOAWAY frames have a reserved bit in the payload that must be ignored.
%
%% @todo We should eventually implement the mechanism for gracefully
%% shutting down of the connection. (Send the GOAWAY, finish processing
%% the current set of streams, give up after a certain timeout.)
%
%% @todo If we graceful shutdown and receive a GOAWAY, we give up too.
% A GOAWAY frame might not immediately precede closing of the
% connection; a receiver of a GOAWAY that has no more use for the
% connection SHOULD still send a GOAWAY frame before terminating the
% connection.
%
%% @todo And it gets more complex when you think about h1 to h2 proxies.
% A server that is attempting to gracefully shut down a
% connection SHOULD send an initial GOAWAY frame with the last stream
% identifier set to 2^31-1 and a NO_ERROR code. This signals to the
% client that a shutdown is imminent and that initiating further
% requests is prohibited. After allowing time for any in-flight stream
% creation (at least one round-trip time), the server can send another
% GOAWAY frame with an updated last stream identifier. This ensures
% that a connection can be cleanly shut down without losing requests.
%
%% @todo And of course even if we shutdown we need to be careful about
%% the connection state.
% After sending a GOAWAY frame, the sender can discard frames for
% streams initiated by the receiver with identifiers higher than the
% identified last stream. However, any frames that alter connection
% state cannot be completely ignored. For instance, HEADERS,
% PUSH_PROMISE, and CONTINUATION frames MUST be minimally processed to
% ensure the state maintained for header compression is consistent (see
% Section 4.3); similarly, DATA frames MUST be counted toward the
% connection flow-control window. Failure to process these frames can
% cause flow control or header compression state to become
% unsynchronized.
%
graceful_shutdown_client_stays(Config) ->
doc("A server gracefully shutting down must send a GOAWAY frame with the "
"last stream identifier set to 2^31-1 and a NO_ERROR code. After allowing "
"time for any in-flight stream creation the server can send another GOAWAY "
"frame with an updated last stream identifier. (RFC7540 6.8)"),
{ok, Socket} = do_handshake(Config),
ServerConnPid = get_remote_pid_tcp(Socket),
ok = sys:terminate(ServerConnPid, whatever),
%% First GOAWAY frame.
{ok, <<_:24, 7:8, 0:8, 0:1, 0:31, 0:1, 16#7fffffff:31, 0:32>>} = gen_tcp:recv(Socket, 17, 500),
%% Second GOAWAY frame.
{ok, <<_:24, 7:8, 0:8, 0:1, 0:31, 0:1, 0:31, 0:32>>} = gen_tcp:recv(Socket, 17, 1500),
{error, closed} = gen_tcp:recv(Socket, 3, 1000),
ok.
%% @todo We should add this test also for discarded DATA and CONTINUATION frames.
%% The test can be the same for CONTINUATION (just send headers differently) but
%% the DATA test should make sure the global window is not corrupted.
%%
%% @todo We should extend this test to have two requests: one initiated before
%% the second GOAWAY, but not terminated; another initiated after the GOAWAY, terminated.
%% Finally the first request is terminated by sending a body and a trailing
%% HEADERS frame. This way we know for sure that the connection state is not corrupt.
graceful_shutdown_race_condition(Config) ->
doc("A server in the process of gracefully shutting down must discard frames "
"for streams initiated by the receiver with identifiers higher than the "
"identified last stream. This may include frames that alter connection "
"state such as HEADERS frames. (RFC7540 6.8)"),
{ok, Socket} = do_handshake(Config),
ServerConnPid = get_remote_pid_tcp(Socket),
ok = sys:terminate(ServerConnPid, whatever),
%% First GOAWAY frame.
{ok, <<_:24, 7:8, 0:8, 0:1, 0:31, 0:1, 16#7fffffff:31, 0:32>>} = gen_tcp:recv(Socket, 17, 500),
%% Simulate an in-flight request, sent by the client before the
%% GOAWAY frame arrived to the client.
{HeadersBlock, _} = cow_hpack:encode([
{<<":method">>, <<"GET">>},
{<<":scheme">>, <<"http">>},
{<<":authority">>, <<"localhost">>}, %% @todo Correct port number.
{<<":path">>, <<"/delay_hello">>}
]),
ok = gen_tcp:send(Socket, cow_http2:headers(1, fin, HeadersBlock)),
%% Second GOAWAY frame.
{ok, <<_:24, 7:8, 0:8, 0:1, 0:31, 0:1, 1:31, 0:32>>} = gen_tcp:recv(Socket, 17, 2000),
%% The client tries to send another request, ignoring the GOAWAY.
ok = gen_tcp:send(Socket, cow_http2:headers(3, fin, HeadersBlock)),
%% The server responds to the first request (streamid 1) and closes.
{ok, <<RespHeadersPayloadLength:24, 1, 4, 0:1, 1:31>>} = gen_tcp:recv(Socket, 9, 1000),
{ok, _RespHeaders} = gen_tcp:recv(Socket, RespHeadersPayloadLength, 1000),
{ok, <<12:24, 0, 1, 0:1, 1:31, "Hello world!">>} = gen_tcp:recv(Socket, 21, 1000),
{error, closed} = gen_tcp:recv(Socket, 3, 1000),
ok.
% The GOAWAY frame applies to the connection, not a specific stream.
% An endpoint MUST treat a GOAWAY frame with a stream identifier other
% than 0x0 as a connection error (Section 5.4.1) of type