0
Fork 0
mirror of https://github.com/ninenines/cowboy.git synced 2025-07-14 12:20:24 +00:00
cowboy/test/ws_perf_SUITE.erl

309 lines
9.8 KiB
Erlang
Raw Normal View History

%% Copyright (c) Loïc Hoguin <essen@ninenines.eu>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-module(ws_perf_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-import(ct_helper, [config/2]).
-import(ct_helper, [doc/1]).
-import(cowboy_test, [gun_open/2]).
-import(cowboy_test, [gun_down/1]).
%% ct.
all() ->
[{group, binary}, {group, ascii}, {group, mixed}, {group, japanese}].
groups() ->
CommonGroups = cowboy_test:common_groups(ct_helper:all(?MODULE), no_parallel),
SubGroups = [G || G = {GN, _, _} <- CommonGroups,
GN =:= http orelse GN =:= h2c orelse GN =:= http_compress orelse GN =:= h2c_compress],
[
{binary, [], SubGroups},
{ascii, [], SubGroups},
{mixed, [], SubGroups},
{japanese, [], SubGroups}
].
init_per_suite(Config) ->
%% Optionally enable `perf` for the current node.
% spawn(fun() -> ct:pal(os:cmd("perf record -g -F 9999 -o /tmp/ws_perf.data -p " ++ os:getpid() ++ " -- sleep 60")) end),
Config.
end_per_suite(_Config) ->
ok.
init_per_group(Name, Config) when Name =:= http; Name =:= http_compress ->
init_info(Name, Config),
cowboy_test:init_common_groups(Name, Config, ?MODULE);
init_per_group(Name, Config) when Name =:= h2c; Name =:= h2c_compress ->
init_info(Name, Config),
{Flavor, Opts} = case Name of
h2c -> {vanilla, #{}};
h2c_compress -> {compress, #{stream_handlers => [cowboy_compress_h, cowboy_stream_h]}}
end,
Config1 = cowboy_test:init_http(Name, Opts#{
connection_window_margin_size => 64*1024,
enable_connect_protocol => true,
env => #{dispatch => init_dispatch(Config)},
max_frame_size_sent => 64*1024,
max_frame_size_received => 16384 * 1024 - 1,
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
max_received_frame_rate => {10_000_000, 1},
stream_window_data_threshold => 1024,
stream_window_margin_size => 64*1024
}, [{flavor, Flavor}|Config]),
lists:keyreplace(protocol, 1, Config1, {protocol, http2});
init_per_group(ascii, Config) ->
init_text_data("ascii.txt", Config);
init_per_group(mixed, Config) ->
init_text_data("grok_segond.txt", Config);
init_per_group(japanese, Config) ->
init_text_data("japanese.txt", Config);
init_per_group(binary, Config) ->
[{frame_type, binary}|Config].
init_info(Name, Config) ->
DataInfo = case config(frame_type, Config) of
text -> config(text_data_filename, Config);
binary -> binary
end,
ConnInfo = case Name of
http -> "cleartext HTTP/1.1";
http_compress -> "cleartext HTTP/1.1 with compression";
h2c -> "cleartext HTTP/2";
h2c_compress -> "cleartext HTTP/2 with compression"
end,
ct:pal("Websocket over ~s (~s)", [ConnInfo, DataInfo]).
init_text_data(Filename, Config) ->
{ok, Text} = file:read_file(filename:join(config(data_dir, Config), Filename)),
[
{frame_type, text},
{text_data, Text},
{text_data_filename, Filename}
|Config].
end_per_group(Name, _Config) ->
cowboy_test:stop_group(Name).
%% Dispatch configuration.
init_dispatch(_Config) ->
cowboy_router:compile([
{"localhost", [
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
{"/ws_echo", ws_echo, []},
{"/ws_ignore", ws_ignore, []}
]}
]).
%% Support functions for testing using Gun.
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_gun_open_ws(Path, Config) ->
ConnPid = gun_open(Config, #{
http2_opts => #{
connection_window_margin_size => 64*1024,
max_frame_size_sent => 64*1024,
max_frame_size_received => 16384 * 1024 - 1,
notify_settings_changed => true,
stream_window_data_threshold => 1024,
stream_window_margin_size => 64*1024
},
tcp_opts => [{nodelay, true}],
ws_opts => #{compress => config(flavor, Config) =:= compress}
}),
case config(protocol, Config) of
http -> ok;
http2 ->
{notify, settings_changed, #{enable_connect_protocol := true}}
= gun:await(ConnPid, undefined) %% @todo Maybe have a gun:await/1?
end,
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
StreamRef = gun:ws_upgrade(ConnPid, Path),
receive
{gun_upgrade, ConnPid, StreamRef, [<<"websocket">>], _} ->
{ok, ConnPid, StreamRef};
{gun_response, ConnPid, _, _, Status, Headers} ->
exit({ws_upgrade_failed, Status, Headers});
{gun_error, ConnPid, StreamRef, Reason} ->
exit({ws_upgrade_failed, Reason})
after 1000 ->
error(timeout)
end.
receive_ws(ConnPid, StreamRef) ->
receive
{gun_ws, ConnPid, StreamRef, Frame} ->
{ok, Frame}
after 30000 ->
{error, timeout}
end.
%% Tests.
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_1_00064KiB(Config) ->
doc("Send and receive a 64KiB frame."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_1, 1, 64 * 1024).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_1_00256KiB(Config) ->
doc("Send and receive a 256KiB frame."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_1, 1, 256 * 1024).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_1_01024KiB(Config) ->
doc("Send and receive a 1024KiB frame."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_1, 1, 1024 * 1024).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_1_04096KiB(Config) ->
doc("Send and receive a 4096KiB frame."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_1, 1, 4096 * 1024).
%% Minus one because frames can only get so big.
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_1_16384KiB(Config) ->
doc("Send and receive a 16384KiB - 1 frame."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_1, 1, 16384 * 1024 - 1).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_N_00000B(Config) ->
doc("Send and receive a 0B frame 1000 times."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_N, 1000, 0).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_N_00256B(Config) ->
doc("Send and receive a 256B frame 1000 times."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_N, 1000, 256).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_N_01024B(Config) ->
doc("Send and receive a 1024B frame 1000 times."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_N, 1000, 1024).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_N_04096B(Config) ->
doc("Send and receive a 4096B frame 1000 times."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_N, 1000, 4096).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
echo_N_16384B(Config) ->
doc("Send and receive a 16384B frame 1000 times."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, echo_N, 1000, 16384).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
%echo_N_16384B_10K(Config) ->
% doc("Send and receive a 16384B frame 10000 times."),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
% do_echo(Config, echo_N, 10000, 16384).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo(Config, What, Num, FrameSize) ->
{ok, ConnPid, StreamRef} = do_gun_open_ws("/ws_echo", Config),
FrameType = config(frame_type, Config),
FrameData = case FrameType of
text -> do_text_data(Config, FrameSize);
binary -> rand:bytes(FrameSize)
end,
%% Heat up the processes before doing the real run.
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
% do_echo_loop(ConnPid, StreamRef, Num, FrameType, FrameData),
{Time, _} = timer:tc(?MODULE, do_echo_loop, [ConnPid, StreamRef, Num, FrameType, FrameData]),
do_log("~-6s ~-6s ~6s: ~8bµs", [What, FrameType, do_format_size(FrameSize), Time]),
gun:ws_send(ConnPid, StreamRef, close),
{ok, close} = receive_ws(ConnPid, StreamRef),
gun_down(ConnPid).
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo_loop(_, _, 0, _, _) ->
ok;
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo_loop(ConnPid, StreamRef, Num, FrameType, FrameData) ->
gun:ws_send(ConnPid, StreamRef, {FrameType, FrameData}),
{ok, {FrameType, FrameData}} = receive_ws(ConnPid, StreamRef),
Implement dynamic socket buffer sizes Cowboy will set the socket's buffer size dynamically to better fit the current workload. When the incoming data is small, a low buffer size reduces the memory footprint and improves responsiveness and therefore performance. When the incoming data is large, such as large HTTP request bodies, a larger buffer size helps us avoid doing too many binary appends and related allocations. Setting a large buffer size for all use cases is sub-optimal because allocating more than needed necessarily results in a performance hit (not just increased memory usage). By default Cowboy starts with a buffer size of 8192 bytes. It then doubles or halves the buffer size depending on the size of the data it receives from the socket. It stops decreasing at 8192 and increasing at 131072 by default. To keep track of the size of the incoming data Cowboy maintains a moving average. It allows Cowboy to avoid changing the buffer too often but still react quickly when necessary. Cowboy will increase the buffer size when the moving average is above 90% of the current buffer size, and decrease when the moving average is below 40% of the current buffer size. The current buffer size and moving average are propagated when switching protocols. The dynamic buffer is implemented in HTTP/1, HTTP/2 and HTTP/1 Websocket. HTTP/2 Websocket has it disabled because it doesn't interact directly with the socket; in that case it is HTTP/2 that has a dynamic buffer. The dynamic buffer provides a very large performance improvement in many scenarios, at minimal cost for others. Because it largely depend on the underlying protocol the improvements are no all equal. TLS and compression also impact the results. The improvement when reading a large request body, with the requests repeated in a fast loop are: * HTTP: 6x to 20x faster * HTTPS: 2x to 6x faster * H2: 4x to 5x faster * H2C: 20x to 40x faster I am not sure why H2C's performance was so bad, especially compared to H2, when using default buffer sizes. Dynamic buffers make H2C a lot more viable with default settings. The performance impact on "hello world" type requests is minimal, it goes from -5% to +5% roughly. Websocket improvements vary again depending on the protocol, but also depending on whether compression is enabled: * HTTP echo: roughly 2x faster * HTTP send: roughly 4x faster * H2C echo: roughly 2x faster * H2C send: 3x to 4x faster In the echo test we reply back, and Gun doesn't have the dynamic buffer optimisation, so that probably explains the x2 difference. With compression however there isn't much improvement. The results are roughly within -10% to +10% of each other. Zlib compression seems to be a bottleneck, or at least to modify the performance profile to such an extent that the size of the buffer does not matter. This happens to randomly generated binary data as well so it is probably not caused by the test data.
2025-02-03 15:36:16 +01:00
do_echo_loop(ConnPid, StreamRef, Num - 1, FrameType, FrameData).
send_1_00064KiB(Config) ->
doc("Send a 64KiB frame."),
do_send(Config, send_1, 1, 64 * 1024).
send_1_00256KiB(Config) ->
doc("Send a 256KiB frame."),
do_send(Config, send_1, 1, 256 * 1024).
send_1_01024KiB(Config) ->
doc("Send a 1024KiB frame."),
do_send(Config, send_1, 1, 1024 * 1024).
send_1_04096KiB(Config) ->
doc("Send a 4096KiB frame."),
do_send(Config, send_1, 1, 4096 * 1024).
%% Minus one because frames can only get so big.
send_1_16384KiB(Config) ->
doc("Send a 16384KiB - 1 frame."),
do_send(Config, send_1, 1, 16384 * 1024 - 1).
send_N_00000B(Config) ->
doc("Send a 0B frame 10000 times."),
do_send(Config, send_N, 10000, 0).
send_N_00256B(Config) ->
doc("Send a 256B frame 10000 times."),
do_send(Config, send_N, 10000, 256).
send_N_01024B(Config) ->
doc("Send a 1024B frame 10000 times."),
do_send(Config, send_N, 10000, 1024).
send_N_04096B(Config) ->
doc("Send a 4096B frame 10000 times."),
do_send(Config, send_N, 10000, 4096).
send_N_16384B(Config) ->
doc("Send a 16384B frame 10000 times."),
do_send(Config, send_N, 10000, 16384).
%send_N_16384B_10K(Config) ->
% doc("Send and receive a 16384B frame 10000 times."),
% do_send(Config, send_N, 10000, 16384).
do_send(Config, What, Num, FrameSize) ->
{ok, ConnPid, StreamRef} = do_gun_open_ws("/ws_ignore", Config),
FrameType = config(frame_type, Config),
FrameData = case FrameType of
text -> do_text_data(Config, FrameSize);
binary -> rand:bytes(FrameSize)
end,
%% Heat up the processes before doing the real run.
% do_send_loop(ConnPid, StreamRef, Num, FrameType, FrameData),
{Time, _} = timer:tc(?MODULE, do_send_loop, [ConnPid, StreamRef, Num, FrameType, FrameData]),
do_log("~-6s ~-6s ~6s: ~8bµs", [What, FrameType, do_format_size(FrameSize), Time]),
gun:ws_send(ConnPid, StreamRef, close),
{ok, close} = receive_ws(ConnPid, StreamRef),
gun_down(ConnPid).
do_send_loop(ConnPid, StreamRef, 0, _, _) ->
gun:ws_send(ConnPid, StreamRef, {text, <<"CHECK">>}),
{ok, {text, <<"CHECK">>}} = receive_ws(ConnPid, StreamRef),
ok;
do_send_loop(ConnPid, StreamRef, Num, FrameType, FrameData) ->
gun:ws_send(ConnPid, StreamRef, {FrameType, FrameData}),
do_send_loop(ConnPid, StreamRef, Num - 1, FrameType, FrameData).
%% Internal.
do_text_data(Config, FrameSize) ->
do_text_data1(config(text_data, Config), FrameSize).
do_text_data1(LargeText, FrameSize) when byte_size(LargeText) >= FrameSize ->
binary:part(LargeText, 0, FrameSize);
do_text_data1(LargeText, FrameSize) ->
do_text_data1(<<LargeText/binary, LargeText/binary>>, FrameSize).
do_format_size(Size) when Size < 1024 ->
integer_to_list(Size) ++ "B";
do_format_size(Size) when Size < (1024*1024) ->
integer_to_list(Size div 1024) ++ "KiB";
do_format_size(Size) ->
integer_to_list(Size div (1024*1024)) ++ "MiB".
do_log(Str, Args) ->
ct:log(Str, Args),
io:format(ct_default_gl, Str ++ "~n", Args).