From 18edcab170087be90f348d41d9294a6f9a7bc238 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Fri, 1 Dec 2023 16:00:47 +0200 Subject: [PATCH 01/12] Work in progress on GSO --- src/send_udp.cpp | 100 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 23 deletions(-) diff --git a/src/send_udp.cpp b/src/send_udp.cpp index f31a78d6d..5fc5a87d3 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -17,11 +17,18 @@ #include #include #include +#include #include #include #include #include #include +#if SPEAD2_USE_SENDMMSG +# include +# include +# include +# include +#endif namespace spead2::send { @@ -38,6 +45,7 @@ class udp_writer : public writer virtual void wakeup() override final; static constexpr int max_batch = 64; + static constexpr int max_gso_message_size = 65535; // maximum size the kernel will accept #if SPEAD2_USE_SENDMMSG struct mmsghdr msgvec[max_batch]; std::vector msg_iov; @@ -45,9 +53,11 @@ class udp_writer : public writer { transmit_packet packet; std::unique_ptr scratch; + bool merged; // packet is part of the same message as the previous packet } packets[max_batch]; + int current_gso_size = -1; - void send_packets(int first, int last); + void send_packets(int first, int last, int first_msg, int last_msg); #else std::unique_ptr scratch; #endif @@ -65,40 +75,49 @@ class udp_writer : public writer #if SPEAD2_USE_SENDMMSG -void udp_writer::send_packets(int first, int last) +void udp_writer::send_packets(int first, int last, int first_msg, int last_msg) { // Try sending - int sent = sendmmsg(socket.native_handle(), msgvec + first, last - first, MSG_DONTWAIT); + int sent = sendmmsg(socket.native_handle(), msgvec + first_msg, last_msg - first_msg, MSG_DONTWAIT); int groups = 0; if (sent < 0 && errno != EAGAIN && errno != EWOULDBLOCK) { - auto *item = packets[first].packet.item; - if (!item->result) - item->result = boost::system::error_code(errno, boost::asio::error::get_system_category()); - groups += packets[first].packet.last; - first++; + boost::system::error_code result(errno, boost::asio::error::get_system_category()); + do + { + auto *item = packets[first].packet.item; + if (!item->result) + item->result = result; + groups += packets[first].packet.last; + first++; + } while (first < last && packets[first].merged); + first_msg++; } else if (sent > 0) { for (int i = 0; i < sent; i++) { - auto *item = packets[first].packet.item; - item->bytes_sent += packets[first].packet.size; - groups += packets[first].packet.last; - first++; + do + { + auto *item = packets[first].packet.item; + item->bytes_sent += packets[first].packet.size; + groups += packets[first].packet.last; + first++; + } while (first < last && packets[first].merged); } + first_msg += sent; } if (groups > 0) groups_completed(groups); - if (first < last) + if (first_msg < last_msg) { // We didn't manage to send it all: schedule a new attempt once there is // buffer space. socket.async_send( boost::asio::null_buffers(), - [this, first, last](const boost::system::error_code &, std::size_t) { - send_packets(first, last); + [this, first, last, first_msg, last_msg](const boost::system::error_code &, std::size_t) { + send_packets(first, last, first_msg, last_msg); }); } else @@ -125,21 +144,60 @@ void udp_writer::wakeup() // We have at least one packet to send. See if we can get some more. int n; std::size_t n_iov = packets[0].packet.buffers.size(); + std::size_t max_size = packets[0].packet.size; for (n = 1; n < max_batch; n++) { result = get_packet(packets[n].packet, packets[n].scratch.get()); if (result != packet_result::SUCCESS) break; n_iov += packets[n].packet.buffers.size(); + max_size = std::max(max_size, packets[n].packet.size); + } + + int new_gso_size = max_size; + if (new_gso_size != current_gso_size) + { + int ret = setsockopt(socket.native_handle(), IPPROTO_UDP, UDP_SEGMENT, &new_gso_size, sizeof(new_gso_size)); + if (ret != -1) + { + current_gso_size = new_gso_size; + } + // TODO: handle case where it fails and we're left with a GSO size that's too small + // but > 0. Particularly handle case where packet size is too large. } msg_iov.resize(n_iov); std::size_t offset = 0; + int msgs = 0; + std::size_t merged_size = 0; for (int i = 0; i < n; i++) { - auto &hdr = msgvec[i].msg_hdr; - hdr.msg_iov = &msg_iov[offset]; - hdr.msg_iovlen = packets[i].packet.buffers.size(); + /* Check if we can merge with the previous packet using generalised + * segmentation offload. */ + if (i == 0 + || packets[i].packet.substream_index != packets[i - 1].packet.substream_index + || packets[i - 1].packet.size != current_gso_size + || merged_size + packets[i].packet.size > max_gso_message_size) + // TODO: also use UDP_MAX_SEGMENTS + { + // Can't merge, so initialise a new header + auto &hdr = msgvec[msgs].msg_hdr; + hdr.msg_iov = &msg_iov[offset]; + hdr.msg_iovlen = 0; + const auto &endpoint = endpoints[packets[i].packet.substream_index]; + hdr.msg_name = (void *) endpoint.data(); + hdr.msg_namelen = endpoint.size(); + msgs++; + packets[i].merged = false; + merged_size = 0; + } + else + { + packets[i].merged = true; + } + auto &hdr = msgvec[msgs - 1].msg_hdr; + hdr.msg_iovlen += packets[i].packet.buffers.size(); + merged_size += packets[i].packet.size; for (const auto &buffer : packets[i].packet.buffers) { msg_iov[offset].iov_base = const_cast( @@ -147,12 +205,8 @@ void udp_writer::wakeup() msg_iov[offset].iov_len = boost::asio::buffer_size(buffer); offset++; } - const auto &endpoint = endpoints[packets[i].packet.substream_index]; - hdr.msg_name = (void *) endpoint.data(); - hdr.msg_namelen = endpoint.size(); } - - send_packets(0, n); + send_packets(0, n, 0, msgs); } #else // SPEAD2_USE_SENDMMSG From fc8dea9e15b3b5f46abfaa0e64dab384a4e50384 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Sat, 2 Dec 2023 10:27:58 +0200 Subject: [PATCH 02/12] Make GSO more robust - Compile-time detection of the socket option - Run-time detection of ENOPROTOOPT - Best effort (and logging) to deal with other errors --- include/spead2/common_features.h.in | 1 + meson.build | 8 +++++++ meson.options | 1 + src/send_udp.cpp | 34 +++++++++++++++++++++++------ 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/include/spead2/common_features.h.in b/include/spead2/common_features.h.in index 0e1c48dcf..a89551e7a 100644 --- a/include/spead2/common_features.h.in +++ b/include/spead2/common_features.h.in @@ -33,6 +33,7 @@ #define SPEAD2_USE_MLX5DV @SPEAD2_USE_MLX5DV@ #define SPEAD2_USE_RECVMMSG @SPEAD2_USE_RECVMMSG@ #define SPEAD2_USE_SENDMMSG @SPEAD2_USE_SENDMMSG@ +#define SPEAD2_USE_GSO @SPEAD2_USE_GSO@ #define SPEAD2_USE_EVENTFD @SPEAD2_USE_EVENTFD@ #define SPEAD2_USE_PTHREAD_SETAFFINITY_NP @SPEAD2_USE_PTHREAD_SETAFFINITY_NP@ #define SPEAD2_USE_FMV @SPEAD2_USE_FMV@ diff --git a/meson.build b/meson.build index 442127a74..c492b9aee 100644 --- a/meson.build +++ b/meson.build @@ -160,6 +160,13 @@ use_sendmmsg = get_option('sendmmsg').require( prefix : '#include ' ) ).allowed() +use_gso = get_option('gso').require( + compiler.get_define( + 'UDP_SEGMENT', + args : '-D_GNU_SOURCE', + prefix : '#include ' + ) != '' +).allowed() use_eventfd = get_option('eventfd').require( compiler.has_function( 'eventfd', @@ -260,6 +267,7 @@ conf.set10('SPEAD2_USE_IBV_HW_RATE_LIMIT', use_ibv_hw_rate_limit) conf.set10('SPEAD2_USE_MLX5DV', mlx5_dep.found()) conf.set10('SPEAD2_USE_RECVMMSG', use_recvmmsg) conf.set10('SPEAD2_USE_SENDMMSG', use_sendmmsg) +conf.set10('SPEAD2_USE_GSO', use_gso) conf.set10('SPEAD2_USE_EVENTFD', use_eventfd) conf.set10('SPEAD2_USE_POSIX_SEMAPHORES', use_posix_semaphores) conf.set10('SPEAD2_USE_PTHREAD_SETAFFINITY_NP', use_pthread_setaffinity_np) diff --git a/meson.options b/meson.options index 65187a277..2e02bafce 100644 --- a/meson.options +++ b/meson.options @@ -21,6 +21,7 @@ option('pcap', type : 'feature', description : 'Support reading from pcap files' option('cap', type : 'feature', description : 'Use libcap') option('recvmmsg', type : 'feature', description : 'Use recvmmsg system call') option('sendmmsg', type : 'feature', description : 'Use sendmmsg system call') +option('gso', type : 'feature', description : 'Use generic segmentation offload') option('eventfd', type : 'feature', description : 'Use eventfd system call for semaphores') option('posix_semaphores', type : 'feature', description : 'Use POSIX semaphores') option('pthread_setaffinity_np', type : 'feature', description : 'Use pthread_setaffinity_np to set thread affinity') diff --git a/src/send_udp.cpp b/src/send_udp.cpp index 5fc5a87d3..397f5d3ba 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -45,8 +45,8 @@ class udp_writer : public writer virtual void wakeup() override final; static constexpr int max_batch = 64; - static constexpr int max_gso_message_size = 65535; // maximum size the kernel will accept #if SPEAD2_USE_SENDMMSG + static constexpr int max_gso_message_size = 65535; // maximum size the kernel will accept struct mmsghdr msgvec[max_batch]; std::vector msg_iov; struct @@ -55,7 +55,8 @@ class udp_writer : public writer std::unique_ptr scratch; bool merged; // packet is part of the same message as the previous packet } packets[max_batch]; - int current_gso_size = -1; + // -1 means not supported at runtime, 0 means supported but not in use + int current_gso_size = 0; void send_packets(int first, int last, int first_msg, int last_msg); #else @@ -154,17 +155,35 @@ void udp_writer::wakeup() max_size = std::max(max_size, packets[n].packet.size); } +#if SPEAD2_USE_GSO int new_gso_size = max_size; - if (new_gso_size != current_gso_size) + if (new_gso_size != current_gso_size && current_gso_size != -1) { int ret = setsockopt(socket.native_handle(), IPPROTO_UDP, UDP_SEGMENT, &new_gso_size, sizeof(new_gso_size)); if (ret != -1) { current_gso_size = new_gso_size; } - // TODO: handle case where it fails and we're left with a GSO size that's too small - // but > 0. Particularly handle case where packet size is too large. + else if (errno == ENOPROTOOPT) + { + /* Socket option is not supported on this platform. Just + * disable GSO in our code. + */ + current_gso_size = -1; + } + else + { + /* Something else has gone wrong. Make a best effort to disable + * GSO on the socket. + */ + std::error_code code(errno, std::system_category()); + log_warning("failed to set UDP_SEGMENT socket option to %1%: %2% (%3%)", + new_gso_size, code.value(), code.message()); + current_gso_size = new_gso_size = 0; + setsockopt(socket.native_handle(), IPPROTO_UDP, UDP_SEGMENT, &new_gso_size, sizeof(new_gso_size)); + } } +#endif msg_iov.resize(n_iov); std::size_t offset = 0; @@ -174,9 +193,10 @@ void udp_writer::wakeup() { /* Check if we can merge with the previous packet using generalised * segmentation offload. */ - if (i == 0 + if (!SPEAD2_USE_GSO + || i == 0 || packets[i].packet.substream_index != packets[i - 1].packet.substream_index - || packets[i - 1].packet.size != current_gso_size + || (int) packets[i - 1].packet.size != current_gso_size || merged_size + packets[i].packet.size > max_gso_message_size) // TODO: also use UDP_MAX_SEGMENTS { From 53d4b5a777a54bd77d622887c08359931945d033 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Sat, 2 Dec 2023 11:03:04 +0200 Subject: [PATCH 03/12] UDP: document that max_batch is limited to UDP_MAX_SEGMENTS --- src/send_udp.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/send_udp.cpp b/src/send_udp.cpp index 397f5d3ba..8c0de0064 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -44,6 +44,11 @@ class udp_writer : public writer virtual void wakeup() override final; + /* NB: Linux has a maximum of 64 segments for GSO (UDP_MAX_SEGMENTS in the + * kernel, but it doesn't seem to be exposed to userspace). If max_batch + * is increased, logic will need to be added to the GSO merging to prevent + * creating messages bigger than this. + */ static constexpr int max_batch = 64; #if SPEAD2_USE_SENDMMSG static constexpr int max_gso_message_size = 65535; // maximum size the kernel will accept @@ -198,7 +203,6 @@ void udp_writer::wakeup() || packets[i].packet.substream_index != packets[i - 1].packet.substream_index || (int) packets[i - 1].packet.size != current_gso_size || merged_size + packets[i].packet.size > max_gso_message_size) - // TODO: also use UDP_MAX_SEGMENTS { // Can't merge, so initialise a new header auto &hdr = msgvec[msgs].msg_hdr; From dc4e96a496f836f8b73c353335e357feb5521f5d Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 4 Dec 2023 08:13:35 +0200 Subject: [PATCH 04/12] Make GSO robust to non-supporting devices If sendmmsg fails with GSO, try again without GSO, and if that succeeds, disable GSO. --- src/send_udp.cpp | 204 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 144 insertions(+), 60 deletions(-) diff --git a/src/send_udp.cpp b/src/send_udp.cpp index 8c0de0064..95980bc3c 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -39,6 +39,11 @@ namespace class udp_writer : public writer { private: + // Some magic values for current_gso_size + static constexpr int gso_inactive = 0; ///< GSO allowed, but socket option not currently set + static constexpr int gso_disabled = -1; ///< GSO failed; do not try again + static constexpr int gso_probe = -2; ///< Last send with GSO failed; retrying without GSO + boost::asio::ip::udp::socket socket; std::vector endpoints; @@ -60,10 +65,27 @@ class udp_writer : public writer std::unique_ptr scratch; bool merged; // packet is part of the same message as the previous packet } packets[max_batch]; - // -1 means not supported at runtime, 0 means supported but not in use - int current_gso_size = 0; + int current_gso_size = gso_inactive; - void send_packets(int first, int last, int first_msg, int last_msg); +#if SPEAD2_USE_GSO + /// Set the socket option + void set_gso_size(int size, boost::system::error_code &result); +#endif + + /** + * Set up @ref msgvec from @ref msg_iov. + * + * The packets in [first_packet, last_packet) are assumed to have already + * been set in @ref msg_iov, starting from @a first_iov. If @a gso_size is + * positive, then multiple packets may be concatenated into a single + * element of @ref msgvec, provided that all but the last have size + * @a gso_size. Otherwise, each packet gets its own entry in @ref msgvec. + * + * @return The past-the-end index into @ref msgvec after the packets are + * filled in. + */ + int prepare_msgvec(int first_packet, int last_packet, int first_msg, int first_iov, int gso_size); + void send_packets(int first_packet, int last_packet, int first_msg, int last_msg); #else std::unique_ptr scratch; #endif @@ -81,35 +103,84 @@ class udp_writer : public writer #if SPEAD2_USE_SENDMMSG -void udp_writer::send_packets(int first, int last, int first_msg, int last_msg) +#if SPEAD2_USE_GSO +void udp_writer::set_gso_size(int size, boost::system::error_code &result) +{ + if (setsockopt(socket.native_handle(), IPPROTO_UDP, UDP_SEGMENT, + &size, sizeof(size)) == -1) + { + result.assign(errno, boost::asio::error::get_system_category()); + } + else + { + result.clear(); + } +} +#endif + +void udp_writer::send_packets(int first_packet, int last_packet, int first_msg, int last_msg) { +restart: // Try sending int sent = sendmmsg(socket.native_handle(), msgvec + first_msg, last_msg - first_msg, MSG_DONTWAIT); int groups = 0; + boost::system::error_code result; if (sent < 0 && errno != EAGAIN && errno != EWOULDBLOCK) { - boost::system::error_code result(errno, boost::asio::error::get_system_category()); + /* Not all device drivers support GSO. If we were trying with GSO, try again + * without. + */ + result.assign(errno, boost::asio::error::get_system_category()); +#if SPEAD2_USE_GSO + if (current_gso_size == gso_probe) + { + /* We tried sending with GSO and it failed, but resending without GSO + * also failed, so the fault is probably not lack of GSO support. Allow + * GSO to be used again. + */ + current_gso_size = gso_inactive; + } + else if (current_gso_size > 0) + { + set_gso_size(0, result); + if (!result) + { + /* Re-compute msgvec without GSO */ + current_gso_size = gso_probe; + last_msg = prepare_msgvec(first_packet, last_packet, first_msg, + msgvec[first_msg].msg_hdr.msg_iov - msg_iov.data(), + 0); + goto restart; + } + } +#endif do { - auto *item = packets[first].packet.item; + auto *item = packets[first_packet].packet.item; if (!item->result) item->result = result; - groups += packets[first].packet.last; - first++; - } while (first < last && packets[first].merged); + groups += packets[first_packet].packet.last; + first_packet++; + } while (first_packet < last_packet && packets[first_packet].merged); first_msg++; } else if (sent > 0) { + if (current_gso_size == gso_probe) + { + // Sending with GSO failed and without GSO succeeded. The network + // device probably does not support it, so don't try again. + current_gso_size = gso_disabled; + } for (int i = 0; i < sent; i++) { do { - auto *item = packets[first].packet.item; - item->bytes_sent += packets[first].packet.size; - groups += packets[first].packet.last; - first++; - } while (first < last && packets[first].merged); + auto *item = packets[first_packet].packet.item; + item->bytes_sent += packets[first_packet].packet.size; + groups += packets[first_packet].packet.last; + first_packet++; + } while (first_packet < last_packet && packets[first_packet].merged); } first_msg += sent; } @@ -122,8 +193,10 @@ void udp_writer::send_packets(int first, int last, int first_msg, int last_msg) // buffer space. socket.async_send( boost::asio::null_buffers(), - [this, first, last, first_msg, last_msg](const boost::system::error_code &, std::size_t) { - send_packets(first, last, first_msg, last_msg); + [this, first_packet, last_packet, first_msg, last_msg]( + const boost::system::error_code &, std::size_t + ) { + send_packets(first_packet, last_packet, first_msg, last_msg); }); } else @@ -132,6 +205,44 @@ void udp_writer::send_packets(int first, int last, int first_msg, int last_msg) } } +int udp_writer::prepare_msgvec(int first_packet, int last_packet, int first_msg, int first_iov, int gso_size) +{ + int merged_size = 0; + int iov = first_iov; + int msg = first_msg; + for (int i = first_packet; i < last_packet; i++) + { + /* Check if we can merge with the previous packet using generalised + * segmentation offload. */ + if (!SPEAD2_USE_GSO + || i == first_packet + || (int) packets[i - 1].packet.size != gso_size + || packets[i].packet.substream_index != packets[i - 1].packet.substream_index + || merged_size + packets[i].packet.size > max_gso_message_size) + { + // Can't merge, so initialise a new header + auto &hdr = msgvec[msg].msg_hdr; + hdr.msg_iov = &msg_iov[iov]; + hdr.msg_iovlen = 0; + const auto &endpoint = endpoints[packets[i].packet.substream_index]; + hdr.msg_name = (void *) endpoint.data(); + hdr.msg_namelen = endpoint.size(); + msg++; + packets[i].merged = false; + merged_size = 0; + } + else + { + packets[i].merged = true; + } + auto &hdr = msgvec[msg - 1].msg_hdr; + hdr.msg_iovlen += packets[i].packet.buffers.size(); + merged_size += packets[i].packet.size; + iov += packets[i].packet.buffers.size(); + } + return msg; +} + void udp_writer::wakeup() { packet_result result = get_packet(packets[0].packet, packets[0].scratch.get()); @@ -162,75 +273,48 @@ void udp_writer::wakeup() #if SPEAD2_USE_GSO int new_gso_size = max_size; - if (new_gso_size != current_gso_size && current_gso_size != -1) + if (new_gso_size != current_gso_size && current_gso_size >= 0) { - int ret = setsockopt(socket.native_handle(), IPPROTO_UDP, UDP_SEGMENT, &new_gso_size, sizeof(new_gso_size)); - if (ret != -1) - { + boost::system::error_code result; + set_gso_size(new_gso_size, result); + if (!result) current_gso_size = new_gso_size; - } - else if (errno == ENOPROTOOPT) + else if (result == boost::system::errc::no_protocol_option) // ENOPROTOOPT { /* Socket option is not supported on this platform. Just * disable GSO in our code. */ - current_gso_size = -1; + current_gso_size = gso_disabled; } else { /* Something else has gone wrong. Make a best effort to disable * GSO on the socket. */ - std::error_code code(errno, std::system_category()); log_warning("failed to set UDP_SEGMENT socket option to %1%: %2% (%3%)", - new_gso_size, code.value(), code.message()); - current_gso_size = new_gso_size = 0; - setsockopt(socket.native_handle(), IPPROTO_UDP, UDP_SEGMENT, &new_gso_size, sizeof(new_gso_size)); + new_gso_size, result.value(), result.message()); + set_gso_size(0, result); + if (!result) + current_gso_size = 0; } } #endif + /* Fill in msg_iov from the packets */ msg_iov.resize(n_iov); - std::size_t offset = 0; - int msgs = 0; - std::size_t merged_size = 0; + int iov = 0; for (int i = 0; i < n; i++) { - /* Check if we can merge with the previous packet using generalised - * segmentation offload. */ - if (!SPEAD2_USE_GSO - || i == 0 - || packets[i].packet.substream_index != packets[i - 1].packet.substream_index - || (int) packets[i - 1].packet.size != current_gso_size - || merged_size + packets[i].packet.size > max_gso_message_size) - { - // Can't merge, so initialise a new header - auto &hdr = msgvec[msgs].msg_hdr; - hdr.msg_iov = &msg_iov[offset]; - hdr.msg_iovlen = 0; - const auto &endpoint = endpoints[packets[i].packet.substream_index]; - hdr.msg_name = (void *) endpoint.data(); - hdr.msg_namelen = endpoint.size(); - msgs++; - packets[i].merged = false; - merged_size = 0; - } - else - { - packets[i].merged = true; - } - auto &hdr = msgvec[msgs - 1].msg_hdr; - hdr.msg_iovlen += packets[i].packet.buffers.size(); - merged_size += packets[i].packet.size; for (const auto &buffer : packets[i].packet.buffers) { - msg_iov[offset].iov_base = const_cast( + msg_iov[iov].iov_base = const_cast( boost::asio::buffer_cast(buffer)); - msg_iov[offset].iov_len = boost::asio::buffer_size(buffer); - offset++; + msg_iov[iov].iov_len = boost::asio::buffer_size(buffer); + iov++; } } - send_packets(0, n, 0, msgs); + int n_msgs = prepare_msgvec(0, n, 0, 0, current_gso_size); + send_packets(0, n, 0, n_msgs); } #else // SPEAD2_USE_SENDMMSG From 493dca5446ad525371f85cabb942d21f212ec1df Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 4 Dec 2023 08:48:27 +0200 Subject: [PATCH 05/12] Add some debug logging to GSO state machine transitions --- src/send_udp.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/send_udp.cpp b/src/send_udp.cpp index 95980bc3c..ad38cbf1e 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -168,6 +168,7 @@ void udp_writer::send_packets(int first_packet, int last_packet, int first_msg, { if (current_gso_size == gso_probe) { + log_debug("disabling GSO because sending with it failed and without succeeded"); // Sending with GSO failed and without GSO succeeded. The network // device probably does not support it, so don't try again. current_gso_size = gso_disabled; @@ -284,6 +285,7 @@ void udp_writer::wakeup() /* Socket option is not supported on this platform. Just * disable GSO in our code. */ + log_debug("disabling GSO because socket option is not supported"); current_gso_size = gso_disabled; } else From ca5294d43dc166a84a4895205a02f61ac4242bb1 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 4 Dec 2023 10:28:50 +0200 Subject: [PATCH 06/12] Mark gso_* constants as [[maybe_unused]] --- src/send_udp.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/send_udp.cpp b/src/send_udp.cpp index ad38cbf1e..b8a9a882b 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -39,11 +39,6 @@ namespace class udp_writer : public writer { private: - // Some magic values for current_gso_size - static constexpr int gso_inactive = 0; ///< GSO allowed, but socket option not currently set - static constexpr int gso_disabled = -1; ///< GSO failed; do not try again - static constexpr int gso_probe = -2; ///< Last send with GSO failed; retrying without GSO - boost::asio::ip::udp::socket socket; std::vector endpoints; @@ -56,6 +51,14 @@ class udp_writer : public writer */ static constexpr int max_batch = 64; #if SPEAD2_USE_SENDMMSG + // Some magic values for current_gso_size + ///< GSO allowed, but socket option not currently set + [[maybe_unused]] static constexpr int gso_inactive = 0; + ///< GSO failed; do not try again + [[maybe_unused]] static constexpr int gso_disabled = -1; + ///< Last send with GSO failed; retrying without GSO + [[maybe_unused]] static constexpr int gso_probe = -2; + static constexpr int max_gso_message_size = 65535; // maximum size the kernel will accept struct mmsghdr msgvec[max_batch]; std::vector msg_iov; From e2e43fc6422294e2a68cdc50f75a3bbd27875eba Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 4 Dec 2023 10:29:07 +0200 Subject: [PATCH 07/12] Add some dev docs on GSO --- doc/dev-send-gso.rst | 49 ++++++++++++++++++++++++++++++++++++++++++++ doc/developer.rst | 1 + 2 files changed, 50 insertions(+) create mode 100644 doc/dev-send-gso.rst diff --git a/doc/dev-send-gso.rst b/doc/dev-send-gso.rst new file mode 100644 index 000000000..620833d21 --- /dev/null +++ b/doc/dev-send-gso.rst @@ -0,0 +1,49 @@ +Generic segmentation offload +============================ +Linux supports a mechanism called :dfn:`generic segmentation offload` (GSO) to +reduce packet overheads when transmitting UDP data through the kernel +networking stack. A good overview can be found on `Cloudflare's blog`_, but +the basic idea is this: + +1. Userspace concatenates multiple smaller packets into one mega-packet for + submission to the kernel. +2. Most of the networking stack operates on the mega-packet. +3. As late as possible (and possibly on the NIC) the mega-packet is + re-segmented into the original packets. + +The re-segmentation uses a user-supplied parameter (socket option) indicating +the size of the original packets. This imposes a limitation that the original +packets were all the same size, except perhaps for the last one in the +mega-packet. + +The support for this in spead2 is dependent on the :manpage:`sendmmsg(2)` +support. While there is no fundamental reason GSO can't be used without +:manpage:`sendmmsg(2)`, supporting it would complicate the code significantly, +and GSO is a much more recent feature so it is unlikely that this combination +would ever be needed. + +Run-time detection of support is unfortunately rather complicated. The simple +part is that an older kernel will not support the socket option. If that +occurs, we simply disable GSO for the stream. A more tricky problem is that +actually sending the message may fail for several reasons: + +- Fragmentation doesn't seem to be supported, so if the segment size is bigger + than the MTU, it will fail. +- If hardware checksumming is disabled (or presumably if it is not supported), + it will fail. + +To cope with this complication, a state machine is used. It has four possible +states: + +- **active**: the socket option is set to a positive value +- **inactive**: the socket option is set to zero, but we may still transition + to active +- **probe**: the last send in active state failed; the socket option is now + set to zero and we're retrying +- **disabled**: the socket option is set to zero, and we will never try to set + it again. + +If send fails while in state **active**, we switch to state **probe** and try +again (without GSO). If that succeeds, we conclude that GSO is non-functional +for this stream and permanently go to **disabled**. If that also fails, we +conclude that the problem was unrelated to GSO and return to **inactive**. diff --git a/doc/developer.rst b/doc/developer.rst index 5562e9a00..0698c979e 100644 --- a/doc/developer.rst +++ b/doc/developer.rst @@ -27,4 +27,5 @@ the C++ classes to extend functionality. dev-recv-destruction dev-recv-chunk-group dev-send-rate-limit + dev-send-gso dev-ibverbs-linking From a5baeeac4fc70feb852fc53a0c9901423b3641fb Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 4 Dec 2023 11:37:29 +0200 Subject: [PATCH 08/12] Test combinations of sendmmsg and gso in all-builds.sh --- .ci/all-builds.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.ci/all-builds.sh b/.ci/all-builds.sh index 2f0916f0c..a093eb3b2 100755 --- a/.ci/all-builds.sh +++ b/.ci/all-builds.sh @@ -17,4 +17,11 @@ for python in true false; do done done done + # sendmmsg and gso don't interact with ibv, so don't test them jointly + for sendmmsg in auto disabled; do + for gso in auto disabled; do + meson configure -Dpython=$python -Dsendmmsg=$sendmmsg -Dgso=$gso + meson compile + done + done done From ed4c8e3a2be80f1473c1d319d0a4d9f01733714e Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 4 Dec 2023 11:43:00 +0200 Subject: [PATCH 09/12] Fix unused label warning --- src/send_udp.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/send_udp.cpp b/src/send_udp.cpp index b8a9a882b..bef5cbe75 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -123,7 +123,9 @@ void udp_writer::set_gso_size(int size, boost::system::error_code &result) void udp_writer::send_packets(int first_packet, int last_packet, int first_msg, int last_msg) { +#if SPEAD2_USE_GSO restart: +#endif // Try sending int sent = sendmmsg(socket.native_handle(), msgvec + first_msg, last_msg - first_msg, MSG_DONTWAIT); int groups = 0; From c1d5ad9cfbaf7d47e2f84afee74116b609bd1ad4 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 5 Dec 2023 09:04:36 +0200 Subject: [PATCH 10/12] Fix generalised -> generic for GSO in comment --- src/send_udp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/send_udp.cpp b/src/send_udp.cpp index bef5cbe75..a755c7010 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -218,7 +218,7 @@ int udp_writer::prepare_msgvec(int first_packet, int last_packet, int first_msg, int msg = first_msg; for (int i = first_packet; i < last_packet; i++) { - /* Check if we can merge with the previous packet using generalised + /* Check if we can merge with the previous packet using generic * segmentation offload. */ if (!SPEAD2_USE_GSO || i == first_packet From 4fdaf3e930591304db6c6413964655be27758f8f Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 5 Dec 2023 11:33:38 +0200 Subject: [PATCH 11/12] Fix some incorrect Doxygen markers --- src/send_udp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/send_udp.cpp b/src/send_udp.cpp index a755c7010..ea0932ad9 100644 --- a/src/send_udp.cpp +++ b/src/send_udp.cpp @@ -52,11 +52,11 @@ class udp_writer : public writer static constexpr int max_batch = 64; #if SPEAD2_USE_SENDMMSG // Some magic values for current_gso_size - ///< GSO allowed, but socket option not currently set + /// GSO allowed, but socket option not currently set [[maybe_unused]] static constexpr int gso_inactive = 0; - ///< GSO failed; do not try again + /// GSO failed; do not try again [[maybe_unused]] static constexpr int gso_disabled = -1; - ///< Last send with GSO failed; retrying without GSO + /// Last send with GSO failed; retrying without GSO [[maybe_unused]] static constexpr int gso_probe = -2; static constexpr int max_gso_message_size = 65535; // maximum size the kernel will accept From c380a0f7043c1fa5117be4543141381f6629d36b Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 5 Dec 2023 11:40:02 +0200 Subject: [PATCH 12/12] Fix missing link in documentation --- doc/dev-send-gso.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/dev-send-gso.rst b/doc/dev-send-gso.rst index 620833d21..832a2e1b3 100644 --- a/doc/dev-send-gso.rst +++ b/doc/dev-send-gso.rst @@ -5,6 +5,8 @@ reduce packet overheads when transmitting UDP data through the kernel networking stack. A good overview can be found on `Cloudflare's blog`_, but the basic idea is this: +.. _Cloudflare's blog: https://blog.cloudflare.com/accelerating-udp-packet-transmission-for-quic/ + 1. Userspace concatenates multiple smaller packets into one mega-packet for submission to the kernel. 2. Most of the networking stack operates on the mega-packet.