Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cb586c63 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'udp-gso'

Willem de Bruijn says:

====================
udp gso

Segmentation offload reduces cycles/byte for large packets by
amortizing the cost of protocol stack traversal.

This patchset implements GSO for UDP. A process can concatenate and
submit multiple datagrams to the same destination in one send call
by setting socket option SOL_UDP/UDP_SEGMENT with the segment size,
or passing an analogous cmsg at send time.

The stack will send the entire large (up to network layer max size)
datagram through the protocol layer. At the GSO layer, it is broken
up in individual segments. All receive the same network layer header
and UDP src and dst port. All but the last segment have the same UDP
header, but the last may differ in length and checksum.

Initial results show a significant reduction in UDP cycles/byte.
See the main patch for more details and benchmark results.

        udp
          876 MB/s 14873 msg/s 624666 calls/s
            11,205,777,429      cycles

        udp gso
         2139 MB/s 36282 msg/s 36282 calls/s
            11,204,374,561      cycles

The patch set is broken down as follows:
- patch 1 is a prerequisite: code rearrangement, noop otherwise
- patch 2 implements the gso logic
- patch 3 adds protocol stack support for UDP_SEGMENT
- patch 4,5,7 are refinements
- patch 6 adds the cmsg interface
- patch 8..11 are tests

This idea was presented previously at netconf 2017-2
http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf



Changes v1 -> v2
  - Convert __udp_gso_segment to modify headers after skb_segment
  - Split main patch into two, one for gso logic, one for UDP_SEGMENT

Changes RFC -> v1
  - MSG_MORE:
      fixed, by allowing checksum offload with corking if gso
  - SKB_GSO_UDP_L4:
      made independent from SKB_GSO_UDP
      and removed skb_is_ufo() wrapper
  - NETIF_F_GSO_UDP_L4:
      add to netdev_features_string
      and to netdev-features.txt
      add BUILD_BUG_ON to match SKB_GSO_UDP_L4 value
  - UDP_MAX_SEGMENTS:
      introduce limit on number of segments per gso skb
      to avoid extreme cases like IP_MAX_MTU/IPV4_MIN_MTU
  - CHECKSUM_PARTIAL:
      test against missing feature after ndo_features_check
      if not supported return error, analogous to udp_send_check
  - MSG_ZEROCOPY: removed, deferred for now
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a9537c93 3a687bef
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -113,6 +113,13 @@ whatever headers there might be.
NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).

 * Transmit UDP segmentation offload

NETIF_F_GSO_UDP_GSO_L4 accepts a single UDP header with a payload that exceeds
gso_size. On segmentation, it segments the payload on gso_size boundaries and
replicates the network and UDP headers (fixing up the last one if less than
gso_size).

 * Transmit DMA from high memory

On platforms where this is relevant, NETIF_F_HIGHDMA signals that
+4 −1
Original line number Diff line number Diff line
@@ -55,8 +55,9 @@ enum {
	NETIF_F_GSO_SCTP_BIT,		/* ... SCTP fragmentation */
	NETIF_F_GSO_ESP_BIT,		/* ... ESP with TSO */
	NETIF_F_GSO_UDP_BIT,		/* ... UFO, deprecated except tuntap */
	NETIF_F_GSO_UDP_L4_BIT,		/* ... UDP payload GSO (not UFO) */
	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
		NETIF_F_GSO_UDP_BIT,
		NETIF_F_GSO_UDP_L4_BIT,

	NETIF_F_FCOE_CRC_BIT,		/* FCoE CRC32 */
	NETIF_F_SCTP_CRC_BIT,		/* SCTP checksum offload */
@@ -147,6 +148,7 @@ enum {
#define NETIF_F_HW_ESP_TX_CSUM	__NETIF_F(HW_ESP_TX_CSUM)
#define	NETIF_F_RX_UDP_TUNNEL_PORT  __NETIF_F(RX_UDP_TUNNEL_PORT)
#define NETIF_F_HW_TLS_RECORD	__NETIF_F(HW_TLS_RECORD)
#define NETIF_F_GSO_UDP_L4	__NETIF_F(GSO_UDP_L4)

#define for_each_netdev_feature(mask_addr, bit)	\
	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
@@ -216,6 +218,7 @@ enum {
				 NETIF_F_GSO_GRE_CSUM |			\
				 NETIF_F_GSO_IPXIP4 |			\
				 NETIF_F_GSO_IPXIP6 |			\
				 NETIF_F_GSO_UDP_L4 |			\
				 NETIF_F_GSO_UDP_TUNNEL |		\
				 NETIF_F_GSO_UDP_TUNNEL_CSUM)

+1 −0
Original line number Diff line number Diff line
@@ -4186,6 +4186,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
	BUILD_BUG_ON(SKB_GSO_SCTP    != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
	BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT));
	BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
	BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));

	return (features & feature) == feature;
}
+2 −0
Original line number Diff line number Diff line
@@ -573,6 +573,8 @@ enum {
	SKB_GSO_ESP = 1 << 15,

	SKB_GSO_UDP = 1 << 16,

	SKB_GSO_UDP_L4 = 1 << 17,
};

#if BITS_PER_LONG > 32
+3 −0
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@ struct udp_sock {
	 * when the socket is uncorked.
	 */
	__u16		 len;		/* total length of pending frames */
	__u16		 gso_size;
	/*
	 * Fields specific to UDP-Lite.
	 */
@@ -87,6 +88,8 @@ struct udp_sock {
	int		forward_deficit;
};

#define UDP_MAX_SEGMENTS	(1 << 6UL)

static inline struct udp_sock *udp_sk(const struct sock *sk)
{
	return (struct udp_sock *)sk;
Loading