Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5ef49597 authored by Paolo Abeni's avatar Paolo Abeni Committed by Sean Tranchetti
Browse files

udp: implement GRO for plain UDP sockets.



This is the RX counterpart of commit bec1f6f6 ("udp: generate gso
with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also
eligible for GRO in the rx path: UDP segments directed to such socket
are assembled into a larger GSO_UDP_L4 packet.

The core UDP GRO support is enabled with setsockopt(UDP_GRO).

Initial benchmark numbers:

Before:
udp rx:   1079 MB/s   769065 calls/s

After:
udp rx:   1466 MB/s    24877 calls/s

This change introduces a side effect in respect to UDP tunnels:
after a UDP tunnel creation, now the kernel performs a lookup per ingress
UDP packet, while before such lookup happened only if the ingress packet
carried a valid internal header csum.

rfc v2 -> rfc v3:
 - fixed typos in macro name and comments
 - really enforce UDP_GRO_CNT_MAX, instead of UDP_GRO_CNT_MAX + 1
 - acquire socket lock in UDP_GRO setsockopt

rfc v1 -> rfc v2:
 - use a new option to enable UDP GRO
 - use static keys to protect the UDP GRO socket lookup

Change-Id: I02a5c84af7e2e0e87b44bdb3ef6aefe0336943c8
CRs-Fixed: 2351793
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Git-repo: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git


Git-commit: e20cf8d3f1f763ad28a9cb3b41305b8a8a42653e
Signed-off-by: default avatarSean Tranchetti <stranche@codeaurora.org>
parent 1ec4c359
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -50,11 +50,12 @@ struct udp_sock {
	__u8		 encap_type;	/* Is this an Encapsulation socket? */
	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
			 encap_enabled:1;/* This socket enabled encap
			 encap_enabled:1,/* This socket enabled encap
					  * processing; UDP tunnels and
					  * different encapsulation layers set
					  * this
					  */
			 gro_enabled:1; /* Can accept GRO packets */
	/*
	 * Following member retains the information to create a UDP header
	 * when the socket is uncorked.
+1 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ struct udphdr {
#define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */
#define UDP_NO_CHECK6_RX 102	/* Disable accpeting checksum for UDP6 */
#define UDP_SEGMENT	103	/* Set GSO segmentation size */
#define UDP_GRO		104	/* This socket can receive UDP GRO packets */

/* UDP encapsulation types */
#define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
+8 −0
Original line number Diff line number Diff line
@@ -2464,6 +2464,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
		up->gso_size = val;
		break;

	case UDP_GRO:
		lock_sock(sk);
		if (valbool)
			udp_tunnel_encap_enable(sk->sk_socket);
		up->gro_enabled = valbool;
		release_sock(sk);
		break;

	/*
	 * 	UDP-Lite's partial checksum coverage (RFC 3828).
	 */
+88 −22
Original line number Diff line number Diff line
@@ -335,6 +335,55 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
	return segs;
}

#define UDP_GRO_CNT_MAX 64
static struct sk_buff **udp_gro_receive_segment(struct sk_buff **head,
						struct sk_buff *skb)
{
	struct udphdr *uh = udp_hdr(skb);
	struct sk_buff **pp = NULL;
	struct udphdr *uh2;
	struct sk_buff *p;

	/* requires non zero csum, for symmetry with GSO */
	if (!uh->check) {
		NAPI_GRO_CB(skb)->flush = 1;
		return NULL;
	}

	/* pull encapsulating udp header */
	skb_gro_pull(skb, sizeof(struct udphdr));
	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));

	for (; (p = *head); head = &p->next) {
		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		uh2 = udp_hdr(p);

		/* Match ports only, as csum is always non zero */
		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}

		/* Terminate the flow on len mismatch or if it grow "too much".
		 * Under small packet flood GRO count could elsewhere grow a lot
		 * leading to execessive truesize values
		 */
		if (!skb_gro_receive(head, skb) &&
		    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
			pp = head;
		else if (uh->len != uh2->len)
			pp = head;

		return pp;
	}

	/* mismatch, but we never need to flush */
	return NULL;
}


struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
				 struct udphdr *uh, udp_lookup_t lookup)
{
@@ -344,23 +393,27 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
	int flush = 1;
	struct sock *sk;

	rcu_read_lock();
	sk = (*lookup)(skb, uh->source, uh->dest);
	if (!sk)
		goto out_unlock;

	if (udp_sk(sk)->gro_enabled) {
		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
		rcu_read_unlock();
		return pp;
	}

	if (NAPI_GRO_CB(skb)->encap_mark ||
	    (skb->ip_summed != CHECKSUM_PARTIAL &&
	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
	     !NAPI_GRO_CB(skb)->csum_valid))
		goto out;
	     !NAPI_GRO_CB(skb)->csum_valid) ||
	    !udp_sk(sk)->gro_receive)
		goto out_unlock;

	/* mark that this skb passed once through the tunnel gro layer */
	NAPI_GRO_CB(skb)->encap_mark = 1;

	rcu_read_lock();
	sk = (*lookup)(skb, uh->source, uh->dest);

	if (sk && udp_sk(sk)->gro_receive)
		goto unflush;
	goto out_unlock;

unflush:
	flush = 0;

	for (p = *head; p; p = p->next) {
@@ -385,7 +438,6 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,

out_unlock:
	rcu_read_unlock();
out:
	skb_gro_flush_final(skb, pp, flush);
	return pp;
}
@@ -418,6 +470,19 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
	return NULL;
}

static int udp_gro_complete_segment(struct sk_buff *skb)
{
	struct udphdr *uh = udp_hdr(skb);

	skb->csum_start = (unsigned char *)uh - skb->head;
	skb->csum_offset = offsetof(struct udphdr, check);
	skb->ip_summed = CHECKSUM_PARTIAL;

	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
	skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
	return 0;
}

int udp_gro_complete(struct sk_buff *skb, int nhoff,
		     udp_lookup_t lookup)
{
@@ -428,16 +493,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,

	uh->len = newlen;

	/* Set encapsulation before calling into inner gro_complete() functions
	 * to make them set up the inner offsets.
	 */
	skb->encapsulation = 1;

	rcu_read_lock();
	sk = (*lookup)(skb, uh->source, uh->dest);
	if (sk && udp_sk(sk)->gro_complete)
	if (sk && udp_sk(sk)->gro_enabled) {
		err = udp_gro_complete_segment(skb);
	} else if (sk && udp_sk(sk)->gro_complete) {
		skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
					: SKB_GSO_UDP_TUNNEL;

		/* Set encapsulation before calling into inner gro_complete()
		 * functions to make them set up the inner offsets.
		 */
		skb->encapsulation = 1;
		err = udp_sk(sk)->gro_complete(sk, skb,
				nhoff + sizeof(struct udphdr));
	}
	rcu_read_unlock();

	if (skb->remcsum_offload)
@@ -452,13 +522,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
	const struct iphdr *iph = ip_hdr(skb);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

	if (uh->check) {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
	if (uh->check)
		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
					  iph->daddr, 0);
	} else {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
	}

	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
}
+1 −5
Original line number Diff line number Diff line
@@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

	if (uh->check) {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
	if (uh->check)
		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
					  &ipv6h->daddr, 0);
	} else {
		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
	}

	return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}