Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2eba61d5 authored by Paul Durrant's avatar Paul Durrant Committed by David S. Miller
Browse files

xen-netback: add support for IPv6 checksum offload from guest



For performance of VM to VM traffic on a single host it is better to avoid
calculation of TCP/UDP checksum in the sending frontend. To allow this this
patch adds the code necessary to set up partial checksum for IPv6 packets
and xenstore flag feature-ipv6-csum-offload to advertise that fact to
frontends.

Signed-off-by: default avatarPaul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 146c8a77
Loading
Loading
Loading
Loading
+196 −39
Original line number Diff line number Diff line
@@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
	return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
}

/*
 * This is the amount of packet we copy rather than map, so that the
 * guest can't fiddle with the contents of the headers while we do
 * packet processing on them (netfilter, routing, etc).
/* This is a miniumum size for the linear area to avoid lots of
 * calls to __pskb_pull_tail() as we set up checksum offsets. The
 * value 128 was chosen as it covers all IPv4 and most likely
 * IPv6 headers.
 */
#define PKT_PROT_LEN    (ETH_HLEN + \
			 VLAN_HLEN + \
			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
#define PKT_PROT_LEN 128

static u16 frag_get_pending_idx(skb_frag_t *frag)
{
@@ -1118,61 +1115,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
	return 0;
}

static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
{
	struct iphdr *iph;
	int err = -EPROTO;
	int recalculate_partial_csum = 0;

	/*
	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
	 * peers can fail to set NETRXF_csum_blank when sending a GSO
	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
	 * recalculate the partial checksum.
	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
		/* If we need to pullup then pullup to the max, so we
		 * won't need to do it again.
		 */
	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
		vif->rx_gso_checksum_fixup++;
		skb->ip_summed = CHECKSUM_PARTIAL;
		recalculate_partial_csum = 1;
		int target = min_t(int, skb->len, MAX_TCP_HEADER);
		__pskb_pull_tail(skb, target - skb_headlen(skb));
	}
}

	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
	if (skb->ip_summed != CHECKSUM_PARTIAL)
		return 0;
static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
			     int recalculate_partial_csum)
{
	struct iphdr *iph = (void *)skb->data;
	unsigned int header_size;
	unsigned int off;
	int err = -EPROTO;

	if (skb->protocol != htons(ETH_P_IP))
		goto out;
	off = sizeof(struct iphdr);

	header_size = skb->network_header + off + MAX_IPOPTLEN;
	maybe_pull_tail(skb, header_size);

	off = iph->ihl * 4;

	iph = (void *)skb->data;
	switch (iph->protocol) {
	case IPPROTO_TCP:
		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
		if (!skb_partial_csum_set(skb, off,
					  offsetof(struct tcphdr, check)))
			goto out;

		if (recalculate_partial_csum) {
			struct tcphdr *tcph = tcp_hdr(skb);

			header_size = skb->network_header +
				off +
				sizeof(struct tcphdr);
			maybe_pull_tail(skb, header_size);

			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
							 skb->len - iph->ihl*4,
							 skb->len - off,
							 IPPROTO_TCP, 0);
		}
		break;
	case IPPROTO_UDP:
		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
		if (!skb_partial_csum_set(skb, off,
					  offsetof(struct udphdr, check)))
			goto out;

		if (recalculate_partial_csum) {
			struct udphdr *udph = udp_hdr(skb);

			header_size = skb->network_header +
				off +
				sizeof(struct udphdr);
			maybe_pull_tail(skb, header_size);

			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
							 skb->len - iph->ihl*4,
							 skb->len - off,
							 IPPROTO_UDP, 0);
		}
		break;
	default:
		if (net_ratelimit())
			netdev_err(vif->dev,
				   "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
				   "Attempting to checksum a non-TCP/UDP packet, "
				   "dropping a protocol %d packet\n",
				   iph->protocol);
		goto out;
	}
@@ -1183,6 +1193,158 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
	return err;
}

static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
			       int recalculate_partial_csum)
{
	int err = -EPROTO;
	struct ipv6hdr *ipv6h = (void *)skb->data;
	u8 nexthdr;
	unsigned int header_size;
	unsigned int off;
	bool fragment;
	bool done;

	done = false;

	off = sizeof(struct ipv6hdr);

	header_size = skb->network_header + off;
	maybe_pull_tail(skb, header_size);

	nexthdr = ipv6h->nexthdr;

	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
	       !done) {
		switch (nexthdr) {
		case IPPROTO_DSTOPTS:
		case IPPROTO_HOPOPTS:
		case IPPROTO_ROUTING: {
			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);

			header_size = skb->network_header +
				off +
				sizeof(struct ipv6_opt_hdr);
			maybe_pull_tail(skb, header_size);

			nexthdr = hp->nexthdr;
			off += ipv6_optlen(hp);
			break;
		}
		case IPPROTO_AH: {
			struct ip_auth_hdr *hp = (void *)(skb->data + off);

			header_size = skb->network_header +
				off +
				sizeof(struct ip_auth_hdr);
			maybe_pull_tail(skb, header_size);

			nexthdr = hp->nexthdr;
			off += (hp->hdrlen+2)<<2;
			break;
		}
		case IPPROTO_FRAGMENT:
			fragment = true;
			/* fall through */
		default:
			done = true;
			break;
		}
	}

	if (!done) {
		if (net_ratelimit())
			netdev_err(vif->dev, "Failed to parse packet header\n");
		goto out;
	}

	if (fragment) {
		if (net_ratelimit())
			netdev_err(vif->dev, "Packet is a fragment!\n");
		goto out;
	}

	switch (nexthdr) {
	case IPPROTO_TCP:
		if (!skb_partial_csum_set(skb, off,
					  offsetof(struct tcphdr, check)))
			goto out;

		if (recalculate_partial_csum) {
			struct tcphdr *tcph = tcp_hdr(skb);

			header_size = skb->network_header +
				off +
				sizeof(struct tcphdr);
			maybe_pull_tail(skb, header_size);

			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
						       &ipv6h->daddr,
						       skb->len - off,
						       IPPROTO_TCP, 0);
		}
		break;
	case IPPROTO_UDP:
		if (!skb_partial_csum_set(skb, off,
					  offsetof(struct udphdr, check)))
			goto out;

		if (recalculate_partial_csum) {
			struct udphdr *udph = udp_hdr(skb);

			header_size = skb->network_header +
				off +
				sizeof(struct udphdr);
			maybe_pull_tail(skb, header_size);

			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
						       &ipv6h->daddr,
						       skb->len - off,
						       IPPROTO_UDP, 0);
		}
		break;
	default:
		if (net_ratelimit())
			netdev_err(vif->dev,
				   "Attempting to checksum a non-TCP/UDP packet, "
				   "dropping a protocol %d packet\n",
				   nexthdr);
		goto out;
	}

	err = 0;

out:
	return err;
}

static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
{
	int err = -EPROTO;
	int recalculate_partial_csum = 0;

	/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
	 * peers can fail to set NETRXF_csum_blank when sending a GSO
	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
	 * recalculate the partial checksum.
	 */
	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
		vif->rx_gso_checksum_fixup++;
		skb->ip_summed = CHECKSUM_PARTIAL;
		recalculate_partial_csum = 1;
	}

	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
	if (skb->ip_summed != CHECKSUM_PARTIAL)
		return 0;

	if (skb->protocol == htons(ETH_P_IP))
		err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
	else if (skb->protocol == htons(ETH_P_IPV6))
		err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);

	return err;
}

static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
{
	unsigned long now = jiffies;
@@ -1428,12 +1590,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget)

		xenvif_fill_frags(vif, skb);

		/*
		 * If the initial fragment was < PKT_PROT_LEN then
		 * pull through some bytes from the other fragments to
		 * increase the linear region to PKT_PROT_LEN bytes.
		 */
		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
			int target = min_t(int, skb->len, PKT_PROT_LEN);
			__pskb_pull_tail(skb, target - skb_headlen(skb));
		}
+9 −0
Original line number Diff line number Diff line
@@ -105,6 +105,15 @@ static int netback_probe(struct xenbus_device *dev,
			goto abort_transaction;
		}

		/* We support partial checksum setup for IPv6 packets */
		err = xenbus_printf(xbt, dev->nodename,
				    "feature-ipv6-csum-offload",
				    "%d", 1);
		if (err) {
			message = "writing feature-ipv6-csum-offload";
			goto abort_transaction;
		}

		/* We support rx-copy path. */
		err = xenbus_printf(xbt, dev->nodename,
				    "feature-rx-copy", "%d", 1);