Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 40ca1988 authored by Eli Cohen's avatar Eli Cohen Committed by Roland Dreier
Browse files

IPoIB: Add LSO support



For HCAs that support TCP segmentation offload (IB_DEVICE_UD_TSO), set
NETIF_F_TSO and use HW LSO to offload TCP segmentation.

Signed-off-by: default avatarEli Cohen <eli@mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent c93570f2
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -319,6 +319,7 @@ struct ipoib_dev_priv {
	struct dentry *mcg_dentry;
	struct dentry *path_dentry;
#endif
	int	hca_caps;
};

struct ipoib_ah {
+5 −2
Original line number Diff line number Diff line
@@ -1384,7 +1384,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
		ipoib_warn(priv, "enabling connected mode "
			   "will cause multicast packet drops\n");

		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG);
		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;

		ipoib_flush_paths(dev);
@@ -1396,8 +1396,11 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
		dev->mtu = min(priv->mcast_mtu, dev->mtu);
		ipoib_flush_paths(dev);

		if (test_bit(IPOIB_FLAG_CSUM, &priv->flags))
		if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
			dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
			if (priv->hca_caps & IB_DEVICE_UD_TSO)
				dev->features |= NETIF_F_TSO;
		}

		return count;
	}
+76 −31
Original line number Diff line number Diff line
@@ -39,6 +39,8 @@
#include <linux/dma-mapping.h>

#include <rdma/ib_cache.h>
#include <linux/ip.h>
#include <linux/tcp.h>

#include "ipoib.h"

@@ -249,29 +251,37 @@ static int ipoib_dma_map_tx(struct ib_device *ca,
	struct sk_buff *skb = tx_req->skb;
	u64 *mapping = tx_req->mapping;
	int i;
	int off;

	if (skb_headlen(skb)) {
		mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
					       DMA_TO_DEVICE);
		if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
			return -EIO;

		off = 1;
	} else
		off = 0;

	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
		mapping[i + 1] = ib_dma_map_page(ca, frag->page,
		mapping[i + off] = ib_dma_map_page(ca, frag->page,
						 frag->page_offset, frag->size,
						 DMA_TO_DEVICE);
		if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))
		if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
			goto partial_error;
	}
	return 0;

partial_error:
	ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);

	for (; i > 0; --i) {
		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
		ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);
		ib_dma_unmap_page(ca, mapping[i - !off], frag->size, DMA_TO_DEVICE);
	}

	if (off)
		ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);

	return -EIO;
}

@@ -281,12 +291,17 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
	struct sk_buff *skb = tx_req->skb;
	u64 *mapping = tx_req->mapping;
	int i;
	int off;

	if (skb_headlen(skb)) {
		ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
		off = 1;
	} else
		off = 0;

	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
		ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
		ib_dma_unmap_page(ca, mapping[i + off], frag->size,
				  DMA_TO_DEVICE);
	}
}
@@ -392,24 +407,40 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
static inline int post_send(struct ipoib_dev_priv *priv,
			    unsigned int wr_id,
			    struct ib_ah *address, u32 qpn,
			    u64 *mapping, int headlen,
			    skb_frag_t *frags,
			    int nr_frags)
			    struct ipoib_tx_buf *tx_req,
			    void *head, int hlen)
{
	struct ib_send_wr *bad_wr;
	int i;
	int i, off;
	struct sk_buff *skb = tx_req->skb;
	skb_frag_t *frags = skb_shinfo(skb)->frags;
	int nr_frags = skb_shinfo(skb)->nr_frags;
	u64 *mapping = tx_req->mapping;

	if (skb_headlen(skb)) {
		priv->tx_sge[0].addr         = mapping[0];
	priv->tx_sge[0].length       = headlen;
		priv->tx_sge[0].length       = skb_headlen(skb);
		off = 1;
	} else
		off = 0;

	for (i = 0; i < nr_frags; ++i) {
		priv->tx_sge[i + 1].addr = mapping[i + 1];
		priv->tx_sge[i + 1].length = frags[i].size;
		priv->tx_sge[i + off].addr = mapping[i + off];
		priv->tx_sge[i + off].length = frags[i].size;
	}
	priv->tx_wr.num_sge	     = nr_frags + 1;
	priv->tx_wr.num_sge	     = nr_frags + off;
	priv->tx_wr.wr_id 	     = wr_id;
	priv->tx_wr.wr.ud.remote_qpn = qpn;
	priv->tx_wr.wr.ud.ah 	     = address;

	if (head) {
		priv->tx_wr.wr.ud.mss	 = skb_shinfo(skb)->gso_size;
		priv->tx_wr.wr.ud.header = head;
		priv->tx_wr.wr.ud.hlen	 = hlen;
		priv->tx_wr.opcode	 = IB_WR_LSO;
	} else
		priv->tx_wr.opcode	 = IB_WR_SEND;

	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
}

@@ -418,7 +449,20 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ipoib_tx_buf *tx_req;

	int hlen;
	void *phead;

	if (skb_is_gso(skb)) {
		hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
		phead = skb->data;
		if (unlikely(!skb_pull(skb, hlen))) {
			ipoib_warn(priv, "linear data too small\n");
			++dev->stats.tx_dropped;
			++dev->stats.tx_errors;
			dev_kfree_skb_any(skb);
			return;
		}
	} else {
		if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
			ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
				   skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
@@ -427,6 +471,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
			ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
			return;
		}
		phead = NULL;
		hlen  = 0;
	}

	ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
		       skb->len, address, qpn);
@@ -452,9 +499,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;

	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
			       address->ah, qpn,
			       tx_req->mapping, skb_headlen(skb),
			       skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
			       address->ah, qpn, tx_req, phead, hlen))) {
		ipoib_warn(priv, "post_send failed\n");
		++dev->stats.tx_errors;
		ipoib_dma_unmap_tx(priv->ca, tx_req);
+7 −3
Original line number Diff line number Diff line
@@ -1134,14 +1134,15 @@ static struct net_device *ipoib_add_port(const char *format,
		kfree(device_attr);
		goto device_init_failed;
	}
	priv->hca_caps = device_attr->device_cap_flags;

	if (device_attr->device_cap_flags & IB_DEVICE_UD_IP_CSUM) {
	kfree(device_attr);

	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
		priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
	}

	kfree(device_attr);

	/*
	 * Set the full membership bit, so that we join the right
	 * broadcast group, etc.
@@ -1176,6 +1177,9 @@ static struct net_device *ipoib_add_port(const char *format,
		goto event_failed;
	}

	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
		priv->dev->features |= NETIF_F_TSO;

	result = register_netdev(priv->dev);
	if (result) {
		printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
+3 −0
Original line number Diff line number Diff line
@@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
	init_attr.send_cq = priv->cq;
	init_attr.recv_cq = priv->cq;

	if (priv->hca_caps & IB_DEVICE_UD_TSO)
		init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;

	if (dev->features & NETIF_F_SG)
		init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;