Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit af40da89 authored by Vladimir Sokolovsky's avatar Vladimir Sokolovsky Committed by Roland Dreier
Browse files

IPoIB: add LRO support



Add "ipoib_use_lro" module parameter to enable LRO and an
"ipoib_lro_max_aggr" module parameter to set the max number of packets
to be aggregated.  Make LRO controllable and LRO statistics accessible
through ethtool.

Signed-off-by: default avatarVladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: default avatarEli Cohen <eli@mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 12406734
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
config INFINIBAND_IPOIB
	tristate "IP-over-InfiniBand"
	depends on NETDEVICES && INET && (IPV6 || IPV6=n)
	select INET_LRO
	---help---
	  Support for the IP-over-InfiniBand protocol (IPoIB). This
	  transports IP packets over InfiniBand so you can use your IB
+11 −0
Original line number Diff line number Diff line
@@ -50,6 +50,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
#include <linux/inet_lro.h>

/* constants */

@@ -94,6 +95,9 @@ enum {
	IPOIB_MCAST_FLAG_BUSY	  = 2,	/* joining or already joined */
	IPOIB_MCAST_FLAG_ATTACHED = 3,

	IPOIB_MAX_LRO_DESCRIPTORS = 8,
	IPOIB_LRO_MAX_AGGR 	  = 64,

	MAX_SEND_CQE		  = 16,
	IPOIB_CM_COPYBREAK	  = 256,
};
@@ -248,6 +252,11 @@ struct ipoib_ethtool_st {
	u16     max_coalesced_frames;
};

struct ipoib_lro {
	struct net_lro_mgr lro_mgr;
	struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
};

/*
 * Device private locking: tx_lock protects members used in TX fast
 * path (and we use LLTX so upper layers don't do extra locking).
@@ -334,6 +343,8 @@ struct ipoib_dev_priv {
	int	hca_caps;
	struct ipoib_ethtool_st ethtool;
	struct timer_list poll_timer;

	struct ipoib_lro lro;
};

struct ipoib_ah {
+46 −0
Original line number Diff line number Diff line
@@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev,
	return 0;
}

static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = {
	"LRO aggregated", "LRO flushed",
	"LRO avg aggr", "LRO no desc"
};

static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
	switch (stringset) {
	case ETH_SS_STATS:
		memcpy(data, *ipoib_stats_keys,	sizeof(ipoib_stats_keys));
		break;
	}
}

static int ipoib_get_sset_count(struct net_device *dev, int sset)
{
	switch (sset) {
	case ETH_SS_STATS:
		return ARRAY_SIZE(ipoib_stats_keys);
	default:
		return -EOPNOTSUPP;
	}
}

static void ipoib_get_ethtool_stats(struct net_device *dev,
				struct ethtool_stats *stats, uint64_t *data)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int index = 0;

	/* Get LRO statistics */
	data[index++] = priv->lro.lro_mgr.stats.aggregated;
	data[index++] = priv->lro.lro_mgr.stats.flushed;
	if (priv->lro.lro_mgr.stats.flushed)
		data[index++] = priv->lro.lro_mgr.stats.aggregated /
				priv->lro.lro_mgr.stats.flushed;
	else
		data[index++] = 0;
	data[index++] = priv->lro.lro_mgr.stats.no_desc;
}

static const struct ethtool_ops ipoib_ethtool_ops = {
	.get_drvinfo		= ipoib_get_drvinfo,
	.get_tso		= ethtool_op_get_tso,
	.get_coalesce		= ipoib_get_coalesce,
	.set_coalesce		= ipoib_set_coalesce,
	.get_flags		= ethtool_op_get_flags,
	.set_flags		= ethtool_op_set_flags,
	.get_strings		= ipoib_get_strings,
	.get_sset_count		= ipoib_get_sset_count,
	.get_ethtool_stats	= ipoib_get_ethtool_stats,
};

void ipoib_set_ethtool_ops(struct net_device *dev)
+7 −1
Original line number Diff line number Diff line
@@ -288,6 +288,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
	if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
		skb->ip_summed = CHECKSUM_UNNECESSARY;

	if (dev->features & NETIF_F_LRO)
		lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
	else
		netif_receive_skb(skb);

repost:
@@ -440,6 +443,9 @@ int ipoib_poll(struct napi_struct *napi, int budget)
	}

	if (done < budget) {
		if (dev->features & NETIF_F_LRO)
			lro_flush_all(&priv->lro.lro_mgr);

		netif_rx_complete(dev, napi);
		if (unlikely(ib_req_notify_cq(priv->recv_cq,
					      IB_CQ_NEXT_COMP |
+62 −0
Original line number Diff line number Diff line
@@ -60,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");

static int lro;
module_param(lro, bool, 0444);
MODULE_PARM_DESC(lro,  "Enable LRO (Large Receive Offload)");

static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
module_param(lro_max_aggr, int, 0644);
MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
		"(default = 64)");

#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
int ipoib_debug_level;

@@ -936,6 +945,54 @@ static const struct header_ops ipoib_header_ops = {
	.create	= ipoib_hard_header,
};

static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
		       void **tcph, u64 *hdr_flags, void *priv)
{
	unsigned int ip_len;
	struct iphdr *iph;

	if (unlikely(skb->protocol != htons(ETH_P_IP)))
		return -1;

	/*
	 * In the future we may add an else clause that verifies the
	 * checksum and allows devices which do not calculate checksum
	 * to use LRO.
	 */
	if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
		return -1;

	/* Check for non-TCP packet */
	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	if (iph->protocol != IPPROTO_TCP)
		return -1;

	ip_len = ip_hdrlen(skb);
	skb_set_transport_header(skb, ip_len);
	*tcph = tcp_hdr(skb);

	/* check if IP header and TCP header are complete */
	if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
		return -1;

	*hdr_flags = LRO_IPV4 | LRO_TCP;
	*iphdr = iph;

	return 0;
}

static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
{
	priv->lro.lro_mgr.max_aggr	 = lro_max_aggr;
	priv->lro.lro_mgr.max_desc	 = IPOIB_MAX_LRO_DESCRIPTORS;
	priv->lro.lro_mgr.lro_arr	 = priv->lro.lro_desc;
	priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
	priv->lro.lro_mgr.features	 = LRO_F_NAPI;
	priv->lro.lro_mgr.dev		 = priv->dev;
	priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
}

static void ipoib_setup(struct net_device *dev)
{
	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -975,6 +1032,8 @@ static void ipoib_setup(struct net_device *dev)

	priv->dev = dev;

	ipoib_lro_setup(priv);

	spin_lock_init(&priv->lock);
	spin_lock_init(&priv->tx_lock);

@@ -1152,6 +1211,9 @@ static struct net_device *ipoib_add_port(const char *format,
		priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
	}

	if (lro)
		priv->dev->features |= NETIF_F_LRO;

	/*
	 * Set the full membership bit, so that we join the right
	 * broadcast group, etc.