Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0f485251 authored by Shirley Ma's avatar Shirley Ma Committed by Roland Dreier
Browse files

IPoIB: Make send and receive queue sizes tunable



Make IPoIB's send and receive queue sizes tunable via module
parameters ("send_queue_size" and "recv_queue_size").  This allows the
queue sizes to be enlarged to fix disastrously bad performance on some
platforms and workloads, without bloating memory usage when large
queues aren't needed.

Signed-off-by: default avatarShirley Ma <xma@us.ibm.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent f2de3b06
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -65,6 +65,8 @@ enum {

	IPOIB_RX_RING_SIZE 	  = 128,
	IPOIB_TX_RING_SIZE 	  = 64,
	IPOIB_MAX_QUEUE_SIZE	  = 8192,
	IPOIB_MIN_QUEUE_SIZE	  = 2,

	IPOIB_NUM_WC 		  = 4,

@@ -332,6 +334,8 @@ static inline void ipoib_unregister_debugfs(void) { }
#define ipoib_warn(priv, format, arg...)		\
	ipoib_printk(KERN_WARNING, priv, format , ## arg)

extern int ipoib_sendq_size;
extern int ipoib_recvq_size;

#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
extern int ipoib_debug_level;
+11 −11
Original line number Diff line number Diff line
@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	int i;

	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
	for (i = 0; i < ipoib_recvq_size; ++i) {
		if (ipoib_alloc_rx_skb(dev, i)) {
			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
			return -ENOMEM;
@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
	if (wr_id & IPOIB_OP_RECV) {
		wr_id &= ~IPOIB_OP_RECV;

		if (wr_id < IPOIB_RX_RING_SIZE) {
		if (wr_id < ipoib_recvq_size) {
			struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
			dma_addr_t      addr = priv->rx_ring[wr_id].mapping;

@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
		struct ipoib_tx_buf *tx_req;
		unsigned long flags;

		if (wr_id >= IPOIB_TX_RING_SIZE) {
		if (wr_id >= ipoib_sendq_size) {
			ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
				   wr_id, IPOIB_TX_RING_SIZE);
				   wr_id, ipoib_sendq_size);
			return;
		}

@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
		spin_lock_irqsave(&priv->tx_lock, flags);
		++priv->tx_tail;
		if (netif_queue_stopped(dev) &&
		    priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
		    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
			netif_wake_queue(dev);
		spin_unlock_irqrestore(&priv->tx_lock, flags);

@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
	 * means we have to make sure everything is properly recorded and
	 * our state is consistent before we call post_send().
	 */
	tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)];
	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
	tx_req->skb = skb;
	addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
			      DMA_TO_DEVICE);
	pci_unmap_addr_set(tx_req, mapping, addr);

	if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1),
	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
			       address->ah, qpn, addr, skb->len))) {
		ipoib_warn(priv, "post_send failed\n");
		++priv->stats.tx_errors;
@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
		address->last_send = priv->tx_head;
		++priv->tx_head;

		if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) {
		if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
			ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
			netif_stop_queue(dev);
		}
@@ -488,7 +488,7 @@ static int recvs_pending(struct net_device *dev)
	int pending = 0;
	int i;

	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
	for (i = 0; i < ipoib_recvq_size; ++i)
		if (priv->rx_ring[i].skb)
			++pending;

@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
			 */
			while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
				tx_req = &priv->tx_ring[priv->tx_tail &
							(IPOIB_TX_RING_SIZE - 1)];
							(ipoib_sendq_size - 1)];
				dma_unmap_single(priv->ca->dma_device,
						 pci_unmap_addr(tx_req, mapping),
						 tx_req->skb->len,
@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
				++priv->tx_tail;
			}

			for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
			for (i = 0; i < ipoib_recvq_size; ++i)
				if (priv->rx_ring[i].skb) {
					dma_unmap_single(priv->ca->dma_device,
							 pci_unmap_addr(&priv->rx_ring[i],
+22 −6
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/kernel.h>

#include <linux/if_arp.h>	/* For ARPHRD_xxx */

@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
MODULE_LICENSE("Dual BSD/GPL");

int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;

module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");

#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
int ipoib_debug_level;

@@ -795,20 +804,19 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
	struct ipoib_dev_priv *priv = netdev_priv(dev);

	/* Allocate RX/TX "rings" to hold queued skbs */

	priv->rx_ring =	kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
				GFP_KERNEL);
	if (!priv->rx_ring) {
		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
		       ca->name, IPOIB_RX_RING_SIZE);
		       ca->name, ipoib_recvq_size);
		goto out;
	}

	priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
	priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
				GFP_KERNEL);
	if (!priv->tx_ring) {
		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
		       ca->name, IPOIB_TX_RING_SIZE);
		       ca->name, ipoib_sendq_size);
		goto out_rx_ring_cleanup;
	}

@@ -876,7 +884,7 @@ static void ipoib_setup(struct net_device *dev)
	dev->hard_header_len 	 = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
	dev->addr_len 		 = INFINIBAND_ALEN;
	dev->type 		 = ARPHRD_INFINIBAND;
	dev->tx_queue_len 	 = IPOIB_TX_RING_SIZE * 2;
	dev->tx_queue_len 	 = ipoib_sendq_size * 2;
	dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;

	/* MTU will be reset when mcast join happens */
@@ -1128,6 +1136,14 @@ static int __init ipoib_init_module(void)
{
	int ret;

	ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
	ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
	ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);

	ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
	ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
	ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);

	ret = ipoib_register_debugfs();
	if (ret)
		return ret;
+3 −3
Original line number Diff line number Diff line
@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
	struct ipoib_dev_priv *priv = netdev_priv(dev);
	struct ib_qp_init_attr init_attr = {
		.cap = {
			.max_send_wr  = IPOIB_TX_RING_SIZE,
			.max_recv_wr  = IPOIB_RX_RING_SIZE,
			.max_send_wr  = ipoib_sendq_size,
			.max_recv_wr  = ipoib_recvq_size,
			.max_send_sge = 1,
			.max_recv_sge = 1
		},
@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
	}

	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
				IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1);
				ipoib_sendq_size + ipoib_recvq_size + 1);
	if (IS_ERR(priv->cq)) {
		printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
		goto out_free_pd;