Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3dca0f42 authored by Matan Barak's avatar Matan Barak Committed by David S. Miller
Browse files

net/mlx4_core: Use tasklet for user-space CQ completion events



Previously, we've fired all our completion callbacks straight from our ISR.

Some of those callbacks were lightweight (for example, mlx4_en's and
IPoIB napi callbacks), but some of them did more work (for example,
the user-space RDMA stack uverbs' completion handler). Besides that,
doing more than the minimal work in ISR is generally considered wrong,
it could even lead to a hard lockup of the system. Since when a lot
of completion events are generated by the hardware, the loop over those
events could be so long, that we'll get into a hard lockup by the system
watchdog.

In order to avoid that, add a new way of invoking completion events
callbacks. In the interrupt itself, we add the CQs which receive completion
event to a per-EQ list and schedule a tasklet. In the tasklet context
we loop over all the CQs in the list and invoke the user callback.

Signed-off-by: default avatarMatan Barak <matanb@mellanox.com>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 383677da
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -233,6 +233,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
	if (err)
		goto err_dbmap;

	if (context)
		cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp;
	else
		cq->mcq.comp = mlx4_ib_cq_comp;
	cq->mcq.event = mlx4_ib_cq_event;

+50 −0
Original line number Diff line number Diff line
@@ -52,6 +52,51 @@
#define MLX4_CQ_STATE_ARMED_SOL		( 6 <<  8)
#define MLX4_EQ_STATE_FIRED		(10 <<  8)

#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)

void mlx4_cq_tasklet_cb(unsigned long data)
{
	unsigned long flags;
	unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
	struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data;
	struct mlx4_cq *mcq, *temp;

	spin_lock_irqsave(&ctx->lock, flags);
	list_splice_tail_init(&ctx->list, &ctx->process_list);
	spin_unlock_irqrestore(&ctx->lock, flags);

	list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) {
		list_del_init(&mcq->tasklet_ctx.list);
		mcq->tasklet_ctx.comp(mcq);
		if (atomic_dec_and_test(&mcq->refcount))
			complete(&mcq->free);
		if (time_after(jiffies, end))
			break;
	}

	if (!list_empty(&ctx->process_list))
		tasklet_schedule(&ctx->task);
}

static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq)
{
	unsigned long flags;
	struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;

	spin_lock_irqsave(&tasklet_ctx->lock, flags);
	/* When migrating CQs between EQs will be implemented, please note
	 * that you need to sync this point. It is possible that
	 * while migrating a CQ, completions on the old EQs could
	 * still arrive.
	 */
	if (list_empty_careful(&cq->tasklet_ctx.list)) {
		atomic_inc(&cq->refcount);
		list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
	}
	spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
}

void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
{
	struct mlx4_cq *cq;
@@ -292,6 +337,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
	cq->uar        = uar;
	atomic_set(&cq->refcount, 1);
	init_completion(&cq->free);
	cq->comp = mlx4_add_cq_to_tasklet;
	cq->tasklet_ctx.priv =
		&priv->eq_table.eq[cq->vector].tasklet_ctx;
	INIT_LIST_HEAD(&cq->tasklet_ctx.list);


	cq->irq = priv->eq_table.eq[cq->vector].irq;
	return 0;
+15 −1
Original line number Diff line number Diff line
@@ -450,7 +450,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	struct mlx4_eqe *eqe;
	int cqn;
	int cqn = -1;
	int eqes_found = 0;
	int set_ci = 0;
	int port;
@@ -758,6 +758,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)

	eq_set_ci(eq, 1);

	/* cqn is 24bit wide but is initialized such that its higher bits
	 * are ones too. Thus, if we got any event, cqn's high bits should be off
	 * and we need to schedule the tasklet.
	 */
	if (!(cqn & ~0xffffff))
		tasklet_schedule(&eq->tasklet_ctx.task);

	return eqes_found;
}

@@ -971,6 +978,12 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,

	eq->cons_index = 0;

	INIT_LIST_HEAD(&eq->tasklet_ctx.list);
	INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
	spin_lock_init(&eq->tasklet_ctx.lock);
	tasklet_init(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb,
		     (unsigned long)&eq->tasklet_ctx);

	return err;

err_out_free_mtt:
@@ -1027,6 +1040,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
		}
	}
	synchronize_irq(eq->irq);
	tasklet_disable(&eq->tasklet_ctx.task);

	mlx4_mtt_cleanup(dev, &eq->mtt);
	for (i = 0; i < npages; ++i)
+12 −0
Original line number Diff line number Diff line
@@ -43,6 +43,8 @@
#include <linux/timer.h>
#include <linux/semaphore.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>

#include <linux/mlx4/device.h>
#include <linux/mlx4/driver.h>
@@ -373,6 +375,14 @@ struct mlx4_srq_context {
	__be64			db_rec_addr;
};

struct mlx4_eq_tasklet {
	struct list_head list;
	struct list_head process_list;
	struct tasklet_struct task;
	/* lock on completion tasklet list */
	spinlock_t lock;
};

struct mlx4_eq {
	struct mlx4_dev	       *dev;
	void __iomem	       *doorbell;
@@ -383,6 +393,7 @@ struct mlx4_eq {
	int			nent;
	struct mlx4_buf_list   *page_list;
	struct mlx4_mtt		mtt;
	struct mlx4_eq_tasklet	tasklet_ctx;
};

struct mlx4_slave_eqe {
@@ -1146,6 +1157,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev);
int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
		  unsigned long timeout);

void mlx4_cq_tasklet_cb(unsigned long data);
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);

+5 −0
Original line number Diff line number Diff line
@@ -621,6 +621,11 @@ struct mlx4_cq {

	atomic_t		refcount;
	struct completion	free;
	struct {
		struct list_head list;
		void (*comp)(struct mlx4_cq *);
		void		*priv;
	} tasklet_ctx;
};

struct mlx4_qp {