Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit af8aab71 authored by Sebastian Sanchez's avatar Sebastian Sanchez Committed by Doug Ledford
Browse files

IB/hfi1: Optimize kthread pointer locking when queuing CQ entries



All threads queuing CQ entries on different CQs are unnecessarily
synchronized by a spin lock to check if the CQ kthread worker hasn't
been destroyed before queuing an CQ entry.

The lock used in 6efaf10f ("IB/rdmavt: Avoid queuing work into a
destroyed cq kthread worker") is a device global lock and will have
poor performance at scale as completions are entered from a large
number of CPUs.

Convert to use RCU where the read side of RCU is rvt_cq_enter() to
determine that the worker is alive prior to triggering the
completion event.
Apply write side RCU semantics in rvt_driver_cq_init() and
rvt_cq_exit().

Fixes: 6efaf10f ("IB/rdmavt: Avoid queuing work into a destroyed cq kthread worker")
Cc: <stable@vger.kernel.org> # 4.14.x
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarSebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent c872a1f9
Loading
Loading
Loading
Loading
+19 −12
Original line number Diff line number Diff line
@@ -120,17 +120,20 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
	if (cq->notify == IB_CQ_NEXT_COMP ||
	    (cq->notify == IB_CQ_SOLICITED &&
	     (solicited || entry->status != IB_WC_SUCCESS))) {
		struct kthread_worker *worker;

		/*
		 * This will cause send_complete() to be called in
		 * another thread.
		 */
		spin_lock(&cq->rdi->n_cqs_lock);
		if (likely(cq->rdi->worker)) {
		rcu_read_lock();
		worker = rcu_dereference(cq->rdi->worker);
		if (likely(worker)) {
			cq->notify = RVT_CQ_NONE;
			cq->triggered++;
			kthread_queue_work(cq->rdi->worker, &cq->comptask);
			kthread_queue_work(worker, &cq->comptask);
		}
		spin_unlock(&cq->rdi->n_cqs_lock);
		rcu_read_unlock();
	}

	spin_unlock_irqrestore(&cq->lock, flags);
@@ -512,7 +515,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
	int cpu;
	struct kthread_worker *worker;

	if (rdi->worker)
	if (rcu_access_pointer(rdi->worker))
		return 0;

	spin_lock_init(&rdi->n_cqs_lock);
@@ -524,7 +527,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
		return PTR_ERR(worker);

	set_user_nice(worker->task, MIN_NICE);
	rdi->worker = worker;
	RCU_INIT_POINTER(rdi->worker, worker);
	return 0;
}

@@ -536,15 +539,19 @@ void rvt_cq_exit(struct rvt_dev_info *rdi)
{
	struct kthread_worker *worker;

	/* block future queuing from send_complete() */
	spin_lock_irq(&rdi->n_cqs_lock);
	worker = rdi->worker;
	if (!rcu_access_pointer(rdi->worker))
		return;

	spin_lock(&rdi->n_cqs_lock);
	worker = rcu_dereference_protected(rdi->worker,
					   lockdep_is_held(&rdi->n_cqs_lock));
	if (!worker) {
		spin_unlock_irq(&rdi->n_cqs_lock);
		spin_unlock(&rdi->n_cqs_lock);
		return;
	}
	rdi->worker = NULL;
	spin_unlock_irq(&rdi->n_cqs_lock);
	RCU_INIT_POINTER(rdi->worker, NULL);
	spin_unlock(&rdi->n_cqs_lock);
	synchronize_rcu();

	kthread_destroy_worker(worker);
}
+1 −1
Original line number Diff line number Diff line
@@ -402,7 +402,7 @@ struct rvt_dev_info {
	spinlock_t pending_lock; /* protect pending mmap list */

	/* CQ */
	struct kthread_worker *worker; /* per device cq worker */
	struct kthread_worker __rcu *worker; /* per device cq worker */
	u32 n_cqs_allocated;    /* number of CQs allocated for device */
	spinlock_t n_cqs_lock; /* protect count of in use cqs */