Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a19aa5c5 authored by Roland Dreier's avatar Roland Dreier
Browse files

IB/mthca: Fix potential AB-BA deadlock with CQ locks



When destroying a QP, mthca locks both the QP's send CQ and receive
CQ.  However, the following scenario is perfectly valid:

    QP_a: send_cq == CQ_x, recv_cq == CQ_y
    QP_b: send_cq == CQ_y, recv_cq == CQ_x

The old mthca code simply locked send_cq and then recv_cq, which in
this case could lead to an AB-BA deadlock if QP_a and QP_b were
destroyed simultaneously.

We can fix this by changing the locking code to lock the CQ with the
lower CQ number first, which will create a consistent lock ordering.
Also, the second CQ is locked with spin_lock_nested() to tell lockdep
that we know what we're doing with the lock nesting.

This bug was found by lockdep.

Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent e54b82d7
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -136,8 +136,8 @@ struct mthca_ah {
 * We have one global lock that protects dev->cq/qp_table.  Each
 * struct mthca_cq/qp also has its own lock.  An individual qp lock
 * may be taken inside of an individual cq lock.  Both cqs attached to
 * a qp may be locked, with the send cq locked first.  No other
 * nesting should be done.
 * a qp may be locked, with the cq with the lower cqn locked first.
 * No other nesting should be done.
 *
 * Each struct mthca_cq/qp also has an ref count, protected by the
 * corresponding table lock.  The pointer from the cq/qp_table to the
+30 −12
Original line number Diff line number Diff line
@@ -1263,6 +1263,32 @@ int mthca_alloc_qp(struct mthca_dev *dev,
	return 0;
}

static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
{
	if (send_cq == recv_cq)
		spin_lock_irq(&send_cq->lock);
	else if (send_cq->cqn < recv_cq->cqn) {
		spin_lock_irq(&send_cq->lock);
		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
	} else {
		spin_lock_irq(&recv_cq->lock);
		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
	}
}

static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
{
	if (send_cq == recv_cq)
		spin_unlock_irq(&send_cq->lock);
	else if (send_cq->cqn < recv_cq->cqn) {
		spin_unlock(&recv_cq->lock);
		spin_unlock_irq(&send_cq->lock);
	} else {
		spin_unlock(&send_cq->lock);
		spin_unlock_irq(&recv_cq->lock);
	}
}

int mthca_alloc_sqp(struct mthca_dev *dev,
		    struct mthca_pd *pd,
		    struct mthca_cq *send_cq,
@@ -1315,17 +1341,13 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
	 * Lock CQs here, so that CQ polling code can do QP lookup
	 * without taking a lock.
	 */
	spin_lock_irq(&send_cq->lock);
	if (send_cq != recv_cq)
		spin_lock(&recv_cq->lock);
	mthca_lock_cqs(send_cq, recv_cq);

	spin_lock(&dev->qp_table.lock);
	mthca_array_clear(&dev->qp_table.qp, mqpn);
	spin_unlock(&dev->qp_table.lock);

	if (send_cq != recv_cq)
		spin_unlock(&recv_cq->lock);
	spin_unlock_irq(&send_cq->lock);
	mthca_unlock_cqs(send_cq, recv_cq);

 err_out:
	dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1359,9 +1381,7 @@ void mthca_free_qp(struct mthca_dev *dev,
	 * Lock CQs here, so that CQ polling code can do QP lookup
	 * without taking a lock.
	 */
	spin_lock_irq(&send_cq->lock);
	if (send_cq != recv_cq)
		spin_lock(&recv_cq->lock);
	mthca_lock_cqs(send_cq, recv_cq);

	spin_lock(&dev->qp_table.lock);
	mthca_array_clear(&dev->qp_table.qp,
@@ -1369,9 +1389,7 @@ void mthca_free_qp(struct mthca_dev *dev,
	--qp->refcount;
	spin_unlock(&dev->qp_table.lock);

	if (send_cq != recv_cq)
		spin_unlock(&recv_cq->lock);
	spin_unlock_irq(&send_cq->lock);
	mthca_unlock_cqs(send_cq, recv_cq);

	wait_event(qp->wait, !get_qp_refcount(dev, qp));