Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a3285aa4 authored by Roland Dreier's avatar Roland Dreier
Browse files

IB/mthca: Fix race in reference counting



Fix races in in destroying various objects.  If a destroy routine
waits for an object to become free by doing

	wait_event(&obj->wait, !atomic_read(&obj->refcount));
	/* now clean up and destroy the object */

and another place drops a reference to the object by doing

	if (atomic_dec_and_test(&obj->refcount))
		wake_up(&obj->wait);

then this is susceptible to a race where the wait_event() and final
freeing of the object occur between the atomic_dec_and_test() and the
wake_up().  And this is a use-after-free, since wake_up() will be
called on part of the already-freed object.

Fix this in mthca by replacing the atomic_t refcounts with plain old
integers protected by a spinlock.  This makes it possible to do the
decrement of the reference count and the wake_up() so that it appears
as a single atomic operation to the code waiting on the wait queue.

While touching this code, also simplify mthca_cq_clean(): the CQ being
cleaned cannot go away, because it still has a QP attached to it.  So
there's no reason to be paranoid and look up the CQ by number; it's
perfectly safe to use the pointer that the callers already have.

Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent d945e1df
Loading
Loading
Loading
Loading
+21 −20
Original line number Original line Diff line number Diff line
@@ -238,9 +238,9 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
	spin_lock(&dev->cq_table.lock);
	spin_lock(&dev->cq_table.lock);


	cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
	cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));

	if (cq)
	if (cq)
		atomic_inc(&cq->refcount);
		++cq->refcount;

	spin_unlock(&dev->cq_table.lock);
	spin_unlock(&dev->cq_table.lock);


	if (!cq) {
	if (!cq) {
@@ -254,8 +254,10 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
	if (cq->ibcq.event_handler)
	if (cq->ibcq.event_handler)
		cq->ibcq.event_handler(&event, cq->ibcq.cq_context);
		cq->ibcq.event_handler(&event, cq->ibcq.cq_context);


	if (atomic_dec_and_test(&cq->refcount))
	spin_lock(&dev->cq_table.lock);
	if (!--cq->refcount)
		wake_up(&cq->wait);
		wake_up(&cq->wait);
	spin_unlock(&dev->cq_table.lock);
}
}


static inline int is_recv_cqe(struct mthca_cqe *cqe)
static inline int is_recv_cqe(struct mthca_cqe *cqe)
@@ -267,23 +269,13 @@ static inline int is_recv_cqe(struct mthca_cqe *cqe)
		return !(cqe->is_send & 0x80);
		return !(cqe->is_send & 0x80);
}
}


void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
		    struct mthca_srq *srq)
		    struct mthca_srq *srq)
{
{
	struct mthca_cq *cq;
	struct mthca_cqe *cqe;
	struct mthca_cqe *cqe;
	u32 prod_index;
	u32 prod_index;
	int nfreed = 0;
	int nfreed = 0;


	spin_lock_irq(&dev->cq_table.lock);
	cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
	if (cq)
		atomic_inc(&cq->refcount);
	spin_unlock_irq(&dev->cq_table.lock);

	if (!cq)
		return;

	spin_lock_irq(&cq->lock);
	spin_lock_irq(&cq->lock);


	/*
	/*
@@ -301,7 +293,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,


	if (0)
	if (0)
		mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n",
		mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n",
			  qpn, cqn, cq->cons_index, prod_index);
			  qpn, cq->cqn, cq->cons_index, prod_index);


	/*
	/*
	 * Now sweep backwards through the CQ, removing CQ entries
	 * Now sweep backwards through the CQ, removing CQ entries
@@ -325,8 +317,6 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
	}
	}


	spin_unlock_irq(&cq->lock);
	spin_unlock_irq(&cq->lock);
	if (atomic_dec_and_test(&cq->refcount))
		wake_up(&cq->wait);
}
}


void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
@@ -821,7 +811,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
	}
	}


	spin_lock_init(&cq->lock);
	spin_lock_init(&cq->lock);
	atomic_set(&cq->refcount, 1);
	cq->refcount = 1;
	init_waitqueue_head(&cq->wait);
	init_waitqueue_head(&cq->wait);


	memset(cq_context, 0, sizeof *cq_context);
	memset(cq_context, 0, sizeof *cq_context);
@@ -896,6 +886,17 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
	return err;
	return err;
}
}


static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq)
{
	int c;

	spin_lock_irq(&dev->cq_table.lock);
	c = cq->refcount;
	spin_unlock_irq(&dev->cq_table.lock);

	return c;
}

void mthca_free_cq(struct mthca_dev *dev,
void mthca_free_cq(struct mthca_dev *dev,
		   struct mthca_cq *cq)
		   struct mthca_cq *cq)
{
{
@@ -929,6 +930,7 @@ void mthca_free_cq(struct mthca_dev *dev,
	spin_lock_irq(&dev->cq_table.lock);
	spin_lock_irq(&dev->cq_table.lock);
	mthca_array_clear(&dev->cq_table.cq,
	mthca_array_clear(&dev->cq_table.cq,
			  cq->cqn & (dev->limits.num_cqs - 1));
			  cq->cqn & (dev->limits.num_cqs - 1));
	--cq->refcount;
	spin_unlock_irq(&dev->cq_table.lock);
	spin_unlock_irq(&dev->cq_table.lock);


	if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
	if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
@@ -936,8 +938,7 @@ void mthca_free_cq(struct mthca_dev *dev,
	else
	else
		synchronize_irq(dev->pdev->irq);
		synchronize_irq(dev->pdev->irq);


	atomic_dec(&cq->refcount);
	wait_event(cq->wait, !get_cq_refcount(dev, cq));
	wait_event(cq->wait, !atomic_read(&cq->refcount));


	if (cq->is_kernel) {
	if (cq->is_kernel) {
		mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
		mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+1 −1
Original line number Original line Diff line number Diff line
@@ -496,7 +496,7 @@ void mthca_free_cq(struct mthca_dev *dev,
void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
		    enum ib_event_type event_type);
		    enum ib_event_type event_type);
void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
		    struct mthca_srq *srq);
		    struct mthca_srq *srq);
void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
+12 −10
Original line number Original line Diff line number Diff line
@@ -139,11 +139,12 @@ struct mthca_ah {
 * a qp may be locked, with the send cq locked first.  No other
 * a qp may be locked, with the send cq locked first.  No other
 * nesting should be done.
 * nesting should be done.
 *
 *
 * Each struct mthca_cq/qp also has an atomic_t ref count.  The
 * Each struct mthca_cq/qp also has an ref count, protected by the
 * pointer from the cq/qp_table to the struct counts as one reference.
 * corresponding table lock.  The pointer from the cq/qp_table to the
 * This reference also is good for access through the consumer API, so
 * struct counts as one reference.  This reference also is good for
 * modifying the CQ/QP etc doesn't need to take another reference.
 * access through the consumer API, so modifying the CQ/QP etc doesn't
 * Access because of a completion being polled does need a reference.
 * need to take another reference.  Access to a QP because of a
 * completion being polled does not need a reference either.
 *
 *
 * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the
 * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the
 * destroy function to sleep on.
 * destroy function to sleep on.
@@ -159,8 +160,9 @@ struct mthca_ah {
 * - decrement ref count; if zero, wake up waiters
 * - decrement ref count; if zero, wake up waiters
 *
 *
 * To destroy a CQ/QP, we can do the following:
 * To destroy a CQ/QP, we can do the following:
 * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
 * - lock cq/qp_table
 * - decrement ref count
 * - remove pointer and decrement ref count
 * - unlock cq/qp_table lock
 * - wait_event until ref count is zero
 * - wait_event until ref count is zero
 *
 *
 * It is the consumer's responsibilty to make sure that no QP
 * It is the consumer's responsibilty to make sure that no QP
@@ -197,7 +199,7 @@ struct mthca_cq_resize {
struct mthca_cq {
struct mthca_cq {
	struct ib_cq		ibcq;
	struct ib_cq		ibcq;
	spinlock_t		lock;
	spinlock_t		lock;
	atomic_t		refcount;
	int			refcount;
	int			cqn;
	int			cqn;
	u32			cons_index;
	u32			cons_index;
	struct mthca_cq_buf	buf;
	struct mthca_cq_buf	buf;
@@ -217,7 +219,7 @@ struct mthca_cq {
struct mthca_srq {
struct mthca_srq {
	struct ib_srq		ibsrq;
	struct ib_srq		ibsrq;
	spinlock_t		lock;
	spinlock_t		lock;
	atomic_t		refcount;
	int			refcount;
	int			srqn;
	int			srqn;
	int			max;
	int			max;
	int			max_gs;
	int			max_gs;
@@ -254,7 +256,7 @@ struct mthca_wq {


struct mthca_qp {
struct mthca_qp {
	struct ib_qp           ibqp;
	struct ib_qp           ibqp;
	atomic_t               refcount;
	int                    refcount;
	u32                    qpn;
	u32                    qpn;
	int                    is_direct;
	int                    is_direct;
	u8                     port; /* for SQP and memfree use only */
	u8                     port; /* for SQP and memfree use only */
+22 −9
Original line number Original line Diff line number Diff line
@@ -240,7 +240,7 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
	spin_lock(&dev->qp_table.lock);
	spin_lock(&dev->qp_table.lock);
	qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
	qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
	if (qp)
	if (qp)
		atomic_inc(&qp->refcount);
		++qp->refcount;
	spin_unlock(&dev->qp_table.lock);
	spin_unlock(&dev->qp_table.lock);


	if (!qp) {
	if (!qp) {
@@ -257,8 +257,10 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
	if (qp->ibqp.event_handler)
	if (qp->ibqp.event_handler)
		qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
		qp->ibqp.event_handler(&event, qp->ibqp.qp_context);


	if (atomic_dec_and_test(&qp->refcount))
	spin_lock(&dev->qp_table.lock);
	if (!--qp->refcount)
		wake_up(&qp->wait);
		wake_up(&qp->wait);
	spin_unlock(&dev->qp_table.lock);
}
}


static int to_mthca_state(enum ib_qp_state ib_state)
static int to_mthca_state(enum ib_qp_state ib_state)
@@ -833,10 +835,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
	 * entries and reinitialize the QP.
	 * entries and reinitialize the QP.
	 */
	 */
	if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
	if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn,
			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);


		mthca_wq_init(&qp->sq);
		mthca_wq_init(&qp->sq);
@@ -1096,7 +1098,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
	int ret;
	int ret;
	int i;
	int i;


	atomic_set(&qp->refcount, 1);
	qp->refcount = 1;
	init_waitqueue_head(&qp->wait);
	init_waitqueue_head(&qp->wait);
	qp->state    	 = IB_QPS_RESET;
	qp->state    	 = IB_QPS_RESET;
	qp->atomic_rd_en = 0;
	qp->atomic_rd_en = 0;
@@ -1318,6 +1320,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
	return err;
	return err;
}
}


static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp)
{
	int c;

	spin_lock_irq(&dev->qp_table.lock);
	c = qp->refcount;
	spin_unlock_irq(&dev->qp_table.lock);

	return c;
}

void mthca_free_qp(struct mthca_dev *dev,
void mthca_free_qp(struct mthca_dev *dev,
		   struct mthca_qp *qp)
		   struct mthca_qp *qp)
{
{
@@ -1339,14 +1352,14 @@ void mthca_free_qp(struct mthca_dev *dev,
	spin_lock(&dev->qp_table.lock);
	spin_lock(&dev->qp_table.lock);
	mthca_array_clear(&dev->qp_table.qp,
	mthca_array_clear(&dev->qp_table.qp,
			  qp->qpn & (dev->limits.num_qps - 1));
			  qp->qpn & (dev->limits.num_qps - 1));
	--qp->refcount;
	spin_unlock(&dev->qp_table.lock);
	spin_unlock(&dev->qp_table.lock);


	if (send_cq != recv_cq)
	if (send_cq != recv_cq)
		spin_unlock(&recv_cq->lock);
		spin_unlock(&recv_cq->lock);
	spin_unlock_irq(&send_cq->lock);
	spin_unlock_irq(&send_cq->lock);


	atomic_dec(&qp->refcount);
	wait_event(qp->wait, !get_qp_refcount(dev, qp));
	wait_event(qp->wait, !atomic_read(&qp->refcount));


	if (qp->state != IB_QPS_RESET)
	if (qp->state != IB_QPS_RESET)
		mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
		mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
@@ -1358,10 +1371,10 @@ void mthca_free_qp(struct mthca_dev *dev,
	 * unref the mem-free tables and free the QPN in our table.
	 * unref the mem-free tables and free the QPN in our table.
	 */
	 */
	if (!qp->ibqp.uobject) {
	if (!qp->ibqp.uobject) {
		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn,
		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn,
			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);


		mthca_free_memfree(dev, qp);
		mthca_free_memfree(dev, qp);
+18 −5
Original line number Original line Diff line number Diff line
@@ -241,7 +241,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
		goto err_out_mailbox;
		goto err_out_mailbox;


	spin_lock_init(&srq->lock);
	spin_lock_init(&srq->lock);
	atomic_set(&srq->refcount, 1);
	srq->refcount = 1;
	init_waitqueue_head(&srq->wait);
	init_waitqueue_head(&srq->wait);


	if (mthca_is_memfree(dev))
	if (mthca_is_memfree(dev))
@@ -308,6 +308,17 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
	return err;
	return err;
}
}


static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq)
{
	int c;

	spin_lock_irq(&dev->srq_table.lock);
	c = srq->refcount;
	spin_unlock_irq(&dev->srq_table.lock);

	return c;
}

void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
{
{
	struct mthca_mailbox *mailbox;
	struct mthca_mailbox *mailbox;
@@ -329,10 +340,10 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
	spin_lock_irq(&dev->srq_table.lock);
	spin_lock_irq(&dev->srq_table.lock);
	mthca_array_clear(&dev->srq_table.srq,
	mthca_array_clear(&dev->srq_table.srq,
			  srq->srqn & (dev->limits.num_srqs - 1));
			  srq->srqn & (dev->limits.num_srqs - 1));
	--srq->refcount;
	spin_unlock_irq(&dev->srq_table.lock);
	spin_unlock_irq(&dev->srq_table.lock);


	atomic_dec(&srq->refcount);
	wait_event(srq->wait, !get_srq_refcount(dev, srq));
	wait_event(srq->wait, !atomic_read(&srq->refcount));


	if (!srq->ibsrq.uobject) {
	if (!srq->ibsrq.uobject) {
		mthca_free_srq_buf(dev, srq);
		mthca_free_srq_buf(dev, srq);
@@ -414,7 +425,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
	spin_lock(&dev->srq_table.lock);
	spin_lock(&dev->srq_table.lock);
	srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1));
	srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1));
	if (srq)
	if (srq)
		atomic_inc(&srq->refcount);
		++srq->refcount;
	spin_unlock(&dev->srq_table.lock);
	spin_unlock(&dev->srq_table.lock);


	if (!srq) {
	if (!srq) {
@@ -431,8 +442,10 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
	srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
	srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);


out:
out:
	if (atomic_dec_and_test(&srq->refcount))
	spin_lock(&dev->srq_table.lock);
	if (!--srq->refcount)
		wake_up(&srq->wait);
		wake_up(&srq->wait);
	spin_unlock(&dev->srq_table.lock);
}
}


/*
/*