Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ed23a727 authored by Roland Dreier's avatar Roland Dreier
Browse files

IB: Return "maybe missed event" hint from ib_req_notify_cq()



The semantics defined by the InfiniBand specification say that
completion events are only generated when a completions is added to a
completion queue (CQ) after completion notification is requested.  In
other words, this means that the following race is possible:

	while (CQ is not empty)
		ib_poll_cq(CQ);
	// new completion is added after while loop is exited
	ib_req_notify_cq(CQ);
	// no event is generated for the existing completion

To close this race, the IB spec recommends doing another poll of the
CQ after requesting notification.

However, it is not always possible to arrange code this way (for
example, we have found that NAPI for IPoIB cannot poll after
requesting notification).  Also, some hardware (eg Mellanox HCAs)
actually will generate an event for completions added before the call
to ib_req_notify_cq() -- which is allowed by the spec, since there's
no way for any upper-layer consumer to know exactly when a completion
was really added -- so the extra poll of the CQ is just a waste.

Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for
ib_req_notify_cq() so that it can return a hint about whether the a
completion may have been added before the request for notification.
The return value of ib_req_notify_cq() is extended so:

	 < 0	means an error occurred while requesting notification
	== 0	means notification was requested successfully, and if
		IB_CQ_REPORT_MISSED_EVENTS was passed in, then no
		events were missed and it is safe to wait for another
		event.
	 > 0	is only returned if IB_CQ_REPORT_MISSED_EVENTS was
		passed in.  It means that the consumer must poll the
		CQ again to make sure it is empty to avoid the race
		described above.

We add a flag to enable this behavior rather than turning it on
unconditionally, because checking for missed events may incur
significant overhead for some low-level drivers, and consumers that
don't care about the results of this test shouldn't be forced to pay
for the test.

Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent f4fd0b22
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -519,7 +519,7 @@ extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);

/* CM */
extern int c2_llp_connect(struct iw_cm_id *cm_id,
+12 −4
Original line number Diff line number Diff line
@@ -217,17 +217,19 @@ int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
	return npolled;
}

int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
{
	struct c2_mq_shared __iomem *shared;
	struct c2_cq *cq;
	unsigned long flags;
	int ret = 0;

	cq = to_c2cq(ibcq);
	shared = cq->mq.peer;

	if (notify == IB_CQ_NEXT_COMP)
	if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
	else if (notify == IB_CQ_SOLICITED)
	else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
	else
		return -EINVAL;
@@ -241,7 +243,13 @@ int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
	 */
	readb(&shared->armed);

	return 0;
	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
		spin_lock_irqsave(&cq->lock, flags);
		ret = !c2_mq_empty(&cq->mq);
		spin_unlock_irqrestore(&cq->lock, flags);
	}

	return ret;
}

static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
+3 −0
Original line number Diff line number Diff line
@@ -114,7 +114,10 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
				return -EIO;
			}
		}

		return 1;
	}

	return 0;
}

+5 −3
Original line number Diff line number Diff line
@@ -292,7 +292,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
#endif
}

static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
	struct iwch_dev *rhp;
	struct iwch_cq *chp;
@@ -303,7 +303,7 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)

	chp = to_iwch_cq(ibcq);
	rhp = chp->rhp;
	if (notify == IB_CQ_SOLICITED)
	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
		cq_op = CQ_ARM_SE;
	else
		cq_op = CQ_ARM_AN;
@@ -317,9 +317,11 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
	PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr);
	err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
	spin_unlock_irqrestore(&chp->lock, flag);
	if (err)
	if (err < 0)
		printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
		       chp->cq.cqid);
	if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
		err = 0;
	return err;
}

+1 −1
Original line number Diff line number Diff line
@@ -135,7 +135,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);

int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);

int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);

struct ib_qp *ehca_create_qp(struct ib_pd *pd,
			     struct ib_qp_init_attr *init_attr,
Loading