Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 46a80d62 authored by Mike Marciniszyn's avatar Mike Marciniszyn Committed by Doug Ledford
Browse files

IB/qib, staging/rdma/hfi1: add s_hlock for use in post send



This patch adds an additional lock to reduce contention on the s_lock.

This lock is used in post_send() so that the post_send is not
serialized with the send engine and other send related processing.

To do this the s_next_psn is now maintained on post_send() while
post_send() related fields are moved to a new cache line.  There is
an s_avail maintained for the post_send() to mitigate trading cache
lines with the send engine.  The lock is released/acquired around
releasing the just built packet to the egress mechanism.

Reviewed-by: default avatarJubin John <jubin.john@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDean Luick <dean.luick@intel.com>
Signed-off-by: default avatarHarish Chegondi <harish.chegondi@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarIra Weiny <ira.weiny@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 20f333b6
Loading
Loading
Loading
Loading
+36 −0
Original line number Diff line number Diff line
@@ -474,6 +474,42 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth)
	}
}

/**
 * qib_check_send_wqe - validate wr/wqe
 * @qp - The qp
 * @wqe - The built wqe
 *
 * validate wr/wqe.  This is called
 * prior to inserting the wqe into
 * the ring but after the wqe has been
 * setup.
 *
 * Returns 0 on success, -EINVAL on failure
 */
int qib_check_send_wqe(struct rvt_qp *qp,
		       struct rvt_swqe *wqe)
{
	struct rvt_ah *ah;

	switch (qp->ibqp.qp_type) {
	case IB_QPT_RC:
	case IB_QPT_UC:
		if (wqe->length > 0x80000000U)
			return -EINVAL;
		break;
	case IB_QPT_SMI:
	case IB_QPT_GSI:
	case IB_QPT_UD:
		ah = ibah_to_rvtah(wqe->ud_wr.ah);
		if (wqe->length > (1 << ah->log_pmtu))
			return -EINVAL;
		break;
	default:
		break;
	}
	return 0;
}

#ifdef CONFIG_DEBUG_FS

struct qib_qp_iter {
+9 −35
Original line number Diff line number Diff line
@@ -226,6 +226,8 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
 * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
 * @qp: a pointer to the QP
 *
 * Assumes the s_lock is held.
 *
 * Return 1 if constructed; otherwise, return 0.
 */
int qib_make_rc_req(struct rvt_qp *qp)
@@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
	u32 bth2;
	u32 pmtu = qp->pmtu;
	char newreq;
	unsigned long flags;
	int ret = 0;
	int delta;

@@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
		ohdr = &priv->s_hdr->u.l.oth;

	/*
	 * The lock is needed to synchronize between the sending tasklet,
	 * the receive interrupt handler, and timeout resends.
	 */
	spin_lock_irqsave(&qp->s_lock, flags);

	/* Sending responses has higher priority over sending requests. */
	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
	    qib_make_rc_ack(dev, qp, ohdr, pmtu))
@@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
			goto bail;
		/* We are in the error state, flush the work request. */
		if (qp->s_last == qp->s_head)
		smp_read_barrier_depends(); /* see post_one_send() */
		if (qp->s_last == ACCESS_ONCE(qp->s_head))
			goto bail;
		/* If DMAs are in progress, we can't flush immediately. */
		if (atomic_read(&priv->s_dma_busy)) {
@@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
				qp->s_flags |= RVT_S_WAIT_FENCE;
				goto bail;
			}
			wqe->psn = qp->s_next_psn;
			newreq = 1;
			qp->s_psn = wqe->psn;
		}
		/*
		 * Note that we have to be careful not to modify the
@@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp)
				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
				goto bail;
			}
			wqe->lpsn = wqe->psn;
			if (len > pmtu) {
				wqe->lpsn += (len - 1) / pmtu;
				qp->s_state = OP(SEND_FIRST);
				len = pmtu;
				break;
@@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp)
				cpu_to_be32(wqe->rdma_wr.rkey);
			ohdr->u.rc.reth.length = cpu_to_be32(len);
			hwords += sizeof(struct ib_reth) / sizeof(u32);
			wqe->lpsn = wqe->psn;
			if (len > pmtu) {
				wqe->lpsn += (len - 1) / pmtu;
				qp->s_state = OP(RDMA_WRITE_FIRST);
				len = pmtu;
				break;
@@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
				qp->s_num_rd_atomic++;
				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
					qp->s_lsn++;
				/*
				 * Adjust s_next_psn to count the
				 * expected number of responses.
				 */
				if (len > pmtu)
					qp->s_next_psn += (len - 1) / pmtu;
				wqe->lpsn = qp->s_next_psn++;
			}

			ohdr->u.rc.reth.vaddr =
@@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
				qp->s_num_rd_atomic++;
				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
					qp->s_lsn++;
				wqe->lpsn = wqe->psn;
			}
			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
				qp->s_state = OP(COMPARE_SWAP);
@@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
		}
		if (wqe->wr.opcode == IB_WR_RDMA_READ)
			qp->s_psn = wqe->lpsn + 1;
		else {
		else
			qp->s_psn++;
			if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
				qp->s_next_psn = qp->s_psn;
		}
		break;

	case OP(RDMA_READ_RESPONSE_FIRST):
@@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
		/* FALLTHROUGH */
	case OP(SEND_MIDDLE):
		bth2 = qp->s_psn++ & QIB_PSN_MASK;
		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
			qp->s_next_psn = qp->s_psn;
		ss = &qp->s_sge;
		len = qp->s_len;
		if (len > pmtu) {
@@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
		/* FALLTHROUGH */
	case OP(RDMA_WRITE_MIDDLE):
		bth2 = qp->s_psn++ & QIB_PSN_MASK;
		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
			qp->s_next_psn = qp->s_psn;
		ss = &qp->s_sge;
		len = qp->s_len;
		if (len > pmtu) {
@@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp)
	qp->s_cur_size = len;
	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done:
	ret = 1;
	goto unlock;

	return 1;
bail:
	qp->s_flags &= ~RVT_S_BUSY;
unlock:
	spin_unlock_irqrestore(&qp->s_lock, flags);
	return ret;
}

@@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
		goto ack_done;

	/* Ignore invalid responses. */
	if (qib_cmp24(psn, qp->s_next_psn) >= 0)
	smp_read_barrier_depends(); /* see post_one_send */
	if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
		goto ack_done;

	/* Ignore duplicate responses. */
+7 −4
Original line number Diff line number Diff line
@@ -391,7 +391,8 @@ static void qib_ruc_loopback(struct rvt_qp *sqp)
	sqp->s_flags |= RVT_S_BUSY;

again:
	if (sqp->s_last == sqp->s_head)
	smp_read_barrier_depends(); /* see post_one_send() */
	if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
		goto clr_busy;
	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);

@@ -765,22 +766,24 @@ void qib_do_send(struct rvt_qp *qp)

	qp->s_flags |= RVT_S_BUSY;

	spin_unlock_irqrestore(&qp->s_lock, flags);

	do {
		/* Check for a constructed packet to be sent. */
		if (qp->s_hdrwords != 0) {
			spin_unlock_irqrestore(&qp->s_lock, flags);
			/*
			 * If the packet cannot be sent now, return and
			 * the send tasklet will be woken up later.
			 */
			if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
					   qp->s_cur_sge, qp->s_cur_size))
				break;
				return;
			/* Record that s_hdr is empty. */
			qp->s_hdrwords = 0;
			spin_lock_irqsave(&qp->s_lock, flags);
		}
	} while (make_req(qp));

	spin_unlock_irqrestore(&qp->s_lock, flags);
}

/*
+9 −13
Original line number Diff line number Diff line
@@ -41,6 +41,8 @@
 * qib_make_uc_req - construct a request packet (SEND, RDMA write)
 * @qp: a pointer to the QP
 *
 * Assumes the s_lock is held.
 *
 * Return 1 if constructed; otherwise, return 0.
 */
int qib_make_uc_req(struct rvt_qp *qp)
@@ -48,20 +50,18 @@ int qib_make_uc_req(struct rvt_qp *qp)
	struct qib_qp_priv *priv = qp->priv;
	struct qib_other_headers *ohdr;
	struct rvt_swqe *wqe;
	unsigned long flags;
	u32 hwords;
	u32 bth0;
	u32 len;
	u32 pmtu = qp->pmtu;
	int ret = 0;

	spin_lock_irqsave(&qp->s_lock, flags);

	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
			goto bail;
		/* We are in the error state, flush the work request. */
		if (qp->s_last == qp->s_head)
		smp_read_barrier_depends(); /* see post_one_send() */
		if (qp->s_last == ACCESS_ONCE(qp->s_head))
			goto bail;
		/* If DMAs are in progress, we can't flush immediately. */
		if (atomic_read(&priv->s_dma_busy)) {
@@ -90,13 +90,13 @@ int qib_make_uc_req(struct rvt_qp *qp)
		    RVT_PROCESS_NEXT_SEND_OK))
			goto bail;
		/* Check if send work queue is empty. */
		if (qp->s_cur == qp->s_head)
		smp_read_barrier_depends(); /* see post_one_send() */
		if (qp->s_cur == ACCESS_ONCE(qp->s_head))
			goto bail;
		/*
		 * Start a new request.
		 */
		wqe->psn = qp->s_next_psn;
		qp->s_psn = qp->s_next_psn;
		qp->s_psn = wqe->psn;
		qp->s_sge.sge = wqe->sg_list[0];
		qp->s_sge.sg_list = wqe->sg_list + 1;
		qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -215,15 +215,11 @@ int qib_make_uc_req(struct rvt_qp *qp)
	qp->s_cur_sge = &qp->s_sge;
	qp->s_cur_size = len;
	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
			    qp->s_next_psn++ & QIB_PSN_MASK);
			    qp->s_psn++ & QIB_PSN_MASK);
done:
	ret = 1;
	goto unlock;

	return 1;
bail:
	qp->s_flags &= ~RVT_S_BUSY;
unlock:
	spin_unlock_irqrestore(&qp->s_lock, flags);
	return ret;
}

+11 −11
Original line number Diff line number Diff line
@@ -234,6 +234,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 * qib_make_ud_req - construct a UD request packet
 * @qp: the QP
 *
 * Assumes the s_lock is held.
 *
 * Return 1 if constructed; otherwise, return 0.
 */
int qib_make_ud_req(struct rvt_qp *qp)
@@ -244,7 +246,6 @@ int qib_make_ud_req(struct rvt_qp *qp)
	struct qib_pportdata *ppd;
	struct qib_ibport *ibp;
	struct rvt_swqe *wqe;
	unsigned long flags;
	u32 nwords;
	u32 extra_bytes;
	u32 bth0;
@@ -253,13 +254,12 @@ int qib_make_ud_req(struct rvt_qp *qp)
	int ret = 0;
	int next_cur;

	spin_lock_irqsave(&qp->s_lock, flags);

	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
			goto bail;
		/* We are in the error state, flush the work request. */
		if (qp->s_last == qp->s_head)
		smp_read_barrier_depends(); /* see post_one_send */
		if (qp->s_last == ACCESS_ONCE(qp->s_head))
			goto bail;
		/* If DMAs are in progress, we can't flush immediately. */
		if (atomic_read(&priv->s_dma_busy)) {
@@ -271,7 +271,9 @@ int qib_make_ud_req(struct rvt_qp *qp)
		goto done;
	}

	if (qp->s_cur == qp->s_head)
	/* see post_one_send() */
	smp_read_barrier_depends();
	if (qp->s_cur == ACCESS_ONCE(qp->s_head))
		goto bail;

	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
@@ -292,6 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
		this_cpu_inc(ibp->pmastats->n_unicast_xmit);
		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
		if (unlikely(lid == ppd->lid)) {
			unsigned long flags;
			/*
			 * If DMAs are in progress, we can't generate
			 * a completion for the loopback packet since
@@ -304,6 +307,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
				goto bail;
			}
			qp->s_cur = next_cur;
			local_irq_save(flags);
			spin_unlock_irqrestore(&qp->s_lock, flags);
			qib_ud_loopback(qp, wqe);
			spin_lock_irqsave(&qp->s_lock, flags);
@@ -378,7 +382,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
		ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
		cpu_to_be32(QIB_MULTICAST_QPN) :
		cpu_to_be32(wqe->ud_wr.remote_qpn);
	ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
	ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
	/*
	 * Qkeys with the high order bit set mean use the
	 * qkey from the QP context instead of the WR (see 10.2.5).
@@ -388,13 +392,9 @@ int qib_make_ud_req(struct rvt_qp *qp)
	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);

done:
	ret = 1;
	goto unlock;

	return 1;
bail:
	qp->s_flags &= ~RVT_S_BUSY;
unlock:
	spin_unlock_irqrestore(&qp->s_lock, flags);
	return ret;
}

Loading