Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2549f307 authored by Trond Myklebust's avatar Trond Myklebust
Browse files

Merge tag 'nfs-rdma-4.10-1' of git://git.linux-nfs.org/projects/anna/nfs-rdma

NFS: NFSoRDMA Client Side Changes

New Features:
- Support for SG_GAP devices

Bugfixes and cleanups:
- Cap size of callback buffer resources
- Improve send queue and RPC metric accounting
- Fix coverity warning
- Avoid calls to ro_unmap_safe()
- Refactor FRMR invalidation
- Error message improvements
parents 1cded9d2 3a72dc77
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -157,15 +157,17 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
	spin_lock(&op_metrics->om_lock);

	op_metrics->om_ops++;
	op_metrics->om_ntrans += req->rq_ntrans;
	/* kernel API: om_ops must never become larger than om_ntrans */
	op_metrics->om_ntrans += max(req->rq_ntrans, 1);
	op_metrics->om_timeouts += task->tk_timeouts;

	op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;

	if (ktime_to_ns(req->rq_xtime)) {
		delta = ktime_sub(req->rq_xtime, task->tk_start);
		op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);

	}
	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);

	delta = ktime_sub(now, task->tk_start);
+3 −1
Original line number Diff line number Diff line
@@ -55,7 +55,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
	if (IS_ERR(rb))
		goto out_fail;
	req->rl_sendbuf = rb;
	xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size);
	xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
		     min_t(size_t, size, PAGE_SIZE));
	rpcrdma_set_xprtdata(rqst, req);
	return 0;

@@ -191,6 +192,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
	size_t maxmsg;

	maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize);
	maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
	return maxmsg - RPCRDMA_HDRLEN_MIN;
}

+44 −50
Original line number Diff line number Diff line
@@ -101,7 +101,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
	struct rpcrdma_frmr *f = &r->frmr;
	int rc;

	f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth);
	f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
	if (IS_ERR(f->fr_mr))
		goto out_mr_err;

@@ -157,7 +157,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
		return rc;
	}

	f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG,
	f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
			       ia->ri_max_frmr_depth);
	if (IS_ERR(f->fr_mr)) {
		pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
@@ -171,10 +171,6 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
}

/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
 *
 * There's no recovery if this fails. The FRMR is abandoned, but
 * remains in rb_all. It will be cleaned up when the transport is
 * destroyed.
 */
static void
frwr_op_recover_mr(struct rpcrdma_mw *mw)
@@ -210,11 +206,16 @@ static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
	     struct rpcrdma_create_data_internal *cdata)
{
	struct ib_device_attr *attrs = &ia->ri_device->attrs;
	int depth, delta;

	ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
	if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
		ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;

	ia->ri_max_frmr_depth =
			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
			      ia->ri_device->attrs.max_fast_reg_page_list_len);
			      attrs->max_fast_reg_page_list_len);
	dprintk("RPC:       %s: device's max FR page list len = %u\n",
		__func__, ia->ri_max_frmr_depth);

@@ -241,8 +242,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
	}

	ep->rep_attr.cap.max_send_wr *= depth;
	if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
		cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
	if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) {
		cdata->max_requests = attrs->max_qp_wr / depth;
		if (!cdata->max_requests)
			return -EINVAL;
		ep->rep_attr.cap.max_send_wr = cdata->max_requests *
@@ -348,6 +349,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
	    int nsegs, bool writing, struct rpcrdma_mw **out)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
	struct rpcrdma_mw *mw;
	struct rpcrdma_frmr *frmr;
	struct ib_mr *mr;
@@ -383,8 +385,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,

		++seg;
		++i;

		/* Check for holes */
		if (holes_ok)
			continue;
		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
			break;
@@ -421,7 +423,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
			 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
			 IB_ACCESS_REMOTE_READ;

	DECR_CQCOUNT(&r_xprt->rx_ep);
	rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
	rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
	if (rc)
		goto out_senderr;
@@ -451,26 +453,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
	return -ENOTCONN;
}

static struct ib_send_wr *
__frwr_prepare_linv_wr(struct rpcrdma_mw *mw)
{
	struct rpcrdma_frmr *f = &mw->frmr;
	struct ib_send_wr *invalidate_wr;

	dprintk("RPC:       %s: invalidating frmr %p\n", __func__, f);

	f->fr_state = FRMR_IS_INVALID;
	invalidate_wr = &f->fr_invwr;

	memset(invalidate_wr, 0, sizeof(*invalidate_wr));
	f->fr_cqe.done = frwr_wc_localinv;
	invalidate_wr->wr_cqe = &f->fr_cqe;
	invalidate_wr->opcode = IB_WR_LOCAL_INV;
	invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;

	return invalidate_wr;
}

/* Invalidate all memory regions that were registered for "req".
 *
 * Sleeps until it is safe for the host CPU to access the
@@ -481,12 +463,12 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw)
static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
	struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
	struct ib_send_wr *first, **prev, *last, *bad_wr;
	struct rpcrdma_rep *rep = req->rl_reply;
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct rpcrdma_mw *mw, *tmp;
	struct rpcrdma_frmr *f;
	int rc;
	int count, rc;

	dprintk("RPC:       %s: req %p\n", __func__, req);

@@ -496,22 +478,29 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
	 * a single ib_post_send() call.
	 */
	f = NULL;
	invalidate_wrs = pos = prev = NULL;
	count = 0;
	prev = &first;
	list_for_each_entry(mw, &req->rl_registered, mw_list) {
		if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
		    (mw->mw_handle == rep->rr_inv_rkey)) {
		mw->frmr.fr_state = FRMR_IS_INVALID;
			continue;
		}

		pos = __frwr_prepare_linv_wr(mw);
		if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
		    (mw->mw_handle == rep->rr_inv_rkey))
			continue;

		if (!invalidate_wrs)
			invalidate_wrs = pos;
		else
			prev->next = pos;
		prev = pos;
		f = &mw->frmr;
		dprintk("RPC:       %s: invalidating frmr %p\n",
			__func__, f);

		f->fr_cqe.done = frwr_wc_localinv;
		last = &f->fr_invwr;
		memset(last, 0, sizeof(*last));
		last->wr_cqe = &f->fr_cqe;
		last->opcode = IB_WR_LOCAL_INV;
		last->ex.invalidate_rkey = mw->mw_handle;
		count++;

		*prev = last;
		prev = &last->next;
	}
	if (!f)
		goto unmap;
@@ -520,17 +509,22 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
	 * last WR in the chain completes, all WRs in the chain
	 * are complete.
	 */
	f->fr_invwr.send_flags = IB_SEND_SIGNALED;
	last->send_flags = IB_SEND_SIGNALED;
	f->fr_cqe.done = frwr_wc_localinv_wake;
	reinit_completion(&f->fr_linv_done);
	INIT_CQCOUNT(&r_xprt->rx_ep);

	/* Initialize CQ count, since there is always a signaled
	 * WR being posted here.  The new cqcount depends on how
	 * many SQEs are about to be consumed.
	 */
	rpcrdma_init_cqcount(&r_xprt->rx_ep, count);

	/* Transport disconnect drains the receive CQ before it
	 * replaces the QP. The RPC reply handler won't call us
	 * unless ri_id->qp is a valid pointer.
	 */
	r_xprt->rx_stats.local_inv_needed++;
	rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
	rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
	if (rc)
		goto reset_mrs;

@@ -541,7 +535,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
	 */
unmap:
	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
		dprintk("RPC:       %s: unmapping frmr %p\n",
		dprintk("RPC:       %s: DMA unmapping frmr %p\n",
			__func__, &mw->frmr);
		list_del_init(&mw->mw_list);
		ib_dma_unmap_sg(ia->ri_device,
@@ -559,7 +553,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
	 */
	list_for_each_entry(mw, &req->rl_registered, mw_list) {
		f = &mw->frmr;
		if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) {
		if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
			__frwr_reset_mr(ia, mw);
			bad_wr = bad_wr->next;
		}
+1 −35
Original line number Diff line number Diff line
@@ -786,7 +786,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp)
		ifdebug(FACILITY) {
			u64 off;
			xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
			dprintk("RPC:       %s: chunk %d@0x%llx:0x%x\n",
			dprintk("RPC:       %s: chunk %d@0x%016llx:0x%08x\n",
				__func__,
				be32_to_cpu(seg->rs_length),
				(unsigned long long)off,
@@ -906,28 +906,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
	return fixup_copy_count;
}

void
rpcrdma_connect_worker(struct work_struct *work)
{
	struct rpcrdma_ep *ep =
		container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
	struct rpcrdma_xprt *r_xprt =
		container_of(ep, struct rpcrdma_xprt, rx_ep);
	struct rpc_xprt *xprt = &r_xprt->rx_xprt;

	spin_lock_bh(&xprt->transport_lock);
	if (++xprt->connect_cookie == 0)	/* maintain a reserved value */
		++xprt->connect_cookie;
	if (ep->rep_connected > 0) {
		if (!xprt_test_and_set_connected(xprt))
			xprt_wake_pending_tasks(xprt, 0);
	} else {
		if (xprt_test_and_clear_connected(xprt))
			xprt_wake_pending_tasks(xprt, -ENOTCONN);
	}
	spin_unlock_bh(&xprt->transport_lock);
}

#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/* By convention, backchannel calls arrive via rdma_msg type
 * messages, and never populate the chunk lists. This makes
@@ -959,18 +937,6 @@ rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
}
#endif	/* CONFIG_SUNRPC_BACKCHANNEL */

/*
 * This function is called when an async event is posted to
 * the connection which changes the connection state. All it
 * does at this point is mark the connection up/down, the rpc
 * timers do the rest.
 */
void
rpcrdma_conn_func(struct rpcrdma_ep *ep)
{
	schedule_delayed_work(&ep->rep_connect_worker, 0);
}

/* Process received RPC/RDMA messages.
 *
 * Errors must result in the RPC task either being awakened, or
+32 −2
Original line number Diff line number Diff line
@@ -219,6 +219,34 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt)
		}
}

void
rpcrdma_conn_func(struct rpcrdma_ep *ep)
{
	schedule_delayed_work(&ep->rep_connect_worker, 0);
}

void
rpcrdma_connect_worker(struct work_struct *work)
{
	struct rpcrdma_ep *ep =
		container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
	struct rpcrdma_xprt *r_xprt =
		container_of(ep, struct rpcrdma_xprt, rx_ep);
	struct rpc_xprt *xprt = &r_xprt->rx_xprt;

	spin_lock_bh(&xprt->transport_lock);
	if (++xprt->connect_cookie == 0)	/* maintain a reserved value */
		++xprt->connect_cookie;
	if (ep->rep_connected > 0) {
		if (!xprt_test_and_set_connected(xprt))
			xprt_wake_pending_tasks(xprt, 0);
	} else {
		if (xprt_test_and_clear_connected(xprt))
			xprt_wake_pending_tasks(xprt, -ENOTCONN);
	}
	spin_unlock_bh(&xprt->transport_lock);
}

static void
xprt_rdma_connect_worker(struct work_struct *work)
{
@@ -621,6 +649,7 @@ xprt_rdma_free(struct rpc_task *task)

	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);

	if (unlikely(!list_empty(&req->rl_registered)))
		ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
	rpcrdma_unmap_sges(ia, req);
	rpcrdma_buffer_put(req);
@@ -657,6 +686,7 @@ xprt_rdma_send_request(struct rpc_task *task)
	int rc = 0;

	/* On retransmit, remove any previously registered chunks */
	if (unlikely(!list_empty(&req->rl_registered)))
		r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);

	rc = rpcrdma_marshal_req(rqst);
Loading