Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cc886c9f authored by Chuck Lever's avatar Chuck Lever Committed by Doug Ledford
Browse files

svcrdma: Improve allocation of struct svc_rdma_op_ctxt



When the maximum payload size of NFS READ and WRITE was increased
by commit cc9a903d ("svcrdma: Change maximum server payload back
to RPCSVC_MAXPAYLOAD"), the size of struct svc_rdma_op_ctxt
increased to over 6KB (on x86_64). That makes allocating one of
these from a kmem_cache more likely to fail in situations when
system memory is exhausted.

Since I'm about to add a caller where this allocation must always
work _and_ it cannot sleep, pre-allocate ctxts for each connection.

Another motivation for this change is that NFSv4.x servers are
required by specification not to drop NFS requests. Pre-allocating
memory resources reduces the likelihood of a drop.

Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Acked-by: default avatarBruce Fields <bfields@fieldses.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent ced4ac0c
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -69,6 +69,7 @@ extern atomic_t rdma_stat_sq_prod;
 * completes.
 */
struct svc_rdma_op_ctxt {
	struct list_head free;
	struct svc_rdma_op_ctxt *read_hdr;
	struct svc_rdma_fastreg_mr *frmr;
	int hdr_count;
@@ -141,7 +142,10 @@ struct svcxprt_rdma {
	struct ib_pd         *sc_pd;

	atomic_t	     sc_dma_used;
	atomic_t	     sc_ctxt_used;
	spinlock_t	     sc_ctxt_lock;
	struct list_head     sc_ctxts;
	int		     sc_ctxt_used;

	struct list_head     sc_rq_dto_q;
	spinlock_t	     sc_rq_dto_lock;
	struct ib_qp         *sc_qp;
+89 −13
Original line number Diff line number Diff line
@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
}
#endif	/* CONFIG_SUNRPC_BACKCHANNEL */

struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
					   gfp_t flags)
{
	struct svc_rdma_op_ctxt *ctxt;

	ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
				GFP_KERNEL | __GFP_NOFAIL);
	ctxt = kmalloc(sizeof(*ctxt), flags);
	if (ctxt) {
		ctxt->xprt = xprt;
		INIT_LIST_HEAD(&ctxt->free);
		INIT_LIST_HEAD(&ctxt->dto_q);
	}
	return ctxt;
}

static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
{
	int i;

	/* Each RPC/RDMA credit can consume a number of send
	 * and receive WQEs. One ctxt is allocated for each.
	 */
	i = xprt->sc_sq_depth + xprt->sc_max_requests;

	while (i--) {
		struct svc_rdma_op_ctxt *ctxt;

		ctxt = alloc_ctxt(xprt, GFP_KERNEL);
		if (!ctxt) {
			dprintk("svcrdma: No memory for RDMA ctxt\n");
			return false;
		}
		list_add(&ctxt->free, &xprt->sc_ctxts);
	}
	return true;
}

struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
	struct svc_rdma_op_ctxt *ctxt = NULL;

	spin_lock_bh(&xprt->sc_ctxt_lock);
	xprt->sc_ctxt_used++;
	if (list_empty(&xprt->sc_ctxts))
		goto out_empty;

	ctxt = list_first_entry(&xprt->sc_ctxts,
				struct svc_rdma_op_ctxt, free);
	list_del_init(&ctxt->free);
	spin_unlock_bh(&xprt->sc_ctxt_lock);

out:
	ctxt->count = 0;
	ctxt->frmr = NULL;
	atomic_inc(&xprt->sc_ctxt_used);
	return ctxt;

out_empty:
	/* Either pre-allocation missed the mark, or send
	 * queue accounting is broken.
	 */
	spin_unlock_bh(&xprt->sc_ctxt_lock);

	ctxt = alloc_ctxt(xprt, GFP_NOIO);
	if (ctxt)
		goto out;

	spin_lock_bh(&xprt->sc_ctxt_lock);
	xprt->sc_ctxt_used--;
	spin_unlock_bh(&xprt->sc_ctxt_lock);
	WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
	return NULL;
}

void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
@@ -190,16 +248,29 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)

void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
{
	struct svcxprt_rdma *xprt;
	struct svcxprt_rdma *xprt = ctxt->xprt;
	int i;

	xprt = ctxt->xprt;
	if (free_pages)
		for (i = 0; i < ctxt->count; i++)
			put_page(ctxt->pages[i]);

	kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
	atomic_dec(&xprt->sc_ctxt_used);
	spin_lock_bh(&xprt->sc_ctxt_lock);
	xprt->sc_ctxt_used--;
	list_add(&ctxt->free, &xprt->sc_ctxts);
	spin_unlock_bh(&xprt->sc_ctxt_lock);
}

static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
{
	while (!list_empty(&xprt->sc_ctxts)) {
		struct svc_rdma_op_ctxt *ctxt;

		ctxt = list_first_entry(&xprt->sc_ctxts,
					struct svc_rdma_op_ctxt, free);
		list_del(&ctxt->free);
		kfree(ctxt);
	}
}

/*
@@ -521,11 +592,13 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
	INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
	init_waitqueue_head(&cma_xprt->sc_send_wait);

	spin_lock_init(&cma_xprt->sc_lock);
	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
	spin_lock_init(&cma_xprt->sc_ctxt_lock);

	if (listener)
		set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -913,6 +986,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
				   (size_t)svcrdma_max_requests);
	newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;

	if (!svc_rdma_prealloc_ctxts(newxprt))
		goto errout;

	/*
	 * Limit ORD based on client limit, local device limit, and
	 * configured svcrdma limit.
@@ -1174,15 +1250,15 @@ static void __svc_rdma_free(struct work_struct *work)
	}

	/* Warn if we leaked a resource or under-referenced */
	if (atomic_read(&rdma->sc_ctxt_used) != 0)
	if (rdma->sc_ctxt_used != 0)
		pr_err("svcrdma: ctxt still in use? (%d)\n",
		       atomic_read(&rdma->sc_ctxt_used));
		       rdma->sc_ctxt_used);
	if (atomic_read(&rdma->sc_dma_used) != 0)
		pr_err("svcrdma: dma still in use? (%d)\n",
		       atomic_read(&rdma->sc_dma_used));

	/* De-allocate fastreg mr */
	rdma_dealloc_frmr_q(rdma);
	svc_rdma_destroy_ctxts(rdma);

	/* Destroy the QP if present (not a listener) */
	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))