Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d1e1cda8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

   - massive cleanup of the NFS read/write code by Anna and Dros
   - support multiple NFS read/write requests per page in order to deal
     with non-page aligned pNFS striping.  Also cleans up the r/wsize <
     page size code nicely.
   - stable fix for ensuring inode is declared uptodate only after all
     the attributes have been checked.
   - stable fix for a kernel Oops when remounting
   - NFS over RDMA client fixes
   - move the pNFS files layout driver into its own subdirectory"

* tag 'nfs-for-3.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits)
  NFS: populate ->net in mount data when remounting
  pnfs: fix lockup caused by pnfs_generic_pg_test
  NFSv4.1: Fix typo in dprintk
  NFSv4.1: Comment is now wrong and redundant to code
  NFS: Use raw_write_seqcount_begin/end int nfs4_reclaim_open_state
  xprtrdma: Disconnect on registration failure
  xprtrdma: Remove BUG_ON() call sites
  xprtrdma: Avoid deadlock when credit window is reset
  SUNRPC: Move congestion window constants to header file
  xprtrdma: Reset connection timeout after successful reconnect
  xprtrdma: Use macros for reconnection timeout constants
  xprtrdma: Allocate missing pagelist
  xprtrdma: Remove Tavor MTU setting
  xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting
  xprtrdma: Reduce the number of hardway buffer allocations
  xprtrdma: Limit work done by completion handler
  xprtrmda: Reduce calls to ib_poll_cq() in completion handlers
  xprtrmda: Reduce lock contention in completion handlers
  xprtrdma: Split the completion queue
  xprtrdma: Make rpcrdma_ep_destroy() return void
  ...
parents 07888238 a914722f
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -29,8 +29,6 @@ nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o
nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o

obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o

obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
+23 −15
Original line number Diff line number Diff line
@@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
			SetPageUptodate(bvec->bv_page);

	if (err) {
		struct nfs_read_data *rdata = par->data;
		struct nfs_pgio_data *rdata = par->data;
		struct nfs_pgio_header *header = rdata->header;

		if (!header->pnfs_error)
@@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work)
{
	struct rpc_task *task;
	struct nfs_read_data *rdata;
	struct nfs_pgio_data *rdata;
	dprintk("%s enter\n", __func__);
	task = container_of(work, struct rpc_task, u.tk_work);
	rdata = container_of(task, struct nfs_read_data, task);
	rdata = container_of(task, struct nfs_pgio_data, task);
	pnfs_ld_read_done(rdata);
}

static void
bl_end_par_io_read(void *data, int unused)
{
	struct nfs_read_data *rdata = data;
	struct nfs_pgio_data *rdata = data;

	rdata->task.tk_status = rdata->header->pnfs_error;
	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused)
}

static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
bl_read_pagelist(struct nfs_pgio_data *rdata)
{
	struct nfs_pgio_header *header = rdata->header;
	int i, hole;
@@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
	}

	if (unlikely(err)) {
		struct nfs_write_data *data = par->data;
		struct nfs_pgio_data *data = par->data;
		struct nfs_pgio_header *header = data->header;

		if (!header->pnfs_error)
@@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{
	struct parallel_io *par = bio->bi_private;
	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	struct nfs_write_data *data = par->data;
	struct nfs_pgio_data *data = par->data;
	struct nfs_pgio_header *header = data->header;

	if (!uptodate) {
@@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work)
{
	struct rpc_task *task;
	struct nfs_write_data *wdata;
	struct nfs_pgio_data *wdata;
	dprintk("%s enter\n", __func__);
	task = container_of(work, struct rpc_task, u.tk_work);
	wdata = container_of(task, struct nfs_write_data, task);
	wdata = container_of(task, struct nfs_pgio_data, task);
	if (likely(!wdata->header->pnfs_error)) {
		/* Marks for LAYOUTCOMMIT */
		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se)
{
	struct nfs_write_data *wdata = data;
	struct nfs_pgio_data *wdata = data;

	if (unlikely(wdata->header->pnfs_error)) {
		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -673,7 +673,7 @@ check_page:
}

static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data *wdata, int sync)
bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
{
	struct nfs_pgio_header *header = wdata->header;
	int i, ret, npg_zero, pg_index, last = 0;
@@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
		pnfs_generic_pg_init_read(pgio, req);
}

static bool
/*
 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
 * of bytes (maximum @req->wb_bytes) that can be coalesced.
 */
static size_t
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
		struct nfs_page *req)
{
	if (pgio->pg_dreq != NULL &&
	    !is_aligned_req(req, SECTOR_SIZE))
		return false;
		return 0;

	return pnfs_generic_pg_test(pgio, prev, req);
}
@@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
	}
}

static bool
/*
 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
 * of bytes (maximum @req->wb_bytes) that can be coalesced.
 */
static size_t
bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
		 struct nfs_page *req)
{
	if (pgio->pg_dreq != NULL &&
	    !is_aligned_req(req, PAGE_CACHE_SIZE))
		return false;
		return 0;

	return pnfs_generic_pg_test(pgio, prev, req);
}
+105 −12
Original line number Diff line number Diff line
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
	return atomic_dec_and_test(&dreq->io_count);
}

/*
 * nfs_direct_select_verf - select the right verifier
 * @dreq - direct request possibly spanning multiple servers
 * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
 * @ds_idx - index of data server in data server list, only valid if ds_clp set
 *
 * returns the correct verifier to use given the role of the server
 */
static struct nfs_writeverf *
nfs_direct_select_verf(struct nfs_direct_req *dreq,
		       struct nfs_client *ds_clp,
		       int ds_idx)
{
	struct nfs_writeverf *verfp = &dreq->verf;

#ifdef CONFIG_NFS_V4_1
	if (ds_clp) {
		/* pNFS is in use, use the DS verf */
		if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
			verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
		else
			WARN_ON_ONCE(1);
	}
#endif
	return verfp;
}


/*
 * nfs_direct_set_hdr_verf - set the write/commit verifier
 * @dreq - direct request possibly spanning multiple servers
 * @hdr - pageio header to validate against previously seen verfs
 *
 * Set the server's (MDS or DS) "seen" verifier
 */
static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
				    struct nfs_pgio_header *hdr)
{
	struct nfs_writeverf *verfp;

	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
				      hdr->data->ds_idx);
	WARN_ON_ONCE(verfp->committed >= 0);
	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
	WARN_ON_ONCE(verfp->committed < 0);
}

/*
 * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
 * @dreq - direct request possibly spanning multiple servers
 * @hdr - pageio header to validate against previously seen verf
 *
 * set the server's "seen" verf if not initialized.
 * returns result of comparison between @hdr->verf and the "seen"
 * verf of the server used by @hdr (DS or MDS)
 */
static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
					  struct nfs_pgio_header *hdr)
{
	struct nfs_writeverf *verfp;

	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
					 hdr->data->ds_idx);
	if (verfp->committed < 0) {
		nfs_direct_set_hdr_verf(dreq, hdr);
		return 0;
	}
	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
}

#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
/*
 * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
 * @dreq - direct request possibly spanning multiple servers
 * @data - commit data to validate against previously seen verf
 *
 * returns result of comparison between @data->verf and the verf of
 * the server used by @data (DS or MDS)
 */
static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
					   struct nfs_commit_data *data)
{
	struct nfs_writeverf *verfp;

	verfp = nfs_direct_select_verf(dreq, data->ds_clp,
					 data->ds_commit_index);
	WARN_ON_ONCE(verfp->committed < 0);
	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
}
#endif

/**
 * nfs_direct_IO - NFS address space operation for direct I/O
 * @rw: direction (read or write)
@@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
	kref_get(&dreq->kref);
	init_completion(&dreq->completion);
	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */
	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
	spin_lock_init(&dreq->lock);

@@ -380,8 +472,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
			struct nfs_page *req;
			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
			/* XXX do we need to do the eof zeroing found in async_filler? */
			req = nfs_create_request(dreq->ctx, dreq->inode,
						 pagevec[i],
			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
						 pgbase, req_len);
			if (IS_ERR(req)) {
				result = PTR_ERR(req);
@@ -424,7 +515,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
	size_t requested_bytes = 0;
	unsigned long seg;

	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
	nfs_pageio_init_read(&desc, dreq->inode, false,
			     &nfs_direct_read_completion_ops);
	get_dreq(dreq);
	desc.pg_dreq = dreq;
@@ -564,7 +655,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
	dreq->count = 0;
	get_dreq(dreq);

	NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
			      &nfs_direct_write_completion_ops);
	desc.pg_dreq = dreq;

@@ -603,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
		dprintk("NFS: %5u commit failed with error %d.\n",
			data->task.tk_pid, status);
		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
	} else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
	}
@@ -750,8 +841,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
			struct nfs_page *req;
			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);

			req = nfs_create_request(dreq->ctx, dreq->inode,
						 pagevec[i],
			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
						 pgbase, req_len);
			if (IS_ERR(req)) {
				result = PTR_ERR(req);
@@ -813,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
				bit = NFS_IOHDR_NEED_RESCHED;
			else if (dreq->flags == 0) {
				memcpy(&dreq->verf, hdr->verf,
				       sizeof(dreq->verf));
				nfs_direct_set_hdr_verf(dreq, hdr);
				bit = NFS_IOHDR_NEED_COMMIT;
				dreq->flags = NFS_ODIRECT_DO_COMMIT;
			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
				if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
					dreq->flags =
						NFS_ODIRECT_RESCHED_WRITES;
					bit = NFS_IOHDR_NEED_RESCHED;
				} else
					bit = NFS_IOHDR_NEED_COMMIT;
@@ -829,6 +919,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
	spin_unlock(&dreq->lock);

	while (!list_empty(&hdr->pages)) {
		bool do_destroy = true;

		req = nfs_list_entry(hdr->pages.next);
		nfs_list_remove_request(req);
		switch (bit) {
@@ -836,6 +928,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
		case NFS_IOHDR_NEED_COMMIT:
			kref_get(&req->wb_kref);
			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
			do_destroy = false;
		}
		nfs_unlock_and_release_request(req);
	}
@@ -874,7 +967,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
	size_t requested_bytes = 0;
	unsigned long seg;

	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
	nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
			      &nfs_direct_write_completion_ops);
	desc.pg_dreq = dreq;
	get_dreq(dreq);
+5 −0
Original line number Diff line number Diff line
#
# Makefile for the pNFS Files Layout Driver kernel module
#
obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o
+109 −94
Original line number Diff line number Diff line
@@ -35,11 +35,11 @@

#include <linux/sunrpc/metrics.h>

#include "nfs4session.h"
#include "internal.h"
#include "delegation.h"
#include "nfs4filelayout.h"
#include "nfs4trace.h"
#include "../nfs4session.h"
#include "../internal.h"
#include "../delegation.h"
#include "filelayout.h"
#include "../nfs4trace.h"

#define NFSDBG_FACILITY         NFSDBG_PNFS_LD

@@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
	BUG();
}

static void filelayout_reset_write(struct nfs_write_data *data)
static void filelayout_reset_write(struct nfs_pgio_data *data)
{
	struct nfs_pgio_header *hdr = data->header;
	struct rpc_task *task = &data->task;
@@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data)
	}
}

static void filelayout_reset_read(struct nfs_read_data *data)
static void filelayout_reset_read(struct nfs_pgio_data *data)
{
	struct nfs_pgio_header *hdr = data->header;
	struct rpc_task *task = &data->task;
@@ -243,7 +243,7 @@ wait_on_recovery:
/* NFS_PROTO call done callback routines */

static int filelayout_read_done_cb(struct rpc_task *task,
				struct nfs_read_data *data)
				struct nfs_pgio_data *data)
{
	struct nfs_pgio_header *hdr = data->header;
	int err;
@@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
 * rfc5661 is not clear about which credential should be used.
 */
static void
filelayout_set_layoutcommit(struct nfs_write_data *wdata)
filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
{
	struct nfs_pgio_header *hdr = wdata->header;

@@ -279,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
		return;

	pnfs_set_layoutcommit(wdata);
	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
	dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}

@@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
 */
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
	struct nfs_read_data *rdata = data;
	struct nfs_pgio_data *rdata = data;

	if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
		rpc_exit(task, -EIO);
@@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
		rpc_exit(task, 0);
		return;
	}
	rdata->read_done_cb = filelayout_read_done_cb;
	rdata->pgio_done_cb = filelayout_read_done_cb;

	if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
			&rdata->args.seq_args,
@@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)

static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
	struct nfs_read_data *rdata = data;
	struct nfs_pgio_data *rdata = data;

	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);

@@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)

static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
	struct nfs_read_data *rdata = data;
	struct nfs_pgio_data *rdata = data;

	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
}

static void filelayout_read_release(void *data)
{
	struct nfs_read_data *rdata = data;
	struct nfs_pgio_data *rdata = data;
	struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;

	filelayout_fenceme(lo->plh_inode, lo);
@@ -363,7 +363,7 @@ static void filelayout_read_release(void *data)
}

static int filelayout_write_done_cb(struct rpc_task *task,
				struct nfs_write_data *data)
				struct nfs_pgio_data *data)
{
	struct nfs_pgio_header *hdr = data->header;
	int err;
@@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,

static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
	struct nfs_write_data *wdata = data;
	struct nfs_pgio_data *wdata = data;

	if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
		rpc_exit(task, -EIO);
@@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)

static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
	struct nfs_write_data *wdata = data;
	struct nfs_pgio_data *wdata = data;

	if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
	    task->tk_status == 0) {
@@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)

static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
	struct nfs_write_data *wdata = data;
	struct nfs_pgio_data *wdata = data;

	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
}

static void filelayout_write_release(void *data)
{
	struct nfs_write_data *wdata = data;
	struct nfs_pgio_data *wdata = data;
	struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;

	filelayout_fenceme(lo->plh_inode, lo);
@@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
};

static enum pnfs_try_status
filelayout_read_pagelist(struct nfs_read_data *data)
filelayout_read_pagelist(struct nfs_pgio_data *data)
{
	struct nfs_pgio_header *hdr = data->header;
	struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
	/* No multipath support. Use first DS */
	atomic_inc(&ds->ds_clp->cl_count);
	data->ds_clp = ds->ds_clp;
	data->ds_idx = idx;
	fh = nfs4_fl_select_ds_fh(lseg, j);
	if (fh)
		data->args.fh = fh;
@@ -568,14 +569,14 @@ filelayout_read_pagelist(struct nfs_read_data *data)
	data->mds_offset = offset;

	/* Perform an asynchronous read to ds */
	nfs_initiate_read(ds_clnt, data,
				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
	nfs_initiate_pgio(ds_clnt, data,
			    &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
	return PNFS_ATTEMPTED;
}

/* Perform async writes. */
static enum pnfs_try_status
filelayout_write_pagelist(struct nfs_write_data *data, int sync)
filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
{
	struct nfs_pgio_header *hdr = data->header;
	struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -600,20 +601,18 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));

	data->write_done_cb = filelayout_write_done_cb;
	data->pgio_done_cb = filelayout_write_done_cb;
	atomic_inc(&ds->ds_clp->cl_count);
	data->ds_clp = ds->ds_clp;
	data->ds_idx = idx;
	fh = nfs4_fl_select_ds_fh(lseg, j);
	if (fh)
		data->args.fh = fh;
	/*
	 * Get the file offset on the dserver. Set the write offset to
	 * this offset and save the original offset.
	 */

	data->args.offset = filelayout_get_dserver_offset(lseg, offset);

	/* Perform an asynchronous write */
	nfs_initiate_write(ds_clnt, data,
	nfs_initiate_pgio(ds_clnt, data,
				    &filelayout_write_call_ops, sync,
				    RPC_TASK_SOFTCONN);
	return PNFS_ATTEMPTED;
@@ -637,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
	struct nfs4_deviceid_node *d;
	struct nfs4_file_layout_dsaddr *dsaddr;
	int status = -EINVAL;
	struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);

	dprintk("--> %s\n", __func__);

@@ -655,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
		goto out;
	}

	if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
	if (!fl->stripe_unit) {
		dprintk("%s Invalid stripe unit (%u)\n",
			__func__, fl->stripe_unit);
		goto out;
@@ -692,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
		goto out_put;
	}

	if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
		dprintk("%s Stripe unit (%u) not aligned with rsize %u "
			"wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
			nfss->wsize);
	}

	status = 0;
out:
	dprintk("--> %s returns %d\n", __func__, status);
@@ -850,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
{
	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
	struct pnfs_commit_bucket *buckets;
	int size;
	int size, i;

	if (fl->commit_through_mds)
		return 0;
	if (cinfo->ds->nbuckets != 0) {

	size = (fl->stripe_type == STRIPE_SPARSE) ?
		fl->dsaddr->ds_num : fl->dsaddr->stripe_count;

	if (cinfo->ds->nbuckets >= size) {
		/* This assumes there is only one IOMODE_RW lseg.  What
		 * we really want to do is have a layout_hdr level
		 * dictionary of <multipath_list4, fh> keys, each
@@ -864,31 +860,37 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
		return 0;
	}

	size = (fl->stripe_type == STRIPE_SPARSE) ?
		fl->dsaddr->ds_num : fl->dsaddr->stripe_count;

	buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
			  gfp_flags);
	if (!buckets)
		return -ENOMEM;
	else {
		int i;

		spin_lock(cinfo->lock);
		if (cinfo->ds->nbuckets != 0)
			kfree(buckets);
		else {
			cinfo->ds->buckets = buckets;
			cinfo->ds->nbuckets = size;
	for (i = 0; i < size; i++) {
		INIT_LIST_HEAD(&buckets[i].written);
		INIT_LIST_HEAD(&buckets[i].committing);
		/* mark direct verifier as unset */
		buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
	}
		}

	spin_lock(cinfo->lock);
	if (cinfo->ds->nbuckets >= size)
		goto out;
	for (i = 0; i < cinfo->ds->nbuckets; i++) {
		list_splice(&cinfo->ds->buckets[i].written,
			    &buckets[i].written);
		list_splice(&cinfo->ds->buckets[i].committing,
			    &buckets[i].committing);
		buckets[i].direct_verf.committed =
			cinfo->ds->buckets[i].direct_verf.committed;
		buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
		buckets[i].clseg = cinfo->ds->buckets[i].clseg;
	}
	swap(cinfo->ds->buckets, buckets);
	cinfo->ds->nbuckets = size;
out:
	spin_unlock(cinfo->lock);
	kfree(buckets);
	return 0;
}
}

static struct pnfs_layout_segment *
filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
@@ -915,46 +917,50 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
/*
 * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
 *
 * return true  : coalesce page
 * return false : don't coalesce page
 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
 * of bytes (maximum @req->wb_bytes) that can be coalesced.
 */
static bool
static size_t
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
		   struct nfs_page *req)
{
	unsigned int size;
	u64 p_stripe, r_stripe;
	u32 stripe_unit;
	u32 stripe_offset;
	u64 segment_offset = pgio->pg_lseg->pls_range.offset;
	u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;

	if (!pnfs_generic_pg_test(pgio, prev, req) ||
	    !nfs_generic_pg_test(pgio, prev, req))
		return false;

	p_stripe = (u64)req_offset(prev);
	r_stripe = (u64)req_offset(req);
	stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
	/* calls nfs_generic_pg_test */
	size = pnfs_generic_pg_test(pgio, prev, req);
	if (!size)
		return 0;

	/* see if req and prev are in the same stripe */
	if (prev) {
		p_stripe = (u64)req_offset(prev) - segment_offset;
		r_stripe = (u64)req_offset(req) - segment_offset;
		do_div(p_stripe, stripe_unit);
		do_div(r_stripe, stripe_unit);

	return (p_stripe == r_stripe);
		if (p_stripe != r_stripe)
			return 0;
	}

	/* calculate remaining bytes in the current stripe */
	div_u64_rem((u64)req_offset(req) - segment_offset,
			stripe_unit,
			&stripe_offset);
	WARN_ON_ONCE(stripe_offset > stripe_unit);
	if (stripe_offset >= stripe_unit)
		return 0;
	return min(stripe_unit - (unsigned int)stripe_offset, size);
}

static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
			struct nfs_page *req)
{
	WARN_ON_ONCE(pgio->pg_lseg != NULL);

	if (req->wb_offset != req->wb_pgbase) {
		/*
		 * Handling unaligned pages is difficult, because have to
		 * somehow split a req in two in certain cases in the
		 * pg.test code.  Avoid this by just not using pnfs
		 * in this case.
		 */
		nfs_pageio_reset_read_mds(pgio);
		return;
	}
	if (!pgio->pg_lseg)
		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
					   req->wb_context,
					   0,
@@ -973,10 +979,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
	struct nfs_commit_info cinfo;
	int status;

	WARN_ON_ONCE(pgio->pg_lseg != NULL);

	if (req->wb_offset != req->wb_pgbase)
		goto out_mds;
	if (!pgio->pg_lseg)
		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
					   req->wb_context,
					   0,
@@ -1067,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
	 */
	j = nfs4_fl_calc_j_index(lseg, req_offset(req));
	i = select_bucket_index(fl, j);
	spin_lock(cinfo->lock);
	buckets = cinfo->ds->buckets;
	list = &buckets[i].written;
	if (list_empty(list)) {
@@ -1080,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
	}
	set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
	cinfo->ds->nwritten++;
	spin_unlock(cinfo->lock);
	return list;
}

@@ -1176,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst,
	return ret;
}

/* Note called with cinfo->lock held. */
static int
filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
			       struct nfs_commit_info *cinfo,
@@ -1220,15 +1226,18 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
					   struct nfs_commit_info *cinfo)
{
	struct pnfs_commit_bucket *b;
	struct pnfs_layout_segment *freeme;
	int i;

restart:
	spin_lock(cinfo->lock);
	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
		if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
			spin_unlock(cinfo->lock);
			pnfs_put_lseg(b->wlseg);
			freeme = b->wlseg;
			b->wlseg = NULL;
			spin_lock(cinfo->lock);
			spin_unlock(cinfo->lock);
			pnfs_put_lseg(freeme);
			goto restart;
		}
	}
	cinfo->ds->nwritten = 0;
@@ -1243,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
	struct nfs_commit_data *data;
	int i, j;
	unsigned int nreq = 0;
	struct pnfs_layout_segment *freeme;

	fl_cinfo = cinfo->ds;
	bucket = fl_cinfo->buckets;
@@ -1253,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
		if (!data)
			break;
		data->ds_commit_index = i;
		spin_lock(cinfo->lock);
		data->lseg = bucket->clseg;
		bucket->clseg = NULL;
		spin_unlock(cinfo->lock);
		list_add(&data->pages, list);
		nreq++;
	}
@@ -1264,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
		if (list_empty(&bucket->committing))
			continue;
		nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
		pnfs_put_lseg(bucket->clseg);
		spin_lock(cinfo->lock);
		freeme = bucket->clseg;
		bucket->clseg = NULL;
		spin_unlock(cinfo->lock);
		pnfs_put_lseg(freeme);
	}
	/* Caller will clean up entries put on list */
	return nreq;
@@ -1330,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
	struct nfs4_filelayout *flo;

	flo = kzalloc(sizeof(*flo), gfp_flags);
	return &flo->generic_hdr;
	return flo != NULL ? &flo->generic_hdr : NULL;
}

static void
Loading