Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e2c63e09 authored by Trond Myklebust's avatar Trond Myklebust
Browse files

Merge branch 'flexfiles'

* flexfiles: (53 commits)
  pnfs: lookup new lseg at lseg boundary
  nfs41: .init_read and .init_write can be called with valid pg_lseg
  pnfs: Update documentation on the Layout Drivers
  pnfs/flexfiles: Add the FlexFile Layout Driver
  nfs: count DIO good bytes correctly with mirroring
  nfs41: wait for LAYOUTRETURN before retrying LAYOUTGET
  nfs: add a helper to set NFS_ODIRECT_RESCHED_WRITES to direct writes
  nfs41: add NFS_LAYOUT_RETRY_LAYOUTGET to layout header flags
  nfs/flexfiles: send layoutreturn before freeing lseg
  nfs41: introduce NFS_LAYOUT_RETURN_BEFORE_CLOSE
  nfs41: allow async version layoutreturn
  nfs41: add range to layoutreturn args
  pnfs: allow LD to ask to resend read through pnfs
  nfs: add nfs_pgio_current_mirror helper
  nfs: only reset desc->pg_mirror_idx when mirroring is supported
  nfs41: add a debug warning if we destroy an unempty layout
  pnfs: fail comparison when bucket verifier not set
  nfs: mirroring support for direct io
  nfs: add mirroring support to pgio layer
  pnfs: pass ds_commit_idx through the commit path
  ...

Conflicts:
	fs/nfs/pnfs.c
	fs/nfs/pnfs.h
parents cc3ea893 7c13789e
Loading
Loading
Loading
Loading
+7 −6
Original line number Original line Diff line number Diff line
@@ -57,15 +57,16 @@ bit is set, preventing any new lsegs from being added.
layout drivers
layout drivers
--------------
--------------


PNFS utilizes what is called layout drivers. The STD defines 3 basic
PNFS utilizes what is called layout drivers. The STD defines 4 basic
layout types: "files" "objects" and "blocks". For each of these types
layout types: "files", "objects", "blocks", and "flexfiles". For each
there is a layout-driver with a common function-vectors table which
of these types there is a layout-driver with a common function-vectors
are called by the nfs-client pnfs-core to implement the different layout
table which are called by the nfs-client pnfs-core to implement the
types.
different layout types.


Files-layout-driver code is in: fs/nfs/nfs4filelayout.c && nfs4filelayoutdev.c
Files-layout-driver code is in: fs/nfs/filelayout/.. directory
Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory
Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory
Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory
Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory
Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory


objects-layout setup
objects-layout setup
--------------------
--------------------
+5 −0
Original line number Original line Diff line number Diff line
@@ -128,6 +128,11 @@ config PNFS_OBJLAYOUT
	depends on NFS_V4_1 && SCSI_OSD_ULD
	depends on NFS_V4_1 && SCSI_OSD_ULD
	default NFS_V4
	default NFS_V4


config PNFS_FLEXFILE_LAYOUT
	tristate
	depends on NFS_V4_1 && NFS_V3
	default m

config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
	string "NFSv4.1 Implementation ID Domain"
	string "NFSv4.1 Implementation ID Domain"
	depends on NFS_V4_1
	depends on NFS_V4_1
+2 −1
Original line number Original line Diff line number Diff line
@@ -27,9 +27,10 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o
	  dns_resolve.o nfs4trace.o
	  dns_resolve.o nfs4trace.o
nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o
nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o
nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o
nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o pnfs_nfs.o
nfsv4-$(CONFIG_NFS_V4_2)	+= nfs42proc.o
nfsv4-$(CONFIG_NFS_V4_2)	+= nfs42proc.o


obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/
+2 −0
Original line number Original line Diff line number Diff line
@@ -860,12 +860,14 @@ static const struct nfs_pageio_ops bl_pg_read_ops = {
	.pg_init = bl_pg_init_read,
	.pg_init = bl_pg_init_read,
	.pg_test = bl_pg_test_read,
	.pg_test = bl_pg_test_read,
	.pg_doio = pnfs_generic_pg_readpages,
	.pg_doio = pnfs_generic_pg_readpages,
	.pg_cleanup = pnfs_generic_pg_cleanup,
};
};


static const struct nfs_pageio_ops bl_pg_write_ops = {
static const struct nfs_pageio_ops bl_pg_write_ops = {
	.pg_init = bl_pg_init_write,
	.pg_init = bl_pg_init_write,
	.pg_test = bl_pg_test_write,
	.pg_test = bl_pg_test_write,
	.pg_doio = pnfs_generic_pg_writepages,
	.pg_doio = pnfs_generic_pg_writepages,
	.pg_cleanup = pnfs_generic_pg_cleanup,
};
};


static struct pnfs_layoutdriver_type blocklayout_type = {
static struct pnfs_layoutdriver_type blocklayout_type = {
+95 −17
Original line number Original line Diff line number Diff line
@@ -66,6 +66,10 @@ static struct kmem_cache *nfs_direct_cachep;
/*
/*
 * This represents a set of asynchronous requests that we're waiting on
 * This represents a set of asynchronous requests that we're waiting on
 */
 */
struct nfs_direct_mirror {
	ssize_t count;
};

struct nfs_direct_req {
struct nfs_direct_req {
	struct kref		kref;		/* release manager */
	struct kref		kref;		/* release manager */


@@ -78,8 +82,13 @@ struct nfs_direct_req {
	/* completion state */
	/* completion state */
	atomic_t		io_count;	/* i/os we're waiting for */
	atomic_t		io_count;	/* i/os we're waiting for */
	spinlock_t		lock;		/* protect completion state */
	spinlock_t		lock;		/* protect completion state */

	struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
	int			mirror_count;

	ssize_t			count,		/* bytes actually processed */
	ssize_t			count,		/* bytes actually processed */
				bytes_left,	/* bytes left to be sent */
				bytes_left,	/* bytes left to be sent */
				io_start,	/* start of IO */
				error;		/* any reported error */
				error;		/* any reported error */
	struct completion	completion;	/* wait for i/o completion */
	struct completion	completion;	/* wait for i/o completion */


@@ -108,26 +117,56 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
	return atomic_dec_and_test(&dreq->io_count);
	return atomic_dec_and_test(&dreq->io_count);
}
}


void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq)
{
	dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes);

static void
nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
{
	int i;
	ssize_t count;

	WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count);

	count = dreq->mirrors[hdr->pgio_mirror_idx].count;
	if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
		count = hdr->io_start + hdr->good_bytes - dreq->io_start;
		dreq->mirrors[hdr->pgio_mirror_idx].count = count;
	}

	/* update the dreq->count by finding the minimum agreed count from all
	 * mirrors */
	count = dreq->mirrors[0].count;

	for (i = 1; i < dreq->mirror_count; i++)
		count = min(count, dreq->mirrors[i].count);

	dreq->count = count;
}

/*
/*
 * nfs_direct_select_verf - select the right verifier
 * nfs_direct_select_verf - select the right verifier
 * @dreq - direct request possibly spanning multiple servers
 * @dreq - direct request possibly spanning multiple servers
 * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
 * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
 * @ds_idx - index of data server in data server list, only valid if ds_clp set
 * @commit_idx - commit bucket index for the DS
 *
 *
 * returns the correct verifier to use given the role of the server
 * returns the correct verifier to use given the role of the server
 */
 */
static struct nfs_writeverf *
static struct nfs_writeverf *
nfs_direct_select_verf(struct nfs_direct_req *dreq,
nfs_direct_select_verf(struct nfs_direct_req *dreq,
		       struct nfs_client *ds_clp,
		       struct nfs_client *ds_clp,
		       int ds_idx)
		       int commit_idx)
{
{
	struct nfs_writeverf *verfp = &dreq->verf;
	struct nfs_writeverf *verfp = &dreq->verf;


#ifdef CONFIG_NFS_V4_1
#ifdef CONFIG_NFS_V4_1
	if (ds_clp) {
	if (ds_clp) {
		/* pNFS is in use, use the DS verf */
		/* pNFS is in use, use the DS verf */
		if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
		if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets)
			verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
			verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf;
		else
		else
			WARN_ON_ONCE(1);
			WARN_ON_ONCE(1);
	}
	}
@@ -148,8 +187,7 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
{
{
	struct nfs_writeverf *verfp;
	struct nfs_writeverf *verfp;


	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx);
				      hdr->ds_idx);
	WARN_ON_ONCE(verfp->committed >= 0);
	WARN_ON_ONCE(verfp->committed >= 0);
	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
	WARN_ON_ONCE(verfp->committed < 0);
	WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +207,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
{
{
	struct nfs_writeverf *verfp;
	struct nfs_writeverf *verfp;


	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx);
					 hdr->ds_idx);
	if (verfp->committed < 0) {
	if (verfp->committed < 0) {
		nfs_direct_set_hdr_verf(dreq, hdr);
		nfs_direct_set_hdr_verf(dreq, hdr);
		return 0;
		return 0;
@@ -193,7 +230,11 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,


	verfp = nfs_direct_select_verf(dreq, data->ds_clp,
	verfp = nfs_direct_select_verf(dreq, data->ds_clp,
					 data->ds_commit_index);
					 data->ds_commit_index);
	WARN_ON_ONCE(verfp->committed < 0);

	/* verifier not set so always fail */
	if (verfp->committed < 0)
		return 1;

	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
}
}


@@ -249,6 +290,18 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
	cinfo->completion_ops = &nfs_direct_commit_completion_ops;
	cinfo->completion_ops = &nfs_direct_commit_completion_ops;
}
}


static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq,
					     struct nfs_pageio_descriptor *pgio,
					     struct nfs_page *req)
{
	int mirror_count = 1;

	if (pgio->pg_ops->pg_get_mirror_count)
		mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);

	dreq->mirror_count = mirror_count;
}

static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
{
	struct nfs_direct_req *dreq;
	struct nfs_direct_req *dreq;
@@ -263,6 +316,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */
	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */
	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
	dreq->mirror_count = 1;
	spin_lock_init(&dreq->lock);
	spin_lock_init(&dreq->lock);


	return dreq;
	return dreq;
@@ -369,7 +423,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
		dreq->error = hdr->error;
		dreq->error = hdr->error;
	else
	else
		dreq->count += hdr->good_bytes;
		nfs_direct_good_bytes(dreq, hdr);

	spin_unlock(&dreq->lock);
	spin_unlock(&dreq->lock);


	while (!list_empty(&hdr->pages)) {
	while (!list_empty(&hdr->pages)) {
@@ -547,6 +602,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,


	dreq->inode = inode;
	dreq->inode = inode;
	dreq->bytes_left = count;
	dreq->bytes_left = count;
	dreq->io_start = pos;
	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
	l_ctx = nfs_get_lock_context(dreq->ctx);
	l_ctx = nfs_get_lock_context(dreq->ctx);
	if (IS_ERR(l_ctx)) {
	if (IS_ERR(l_ctx)) {
@@ -579,6 +635,20 @@ out:
	return result;
	return result;
}
}


static void
nfs_direct_write_scan_commit_list(struct inode *inode,
				  struct list_head *list,
				  struct nfs_commit_info *cinfo)
{
	spin_lock(cinfo->lock);
#ifdef CONFIG_NFS_V4_1
	if (cinfo->ds != NULL && cinfo->ds->nwritten != 0)
		NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
#endif
	nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0);
	spin_unlock(cinfo->lock);
}

static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
{
	struct nfs_pageio_descriptor desc;
	struct nfs_pageio_descriptor desc;
@@ -586,20 +656,23 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
	LIST_HEAD(reqs);
	LIST_HEAD(reqs);
	struct nfs_commit_info cinfo;
	struct nfs_commit_info cinfo;
	LIST_HEAD(failed);
	LIST_HEAD(failed);
	int i;


	nfs_init_cinfo_from_dreq(&cinfo, dreq);
	nfs_init_cinfo_from_dreq(&cinfo, dreq);
	pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
	spin_lock(cinfo.lock);
	nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
	spin_unlock(cinfo.lock);


	dreq->count = 0;
	dreq->count = 0;
	for (i = 0; i < dreq->mirror_count; i++)
		dreq->mirrors[i].count = 0;
	get_dreq(dreq);
	get_dreq(dreq);


	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
			      &nfs_direct_write_completion_ops);
			      &nfs_direct_write_completion_ops);
	desc.pg_dreq = dreq;
	desc.pg_dreq = dreq;


	req = nfs_list_entry(reqs.next);
	nfs_direct_setup_mirroring(dreq, &desc, req);

	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
		if (!nfs_pageio_add_request(&desc, req)) {
		if (!nfs_pageio_add_request(&desc, req)) {
			nfs_list_remove_request(req);
			nfs_list_remove_request(req);
@@ -646,7 +719,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
		nfs_list_remove_request(req);
		nfs_list_remove_request(req);
		if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
		if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
			/* Note the rewrite will go through mds */
			/* Note the rewrite will go through mds */
			nfs_mark_request_commit(req, NULL, &cinfo);
			nfs_mark_request_commit(req, NULL, &cinfo, 0);
		} else
		} else
			nfs_release_request(req);
			nfs_release_request(req);
		nfs_unlock_and_release_request(req);
		nfs_unlock_and_release_request(req);
@@ -721,7 +794,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
		dreq->error = hdr->error;
		dreq->error = hdr->error;
	}
	}
	if (dreq->error == 0) {
	if (dreq->error == 0) {
		dreq->count += hdr->good_bytes;
		nfs_direct_good_bytes(dreq, hdr);
		if (nfs_write_need_commit(hdr)) {
		if (nfs_write_need_commit(hdr)) {
			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
				request_commit = true;
				request_commit = true;
@@ -745,7 +818,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
		nfs_list_remove_request(req);
		nfs_list_remove_request(req);
		if (request_commit) {
		if (request_commit) {
			kref_get(&req->wb_kref);
			kref_get(&req->wb_kref);
			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
			nfs_mark_request_commit(req, hdr->lseg, &cinfo,
				hdr->ds_commit_idx);
		}
		}
		nfs_unlock_and_release_request(req);
		nfs_unlock_and_release_request(req);
	}
	}
@@ -826,6 +900,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
				result = PTR_ERR(req);
				result = PTR_ERR(req);
				break;
				break;
			}
			}

			nfs_direct_setup_mirroring(dreq, &desc, req);

			nfs_lock_request(req);
			nfs_lock_request(req);
			req->wb_index = pos >> PAGE_SHIFT;
			req->wb_index = pos >> PAGE_SHIFT;
			req->wb_offset = pos & ~PAGE_MASK;
			req->wb_offset = pos & ~PAGE_MASK;
@@ -934,6 +1011,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,


	dreq->inode = inode;
	dreq->inode = inode;
	dreq->bytes_left = count;
	dreq->bytes_left = count;
	dreq->io_start = pos;
	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
	l_ctx = nfs_get_lock_context(dreq->ctx);
	l_ctx = nfs_get_lock_context(dreq->ctx);
	if (IS_ERR(l_ctx)) {
	if (IS_ERR(l_ctx)) {
Loading