Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6860c981 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:

   - SUNRPC: Ensure bvecs are re-synced when we re-encode the RPC
     request

   - Fix an Oops in ff_layout_track_ds_error due to a PTR_ERR()
     dereference

   - Revert buggy NFS readdirplus optimisation

   - NFSv4: Handle the special Linux file open access mode

   - pnfs: Fix a problem where we gratuitously start doing I/O through
     the MDS

  Features:

   - Allow NFS client to set up multiple TCP connections to the server
     using a new 'nconnect=X' mount option. Queue length is used to
     balance load.

   - Enhance statistics reporting to report on all transports when using
     multiple connections.

   - Speed up SUNRPC by removing bh-safe spinlocks

   - Add a mechanism to allow NFSv4 to request that containers set a
     unique per-host identifier for when the hostname is not set.

   - Ensure NFSv4 updates the lease_time after a clientid update

  Bugfixes and cleanup:

   - Fix use-after-free in rpcrdma_post_recvs

   - Fix a memory leak when nfs_match_client() is interrupted

   - Fix buggy file access checking in NFSv4 open for execute

   - disable unsupported client side deduplication

   - Fix spurious client disconnections

   - Fix occasional RDMA transport deadlock

   - Various RDMA cleanups

   - Various tracepoint fixes

   - Fix the TCP callback channel to guarantee the server can actually
     send the number of callback requests that was negotiated at mount
     time"

* tag 'nfs-for-5.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (68 commits)
  pnfs/flexfiles: Add tracepoints for detecting pnfs fallback to MDS
  pnfs: Fix a problem where we gratuitously start doing I/O through the MDS
  SUNRPC: Optimise transport balancing code
  SUNRPC: Ensure the bvecs are reset when we re-encode the RPC request
  pnfs/flexfiles: Fix PTR_ERR() dereferences in ff_layout_track_ds_error
  NFSv4: Don't use the zero stateid with layoutget
  SUNRPC: Fix up backchannel slot table accounting
  SUNRPC: Fix initialisation of struct rpc_xprt_switch
  SUNRPC: Skip zero-refcount transports
  SUNRPC: Replace division by multiplication in calculation of queue length
  NFSv4: Validate the stateid before applying it to state recovery
  nfs4.0: Refetch lease_time after clientid update
  nfs4: Rename nfs41_setup_state_renewal
  nfs4: Make nfs4_proc_get_lease_time available for nfs4.0
  nfs: Fix copy-and-paste error in debug message
  NFS: Replace 16 seq_printf() calls by seq_puts()
  NFS: Use seq_putc() in nfs_show_stats()
  Revert "NFS: readdirplus optimization by cache mechanism" (memleak)
  SUNRPC: Fix transport accounting when caller specifies an rpc_xprt
  NFS: Record task, client ID, and XID in xdr_status trace points
  ...
parents 0570bc8b d5b9216f
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -8,7 +8,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
CFLAGS_nfstrace.o += -I$(src)
nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \
			   io.o direct.o pagelist.o read.o symlink.o unlink.o \
			   write.o namespace.o mount_clnt.o nfstrace.o export.o
			   write.o namespace.o mount_clnt.o nfstrace.o \
			   export.o sysfs.o
nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
nfs-$(CONFIG_SYSCTL)	+= sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
+20 −8
Original line number Diff line number Diff line
@@ -414,27 +414,39 @@ static __be32
validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
		const struct cb_sequenceargs * args)
{
	__be32 ret;

	ret = cpu_to_be32(NFS4ERR_BADSLOT);
	if (args->csa_slotid > tbl->server_highest_slotid)
		return htonl(NFS4ERR_BADSLOT);
		goto out_err;

	/* Replay */
	if (args->csa_sequenceid == slot->seq_nr) {
		ret = cpu_to_be32(NFS4ERR_DELAY);
		if (nfs4_test_locked_slot(tbl, slot->slot_nr))
			return htonl(NFS4ERR_DELAY);
			goto out_err;

		/* Signal process_op to set this error on next op */
		ret = cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP);
		if (args->csa_cachethis == 0)
			return htonl(NFS4ERR_RETRY_UNCACHED_REP);
			goto out_err;

		/* Liar! We never allowed you to set csa_cachethis != 0 */
		return htonl(NFS4ERR_SEQ_FALSE_RETRY);
		ret = cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY);
		goto out_err;
	}

	/* Note: wraparound relies on seq_nr being of type u32 */
	if (likely(args->csa_sequenceid == slot->seq_nr + 1))
		return htonl(NFS4_OK);

	/* Misordered request */
	return htonl(NFS4ERR_SEQ_MISORDERED);
	ret = cpu_to_be32(NFS4ERR_SEQ_MISORDERED);
	if (args->csa_sequenceid != slot->seq_nr + 1)
		goto out_err;

	return cpu_to_be32(NFS4_OK);

out_err:
	trace_nfs4_cb_seqid_err(args, ret);
	return ret;
}

/*
+20 −4
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@
#include "pnfs.h"
#include "nfs.h"
#include "netns.h"
#include "sysfs.h"

#define NFSDBG_FACILITY		NFSDBG_CLIENT

@@ -175,6 +176,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
	clp->cl_rpcclient = ERR_PTR(-EINVAL);

	clp->cl_proto = cl_init->proto;
	clp->cl_nconnect = cl_init->nconnect;
	clp->cl_net = get_net(cl_init->net);

	clp->cl_principal = "*";
@@ -192,7 +194,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
EXPORT_SYMBOL_GPL(nfs_alloc_client);

#if IS_ENABLED(CONFIG_NFS_V4)
void nfs_cleanup_cb_ident_idr(struct net *net)
static void nfs_cleanup_cb_ident_idr(struct net *net)
{
	struct nfs_net *nn = net_generic(net, nfs_net_id);

@@ -214,7 +216,7 @@ static void pnfs_init_server(struct nfs_server *server)
}

#else
void nfs_cleanup_cb_ident_idr(struct net *net)
static void nfs_cleanup_cb_ident_idr(struct net *net)
{
}

@@ -406,10 +408,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
		clp = nfs_match_client(cl_init);
		if (clp) {
			spin_unlock(&nn->nfs_client_lock);
			if (IS_ERR(clp))
				return clp;
			if (new)
				new->rpc_ops->free_client(new);
			if (IS_ERR(clp))
				return clp;
			return nfs_found_client(cl_init, clp);
		}
		if (new) {
@@ -493,6 +495,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
	struct rpc_create_args args = {
		.net		= clp->cl_net,
		.protocol	= clp->cl_proto,
		.nconnect	= clp->cl_nconnect,
		.address	= (struct sockaddr *)&clp->cl_addr,
		.addrsize	= clp->cl_addrlen,
		.timeout	= cl_init->timeparms,
@@ -658,6 +661,7 @@ static int nfs_init_server(struct nfs_server *server,
		.net = data->net,
		.timeparms = &timeparms,
		.cred = server->cred,
		.nconnect = data->nfs_server.nconnect,
	};
	struct nfs_client *clp;
	int error;
@@ -1072,6 +1076,18 @@ void nfs_clients_init(struct net *net)
#endif
	spin_lock_init(&nn->nfs_client_lock);
	nn->boot_time = ktime_get_real();

	nfs_netns_sysfs_setup(nn, net);
}

void nfs_clients_exit(struct net *net)
{
	struct nfs_net *nn = net_generic(net, nfs_net_id);

	nfs_netns_sysfs_destroy(nn);
	nfs_cleanup_cb_ident_idr(net);
	WARN_ON_ONCE(!list_empty(&nn->nfs_client_list));
	WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list));
}

#ifdef CONFIG_PROC_FS
+10 −84
Original line number Diff line number Diff line
@@ -80,6 +80,10 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
		ctx->dup_cookie = 0;
		ctx->cred = get_cred(cred);
		spin_lock(&dir->i_lock);
		if (list_empty(&nfsi->open_files) &&
		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
			nfsi->cache_validity |= NFS_INO_INVALID_DATA |
				NFS_INO_REVAL_FORCED;
		list_add(&ctx->list, &nfsi->open_files);
		spin_unlock(&dir->i_lock);
		return ctx;
@@ -140,19 +144,12 @@ struct nfs_cache_array {
	struct nfs_cache_array_entry array[0];
};

struct readdirvec {
	unsigned long nr;
	unsigned long index;
	struct page *pages[NFS_MAX_READDIR_RAPAGES];
};

typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
typedef struct {
	struct file	*file;
	struct page	*page;
	struct dir_context *ctx;
	unsigned long	page_index;
	struct readdirvec pvec;
	u64		*dir_cookie;
	u64		last_cookie;
	loff_t		current_index;
@@ -532,10 +529,6 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
	struct nfs_cache_array *array;
	unsigned int count = 0;
	int status;
	int max_rapages = NFS_MAX_READDIR_RAPAGES;

	desc->pvec.index = desc->page_index;
	desc->pvec.nr = 0;

	scratch = alloc_page(GFP_KERNEL);
	if (scratch == NULL)
@@ -560,40 +553,20 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
		if (desc->plus)
			nfs_prime_dcache(file_dentry(desc->file), entry);

		status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
		if (status == -ENOSPC) {
			desc->pvec.nr++;
			if (desc->pvec.nr == max_rapages)
				break;
			status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
		}
		status = nfs_readdir_add_to_array(entry, page);
		if (status != 0)
			break;
	} while (!entry->eof);

	/*
	 * page and desc->pvec.pages[0] are valid, don't need to check
	 * whether or not to be NULL.
	 */
	copy_highpage(page, desc->pvec.pages[0]);

out_nopages:
	if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
		array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]);
		array = kmap(page);
		array->eof_index = array->size;
		status = 0;
		kunmap_atomic(array);
		kunmap(page);
	}

	put_page(scratch);

	/*
	 * desc->pvec.nr > 0 means at least one page was completely filled,
	 * we should return -ENOSPC. Otherwise function
	 * nfs_readdir_xdr_to_array will enter infinite loop.
	 */
	if (desc->pvec.nr > 0)
		return -ENOSPC;
	return status;
}

@@ -627,24 +600,6 @@ int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
	return -ENOMEM;
}

/*
 * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure.
 */
static
void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc)
{
	struct nfs_cache_array *array;
	int max_rapages = NFS_MAX_READDIR_RAPAGES;
	int index;

	for (index = 0; index < max_rapages; index++) {
		array = kmap_atomic(desc->pvec.pages[index]);
		memset(array, 0, sizeof(struct nfs_cache_array));
		array->eof_index = -1;
		kunmap_atomic(array);
	}
}

static
int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
{
@@ -655,12 +610,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
	int status = -ENOMEM;
	unsigned int array_size = ARRAY_SIZE(pages);

	/*
	 * This means we hit readdir rdpages miss, the preallocated rdpages
	 * are useless, the preallocate rdpages should be reinitialized.
	 */
	nfs_readdir_rapages_init(desc);

	entry.prev_cookie = 0;
	entry.cookie = desc->last_cookie;
	entry.eof = 0;
@@ -721,24 +670,9 @@ int nfs_readdir_filler(void *data, struct page* page)
	struct inode	*inode = file_inode(desc->file);
	int ret;

	/*
	 * If desc->page_index in range desc->pvec.index and
	 * desc->pvec.index + desc->pvec.nr, we get readdir cache hit.
	 */
	if (desc->page_index >= desc->pvec.index &&
		desc->page_index < (desc->pvec.index + desc->pvec.nr)) {
		/*
		 * page and desc->pvec.pages[x] are valid, don't need to check
		 * whether or not to be NULL.
		 */
		copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]);
		ret = 0;
	} else {
	ret = nfs_readdir_xdr_to_array(desc, page, inode);
	if (ret < 0)
		goto error;
	}

	SetPageUptodate(page);

	if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
@@ -903,7 +837,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
			*desc = &my_desc;
	struct nfs_open_dir_context *dir_ctx = file->private_data;
	int res = 0;
	int max_rapages = NFS_MAX_READDIR_RAPAGES;

	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
			file, (long long)ctx->pos);
@@ -923,12 +856,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
	desc->decode = NFS_PROTO(inode)->decode_dirent;
	desc->plus = nfs_use_readdirplus(inode, ctx);

	res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages);
	if (res < 0)
		return -ENOMEM;

	nfs_readdir_rapages_init(desc);

	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
		res = nfs_revalidate_mapping(inode, file->f_mapping);
	if (res < 0)
@@ -964,7 +891,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
			break;
	} while (!desc->eof);
out:
	nfs_readdir_free_pages(desc->pvec.pages, max_rapages);
	if (res > 0)
		res = 0;
	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
+26 −0
Original line number Diff line number Diff line
@@ -934,6 +934,10 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
	if (pgio->pg_error < 0)
		return;
out_mds:
	trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
			0, NFS4_MAX_UINT64, IOMODE_READ,
			NFS_I(pgio->pg_inode)->layout,
			pgio->pg_lseg);
	pnfs_put_lseg(pgio->pg_lseg);
	pgio->pg_lseg = NULL;
	nfs_pageio_reset_read_mds(pgio);
@@ -1000,6 +1004,10 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
	return;

out_mds:
	trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode,
			0, NFS4_MAX_UINT64, IOMODE_RW,
			NFS_I(pgio->pg_inode)->layout,
			pgio->pg_lseg);
	pnfs_put_lseg(pgio->pg_lseg);
	pgio->pg_lseg = NULL;
	nfs_pageio_reset_write_mds(pgio);
@@ -1026,6 +1034,10 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
	if (pgio->pg_lseg)
		return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);

	trace_pnfs_mds_fallback_pg_get_mirror_count(pgio->pg_inode,
			0, NFS4_MAX_UINT64, IOMODE_RW,
			NFS_I(pgio->pg_inode)->layout,
			pgio->pg_lseg);
	/* no lseg means that pnfs is not in use, so no mirroring here */
	nfs_pageio_reset_write_mds(pgio);
out:
@@ -1075,6 +1087,10 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
			hdr->args.count,
			(unsigned long long)hdr->args.offset);

		trace_pnfs_mds_fallback_write_done(hdr->inode,
				hdr->args.offset, hdr->args.count,
				IOMODE_RW, NFS_I(hdr->inode)->layout,
				hdr->lseg);
		task->tk_status = pnfs_write_done_resend_to_mds(hdr);
	}
}
@@ -1094,6 +1110,10 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
			hdr->args.count,
			(unsigned long long)hdr->args.offset);

		trace_pnfs_mds_fallback_read_done(hdr->inode,
				hdr->args.offset, hdr->args.count,
				IOMODE_READ, NFS_I(hdr->inode)->layout,
				hdr->lseg);
		task->tk_status = pnfs_read_done_resend_to_mds(hdr);
	}
}
@@ -1827,6 +1847,9 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
out_failed:
	if (ff_layout_avoid_mds_available_ds(lseg))
		return PNFS_TRY_AGAIN;
	trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
			hdr->args.offset, hdr->args.count,
			IOMODE_READ, NFS_I(hdr->inode)->layout, lseg);
	return PNFS_NOT_ATTEMPTED;
}

@@ -1892,6 +1915,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
out_failed:
	if (ff_layout_avoid_mds_available_ds(lseg))
		return PNFS_TRY_AGAIN;
	trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
			hdr->args.offset, hdr->args.count,
			IOMODE_RW, NFS_I(hdr->inode)->layout, lseg);
	return PNFS_NOT_ATTEMPTED;
}

Loading