Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7a2c49e authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - Fix the NFSv4.1 r/wsize sanity checking
   - Reset the RPC/RDMA credit grant properly after a disconnect
   - Fix a missed page unlock after pg_doio()

  Features and optimisations:
   - Overhaul of the RPC client socket code to eliminate a locking
     bottleneck and reduce the latency when transmitting lots of
     requests in parallel.
   - Allow parallelisation of the RPCSEC_GSS encoding of an RPC request.
   - Convert the RPC client socket receive code to use iovec_iter() for
     improved efficiency.
   - Convert several NFS and RPC lookup operations to use RCU instead of
     taking global locks.
   - Avoid the need for BH-safe locks in the RPC/RDMA back channel.

  Bugfixes and cleanups:
   - Fix lock recovery during NFSv4 delegation recalls
   - Fix the NFSv4 + NFSv4.1 "lookup revalidate + open file" case.
   - Fixes for the RPC connection metrics
   - Various RPC client layer cleanups to consolidate stream based
     sockets
   - RPC/RDMA connection cleanups
   - Simplify the RPC/RDMA cleanup after memory operation failures
   - Clean ups for NFS v4.2 copy completion and NFSv4 open state
     reclaim"

* tag 'nfs-for-4.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (97 commits)
  SUNRPC: Convert the auth cred cache to use refcount_t
  SUNRPC: Convert auth creds to use refcount_t
  SUNRPC: Simplify lookup code
  SUNRPC: Clean up the AUTH cache code
  NFS: change sign of nfs_fh length
  sunrpc: safely reallow resvport min/max inversion
  nfs: remove redundant call to nfs_context_set_write_error()
  nfs: Fix a missed page unlock after pg_doio()
  SUNRPC: Fix a compile warning for cmpxchg64()
  NFSv4.x: fix lock recovery during delegation recall
  SUNRPC: use cmpxchg64() in gss_seq_send64_fetch_and_inc()
  xprtrdma: Squelch a sparse warning
  xprtrdma: Clean up xprt_rdma_disconnect_inject
  xprtrdma: Add documenting comments
  xprtrdma: Report when there were zero posted Receives
  xprtrdma: Move rb_flags initialization
  xprtrdma: Don't disable BH's in backchannel server
  xprtrdma: Remove memory address of "ep" from an error message
  xprtrdma: Rename rpcrdma_qp_async_error_upcall
  xprtrdma: Simplify RPC wake-ups on connect
  ...
parents 033078a9 331bc71c
Loading
Loading
Loading
Loading
+9 −8
Original line number Diff line number Diff line
@@ -93,7 +93,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags)
	return nfs4_do_check_delegation(inode, flags, false);
}

static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
{
	struct inode *inode = state->inode;
	struct file_lock *fl;
@@ -108,7 +108,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
	spin_lock(&flctx->flc_lock);
restart:
	list_for_each_entry(fl, list, fl_list) {
		if (nfs_file_open_context(fl->fl_file) != ctx)
		if (nfs_file_open_context(fl->fl_file)->state != state)
			continue;
		spin_unlock(&flctx->flc_lock);
		status = nfs4_lock_delegation_recall(fl, state, stateid);
@@ -136,8 +136,8 @@ static int nfs_delegation_claim_opens(struct inode *inode,
	int err;

again:
	spin_lock(&inode->i_lock);
	list_for_each_entry(ctx, &nfsi->open_files, list) {
	rcu_read_lock();
	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
		state = ctx->state;
		if (state == NULL)
			continue;
@@ -147,15 +147,16 @@ static int nfs_delegation_claim_opens(struct inode *inode,
			continue;
		if (!nfs4_stateid_match(&state->stateid, stateid))
			continue;
		get_nfs_open_context(ctx);
		spin_unlock(&inode->i_lock);
		if (!get_nfs_open_context(ctx))
			continue;
		rcu_read_unlock();
		sp = state->owner;
		/* Block nfs4_proc_unlck */
		mutex_lock(&sp->so_delegreturn_mutex);
		seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
		err = nfs4_open_delegation_recall(ctx, state, stateid, type);
		if (!err)
			err = nfs_delegation_claim_locks(ctx, state, stateid);
			err = nfs_delegation_claim_locks(state, stateid);
		if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
			err = -EAGAIN;
		mutex_unlock(&sp->so_delegreturn_mutex);
@@ -164,7 +165,7 @@ static int nfs_delegation_claim_opens(struct inode *inode,
			return err;
		goto again;
	}
	spin_unlock(&inode->i_lock);
	rcu_read_unlock();
	return 0;
}

+162 −133
Original line number Diff line number Diff line
@@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}

static int
nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
			   struct inode *inode, int error)
{
	switch (error) {
	case 1:
		dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
			__func__, dentry);
		return 1;
	case 0:
		nfs_mark_for_revalidate(dir);
		if (inode && S_ISDIR(inode->i_mode)) {
			/* Purge readdir caches. */
			nfs_zap_caches(inode);
			/*
			 * We can't d_drop the root of a disconnected tree:
			 * its d_hash is on the s_anon list and d_drop() would hide
			 * it from shrink_dcache_for_unmount(), leading to busy
			 * inodes on unmount and further oopses.
			 */
			if (IS_ROOT(dentry))
				return 1;
		}
		dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
				__func__, dentry);
		return 0;
	}
	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
				__func__, dentry, error);
	return error;
}

static int
nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
			       unsigned int flags)
{
	int ret = 1;
	if (nfs_neg_need_reval(dir, dentry, flags)) {
		if (flags & LOOKUP_RCU)
			return -ECHILD;
		ret = 0;
	}
	return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
}

static int
nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
				struct inode *inode)
{
	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
}

static int
nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
			     struct inode *inode)
{
	struct nfs_fh *fhandle;
	struct nfs_fattr *fattr;
	struct nfs4_label *label;
	int ret;

	ret = -ENOMEM;
	fhandle = nfs_alloc_fhandle();
	fattr = nfs_alloc_fattr();
	label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
	if (fhandle == NULL || fattr == NULL || IS_ERR(label))
		goto out;

	ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
	if (ret < 0) {
		if (ret == -ESTALE || ret == -ENOENT)
			ret = 0;
		goto out;
	}
	ret = 0;
	if (nfs_compare_fh(NFS_FH(inode), fhandle))
		goto out;
	if (nfs_refresh_inode(inode, fattr) < 0)
		goto out;

	nfs_setsecurity(inode, fattr, label);
	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));

	/* set a readdirplus hint that we had a cache miss */
	nfs_force_use_readdirplus(dir);
	ret = 1;
out:
	nfs_free_fattr(fattr);
	nfs_free_fhandle(fhandle);
	nfs4_label_free(label);
	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
}

/*
 * This is called every time the dcache has a lookup hit,
 * and we should check whether we can really trust that
@@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
 * If the parent directory is seen to have changed, we throw out the
 * cached dentry and do a new lookup.
 */
static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
static int
nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
			 unsigned int flags)
{
	struct inode *dir;
	struct inode *inode;
	struct dentry *parent;
	struct nfs_fh *fhandle = NULL;
	struct nfs_fattr *fattr = NULL;
	struct nfs4_label *label = NULL;
	int error;

	if (flags & LOOKUP_RCU) {
		parent = READ_ONCE(dentry->d_parent);
		dir = d_inode_rcu(parent);
		if (!dir)
			return -ECHILD;
	} else {
		parent = dget_parent(dentry);
		dir = d_inode(parent);
	}
	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
	inode = d_inode(dentry);

	if (!inode) {
		if (nfs_neg_need_reval(dir, dentry, flags)) {
			if (flags & LOOKUP_RCU)
				return -ECHILD;
			goto out_bad;
		}
		goto out_valid;
	}
	if (!inode)
		return nfs_lookup_revalidate_negative(dir, dentry, flags);

	if (is_bad_inode(inode)) {
		if (flags & LOOKUP_RCU)
			return -ECHILD;
		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
				__func__, dentry);
		goto out_bad;
	}

	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
		goto out_set_verifier;
		return nfs_lookup_revalidate_delegated(dir, dentry, inode);

	/* Force a full look up iff the parent directory has changed */
	if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
		error = nfs_lookup_verify_inode(inode, flags);
		if (error) {
			if (flags & LOOKUP_RCU)
				return -ECHILD;
			if (error == -ESTALE)
				goto out_zap_parent;
			goto out_error;
				nfs_zap_caches(dir);
			goto out_bad;
		}
		nfs_advise_use_readdirplus(dir);
		goto out_valid;
@@ -1146,81 +1218,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
	if (NFS_STALE(inode))
		goto out_bad;

	error = -ENOMEM;
	fhandle = nfs_alloc_fhandle();
	fattr = nfs_alloc_fattr();
	if (fhandle == NULL || fattr == NULL)
		goto out_error;

	label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
	if (IS_ERR(label))
		goto out_error;

	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
	error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
	trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
	if (error == -ESTALE || error == -ENOENT)
		goto out_bad;
	if (error)
		goto out_error;
	if (nfs_compare_fh(NFS_FH(inode), fhandle))
		goto out_bad;
	if ((error = nfs_refresh_inode(inode, fattr)) != 0)
		goto out_bad;

	nfs_setsecurity(inode, fattr, label);

	nfs_free_fattr(fattr);
	nfs_free_fhandle(fhandle);
	nfs4_label_free(label);
	return error;
out_valid:
	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
out_bad:
	if (flags & LOOKUP_RCU)
		return -ECHILD;
	return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
}

	/* set a readdirplus hint that we had a cache miss */
	nfs_force_use_readdirplus(dir);
static int
__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
			int (*reval)(struct inode *, struct dentry *, unsigned int))
{
	struct dentry *parent;
	struct inode *dir;
	int ret;

out_set_verifier:
	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_valid:
	if (flags & LOOKUP_RCU) {
		parent = READ_ONCE(dentry->d_parent);
		dir = d_inode_rcu(parent);
		if (!dir)
			return -ECHILD;
		ret = reval(dir, dentry, flags);
		if (parent != READ_ONCE(dentry->d_parent))
			return -ECHILD;
	} else
	} else {
		parent = dget_parent(dentry);
		ret = reval(d_inode(parent), dentry, flags);
		dput(parent);
	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
			__func__, dentry);
	return 1;
out_zap_parent:
	nfs_zap_caches(dir);
 out_bad:
	WARN_ON(flags & LOOKUP_RCU);
	nfs_free_fattr(fattr);
	nfs_free_fhandle(fhandle);
	nfs4_label_free(label);
	nfs_mark_for_revalidate(dir);
	if (inode && S_ISDIR(inode->i_mode)) {
		/* Purge readdir caches. */
		nfs_zap_caches(inode);
		/*
		 * We can't d_drop the root of a disconnected tree:
		 * its d_hash is on the s_anon list and d_drop() would hide
		 * it from shrink_dcache_for_unmount(), leading to busy
		 * inodes on unmount and further oopses.
		 */
		if (IS_ROOT(dentry))
			goto out_valid;
	}
	dput(parent);
	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
			__func__, dentry);
	return 0;
out_error:
	WARN_ON(flags & LOOKUP_RCU);
	nfs_free_fattr(fattr);
	nfs_free_fhandle(fhandle);
	nfs4_label_free(label);
	dput(parent);
	dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
			__func__, dentry, error);
	return error;
	return ret;
}

static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
	return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
}

/*
@@ -1579,62 +1615,55 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
}
EXPORT_SYMBOL_GPL(nfs_atomic_open);

static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
static int
nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
			  unsigned int flags)
{
	struct inode *inode;
	int ret = 0;

	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
		goto no_open;
		goto full_reval;
	if (d_mountpoint(dentry))
		goto no_open;
	if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
		goto no_open;
		goto full_reval;

	inode = d_inode(dentry);

	/* We can't create new files in nfs_open_revalidate(), so we
	 * optimize away revalidation of negative dentries.
	 */
	if (inode == NULL) {
		struct dentry *parent;
		struct inode *dir;
	if (inode == NULL)
		goto full_reval;

		if (flags & LOOKUP_RCU) {
			parent = READ_ONCE(dentry->d_parent);
			dir = d_inode_rcu(parent);
			if (!dir)
				return -ECHILD;
		} else {
			parent = dget_parent(dentry);
			dir = d_inode(parent);
		}
		if (!nfs_neg_need_reval(dir, dentry, flags))
			ret = 1;
		else if (flags & LOOKUP_RCU)
			ret = -ECHILD;
		if (!(flags & LOOKUP_RCU))
			dput(parent);
		else if (parent != READ_ONCE(dentry->d_parent))
			return -ECHILD;
		goto out;
	}
	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
		return nfs_lookup_revalidate_delegated(dir, dentry, inode);

	/* NFS only supports OPEN on regular files */
	if (!S_ISREG(inode->i_mode))
		goto no_open;
		goto full_reval;

	/* We cannot do exclusive creation on a positive dentry */
	if (flags & LOOKUP_EXCL)
		goto no_open;
	if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
		goto reval_dentry;

	/* Check if the directory changed */
	if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
		goto reval_dentry;

	/* Let f_op->open() actually open (and revalidate) the file */
	ret = 1;
	return 1;
reval_dentry:
	if (flags & LOOKUP_RCU)
		return -ECHILD;
	return nfs_lookup_revalidate_dentry(dir, dentry, inode);;

out:
	return ret;
full_reval:
	return nfs_do_lookup_revalidate(dir, dentry, flags);
}

no_open:
	return nfs_lookup_revalidate(dentry, flags);
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
	return __nfs_lookup_revalidate(dentry, flags,
			nfs4_do_lookup_revalidate);
}

#endif /* CONFIG_NFSV4 */
+1 −0
Original line number Diff line number Diff line
@@ -1164,6 +1164,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
	.id			= LAYOUT_NFSV4_1_FILES,
	.name			= "LAYOUT_NFSV4_1_FILES",
	.owner			= THIS_MODULE,
	.max_layoutget_response	= 4096, /* 1 page or so... */
	.alloc_layout_hdr	= filelayout_alloc_layout_hdr,
	.free_layout_hdr	= filelayout_free_layout_hdr,
	.alloc_lseg		= filelayout_alloc_lseg,
+1 −0
Original line number Diff line number Diff line
@@ -2356,6 +2356,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
	.name			= "LAYOUT_FLEX_FILES",
	.owner			= THIS_MODULE,
	.flags			= PNFS_LAYOUTGET_ON_OPEN,
	.max_layoutget_response	= 4096, /* 1 page or so... */
	.set_layoutdriver	= ff_layout_set_layoutdriver,
	.alloc_layout_hdr	= ff_layout_alloc_layout_hdr,
	.free_layout_hdr	= ff_layout_free_layout_hdr,
+1 −1
Original line number Diff line number Diff line
@@ -453,7 +453,7 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
	struct rpc_cred *cred;

	if (mirror) {
	if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
		cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
		if (!cred)
			cred = get_rpccred(mdscred);
Loading