Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7f27392c authored by Trond Myklebust's avatar Trond Myklebust
Browse files

pNFS: Fix races between return-on-close and layoutreturn.



If one or more of the layout segments reports an error during I/O, then
we may have to send a layoutreturn to report the error back to the NFS
metadata server.
This patch ensures that the return-on-close code can detect the
outstanding layoutreturn, and not preempt it.

Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
parent df9cecc1
Loading
Loading
Loading
Loading
+0 −2
Original line number Original line Diff line number Diff line
@@ -7972,8 +7972,6 @@ static void nfs4_layoutreturn_release(void *calldata)
		pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
		pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
	pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
	pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
	pnfs_clear_layoutreturn_waitbit(lo);
	pnfs_clear_layoutreturn_waitbit(lo);
	clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
	lo->plh_block_lgets--;
	lo->plh_block_lgets--;
	spin_unlock(&lo->plh_inode->i_lock);
	spin_unlock(&lo->plh_inode->i_lock);
	pnfs_free_lseg_list(&freeme);
	pnfs_free_lseg_list(&freeme);
+35 −28
Original line number Original line Diff line number Diff line
@@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
{
{
	struct pnfs_layout_segment *s;
	struct pnfs_layout_segment *s;


	if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
	if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
		return false;
		return false;


	list_for_each_entry(s, &lo->plh_segs, pls_list)
	list_for_each_entry(s, &lo->plh_segs, pls_list)
@@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
	return true;
	return true;
}
}


static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
{
	if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
		return false;
	lo->plh_return_iomode = 0;
	lo->plh_block_lgets++;
	pnfs_get_layout_hdr(lo);
	clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
	return true;
}

static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
		struct pnfs_layout_hdr *lo, struct inode *inode)
		struct pnfs_layout_hdr *lo, struct inode *inode)
{
{
@@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
	if (pnfs_layout_need_return(lo, lseg)) {
	if (pnfs_layout_need_return(lo, lseg)) {
		nfs4_stateid stateid;
		nfs4_stateid stateid;
		enum pnfs_iomode iomode;
		enum pnfs_iomode iomode;
		bool send;


		stateid = lo->plh_stateid;
		stateid = lo->plh_stateid;
		iomode = lo->plh_return_iomode;
		iomode = lo->plh_return_iomode;
		/* decreased in pnfs_send_layoutreturn() */
		send = pnfs_prepare_layoutreturn(lo);
		lo->plh_block_lgets++;
		lo->plh_return_iomode = 0;
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode->i_lock);
		pnfs_get_layout_hdr(lo);
		if (send) {

			/* Send an async layoutreturn so we dont deadlock */
			/* Send an async layoutreturn so we dont deadlock */
			pnfs_send_layoutreturn(lo, stateid, iomode, false);
			pnfs_send_layoutreturn(lo, stateid, iomode, false);
		}
	} else
	} else
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode->i_lock);
}
}
@@ -924,6 +935,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
	clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
	clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
	smp_mb__after_atomic();
	smp_mb__after_atomic();
	wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
	wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
}
}


static int
static int
@@ -978,6 +990,7 @@ _pnfs_return_layout(struct inode *ino)
	LIST_HEAD(tmp_list);
	LIST_HEAD(tmp_list);
	nfs4_stateid stateid;
	nfs4_stateid stateid;
	int status = 0, empty;
	int status = 0, empty;
	bool send;


	dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
	dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);


@@ -1007,17 +1020,18 @@ _pnfs_return_layout(struct inode *ino)
	/* Don't send a LAYOUTRETURN if list was initially empty */
	/* Don't send a LAYOUTRETURN if list was initially empty */
	if (empty) {
	if (empty) {
		spin_unlock(&ino->i_lock);
		spin_unlock(&ino->i_lock);
		pnfs_put_layout_hdr(lo);
		dprintk("NFS: %s no layout segments to return\n", __func__);
		dprintk("NFS: %s no layout segments to return\n", __func__);
		goto out;
		goto out_put_layout_hdr;
	}
	}


	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
	lo->plh_block_lgets++;
	send = pnfs_prepare_layoutreturn(lo);
	spin_unlock(&ino->i_lock);
	spin_unlock(&ino->i_lock);
	pnfs_free_lseg_list(&tmp_list);
	pnfs_free_lseg_list(&tmp_list);

	if (send)
		status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
		status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
out_put_layout_hdr:
	pnfs_put_layout_hdr(lo);
out:
out:
	dprintk("<-- %s status: %d\n", __func__, status);
	dprintk("<-- %s status: %d\n", __func__, status);
	return status;
	return status;
@@ -1097,13 +1111,9 @@ bool pnfs_roc(struct inode *ino)
out_noroc:
out_noroc:
	if (lo) {
	if (lo) {
		stateid = lo->plh_stateid;
		stateid = lo->plh_stateid;
		layoutreturn =
		if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
			test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
					   &lo->plh_flags))
					   &lo->plh_flags);
			layoutreturn = pnfs_prepare_layoutreturn(lo);
		if (layoutreturn) {
			lo->plh_block_lgets++;
			pnfs_get_layout_hdr(lo);
		}
	}
	}
	spin_unlock(&ino->i_lock);
	spin_unlock(&ino->i_lock);
	if (layoutreturn) {
	if (layoutreturn) {
@@ -1163,16 +1173,14 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
	 */
	 */
	*barrier = current_seqid + atomic_read(&lo->plh_outstanding);
	*barrier = current_seqid + atomic_read(&lo->plh_outstanding);
	stateid = lo->plh_stateid;
	stateid = lo->plh_stateid;
	layoutreturn = test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
	if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
					   &lo->plh_flags);
					   &lo->plh_flags))
	if (layoutreturn) {
		layoutreturn = pnfs_prepare_layoutreturn(lo);
		lo->plh_block_lgets++;
	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
		pnfs_get_layout_hdr(lo);
		rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
	}


	spin_unlock(&ino->i_lock);
	spin_unlock(&ino->i_lock);
	if (layoutreturn) {
	if (layoutreturn) {
		rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
		pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false);
		pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false);
		return true;
		return true;
	}
	}
@@ -1693,7 +1701,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
	spin_lock(&inode->i_lock);
	spin_lock(&inode->i_lock);
	/* set failure bit so that pnfs path will be retried later */
	/* set failure bit so that pnfs path will be retried later */
	pnfs_layout_set_fail_bit(lo, iomode);
	pnfs_layout_set_fail_bit(lo, iomode);
	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
	if (lo->plh_return_iomode == 0)
	if (lo->plh_return_iomode == 0)
		lo->plh_return_iomode = range.iomode;
		lo->plh_return_iomode = range.iomode;
	else if (lo->plh_return_iomode != range.iomode)
	else if (lo->plh_return_iomode != range.iomode)