Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4c46819a authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers
Browse files

xfs: do not write the buffer from xfs_iflush



Instead of writing the buffer directly from inside xfs_iflush return it to
the caller and let the caller decide what to do with the buffer.  Also
remove the pincount check in xfs_iflush that all non-blocking callers already
implement and the now unused flags parameter.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarMark Tinguely <tinguely@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 8a48088f
Loading
Loading
Loading
Loading
+16 −38
Original line number Diff line number Diff line
@@ -2384,22 +2384,22 @@ xfs_iflush_cluster(
}

/*
 * xfs_iflush() will write a modified inode's changes out to the
 * inode's on disk home.  The caller must have the inode lock held
 * in at least shared mode and the inode flush completion must be
 * active as well.  The inode lock will still be held upon return from
 * the call and the caller is free to unlock it.
 * The inode flush will be completed when the inode reaches the disk.
 * The flags indicate how the inode's buffer should be written out.
 * Flush dirty inode metadata into the backing buffer.
 *
 * The caller must have the inode lock and the inode flush lock held.  The
 * inode lock will still be held upon return to the caller, and the inode
 * flush lock will be released after the inode has reached the disk.
 *
 * The caller must write out the buffer returned in *bpp and release it.
 */
int
xfs_iflush(
	xfs_inode_t		*ip,
	uint			flags)
	struct xfs_inode	*ip,
	struct xfs_buf		**bpp)
{
	xfs_buf_t		*bp;
	xfs_dinode_t		*dip;
	xfs_mount_t		*mp;
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_buf		*bp;
	struct xfs_dinode	*dip;
	int			error;

	XFS_STATS_INC(xs_iflush_count);
@@ -2409,24 +2409,8 @@ xfs_iflush(
	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));

	mp = ip->i_mount;
	*bpp = NULL;

	/*
	 * We can't flush the inode until it is unpinned, so wait for it if we
	 * are allowed to block.  We know no one new can pin it, because we are
	 * holding the inode lock shared and you need to hold it exclusively to
	 * pin the inode.
	 *
	 * If we are not allowed to block, force the log out asynchronously so
	 * that when we come back the inode will be unpinned. If other inodes
	 * in the same cluster are dirty, they will probably write the inode
	 * out for us if they occur after the log force completes.
	 */
	if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
		xfs_iunpin(ip);
		xfs_ifunlock(ip);
		return EAGAIN;
	}
	xfs_iunpin_wait(ip);

	/*
@@ -2458,8 +2442,7 @@ xfs_iflush(
	/*
	 * Get the buffer containing the on-disk inode.
	 */
	error = xfs_itobp(mp, NULL, ip, &dip, &bp,
				(flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
	error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
	if (error || !bp) {
		xfs_ifunlock(ip);
		return error;
@@ -2487,13 +2470,8 @@ xfs_iflush(
	if (error)
		goto cluster_corrupt_out;

	if (flags & SYNC_WAIT)
		error = xfs_bwrite(bp);
	else
		xfs_buf_delwri_queue(bp);

	xfs_buf_relse(bp);
	return error;
	*bpp = bp;
	return 0;

corrupt_out:
	xfs_buf_relse(bp);
+1 −1
Original line number Diff line number Diff line
@@ -529,7 +529,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);

void		xfs_iext_realloc(xfs_inode_t *, int, int);
void		xfs_iunpin_wait(xfs_inode_t *);
int		xfs_iflush(xfs_inode_t *, uint);
int		xfs_iflush(struct xfs_inode *, struct xfs_buf **);
void		xfs_promote_inode(struct xfs_inode *);
void		xfs_lock_inodes(xfs_inode_t **, int, uint);
void		xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
+16 −1
Original line number Diff line number Diff line
@@ -506,6 +506,15 @@ xfs_inode_item_trylock(
	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
		return XFS_ITEM_LOCKED;

	/*
	 * Re-check the pincount now that we stabilized the value by
	 * taking the ilock.
	 */
	if (xfs_ipincount(ip) > 0) {
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		return XFS_ITEM_PINNED;
	}

	if (!xfs_iflock_nowait(ip)) {
		/*
		 * inode has already been flushed to the backing buffer,
@@ -666,6 +675,8 @@ xfs_inode_item_push(
{
	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
	struct xfs_inode	*ip = iip->ili_inode;
	struct xfs_buf		*bp = NULL;
	int			error;

	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
	ASSERT(xfs_isiflocked(ip));
@@ -689,7 +700,11 @@ xfs_inode_item_push(
	 * will pull the inode from the AIL, mark it clean and unlock the flush
	 * lock.
	 */
	(void) xfs_iflush(ip, SYNC_TRYLOCK);
	error = xfs_iflush(ip, &bp);
	if (!error) {
		xfs_buf_delwri_queue(bp);
		xfs_buf_relse(bp);
	}
	xfs_iunlock(ip, XFS_ILOCK_SHARED);
}

+15 −14
Original line number Diff line number Diff line
@@ -648,10 +648,6 @@ xfs_reclaim_inode_grab(
 * (*) dgc: I don't think the clean, pinned state is possible but it gets
 * handled anyway given the order of checks implemented.
 *
 * As can be seen from the table, the return value of xfs_iflush() is not
 * sufficient to correctly decide the reclaim action here. The checks in
 * xfs_iflush() might look like duplicates, but they are not.
 *
 * Also, because we get the flush lock first, we know that any inode that has
 * been flushed delwri has had the flush completed by the time we check that
 * the inode is clean.
@@ -679,6 +675,7 @@ xfs_reclaim_inode(
	struct xfs_perag	*pag,
	int			sync_mode)
{
	struct xfs_buf		*bp = NULL;
	int			error;

restart:
@@ -728,29 +725,33 @@ xfs_reclaim_inode(
	/*
	 * Now we have an inode that needs flushing.
	 *
	 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
	 * reclaim as we can deadlock with inode cluster removal.
	 * Note that xfs_iflush will never block on the inode buffer lock, as
	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
	 * ip->i_lock, and we are doing the exact opposite here.  As a result,
	 * doing a blocking xfs_itobp() to get the cluster buffer will result
	 * doing a blocking xfs_itobp() to get the cluster buffer would result
	 * in an ABBA deadlock with xfs_ifree_cluster().
	 *
	 * As xfs_ifree_cluser() must gather all inodes that are active in the
	 * cache to mark them stale, if we hit this case we don't actually want
	 * to do IO here - we want the inode marked stale so we can simply
	 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
	 * just unlock the inode, back off and try again. Hopefully the next
	 * pass through will see the stale flag set on the inode.
	 * reclaim it.  Hence if we get an EAGAIN error here,  just unlock the
	 * inode, back off and try again.  Hopefully the next pass through will
	 * see the stale flag set on the inode.
	 */
	error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
	error = xfs_iflush(ip, &bp);
	if (error == EAGAIN) {
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		/* backoff longer than in xfs_ifree_cluster */
		delay(2);
		goto restart;
	}
	xfs_iflock(ip);

	if (!error) {
		error = xfs_bwrite(bp);
		xfs_buf_relse(bp);
	}

	xfs_iflock(ip);
reclaim:
	xfs_ifunlock(ip);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);