xfs: do not write the buffer from xfs_iflush (4c46819a) · Commits · e / devices / android_kernel_oneplus_sm8150

fs/xfs/xfs_inode.c

+16 −38

Original line number	Diff line number	Diff line
		@@ -2384,22 +2384,22 @@ xfs_iflush_cluster(
		}

		/*
		* xfs_iflush() will write a modified inode's changes out to the
		* inode's on disk home. The caller must have the inode lock held
		* in at least shared mode and the inode flush completion must be
		* active as well. The inode lock will still be held upon return from
		* the call and the caller is free to unlock it.
		* The inode flush will be completed when the inode reaches the disk.
		* The flags indicate how the inode's buffer should be written out.
		* Flush dirty inode metadata into the backing buffer.
		*
		* The caller must have the inode lock and the inode flush lock held. The
		* inode lock will still be held upon return to the caller, and the inode
		* flush lock will be released after the inode has reached the disk.
		*
		* The caller must write out the buffer returned in *bpp and release it.
		*/
		int
		xfs_iflush(
		xfs_inode_t *ip,
		uint flags)
		struct xfs_inode *ip,
		struct xfs_buf **bpp)
		{
		xfs_buf_t *bp;
		xfs_dinode_t *dip;
		xfs_mount_t *mp;
		struct xfs_mount *mp = ip->i_mount;
		struct xfs_buf *bp;
		struct xfs_dinode *dip;
		int error;

		XFS_STATS_INC(xs_iflush_count);
		@@ -2409,24 +2409,8 @@ xfs_iflush(
		ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE \|\|
		ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));

		mp = ip->i_mount;
		*bpp = NULL;

		/*
		* We can't flush the inode until it is unpinned, so wait for it if we
		* are allowed to block. We know no one new can pin it, because we are
		* holding the inode lock shared and you need to hold it exclusively to
		* pin the inode.
		*
		* If we are not allowed to block, force the log out asynchronously so
		* that when we come back the inode will be unpinned. If other inodes
		* in the same cluster are dirty, they will probably write the inode
		* out for us if they occur after the log force completes.
		*/
		if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
		xfs_iunpin(ip);
		xfs_ifunlock(ip);
		return EAGAIN;
		}
		xfs_iunpin_wait(ip);

		/*
		@@ -2458,8 +2442,7 @@ xfs_iflush(
		/*
		* Get the buffer containing the on-disk inode.
		*/
		error = xfs_itobp(mp, NULL, ip, &dip, &bp,
		(flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
		error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
		if (error \|\| !bp) {
		xfs_ifunlock(ip);
		return error;
		@@ -2487,13 +2470,8 @@ xfs_iflush(
		if (error)
		goto cluster_corrupt_out;

		if (flags & SYNC_WAIT)
		error = xfs_bwrite(bp);
		else
		xfs_buf_delwri_queue(bp);

		xfs_buf_relse(bp);
		return error;
		*bpp = bp;
		return 0;

		corrupt_out:
		xfs_buf_relse(bp);

fs/xfs/xfs_inode.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -529,7 +529,7 @@ int xfs_iunlink(struct xfs_trans , xfs_inode_t );

		void xfs_iext_realloc(xfs_inode_t *, int, int);
		void xfs_iunpin_wait(xfs_inode_t *);
		int xfs_iflush(xfs_inode_t *, uint);
		int xfs_iflush(struct xfs_inode , struct xfs_buf *);
		void xfs_promote_inode(struct xfs_inode *);
		void xfs_lock_inodes(xfs_inode_t **, int, uint);
		void xfs_lock_two_inodes(xfs_inode_t , xfs_inode_t , uint);

fs/xfs/xfs_inode_item.c

+16 −1

Original line number	Diff line number	Diff line
		@@ -506,6 +506,15 @@ xfs_inode_item_trylock(
		if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
		return XFS_ITEM_LOCKED;

		/*
		* Re-check the pincount now that we stabilized the value by
		* taking the ilock.
		*/
		if (xfs_ipincount(ip) > 0) {
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		return XFS_ITEM_PINNED;
		}

		if (!xfs_iflock_nowait(ip)) {
		/*
		* inode has already been flushed to the backing buffer,
		@@ -666,6 +675,8 @@ xfs_inode_item_push(
		{
		struct xfs_inode_log_item *iip = INODE_ITEM(lip);
		struct xfs_inode *ip = iip->ili_inode;
		struct xfs_buf *bp = NULL;
		int error;

		ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
		ASSERT(xfs_isiflocked(ip));
		@@ -689,7 +700,11 @@ xfs_inode_item_push(
		* will pull the inode from the AIL, mark it clean and unlock the flush
		* lock.
		*/
		(void) xfs_iflush(ip, SYNC_TRYLOCK);
		error = xfs_iflush(ip, &bp);
		if (!error) {
		xfs_buf_delwri_queue(bp);
		xfs_buf_relse(bp);
		}
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		}

fs/xfs/xfs_sync.c

+15 −14

Original line number	Diff line number	Diff line
		@@ -648,10 +648,6 @@ xfs_reclaim_inode_grab(
		* (*) dgc: I don't think the clean, pinned state is possible but it gets
		* handled anyway given the order of checks implemented.
		*
		* As can be seen from the table, the return value of xfs_iflush() is not
		* sufficient to correctly decide the reclaim action here. The checks in
		* xfs_iflush() might look like duplicates, but they are not.
		*
		* Also, because we get the flush lock first, we know that any inode that has
		* been flushed delwri has had the flush completed by the time we check that
		* the inode is clean.
		@@ -679,6 +675,7 @@ xfs_reclaim_inode(
		struct xfs_perag *pag,
		int sync_mode)
		{
		struct xfs_buf *bp = NULL;
		int error;

		restart:
		@@ -728,29 +725,33 @@ xfs_reclaim_inode(
		/*
		* Now we have an inode that needs flushing.
		*
		* We do a nonblocking flush here even if we are doing a SYNC_WAIT
		* reclaim as we can deadlock with inode cluster removal.
		* Note that xfs_iflush will never block on the inode buffer lock, as
		* xfs_ifree_cluster() can lock the inode buffer before it locks the
		* ip->i_lock, and we are doing the exact opposite here. As a result,
		* doing a blocking xfs_itobp() to get the cluster buffer will result
		* doing a blocking xfs_itobp() to get the cluster buffer would result
		* in an ABBA deadlock with xfs_ifree_cluster().
		*
		* As xfs_ifree_cluser() must gather all inodes that are active in the
		* cache to mark them stale, if we hit this case we don't actually want
		* to do IO here - we want the inode marked stale so we can simply
		* reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
		* just unlock the inode, back off and try again. Hopefully the next
		* pass through will see the stale flag set on the inode.
		* reclaim it. Hence if we get an EAGAIN error here, just unlock the
		* inode, back off and try again. Hopefully the next pass through will
		* see the stale flag set on the inode.
		*/
		error = xfs_iflush(ip, SYNC_TRYLOCK \| sync_mode);
		error = xfs_iflush(ip, &bp);
		if (error == EAGAIN) {
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		/* backoff longer than in xfs_ifree_cluster */
		delay(2);
		goto restart;
		}
		xfs_iflock(ip);

		if (!error) {
		error = xfs_bwrite(bp);
		xfs_buf_relse(bp);
		}

		xfs_iflock(ip);
		reclaim:
		xfs_ifunlock(ip);
		xfs_iunlock(ip, XFS_ILOCK_EXCL);