xfs: don't flush inodes from background inode reclaim (8a48088f) · Commits · e / devices / android_kernel_oneplus_sm8150

fs/xfs/xfs_sync.c

+42 −60

Original line number	Diff line number	Diff line
		@@ -631,11 +631,8 @@ xfs_reclaim_inode_grab(
		}

		/*
		* Inodes in different states need to be treated differently, and the return
		* value of xfs_iflush is not sufficient to get this right. The following table
		* lists the inode states and the reclaim actions necessary for non-blocking
		* reclaim:
		*
		* Inodes in different states need to be treated differently. The following
		* table lists the inode states and the reclaim actions necessary:
		*
		* inode state iflush ret required action
		* --------------- ---------- ---------------
		@@ -645,9 +642,8 @@ xfs_reclaim_inode_grab(
		* stale, unpinned 0 reclaim
		* clean, pinned(*) 0 requeue
		* stale, pinned EAGAIN requeue
		* dirty, delwri ok 0 requeue
		* dirty, delwri blocked EAGAIN requeue
		* dirty, sync flush 0 reclaim
		* dirty, async - requeue
		* dirty, sync 0 reclaim
		*
		* (*) dgc: I don't think the clean, pinned state is possible but it gets
		* handled anyway given the order of checks implemented.
		@@ -658,26 +654,23 @@ xfs_reclaim_inode_grab(
		*
		* Also, because we get the flush lock first, we know that any inode that has
		* been flushed delwri has had the flush completed by the time we check that
		* the inode is clean. The clean inode check needs to be done before flushing
		* the inode delwri otherwise we would loop forever requeuing clean inodes as
		* we cannot tell apart a successful delwri flush and a clean inode from the
		* return value of xfs_iflush().
		* the inode is clean.
		*
		* Note that because the inode is flushed delayed write by background
		* writeback, the flush lock may already be held here and waiting on it can
		* result in very long latencies. Hence for sync reclaims, where we wait on the
		* flush lock, the caller should push out delayed write inodes first before
		* trying to reclaim them to minimise the amount of time spent waiting. For
		* background relaim, we just requeue the inode for the next pass.
		* Note that because the inode is flushed delayed write by AIL pushing, the
		* flush lock may already be held here and waiting on it can result in very
		* long latencies. Hence for sync reclaims, where we wait on the flush lock,
		* the caller should push the AIL first before trying to reclaim inodes to
		* minimise the amount of time spent waiting. For background relaim, we only
		* bother to reclaim clean inodes anyway.
		*
		* Hence the order of actions after gaining the locks should be:
		* bad => reclaim
		* shutdown => unpin and reclaim
		* pinned, delwri => requeue
		* pinned, async => requeue
		* pinned, sync => unpin
		* stale => reclaim
		* clean => reclaim
		* dirty, delwri => flush and requeue
		* dirty, async => requeue
		* dirty, sync => flush, wait and reclaim
		*/
		STATIC int
		@@ -716,10 +709,8 @@ xfs_reclaim_inode(
		goto reclaim;
		}
		if (xfs_ipincount(ip)) {
		if (!(sync_mode & SYNC_WAIT)) {
		xfs_ifunlock(ip);
		goto out;
		}
		if (!(sync_mode & SYNC_WAIT))
		goto out_ifunlock;
		xfs_iunpin_wait(ip);
		}
		if (xfs_iflags_test(ip, XFS_ISTALE))
		@@ -727,6 +718,13 @@ xfs_reclaim_inode(
		if (xfs_inode_clean(ip))
		goto reclaim;

		/*
		* Never flush out dirty data during non-blocking reclaim, as it would
		* just contend with AIL pushing trying to do the same job.
		*/
		if (!(sync_mode & SYNC_WAIT))
		goto out_ifunlock;

		/*
		* Now we have an inode that needs flushing.
		*
		@@ -745,7 +743,6 @@ xfs_reclaim_inode(
		* pass through will see the stale flag set on the inode.
		*/
		error = xfs_iflush(ip, SYNC_TRYLOCK \| sync_mode);
		if (sync_mode & SYNC_WAIT) {
		if (error == EAGAIN) {
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		/* backoff longer than in xfs_ifree_cluster */
		@@ -753,34 +750,6 @@ xfs_reclaim_inode(
		goto restart;
		}
		xfs_iflock(ip);
		goto reclaim;
		}

		/*
		* When we have to flush an inode but don't have SYNC_WAIT set, we
		* flush the inode out using a delwri buffer and wait for the next
		* call into reclaim to find it in a clean state instead of waiting for
		* it now. We also don't return errors here - if the error is transient
		* then the next reclaim pass will flush the inode, and if the error
		* is permanent then the next sync reclaim will reclaim the inode and
		* pass on the error.
		*/
		if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		xfs_warn(ip->i_mount,
		"inode 0x%llx background reclaim flush failed with %d",
		(long long)ip->i_ino, error);
		}
		out:
		xfs_iflags_clear(ip, XFS_IRECLAIM);
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		/*
		* We could return EAGAIN here to make reclaim rescan the inode tree in
		* a short while. However, this just burns CPU time scanning the tree
		* waiting for IO to complete and xfssyncd never goes back to the idle
		* state. Instead, return 0 to let the next scheduled background reclaim
		* attempt to reclaim the inode again.
		*/
		return 0;

		reclaim:
		xfs_ifunlock(ip);
		@@ -814,8 +783,21 @@ xfs_reclaim_inode(
		xfs_iunlock(ip, XFS_ILOCK_EXCL);

		xfs_inode_free(ip);

		return error;

		out_ifunlock:
		xfs_ifunlock(ip);
		out:
		xfs_iflags_clear(ip, XFS_IRECLAIM);
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		/*
		* We could return EAGAIN here to make reclaim rescan the inode tree in
		* a short while. However, this just burns CPU time scanning the tree
		* waiting for IO to complete and xfssyncd never goes back to the idle
		* state. Instead, return 0 to let the next scheduled background reclaim
		* attempt to reclaim the inode again.
		*/
		return 0;
		}

		/*