Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dcd79a14 authored by Dave Chinner's avatar Dave Chinner Committed by Alex Elder
Browse files

xfs: don't use vfs writeback for pure metadata modifications



Under heavy multi-way parallel create workloads, the VFS struggles
to write back all the inodes that have been changed in age order.
The bdi flusher thread becomes CPU bound, spending 85% of it's time
in the VFS code, mostly traversing the superblock dirty inode list
to separate dirty inodes old enough to flush.

We already keep an index of all metadata changes in age order - in
the AIL - and continued log pressure will do age ordered writeback
without any extra overhead at all. If there is no pressure on the
log, the xfssyncd will periodically write back metadata in ascending
disk address offset order so will be very efficient.

Hence we can stop marking VFS inodes dirty during transaction commit
or when changing timestamps during transactions. This will keep the
inodes in the superblock dirty list to those containing data or
unlogged metadata changes.

However, the timstamp changes are slightly more complex than this -
there are a couple of places that do unlogged updates of the
timestamps, and the VFS need to be informed of these. Hence add a
new function xfs_trans_ichgtime() for transactional changes,
and leave xfs_ichgtime() for the non-transactional changes.

Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarAlex Elder <aelder@sgi.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent e176579e
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -1088,8 +1088,8 @@ xfs_ioctl_setattr(
		xfs_diflags_to_linux(ip);
		xfs_diflags_to_linux(ip);
	}
	}


	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
	xfs_ichgtime(ip, XFS_ICHGTIME_CHG);


	XFS_STATS_INC(xs_ig_attrchg);
	XFS_STATS_INC(xs_ig_attrchg);


+0 −35
Original line number Original line Diff line number Diff line
@@ -94,41 +94,6 @@ xfs_mark_inode_dirty(
		mark_inode_dirty(inode);
		mark_inode_dirty(inode);
}
}


/*
 * Change the requested timestamp in the given inode.
 * We don't lock across timestamp updates, and we don't log them but
 * we do record the fact that there is dirty information in core.
 */
void
xfs_ichgtime(
	xfs_inode_t	*ip,
	int		flags)
{
	struct inode	*inode = VFS_I(ip);
	timespec_t	tv;
	int		sync_it = 0;

	tv = current_fs_time(inode->i_sb);

	if ((flags & XFS_ICHGTIME_MOD) &&
	    !timespec_equal(&inode->i_mtime, &tv)) {
		inode->i_mtime = tv;
		sync_it = 1;
	}
	if ((flags & XFS_ICHGTIME_CHG) &&
	    !timespec_equal(&inode->i_ctime, &tv)) {
		inode->i_ctime = tv;
		sync_it = 1;
	}

	/*
	 * Update complete - now make sure everyone knows that the inode
	 * is dirty.
	 */
	if (sync_it)
		xfs_mark_inode_dirty_sync(ip);
}

/*
/*
 * Hook in SELinux.  This is not quite correct yet, what we really need
 * Hook in SELinux.  This is not quite correct yet, what we really need
 * here (as we do for default ACLs) is a mechanism by which creation of
 * here (as we do for default ACLs) is a mechanism by which creation of
+1 −6
Original line number Original line Diff line number Diff line
@@ -972,12 +972,7 @@ xfs_fs_inode_init_once(


/*
/*
 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
 * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
 * we catch unlogged VFS level updates to the inode. Care must be taken
 * we catch unlogged VFS level updates to the inode.
 * here - the transaction code calls mark_inode_dirty_sync() to mark the
 * VFS inode dirty in a transaction and clears the i_update_core field;
 * it must clear the field after calling mark_inode_dirty_sync() to
 * correctly indicate that the dirty state has been propagated into the
 * inode log item.
 *
 *
 * We need the barrier() to maintain correct ordering between unlogged
 * We need the barrier() to maintain correct ordering between unlogged
 * updates and the transaction commit code that clears the i_update_core
 * updates and the transaction commit code that clears the i_update_core
+1 −1
Original line number Original line Diff line number Diff line
@@ -276,7 +276,7 @@ xfs_qm_scall_trunc_qfile(
		goto out_unlock;
		goto out_unlock;
	}
	}


	xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);


out_unlock:
out_unlock:
+11 −20
Original line number Original line Diff line number Diff line
@@ -355,16 +355,15 @@ xfs_attr_set_int(
			if (mp->m_flags & XFS_MOUNT_WSYNC) {
			if (mp->m_flags & XFS_MOUNT_WSYNC) {
				xfs_trans_set_sync(args.trans);
				xfs_trans_set_sync(args.trans);
			}
			}

			if (!error && (flags & ATTR_KERNOTIME) == 0) {
				xfs_trans_ichgtime(args.trans, dp,
							XFS_ICHGTIME_CHG);
			}
			err2 = xfs_trans_commit(args.trans,
			err2 = xfs_trans_commit(args.trans,
						 XFS_TRANS_RELEASE_LOG_RES);
						 XFS_TRANS_RELEASE_LOG_RES);
			xfs_iunlock(dp, XFS_ILOCK_EXCL);
			xfs_iunlock(dp, XFS_ILOCK_EXCL);


			/*
			 * Hit the inode change time.
			 */
			if (!error && (flags & ATTR_KERNOTIME) == 0) {
				xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
			}
			return(error == 0 ? err2 : error);
			return(error == 0 ? err2 : error);
		}
		}


@@ -420,6 +419,9 @@ xfs_attr_set_int(
		xfs_trans_set_sync(args.trans);
		xfs_trans_set_sync(args.trans);
	}
	}


	if ((flags & ATTR_KERNOTIME) == 0)
		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);

	/*
	/*
	 * Commit the last in the sequence of transactions.
	 * Commit the last in the sequence of transactions.
	 */
	 */
@@ -427,13 +429,6 @@ xfs_attr_set_int(
	error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
	error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
	xfs_iunlock(dp, XFS_ILOCK_EXCL);
	xfs_iunlock(dp, XFS_ILOCK_EXCL);


	/*
	 * Hit the inode change time.
	 */
	if (!error && (flags & ATTR_KERNOTIME) == 0) {
		xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
	}

	return(error);
	return(error);


out:
out:
@@ -567,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
		xfs_trans_set_sync(args.trans);
		xfs_trans_set_sync(args.trans);
	}
	}


	if ((flags & ATTR_KERNOTIME) == 0)
		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);

	/*
	/*
	 * Commit the last in the sequence of transactions.
	 * Commit the last in the sequence of transactions.
	 */
	 */
@@ -574,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
	error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
	error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
	xfs_iunlock(dp, XFS_ILOCK_EXCL);
	xfs_iunlock(dp, XFS_ILOCK_EXCL);


	/*
	 * Hit the inode change time.
	 */
	if (!error && (flags & ATTR_KERNOTIME) == 0) {
		xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
	}

	return(error);
	return(error);


out:
out:
Loading