Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 487f84f3 authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner
Browse files

xfs: introduce xfs_rw_lock() helpers for locking the inode



We need to obtain the i_mutex, i_iolock and i_ilock during the read
and write paths. Add a set of wrapper functions to neatly
encapsulate the lock ordering and shared/exclusive semantics to make
the locking easier to follow and get right.

Note that this changes some of the exclusive locking serialisation in
that serialisation will occur against the i_mutex instead of the
XFS_IOLOCK_EXCL. This does not change any behaviour, and it is
arguably more efficient to use the mutex for such serialisation than
the rw_sem.

Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 4c5cfd1b
Loading
Loading
Loading
Loading
+80 −51
Original line number Original line Diff line number Diff line
@@ -40,6 +40,40 @@


static const struct vm_operations_struct xfs_file_vm_ops;
static const struct vm_operations_struct xfs_file_vm_ops;


/*
 * Locking primitives for read and write IO paths to ensure we consistently use
 * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
 */
static inline void
xfs_rw_ilock(
	struct xfs_inode	*ip,
	int			type)
{
	if (type & XFS_IOLOCK_EXCL)
		mutex_lock(&VFS_I(ip)->i_mutex);
	xfs_ilock(ip, type);
}

static inline void
xfs_rw_iunlock(
	struct xfs_inode	*ip,
	int			type)
{
	xfs_iunlock(ip, type);
	if (type & XFS_IOLOCK_EXCL)
		mutex_unlock(&VFS_I(ip)->i_mutex);
}

static inline void
xfs_rw_ilock_demote(
	struct xfs_inode	*ip,
	int			type)
{
	xfs_ilock_demote(ip, type);
	if (type & XFS_IOLOCK_EXCL)
		mutex_unlock(&VFS_I(ip)->i_mutex);
}

/*
/*
 *	xfs_iozero
 *	xfs_iozero
 *
 *
@@ -262,22 +296,21 @@ xfs_file_aio_read(
	if (XFS_FORCED_SHUTDOWN(mp))
	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;
		return -EIO;


	if (unlikely(ioflags & IO_ISDIRECT))
		mutex_lock(&inode->i_mutex);
	xfs_ilock(ip, XFS_IOLOCK_SHARED);

	if (unlikely(ioflags & IO_ISDIRECT)) {
	if (unlikely(ioflags & IO_ISDIRECT)) {
		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);

		if (inode->i_mapping->nrpages) {
		if (inode->i_mapping->nrpages) {
			ret = -xfs_flushinval_pages(ip,
			ret = -xfs_flushinval_pages(ip,
					(iocb->ki_pos & PAGE_CACHE_MASK),
					(iocb->ki_pos & PAGE_CACHE_MASK),
					-1, FI_REMAPF_LOCKED);
					-1, FI_REMAPF_LOCKED);
		}
		mutex_unlock(&inode->i_mutex);
			if (ret) {
			if (ret) {
			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
				return ret;
				return ret;
			}
			}
		}
		}
		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
	} else
		xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);


	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);


@@ -285,7 +318,7 @@ xfs_file_aio_read(
	if (ret > 0)
	if (ret > 0)
		XFS_STATS_ADD(xs_read_bytes, ret);
		XFS_STATS_ADD(xs_read_bytes, ret);


	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
	return ret;
	return ret;
}
}


@@ -309,7 +342,7 @@ xfs_file_splice_read(
	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -EIO;
		return -EIO;


	xfs_ilock(ip, XFS_IOLOCK_SHARED);
	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);


	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);


@@ -317,7 +350,7 @@ xfs_file_splice_read(
	if (ret > 0)
	if (ret > 0)
		XFS_STATS_ADD(xs_read_bytes, ret);
		XFS_STATS_ADD(xs_read_bytes, ret);


	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
	return ret;
	return ret;
}
}


@@ -338,10 +371,10 @@ xfs_aio_write_isize_update(
		*ppos = isize;
		*ppos = isize;


	if (*ppos > ip->i_size) {
	if (*ppos > ip->i_size) {
		xfs_ilock(ip, XFS_ILOCK_EXCL);
		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
		if (*ppos > ip->i_size)
		if (*ppos > ip->i_size)
			ip->i_size = *ppos;
			ip->i_size = *ppos;
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
	}
	}
}
}


@@ -356,14 +389,22 @@ xfs_aio_write_newsize_update(
	struct xfs_inode	*ip)
	struct xfs_inode	*ip)
{
{
	if (ip->i_new_size) {
	if (ip->i_new_size) {
		xfs_ilock(ip, XFS_ILOCK_EXCL);
		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
		ip->i_new_size = 0;
		ip->i_new_size = 0;
		if (ip->i_d.di_size > ip->i_size)
		if (ip->i_d.di_size > ip->i_size)
			ip->i_d.di_size = ip->i_size;
			ip->i_d.di_size = ip->i_size;
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
	}
	}
}
}


/*
 * xfs_file_splice_write() does not use xfs_rw_ilock() because
 * generic_file_splice_write() takes the i_mutex itself. This, in theory,
 * couuld cause lock inversions between the aio_write path and the splice path
 * if someone is doing concurrent splice(2) based writes and write(2) based
 * writes to the same inode. The only real way to fix this is to re-implement
 * the generic code here with correct locking orders.
 */
STATIC ssize_t
STATIC ssize_t
xfs_file_splice_write(
xfs_file_splice_write(
	struct pipe_inode_info	*pipe,
	struct pipe_inode_info	*pipe,
@@ -604,7 +645,6 @@ xfs_file_aio_write(
	xfs_fsize_t		new_size;
	xfs_fsize_t		new_size;
	int			iolock;
	int			iolock;
	size_t			ocount = 0, count;
	size_t			ocount = 0, count;
	int			need_i_mutex;


	XFS_STATS_INC(xs_write_calls);
	XFS_STATS_INC(xs_write_calls);


@@ -631,21 +671,17 @@ xfs_file_aio_write(
relock:
relock:
	if (ioflags & IO_ISDIRECT) {
	if (ioflags & IO_ISDIRECT) {
		iolock = XFS_IOLOCK_SHARED;
		iolock = XFS_IOLOCK_SHARED;
		need_i_mutex = 0;
	} else {
	} else {
		iolock = XFS_IOLOCK_EXCL;
		iolock = XFS_IOLOCK_EXCL;
		need_i_mutex = 1;
		mutex_lock(&inode->i_mutex);
	}
	}


	xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);

start:
start:
	xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock);
	ret = generic_write_checks(file, &pos, &count,
	ret = generic_write_checks(file, &pos, &count,
					S_ISBLK(inode->i_mode));
					S_ISBLK(inode->i_mode));
	if (ret) {
	if (ret) {
		xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
		goto out_unlock_mutex;
		return ret;
	}
	}


	if (ioflags & IO_ISDIRECT) {
	if (ioflags & IO_ISDIRECT) {
@@ -654,16 +690,20 @@ xfs_file_aio_write(
				mp->m_rtdev_targp : mp->m_ddev_targp;
				mp->m_rtdev_targp : mp->m_ddev_targp;


		if ((pos & target->bt_smask) || (count & target->bt_smask)) {
		if ((pos & target->bt_smask) || (count & target->bt_smask)) {
			xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
			xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
			return XFS_ERROR(-EINVAL);
			return XFS_ERROR(-EINVAL);
		}
		}


		if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
		/*
			xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
		 * For direct I/O, if there are cached pages or we're extending
		 * the file, we need IOLOCK_EXCL until we're sure the bytes at
		 * the new EOF have been zeroed and/or the cached pages are
		 * flushed out.  Upgrade the I/O lock and start again.
		 */
		if (iolock != XFS_IOLOCK_EXCL &&
		    (mapping->nrpages || pos > ip->i_size)) {
			xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
			iolock = XFS_IOLOCK_EXCL;
			iolock = XFS_IOLOCK_EXCL;
			need_i_mutex = 1;
			mutex_lock(&inode->i_mutex);
			xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
			goto start;
			goto start;
		}
		}
	}
	}
@@ -687,11 +727,11 @@ xfs_file_aio_write(
	if (pos > ip->i_size) {
	if (pos > ip->i_size) {
		ret = -xfs_zero_eof(ip, pos, ip->i_size);
		ret = -xfs_zero_eof(ip, pos, ip->i_size);
		if (ret) {
		if (ret) {
			xfs_iunlock(ip, XFS_ILOCK_EXCL);
			xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
			goto out_unlock_internal;
			goto out_unlock_internal;
		}
		}
	}
	}
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);


	/*
	/*
	 * If we're writing the file then make sure to clear the
	 * If we're writing the file then make sure to clear the
@@ -708,7 +748,7 @@ xfs_file_aio_write(


	if ((ioflags & IO_ISDIRECT)) {
	if ((ioflags & IO_ISDIRECT)) {
		if (mapping->nrpages) {
		if (mapping->nrpages) {
			WARN_ON(need_i_mutex == 0);
			WARN_ON(iolock != XFS_IOLOCK_EXCL);
			ret = -xfs_flushinval_pages(ip,
			ret = -xfs_flushinval_pages(ip,
					(pos & PAGE_CACHE_MASK),
					(pos & PAGE_CACHE_MASK),
					-1, FI_REMAPF_LOCKED);
					-1, FI_REMAPF_LOCKED);
@@ -716,13 +756,10 @@ xfs_file_aio_write(
				goto out_unlock_internal;
				goto out_unlock_internal;
		}
		}


		if (need_i_mutex) {
		if (iolock == XFS_IOLOCK_EXCL) {
			/* demote the lock now the cached pages are gone */
			/* demote the lock now the cached pages are gone */
			xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
			xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
			mutex_unlock(&inode->i_mutex);

			iolock = XFS_IOLOCK_SHARED;
			iolock = XFS_IOLOCK_SHARED;
			need_i_mutex = 0;
		}
		}


		trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
		trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
@@ -740,7 +777,7 @@ xfs_file_aio_write(
			count -= ret;
			count -= ret;


			ioflags &= ~IO_ISDIRECT;
			ioflags &= ~IO_ISDIRECT;
			xfs_iunlock(ip, iolock);
			xfs_rw_iunlock(ip, iolock);
			goto relock;
			goto relock;
		}
		}
	} else {
	} else {
@@ -775,14 +812,9 @@ xfs_file_aio_write(
		loff_t end = pos + ret - 1;
		loff_t end = pos + ret - 1;
		int error, error2;
		int error, error2;


		xfs_iunlock(ip, iolock);
		xfs_rw_iunlock(ip, iolock);
		if (need_i_mutex)
			mutex_unlock(&inode->i_mutex);

		error = filemap_write_and_wait_range(mapping, pos, end);
		error = filemap_write_and_wait_range(mapping, pos, end);
		if (need_i_mutex)
		xfs_rw_ilock(ip, iolock);
			mutex_lock(&inode->i_mutex);
		xfs_ilock(ip, iolock);


		error2 = -xfs_file_fsync(file,
		error2 = -xfs_file_fsync(file,
					 (file->f_flags & __O_SYNC) ? 0 : 1);
					 (file->f_flags & __O_SYNC) ? 0 : 1);
@@ -794,10 +826,7 @@ xfs_file_aio_write(


 out_unlock_internal:
 out_unlock_internal:
	xfs_aio_write_newsize_update(ip);
	xfs_aio_write_newsize_update(ip);
	xfs_iunlock(ip, iolock);
	xfs_rw_iunlock(ip, iolock);
 out_unlock_mutex:
	if (need_i_mutex)
		mutex_unlock(&inode->i_mutex);
	return ret;
	return ret;
}
}