Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2813d682 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers
Browse files

xfs: remove the i_new_size field in struct xfs_inode



Now that we use the VFS i_size field throughout XFS there is no need for the
i_new_size field any more given that the VFS i_size field gets updated
in ->write_end before unlocking the page, and thus is always uptodate when
writeback could see a page.  Removing i_new_size also has the advantage that
we will never have to trim back di_size during a failed buffered write,
given that it never gets updated past i_size.

Note that currently the generic direct I/O code only updates i_size after
calling our end_io handler, which requires a small workaround to make
sure di_size actually makes it to disk.  I hope to fix this properly in
the generic code.

A downside is that we lose the support for parallel non-overlapping O_DIRECT
appending writes that recently was added.  I don't think keeping the complex
and fragile i_new_size infrastructure for this is a good tradeoff - if we
really care about parallel appending writers we should investigate turning
the iolock into a range lock, which would also allow for parallel
non-overlapping buffered writers.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent ce7ae151
Loading
Loading
Loading
Loading
+16 −13
Original line number Diff line number Diff line
@@ -111,8 +111,7 @@ xfs_ioend_new_eof(
	xfs_fsize_t		bsize;

	bsize = ioend->io_offset + ioend->io_size;
	isize = MAX(i_size_read(VFS_I(ip)), ip->i_new_size);
	isize = MIN(isize, bsize);
	isize = MIN(i_size_read(VFS_I(ip)), bsize);
	return isize > ip->i_d.di_size ? isize : 0;
}

@@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
}

/*
 * Update on-disk file size now that data has been written to disk.  The
 * current in-memory file size is i_size.  If a write is beyond eof i_new_size
 * will be the intended file size until i_size is updated.  If this write does
 * not extend all the way to the valid file size then restrict this update to
 * the end of the write.
 * Update on-disk file size now that data has been written to disk.
 *
 * This function does not block as blocking on the inode lock in IO completion
 * can lead to IO completion order dependency deadlocks.. If it can't get the
@@ -1278,6 +1273,15 @@ xfs_end_io_direct_write(
{
	struct xfs_ioend	*ioend = iocb->private;

	/*
	 * While the generic direct I/O code updates the inode size, it does
	 * so only after the end_io handler is called, which means our
	 * end_io handler thinks the on-disk size is outside the in-core
	 * size.  To prevent this just update it a little bit earlier here.
	 */
	if (offset + size > i_size_read(ioend->io_inode))
		i_size_write(ioend->io_inode, offset + size);

	/*
	 * blockdev_direct_IO can return an error even after the I/O
	 * completion handler was called.  Thus we need to protect
@@ -1340,12 +1344,11 @@ xfs_vm_write_failed(

	if (to > inode->i_size) {
		/*
		 * punch out the delalloc blocks we have already allocated. We
		 * don't call xfs_setattr() to do this as we may be in the
		 * middle of a multi-iovec write and so the vfs inode->i_size
		 * will not match the xfs ip->i_size and so it will zero too
		 * much. Hence we jus truncate the page cache to zero what is
		 * necessary and punch the delalloc blocks directly.
		 * Punch out the delalloc blocks we have already allocated.
		 *
		 * Don't bother with xfs_setattr given that nothing can have
		 * made it to disk yet as the page is still locked at this
		 * point.
		 */
		struct xfs_inode	*ip = XFS_I(inode);
		xfs_fileoff_t		start_fsb;
+10 −62
Original line number Diff line number Diff line
@@ -412,27 +412,6 @@ xfs_file_splice_read(
	return ret;
}

/*
 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
 * part of the I/O may have been written to disk before the error occurred.  In
 * this case the on-disk file size may have been adjusted beyond the in-memory
 * file size and now needs to be truncated back.
 */
STATIC void
xfs_aio_write_newsize_update(
	struct xfs_inode	*ip,
	xfs_fsize_t		new_size)
{
	if (new_size == ip->i_new_size) {
		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
		if (new_size == ip->i_new_size)
			ip->i_new_size = 0;
		if (ip->i_d.di_size > i_size_read(VFS_I(ip)))
			ip->i_d.di_size = i_size_read(VFS_I(ip));
		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
	}
}

/*
 * xfs_file_splice_write() does not use xfs_rw_ilock() because
 * generic_file_splice_write() takes the i_mutex itself. This, in theory,
@@ -451,7 +430,6 @@ xfs_file_splice_write(
{
	struct inode		*inode = outfilp->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	xfs_fsize_t		new_size;
	int			ioflags = 0;
	ssize_t			ret;

@@ -465,20 +443,12 @@ xfs_file_splice_write(

	xfs_ilock(ip, XFS_IOLOCK_EXCL);

	new_size = *ppos + count;

	xfs_ilock(ip, XFS_ILOCK_EXCL);
	if (new_size > i_size_read(inode))
		ip->i_new_size = new_size;
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);

	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
	if (ret > 0)
		XFS_STATS_ADD(xs_write_bytes, ret);

	xfs_aio_write_newsize_update(ip, new_size);
	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
	return ret;
}
@@ -673,16 +643,13 @@ xfs_file_aio_write_checks(
	struct file		*file,
	loff_t			*pos,
	size_t			*count,
	xfs_fsize_t		*new_sizep,
	int			*iolock)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	xfs_fsize_t		new_size;
	int			error = 0;

	xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
	*new_sizep = 0;
restart:
	error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
	if (error) {
@@ -697,15 +664,13 @@ xfs_file_aio_write_checks(
	/*
	 * If the offset is beyond the size of the file, we need to zero any
	 * blocks that fall between the existing EOF and the start of this
	 * write. There is no need to issue zeroing if another in-flght IO ends
	 * at or before this one If zeronig is needed and we are currently
	 * holding the iolock shared, we need to update it to exclusive which
	 * involves dropping all locks and relocking to maintain correct locking
	 * order. If we do this, restart the function to ensure all checks and
	 * values are still valid.
	 * write.  If zeroing is needed and we are currently holding the
	 * iolock shared, we need to update it to exclusive which involves
	 * dropping all locks and relocking to maintain correct locking order.
	 * If we do this, restart the function to ensure all checks and values
	 * are still valid.
	 */
	if ((ip->i_new_size && *pos > ip->i_new_size) ||
	    (!ip->i_new_size && *pos > i_size_read(inode))) {
	if (*pos > i_size_read(inode)) {
		if (*iolock == XFS_IOLOCK_SHARED) {
			xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
			*iolock = XFS_IOLOCK_EXCL;
@@ -714,19 +679,6 @@ xfs_file_aio_write_checks(
		}
		error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
	}

	/*
	 * If this IO extends beyond EOF, we may need to update ip->i_new_size.
	 * We have already zeroed space beyond EOF (if necessary).  Only update
	 * ip->i_new_size if this IO ends beyond any other in-flight writes.
	 */
	new_size = *pos + *count;
	if (new_size > i_size_read(inode)) {
		if (new_size > ip->i_new_size)
			ip->i_new_size = new_size;
		*new_sizep = new_size;
	}

	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
	if (error)
		return error;
@@ -772,7 +724,6 @@ xfs_file_dio_aio_write(
	unsigned long		nr_segs,
	loff_t			pos,
	size_t			ocount,
	xfs_fsize_t		*new_size,
	int			*iolock)
{
	struct file		*file = iocb->ki_filp;
@@ -817,7 +768,7 @@ xfs_file_dio_aio_write(
		xfs_rw_ilock(ip, *iolock);
	}

	ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
	if (ret)
		return ret;

@@ -855,7 +806,6 @@ xfs_file_buffered_aio_write(
	unsigned long		nr_segs,
	loff_t			pos,
	size_t			ocount,
	xfs_fsize_t		*new_size,
	int			*iolock)
{
	struct file		*file = iocb->ki_filp;
@@ -869,7 +819,7 @@ xfs_file_buffered_aio_write(
	*iolock = XFS_IOLOCK_EXCL;
	xfs_rw_ilock(ip, *iolock);

	ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
	if (ret)
		return ret;

@@ -909,7 +859,6 @@ xfs_file_aio_write(
	ssize_t			ret;
	int			iolock;
	size_t			ocount = 0;
	xfs_fsize_t		new_size = 0;

	XFS_STATS_INC(xs_write_calls);

@@ -929,10 +878,10 @@ xfs_file_aio_write(

	if (unlikely(file->f_flags & O_DIRECT))
		ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
						ocount, &new_size, &iolock);
						ocount, &iolock);
	else
		ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
						ocount, &new_size, &iolock);
						ocount, &iolock);

	if (ret <= 0)
		goto out_unlock;
@@ -953,7 +902,6 @@ xfs_file_aio_write(
	}

out_unlock:
	xfs_aio_write_newsize_update(ip, new_size);
	xfs_rw_iunlock(ip, iolock);
	return ret;
}
+0 −1
Original line number Diff line number Diff line
@@ -94,7 +94,6 @@ xfs_inode_alloc(
	ip->i_update_core = 0;
	ip->i_delayed_blks = 0;
	memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
	ip->i_new_size = 0;

	return ip;
}
+0 −2
Original line number Diff line number Diff line
@@ -246,8 +246,6 @@ typedef struct xfs_inode {

	xfs_icdinode_t		i_d;		/* most of ondisk inode */

	xfs_fsize_t		i_new_size;	/* size when write completes */

	/* VFS inode */
	struct inode		i_vnode;	/* embedded VFS inode */
} xfs_inode_t;
+4 −14
Original line number Diff line number Diff line
@@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class,
		__field(dev_t, dev)
		__field(xfs_ino_t, ino)
		__field(xfs_fsize_t, size)
		__field(xfs_fsize_t, new_size)
		__field(loff_t, offset)
		__field(size_t, count)
		__field(int, flags)
@@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class,
		__entry->dev = VFS_I(ip)->i_sb->s_dev;
		__entry->ino = ip->i_ino;
		__entry->size = ip->i_d.di_size;
		__entry->new_size = ip->i_new_size;
		__entry->offset = offset;
		__entry->count = count;
		__entry->flags = flags;
	),
	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
	TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
		  "offset 0x%llx count 0x%zx ioflags %s",
		  MAJOR(__entry->dev), MINOR(__entry->dev),
		  __entry->ino,
		  __entry->size,
		  __entry->new_size,
		  __entry->offset,
		  __entry->count,
		  __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
@@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
		__field(dev_t, dev)
		__field(xfs_ino_t, ino)
		__field(loff_t, size)
		__field(loff_t, new_size)
		__field(loff_t, offset)
		__field(size_t, count)
		__field(int, type)
@@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
		__entry->dev = VFS_I(ip)->i_sb->s_dev;
		__entry->ino = ip->i_ino;
		__entry->size = ip->i_d.di_size;
		__entry->new_size = ip->i_new_size;
		__entry->offset = offset;
		__entry->count = count;
		__entry->type = type;
@@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
		__entry->startblock = irec ? irec->br_startblock : 0;
		__entry->blockcount = irec ? irec->br_blockcount : 0;
	),
	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
		  "offset 0x%llx count %zd type %s "
		  "startoff 0x%llx startblock %lld blockcount 0x%llx",
	TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
		  "type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
		  MAJOR(__entry->dev), MINOR(__entry->dev),
		  __entry->ino,
		  __entry->size,
		  __entry->new_size,
		  __entry->offset,
		  __entry->count,
		  __print_symbolic(__entry->type, XFS_IO_TYPES),
@@ -1031,7 +1024,6 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
		__field(xfs_ino_t, ino)
		__field(loff_t, isize)
		__field(loff_t, disize)
		__field(loff_t, new_size)
		__field(loff_t, offset)
		__field(size_t, count)
	),
@@ -1040,17 +1032,15 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
		__entry->ino = ip->i_ino;
		__entry->isize = VFS_I(ip)->i_size;
		__entry->disize = ip->i_d.di_size;
		__entry->new_size = ip->i_new_size;
		__entry->offset = offset;
		__entry->count = count;
	),
	TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
	TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
		  "offset 0x%llx count %zd",
		  MAJOR(__entry->dev), MINOR(__entry->dev),
		  __entry->ino,
		  __entry->isize,
		  __entry->disize,
		  __entry->new_size,
		  __entry->offset,
		  __entry->count)
);