Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 49d99a2f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

Pull XFS updates from Ben Myers:
 "Scalability improvements for dquots, log grant code cleanups, plus
  bugfixes and cleanups large and small"

Fix up various trivial conflicts that were due to some of the earlier
patches already having been integrated into v3.3 as bugfixes, and then
there were development patches on top of those.  Easily merged by just
taking the newer version from the pulled branch.

* 'for-linus' of git://oss.sgi.com/xfs/xfs: (45 commits)
  xfs: fallback to vmalloc for large buffers in xfs_getbmap
  xfs: fallback to vmalloc for large buffers in xfs_attrmulti_attr_get
  xfs: remove remaining scraps of struct xfs_iomap
  xfs: fix inode lookup race
  xfs: clean up minor sparse warnings
  xfs: remove the global xfs_Gqm structure
  xfs: remove the per-filesystem list of dquots
  xfs: use per-filesystem radix trees for dquot lookup
  xfs: per-filesystem dquot LRU lists
  xfs: use common code for quota statistics
  xfs: reimplement fdatasync support
  xfs: split in-core and on-disk inode log item fields
  xfs: make xfs_inode_item_size idempotent
  xfs: log timestamp updates
  xfs: log file size updates at I/O completion time
  xfs: log file size updates as part of unwritten extent conversion
  xfs: do not require an ioend for new EOF calculation
  xfs: use per-filesystem I/O completion workqueues
  quota: make Q_XQUOTASYNC a noop
  xfs: include reservations in quota reporting
  ...
parents 1c3ddfe5 f074211f
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -282,10 +282,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
	case Q_XGETQUOTA:
		return quota_getxquota(sb, type, id, addr);
	case Q_XQUOTASYNC:
		/* caller already holds s_umount */
		if (sb->s_flags & MS_RDONLY)
			return -EROFS;
		writeback_inodes_sb(sb, WB_REASON_SYNC);
		/* XFS quotas are fully coherent now, making this call a noop */
		return 0;
	default:
		return -EINVAL;
+0 −3
Original line number Diff line number Diff line
@@ -96,9 +96,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
				   xfs_qm_bhv.o \
				   xfs_qm.o \
				   xfs_quotaops.o
ifeq ($(CONFIG_XFS_QUOTA),y)
xfs-$(CONFIG_PROC_FS)		+= xfs_qm_stats.o
endif
xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
xfs-$(CONFIG_PROC_FS)		+= xfs_stats.o
+117 −66
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
@@ -99,57 +100,73 @@ xfs_destroy_ioend(
}

/*
 * If the end of the current ioend is beyond the current EOF,
 * return the new EOF value, otherwise zero.
 * Fast and loose check if this write could update the on-disk inode size.
 */
STATIC xfs_fsize_t
xfs_ioend_new_eof(
	xfs_ioend_t		*ioend)
static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
{
	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
	xfs_fsize_t		isize;
	xfs_fsize_t		bsize;
	return ioend->io_offset + ioend->io_size >
		XFS_I(ioend->io_inode)->i_d.di_size;
}

	bsize = ioend->io_offset + ioend->io_size;
	isize = MIN(i_size_read(VFS_I(ip)), bsize);
	return isize > ip->i_d.di_size ? isize : 0;
STATIC int
xfs_setfilesize_trans_alloc(
	struct xfs_ioend	*ioend)
{
	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
	struct xfs_trans	*tp;
	int			error;

	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);

	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
	if (error) {
		xfs_trans_cancel(tp, 0);
		return error;
	}

	ioend->io_append_trans = tp;

	/*
 * Fast and loose check if this write could update the on-disk inode size.
	 * We hand off the transaction to the completion thread now, so
	 * clear the flag here.
	 */
static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
{
	return ioend->io_offset + ioend->io_size >
		XFS_I(ioend->io_inode)->i_d.di_size;
	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
	return 0;
}

/*
 * Update on-disk file size now that data has been written to disk.
 *
 * This function does not block as blocking on the inode lock in IO completion
 * can lead to IO completion order dependency deadlocks.. If it can't get the
 * inode ilock it will return EAGAIN. Callers must handle this.
 */
STATIC int
xfs_setfilesize(
	xfs_ioend_t		*ioend)
	struct xfs_ioend	*ioend)
{
	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
	struct xfs_trans	*tp = ioend->io_append_trans;
	xfs_fsize_t		isize;

	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
		return EAGAIN;
	/*
	 * The transaction was allocated in the I/O submission thread,
	 * thus we need to mark ourselves as beeing in a transaction
	 * manually.
	 */
	current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);

	xfs_ilock(ip, XFS_ILOCK_EXCL);
	isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
	if (!isize) {
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		xfs_trans_cancel(tp, 0);
		return 0;
	}

	isize = xfs_ioend_new_eof(ioend);
	if (isize) {
	trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);

	ip->i_d.di_size = isize;
		xfs_mark_inode_dirty(ip);
	}
	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return 0;
	return xfs_trans_commit(tp, 0);
}

/*
@@ -163,10 +180,12 @@ xfs_finish_ioend(
	struct xfs_ioend	*ioend)
{
	if (atomic_dec_and_test(&ioend->io_remaining)) {
		struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;

		if (ioend->io_type == IO_UNWRITTEN)
			queue_work(xfsconvertd_workqueue, &ioend->io_work);
		else if (xfs_ioend_is_append(ioend))
			queue_work(xfsdatad_workqueue, &ioend->io_work);
			queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
		else if (ioend->io_append_trans)
			queue_work(mp->m_data_workqueue, &ioend->io_work);
		else
			xfs_destroy_ioend(ioend);
	}
@@ -195,36 +214,37 @@ xfs_end_io(
	 * range to normal written extens after the data I/O has finished.
	 */
	if (ioend->io_type == IO_UNWRITTEN) {
		/*
		 * For buffered I/O we never preallocate a transaction when
		 * doing the unwritten extent conversion, but for direct I/O
		 * we do not know if we are converting an unwritten extent
		 * or not at the point where we preallocate the transaction.
		 */
		if (ioend->io_append_trans) {
			ASSERT(ioend->io_isdirect);

			current_set_flags_nested(
				&ioend->io_append_trans->t_pflags, PF_FSTRANS);
			xfs_trans_cancel(ioend->io_append_trans, 0);
		}

		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
						 ioend->io_size);
		if (error) {
			ioend->io_error = -error;
			goto done;
		}
	}

	/*
	 * We might have to update the on-disk file size after extending
	 * writes.
	 */
	} else if (ioend->io_append_trans) {
		error = xfs_setfilesize(ioend);
	ASSERT(!error || error == EAGAIN);
		if (error)
			ioend->io_error = -error;
	} else {
		ASSERT(!xfs_ioend_is_append(ioend));
	}

done:
	/*
	 * If we didn't complete processing of the ioend, requeue it to the
	 * tail of the workqueue for another attempt later. Otherwise destroy
	 * it.
	 */
	if (error == EAGAIN) {
		atomic_inc(&ioend->io_remaining);
		xfs_finish_ioend(ioend);
		/* ensure we don't spin on blocked ioends */
		delay(1);
	} else {
	xfs_destroy_ioend(ioend);
}
}

/*
 * Call IO completion handling in caller context on the final put of an ioend.
@@ -259,6 +279,7 @@ xfs_alloc_ioend(
	 */
	atomic_set(&ioend->io_remaining, 1);
	ioend->io_isasync = 0;
	ioend->io_isdirect = 0;
	ioend->io_error = 0;
	ioend->io_list = NULL;
	ioend->io_type = type;
@@ -269,6 +290,7 @@ xfs_alloc_ioend(
	ioend->io_size = 0;
	ioend->io_iocb = NULL;
	ioend->io_result = 0;
	ioend->io_append_trans = NULL;

	INIT_WORK(&ioend->io_work, xfs_end_io);
	return ioend;
@@ -379,14 +401,6 @@ xfs_submit_ioend_bio(
	atomic_inc(&ioend->io_remaining);
	bio->bi_private = ioend;
	bio->bi_end_io = xfs_end_bio;

	/*
	 * If the I/O is beyond EOF we mark the inode dirty immediately
	 * but don't update the inode size until I/O completion.
	 */
	if (xfs_ioend_new_eof(ioend))
		xfs_mark_inode_dirty(XFS_I(ioend->io_inode));

	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
}

@@ -1033,8 +1047,20 @@ xfs_vm_writepage(
				  wbc, end_index);
	}

	if (iohead)
	if (iohead) {
		/*
		 * Reserve log space if we might write beyond the on-disk
		 * inode size.
		 */
		if (ioend->io_type != IO_UNWRITTEN &&
		    xfs_ioend_is_append(ioend)) {
			err = xfs_setfilesize_trans_alloc(ioend);
			if (err)
				goto error;
		}

		xfs_submit_ioend(wbc, iohead);
	}

	return 0;

@@ -1314,17 +1340,32 @@ xfs_vm_direct_IO(
{
	struct inode		*inode = iocb->ki_filp->f_mapping->host;
	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
	struct xfs_ioend	*ioend = NULL;
	ssize_t			ret;

	if (rw & WRITE) {
		iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
		size_t size = iov_length(iov, nr_segs);

		/*
		 * We need to preallocate a transaction for a size update
		 * here.  In the case that this write both updates the size
		 * and converts at least on unwritten extent we will cancel
		 * the still clean transaction after the I/O has finished.
		 */
		iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
		if (offset + size > XFS_I(inode)->i_d.di_size) {
			ret = xfs_setfilesize_trans_alloc(ioend);
			if (ret)
				goto out_destroy_ioend;
			ioend->io_isdirect = 1;
		}

		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
					    offset, nr_segs,
					    xfs_get_blocks_direct,
					    xfs_end_io_direct_write, NULL, 0);
		if (ret != -EIOCBQUEUED && iocb->private)
			xfs_destroy_ioend(iocb->private);
			goto out_trans_cancel;
	} else {
		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
					    offset, nr_segs,
@@ -1333,6 +1374,16 @@ xfs_vm_direct_IO(
	}

	return ret;

out_trans_cancel:
	if (ioend->io_append_trans) {
		current_set_flags_nested(&ioend->io_append_trans->t_pflags,
					 PF_FSTRANS);
		xfs_trans_cancel(ioend->io_append_trans, 0);
	}
out_destroy_ioend:
	xfs_destroy_ioend(ioend);
	return ret;
}

STATIC void
+2 −2
Original line number Diff line number Diff line
@@ -18,8 +18,6 @@
#ifndef __XFS_AOPS_H__
#define __XFS_AOPS_H__

extern struct workqueue_struct *xfsdatad_workqueue;
extern struct workqueue_struct *xfsconvertd_workqueue;
extern mempool_t *xfs_ioend_pool;

/*
@@ -48,12 +46,14 @@ typedef struct xfs_ioend {
	int			io_error;	/* I/O error code */
	atomic_t		io_remaining;	/* hold count */
	unsigned int		io_isasync : 1;	/* needs aio_complete */
	unsigned int		io_isdirect : 1;/* direct I/O */
	struct inode		*io_inode;	/* file being written to */
	struct buffer_head	*io_buffer_head;/* buffer linked list head */
	struct buffer_head	*io_buffer_tail;/* buffer linked list tail */
	size_t			io_size;	/* size of the extent */
	xfs_off_t		io_offset;	/* offset in the file */
	struct work_struct	io_work;	/* xfsdatad work queue */
	struct xfs_trans	*io_append_trans;/* xact. for size update */
	struct kiocb		*io_iocb;
	int			io_result;
} xfs_ioend_t;
+10 −3
Original line number Diff line number Diff line
@@ -5536,8 +5536,12 @@ xfs_getbmap(
	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
		return XFS_ERROR(ENOMEM);
	out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
	if (!out) {
		out = kmem_zalloc_large(bmv->bmv_count *
					sizeof(struct getbmapx));
		if (!out)
			return XFS_ERROR(ENOMEM);
	}

	xfs_ilock(ip, XFS_IOLOCK_SHARED);
	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -5661,6 +5665,9 @@ xfs_getbmap(
			break;
	}

	if (is_vmalloc_addr(out))
		kmem_free_large(out);
	else
		kmem_free(out);
	return error;
}
Loading