Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b305956a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs: (52 commits)
  fs/xfs: Correct NULL test
  xfs: optimize log flushing in xfs_fsync
  xfs: only clear the suid bit once in xfs_write
  xfs: kill xfs_bawrite
  xfs: log changed inodes instead of writing them synchronously
  xfs: remove invalid barrier optimization from xfs_fsync
  xfs: kill the unused XFS_QMOPT_* flush flags V2
  xfs: Use delay write promotion for dquot flushing
  xfs: Sort delayed write buffers before dispatch
  xfs: Don't issue buffer IO direct from AIL push V2
  xfs: Use delayed write for inodes rather than async V2
  xfs: Make inode reclaim states explicit
  xfs: more reserved blocks fixups
  xfs: turn off sign warnings
  xfs: don't hold onto reserved blocks on remount,ro
  xfs: quota limit statvfs available blocks
  xfs: replace KM_LARGE with explicit vmalloc use
  xfs: cleanup up xfs_log_force calling conventions
  xfs: kill XLOG_VEC_SET_TYPE
  xfs: remove duplicate buffer flags
  ...
parents 41630959 398007f8
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@
# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#

EXTRA_CFLAGS +=	 -I$(src) -I$(src)/linux-2.6 -funsigned-char
EXTRA_CFLAGS +=	 -I$(src) -I$(src)/linux-2.6

XFS_LINUX := linux-2.6

+20 −36
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/swap.h>
#include <linux/blkdev.h>
@@ -24,8 +23,25 @@
#include "time.h"
#include "kmem.h"

#define MAX_VMALLOCS	6
#define MAX_SLAB_SIZE	0x20000
/*
 * Greedy allocation.  May fail and may return vmalloced memory.
 *
 * Must be freed using kmem_free_large.
 */
void *
kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
{
	void		*ptr;
	size_t		kmsize = maxsize;

	while (!(ptr = kmem_zalloc_large(kmsize))) {
		if ((kmsize >>= 1) <= minsize)
			kmsize = minsize;
	}
	if (ptr)
		*size = kmsize;
	return ptr;
}

void *
kmem_alloc(size_t size, unsigned int __nocast flags)
@@ -34,19 +50,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
	gfp_t	lflags = kmem_flags_convert(flags);
	void	*ptr;

#ifdef DEBUG
	if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
		printk(KERN_WARNING "Large %s attempt, size=%ld\n",
			__func__, (long)size);
		dump_stack();
	}
#endif

	do {
		if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
		ptr = kmalloc(size, lflags);
		else
			ptr = __vmalloc(size, lflags, PAGE_KERNEL);
		if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
			return ptr;
		if (!(++retries % 100))
@@ -68,27 +73,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags)
	return ptr;
}

void *
kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
		   unsigned int __nocast flags)
{
	void		*ptr;
	size_t		kmsize = maxsize;
	unsigned int	kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP;

	while (!(ptr = kmem_zalloc(kmsize, kmflags))) {
		if ((kmsize <= minsize) && (flags & KM_NOSLEEP))
			break;
		if ((kmsize >>= 1) <= minsize) {
			kmsize = minsize;
			kmflags = flags;
		}
	}
	if (ptr)
		*size = kmsize;
	return ptr;
}

void
kmem_free(const void *ptr)
{
+18 −3
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>

/*
 * General memory allocation interfaces
@@ -30,7 +31,6 @@
#define KM_NOSLEEP	0x0002u
#define KM_NOFS		0x0004u
#define KM_MAYFAIL	0x0008u
#define KM_LARGE	0x0010u

/*
 * We use a special process flag to avoid recursive callbacks into
@@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags)
{
	gfp_t	lflags;

	BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE));
	BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));

	if (flags & KM_NOSLEEP) {
		lflags = GFP_ATOMIC | __GFP_NOWARN;
@@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags)

extern void *kmem_alloc(size_t, unsigned int __nocast);
extern void *kmem_zalloc(size_t, unsigned int __nocast);
extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
extern void  kmem_free(const void *);

static inline void *kmem_zalloc_large(size_t size)
{
	void *ptr;

	ptr = vmalloc(size);
	if (ptr)
		memset(ptr, 0, size);
	return ptr;
}
static inline void kmem_free_large(void *ptr)
{
	vfree(ptr);
}

extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);

/*
 * Zone interfaces
 */
+6 −5
Original line number Diff line number Diff line
@@ -106,7 +106,7 @@ xfs_get_acl(struct inode *inode, int type)
	struct posix_acl *acl;
	struct xfs_acl *xfs_acl;
	int len = sizeof(struct xfs_acl);
	char *ea_name;
	unsigned char *ea_name;
	int error;

	acl = get_cached_acl(inode, type);
@@ -133,7 +133,8 @@ xfs_get_acl(struct inode *inode, int type)
	if (!xfs_acl)
		return ERR_PTR(-ENOMEM);

	error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT);
	error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
							&len, ATTR_ROOT);
	if (error) {
		/*
		 * If the attribute doesn't exist make sure we have a negative
@@ -162,7 +163,7 @@ STATIC int
xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
{
	struct xfs_inode *ip = XFS_I(inode);
	char *ea_name;
	unsigned char *ea_name;
	int error;

	if (S_ISLNK(inode->i_mode))
@@ -194,7 +195,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
			(sizeof(struct xfs_acl_entry) *
			 (XFS_ACL_MAX_ENTRIES - acl->a_count));

		error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl,
		error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
				len, ATTR_ROOT);

		kfree(xfs_acl);
@@ -262,7 +263,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
}

static int
xfs_acl_exists(struct inode *inode, char *name)
xfs_acl_exists(struct inode *inode, unsigned char *name)
{
	int len = sizeof(struct xfs_acl);

+245 −45
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#include <linux/migrate.h>
#include <linux/backing-dev.h>
#include <linux/freezer.h>
#include <linux/list_sort.h>

#include "xfs_sb.h"
#include "xfs_inum.h"
@@ -1072,22 +1073,30 @@ xfs_buf_ioerror(
}

int
xfs_bawrite(
	void			*mp,
xfs_bwrite(
	struct xfs_mount	*mp,
	struct xfs_buf		*bp)
{
	trace_xfs_buf_bawrite(bp, _RET_IP_);
	int			iowait = (bp->b_flags & XBF_ASYNC) == 0;
	int			error = 0;

	ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
	bp->b_strat = xfs_bdstrat_cb;
	bp->b_mount = mp;
	bp->b_flags |= XBF_WRITE;
	if (!iowait)
		bp->b_flags |= _XBF_RUN_QUEUES;

	xfs_buf_delwri_dequeue(bp);
	xfs_buf_iostrategy(bp);

	bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD);
	bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
	if (iowait) {
		error = xfs_buf_iowait(bp);
		if (error)
			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
		xfs_buf_relse(bp);
	}

	bp->b_mount = mp;
	bp->b_strat = xfs_bdstrat_cb;
	return xfs_bdstrat_cb(bp);
	return error;
}

void
@@ -1106,6 +1115,126 @@ xfs_bdwrite(
	xfs_buf_delwri_queue(bp, 1);
}

/*
 * Called when we want to stop a buffer from getting written or read.
 * We attach the EIO error, muck with its flags, and call biodone
 * so that the proper iodone callbacks get called.
 */
STATIC int
xfs_bioerror(
	xfs_buf_t *bp)
{
#ifdef XFSERRORDEBUG
	ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
#endif

	/*
	 * No need to wait until the buffer is unpinned, we aren't flushing it.
	 */
	XFS_BUF_ERROR(bp, EIO);

	/*
	 * We're calling biodone, so delete XBF_DONE flag.
	 */
	XFS_BUF_UNREAD(bp);
	XFS_BUF_UNDELAYWRITE(bp);
	XFS_BUF_UNDONE(bp);
	XFS_BUF_STALE(bp);

	XFS_BUF_CLR_BDSTRAT_FUNC(bp);
	xfs_biodone(bp);

	return EIO;
}

/*
 * Same as xfs_bioerror, except that we are releasing the buffer
 * here ourselves, and avoiding the biodone call.
 * This is meant for userdata errors; metadata bufs come with
 * iodone functions attached, so that we can track down errors.
 */
STATIC int
xfs_bioerror_relse(
	struct xfs_buf	*bp)
{
	int64_t		fl = XFS_BUF_BFLAGS(bp);
	/*
	 * No need to wait until the buffer is unpinned.
	 * We aren't flushing it.
	 *
	 * chunkhold expects B_DONE to be set, whether
	 * we actually finish the I/O or not. We don't want to
	 * change that interface.
	 */
	XFS_BUF_UNREAD(bp);
	XFS_BUF_UNDELAYWRITE(bp);
	XFS_BUF_DONE(bp);
	XFS_BUF_STALE(bp);
	XFS_BUF_CLR_IODONE_FUNC(bp);
	XFS_BUF_CLR_BDSTRAT_FUNC(bp);
	if (!(fl & XBF_ASYNC)) {
		/*
		 * Mark b_error and B_ERROR _both_.
		 * Lot's of chunkcache code assumes that.
		 * There's no reason to mark error for
		 * ASYNC buffers.
		 */
		XFS_BUF_ERROR(bp, EIO);
		XFS_BUF_FINISH_IOWAIT(bp);
	} else {
		xfs_buf_relse(bp);
	}

	return EIO;
}


/*
 * All xfs metadata buffers except log state machine buffers
 * get this attached as their b_bdstrat callback function.
 * This is so that we can catch a buffer
 * after prematurely unpinning it to forcibly shutdown the filesystem.
 */
int
xfs_bdstrat_cb(
	struct xfs_buf	*bp)
{
	if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
		trace_xfs_bdstrat_shut(bp, _RET_IP_);
		/*
		 * Metadata write that didn't get logged but
		 * written delayed anyway. These aren't associated
		 * with a transaction, and can be ignored.
		 */
		if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
			return xfs_bioerror_relse(bp);
		else
			return xfs_bioerror(bp);
	}

	xfs_buf_iorequest(bp);
	return 0;
}

/*
 * Wrapper around bdstrat so that we can stop data from going to disk in case
 * we are shutting down the filesystem.  Typically user data goes thru this
 * path; one of the exceptions is the superblock.
 */
void
xfsbdstrat(
	struct xfs_mount	*mp,
	struct xfs_buf		*bp)
{
	if (XFS_FORCED_SHUTDOWN(mp)) {
		trace_xfs_bdstrat_shut(bp, _RET_IP_);
		xfs_bioerror_relse(bp);
		return;
	}

	xfs_buf_iorequest(bp);
}

STATIC void
_xfs_buf_ioend(
	xfs_buf_t		*bp,
@@ -1324,7 +1453,7 @@ xfs_buf_iomove(
	xfs_buf_t		*bp,	/* buffer to process		*/
	size_t			boff,	/* starting buffer offset	*/
	size_t			bsize,	/* length to copy		*/
	caddr_t			data,	/* data address			*/
	void			*data,	/* data address			*/
	xfs_buf_rw_t		mode)	/* read/write/zero flag		*/
{
	size_t			bend, cpoff, csize;
@@ -1406,8 +1535,8 @@ xfs_alloc_bufhash(

	btp->bt_hashshift = external ? 3 : 8;	/* 8 or 256 buckets */
	btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
	btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
					sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE);
	btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
					 sizeof(xfs_bufhash_t));
	for (i = 0; i < (1 << btp->bt_hashshift); i++) {
		spin_lock_init(&btp->bt_hash[i].bh_lock);
		INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
@@ -1418,7 +1547,7 @@ STATIC void
xfs_free_bufhash(
	xfs_buftarg_t		*btp)
{
	kmem_free(btp->bt_hash);
	kmem_free_large(btp->bt_hash);
	btp->bt_hash = NULL;
}

@@ -1623,6 +1752,11 @@ xfs_buf_delwri_queue(
		list_del(&bp->b_list);
	}

	if (list_empty(dwq)) {
		/* start xfsbufd as it is about to have something to do */
		wake_up_process(bp->b_target->bt_task);
	}

	bp->b_flags |= _XBF_DELWRI_Q;
	list_add_tail(&bp->b_list, dwq);
	bp->b_queuetime = jiffies;
@@ -1654,6 +1788,35 @@ xfs_buf_delwri_dequeue(
	trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
}

/*
 * If a delwri buffer needs to be pushed before it has aged out, then promote
 * it to the head of the delwri queue so that it will be flushed on the next
 * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
 * than the age currently needed to flush the buffer. Hence the next time the
 * xfsbufd sees it is guaranteed to be considered old enough to flush.
 */
void
xfs_buf_delwri_promote(
	struct xfs_buf	*bp)
{
	struct xfs_buftarg *btp = bp->b_target;
	long		age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;

	ASSERT(bp->b_flags & XBF_DELWRI);
	ASSERT(bp->b_flags & _XBF_DELWRI_Q);

	/*
	 * Check the buffer age before locking the delayed write queue as we
	 * don't need to promote buffers that are already past the flush age.
	 */
	if (bp->b_queuetime < jiffies - age)
		return;
	bp->b_queuetime = jiffies - age;
	spin_lock(&btp->bt_delwrite_lock);
	list_move(&bp->b_list, &btp->bt_delwrite_queue);
	spin_unlock(&btp->bt_delwrite_lock);
}

STATIC void
xfs_buf_runall_queues(
	struct workqueue_struct	*queue)
@@ -1672,6 +1835,8 @@ xfsbufd_wakeup(
	list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
		if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
			continue;
		if (list_empty(&btp->bt_delwrite_queue))
			continue;
		set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
		wake_up_process(btp->bt_task);
	}
@@ -1722,20 +1887,53 @@ xfs_buf_delwri_split(

}

/*
 * Compare function is more complex than it needs to be because
 * the return value is only 32 bits and we are doing comparisons
 * on 64 bit values
 */
static int
xfs_buf_cmp(
	void		*priv,
	struct list_head *a,
	struct list_head *b)
{
	struct xfs_buf	*ap = container_of(a, struct xfs_buf, b_list);
	struct xfs_buf	*bp = container_of(b, struct xfs_buf, b_list);
	xfs_daddr_t		diff;

	diff = ap->b_bn - bp->b_bn;
	if (diff < 0)
		return -1;
	if (diff > 0)
		return 1;
	return 0;
}

void
xfs_buf_delwri_sort(
	xfs_buftarg_t	*target,
	struct list_head *list)
{
	list_sort(NULL, list, xfs_buf_cmp);
}

STATIC int
xfsbufd(
	void		*data)
{
	struct list_head tmp;
	xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
	int		count;
	xfs_buf_t	*bp;

	current->flags |= PF_MEMALLOC;

	set_freezable();

	do {
		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
		long	tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
		int	count = 0;
		struct list_head tmp;

		if (unlikely(freezing(current))) {
			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
			refrigerator();
@@ -1743,17 +1941,16 @@ xfsbufd(
			clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
		}

		schedule_timeout_interruptible(
			xfs_buf_timer_centisecs * msecs_to_jiffies(10));

		xfs_buf_delwri_split(target, &tmp,
				xfs_buf_age_centisecs * msecs_to_jiffies(10));
		/* sleep for a long time if there is nothing to do. */
		if (list_empty(&target->bt_delwrite_queue))
			tout = MAX_SCHEDULE_TIMEOUT;
		schedule_timeout_interruptible(tout);

		count = 0;
		xfs_buf_delwri_split(target, &tmp, age);
		list_sort(NULL, &tmp, xfs_buf_cmp);
		while (!list_empty(&tmp)) {
			bp = list_entry(tmp.next, xfs_buf_t, b_list);
			ASSERT(target == bp->b_target);

			struct xfs_buf *bp;
			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
			list_del_init(&bp->b_list);
			xfs_buf_iostrategy(bp);
			count++;
@@ -1779,43 +1976,46 @@ xfs_flush_buftarg(
	xfs_buftarg_t	*target,
	int		wait)
{
	struct list_head tmp;
	xfs_buf_t	*bp, *n;
	xfs_buf_t	*bp;
	int		pincount = 0;
	LIST_HEAD(tmp_list);
	LIST_HEAD(wait_list);

	xfs_buf_runall_queues(xfsconvertd_workqueue);
	xfs_buf_runall_queues(xfsdatad_workqueue);
	xfs_buf_runall_queues(xfslogd_workqueue);

	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
	pincount = xfs_buf_delwri_split(target, &tmp, 0);
	pincount = xfs_buf_delwri_split(target, &tmp_list, 0);

	/*
	 * Dropped the delayed write list lock, now walk the temporary list
	 * Dropped the delayed write list lock, now walk the temporary list.
	 * All I/O is issued async and then if we need to wait for completion
	 * we do that after issuing all the IO.
	 */
	list_for_each_entry_safe(bp, n, &tmp, b_list) {
	list_sort(NULL, &tmp_list, xfs_buf_cmp);
	while (!list_empty(&tmp_list)) {
		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
		ASSERT(target == bp->b_target);
		if (wait)
			bp->b_flags &= ~XBF_ASYNC;
		else
		list_del_init(&bp->b_list);

		if (wait) {
			bp->b_flags &= ~XBF_ASYNC;
			list_add(&bp->b_list, &wait_list);
		}
		xfs_buf_iostrategy(bp);
	}

	if (wait)
	if (wait) {
		/* Expedite and wait for IO to complete. */
		blk_run_address_space(target->bt_mapping);

	/*
	 * Remaining list items must be flushed before returning
	 */
	while (!list_empty(&tmp)) {
		bp = list_entry(tmp.next, xfs_buf_t, b_list);
		while (!list_empty(&wait_list)) {
			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);

			list_del_init(&bp->b_list);
			xfs_iowait(bp);
			xfs_buf_relse(bp);
		}
	}

	return pincount;
}
Loading